1 /++ 2 3 MD4C: Markdown parser for C 4 (http://github.com/mity/md4c) 5 6 Copyright: 7 8 Copyright (c) 2016-2019 Martin Mitas 9 Copyright (c) 2019 Guillaume Piolat (D translation as commonmarkd package: https://github.com/AuburnSounds/commonmark-d ) 10 Somewhat modified by Adam D. Ruppe in 2024. 11 12 Permission is hereby granted, free of charge, to any person obtaining a 13 copy of this software and associated documentation files (the "Software"), 14 to deal in the Software without restriction, including without limitation 15 the rights to use, copy, modify, merge, publish, distribute, sublicense, 16 and/or sell copies of the Software, and to permit persons to whom the 17 Software is furnished to do so, subject to the following conditions: 18 19 The above copyright notice and this permission notice shall be included in 20 all copies or substantial portions of the Software. 21 22 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 23 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 24 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 25 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 26 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 27 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 28 IN THE SOFTWARE. 29 +/ 30 module arsd.markdown; 31 32 /// Options for Markdown parsing. 33 enum MarkdownFlag : int 34 { 35 collapseWhitespace = 0x0001, /** Collapse non-trivial whitespace into single ' ' */ 36 permissiveATXHeaders = 0x0002, /** Do not require space in ATX headers ( ###header ) */ 37 permissiveURLAutoLinks = 0x0004, /** Recognize URLs as autolinks even without '<', '>' */ 38 permissiveEmailAutoLinks = 0x0008, /** Recognize e-mails as autolinks even without '<', '>' and 'mailto:' */ 39 noIndentedCodeBlocks = 0x0010, /** Disable indented code blocks. (Only fenced code works.) */ 40 noHTMLBlocks = 0x0020, /** Disable raw HTML blocks. */ 41 noHTMLSpans = 0x0040, /** Disable raw HTML (inline). */ 42 tablesExtension = 0x0100, /** Enable tables extension. */ 43 enableStrikeThrough = 0x0200, /** Enable strikethrough extension. */ 44 permissiveWWWAutoLinks = 0x0400, /** Enable WWW autolinks (even without any scheme prefix, if they begin with 'www.') */ 45 enableTaskLists = 0x0800, /** Enable task list extension. */ 46 latexMathSpans = 0x1000, /** Enable $ and $$ containing LaTeX equations. */ 47 48 permissiveAutoLinks = permissiveEmailAutoLinks | permissiveURLAutoLinks | permissiveWWWAutoLinks, /** Recognize e-mails, URL and WWW links */ 49 noHTML = noHTMLBlocks | noHTMLSpans, /** Disable raw HTML. */ 50 51 /* Convenient sets of flags corresponding to well-known Markdown dialects. 52 * 53 * Note we may only support subset of features of the referred dialect. 54 * The constant just enables those extensions which bring us as close as 55 * possible given what features we implement. 56 * 57 * ABI compatibility note: Meaning of these can change in time as new 58 * extensions, bringing the dialect closer to the original, are implemented. 59 */ 60 dialectCommonMark = 0, /** CommonMark */ 61 dialectGitHub = (permissiveAutoLinks | tablesExtension | enableStrikeThrough | enableTaskLists), /** Github Flavoured Markdown */ 62 } 63 64 /// Parses a Markdown input, returns HTML. `flags` set the particular Markdown dialect that is used. 65 string convertMarkdownToHTML(const(char)[] input, MarkdownFlag flags = MarkdownFlag.dialectCommonMark) 66 { 67 import core.stdc.stdlib; 68 69 static struct GrowableBuffer 70 { 71 nothrow: 72 @nogc: 73 char* buf = null; 74 size_t size = 0; 75 size_t allocated = 0; 76 77 void ensureSize(size_t atLeastthisSize) 78 { 79 if (atLeastthisSize > allocated) 80 { 81 allocated = 2 * allocated + atLeastthisSize + 1; // TODO: enhancing this estimation probably beneficial to performance 82 buf = cast(char*) realloc(buf, allocated); 83 } 84 85 } 86 87 ~this() 88 { 89 if (buf) 90 { 91 free(buf); 92 buf = null; 93 size = 0; 94 allocated = 0; 95 } 96 } 97 98 void append(const(char)[] suffix) 99 { 100 size_t L = suffix.length; 101 ensureSize(size + L); 102 buf[size..size+L] = suffix[0..L]; 103 size += L; 104 } 105 106 const(char)[] getData() 107 { 108 return buf[0..size]; 109 } 110 111 static void appendCallback(const(char)* chars, uint size, void* userData) 112 { 113 GrowableBuffer* gb = cast(GrowableBuffer*) userData; 114 gb.append(chars[0..size]); 115 } 116 } 117 118 GrowableBuffer gb; 119 gb.ensureSize(input.length); // TODO: enhancing this estimation probably beneficial to performance 120 121 //int renderFlags = MD_RENDER_FLAG_DEBUG; 122 int renderFlags = 0; 123 124 int ret = md_render_html(input.ptr, 125 cast(uint) input.length, 126 &GrowableBuffer.appendCallback, 127 &gb, flags, renderFlags); 128 return gb.getData.idup; // Note: this is the only GC-using stuff 129 } 130 131 132 133 134 import core.stdc.string; 135 import core.stdc.stdio; 136 import core.stdc.stdlib: malloc, free; 137 138 nothrow: 139 @nogc: 140 @system: 141 142 // Compatibility with older DMDFE 143 static if (__VERSION__ < 2079) 144 { 145 import core.stdc.stdlib: _compare_fp_t; 146 // Provide @nogc nothrow bsearch and qsort for older compilers 147 extern (C): 148 @system: 149 inout(void)* bsearch(scope const void* key, scope inout(void)* base, size_t nmemb, size_t size, _compare_fp_t compar); 150 void qsort(scope void* base, size_t nmemb, size_t size, _compare_fp_t compar); 151 } 152 else 153 { 154 import core.stdc.stdlib: qsort, bsearch; 155 } 156 157 alias MD_CHAR = char; 158 alias MD_SIZE = uint; 159 alias MD_OFFSET = uint; 160 161 /* Block represents a part of document hierarchy structure like a paragraph 162 * or list item. 163 */ 164 alias MD_BLOCKTYPE = int; 165 enum : MD_BLOCKTYPE 166 { 167 /* <body>...</body> */ 168 MD_BLOCK_DOC = 0, 169 170 /* <blockquote>...</blockquote> */ 171 MD_BLOCK_QUOTE, 172 173 /* <ul>...</ul> 174 * Detail: Structure MD_BLOCK_UL_DETAIL. */ 175 MD_BLOCK_UL, 176 177 /* <ol>...</ol> 178 * Detail: Structure MD_BLOCK_OL_DETAIL. */ 179 MD_BLOCK_OL, 180 181 /* <li>...</li> 182 * Detail: Structure MD_BLOCK_LI_DETAIL. */ 183 MD_BLOCK_LI, 184 185 /* <hr> */ 186 MD_BLOCK_HR, 187 188 /* <h1>...</h1> (for levels up to 6) 189 * Detail: Structure MD_BLOCK_H_DETAIL. */ 190 MD_BLOCK_H, 191 192 /* <pre><code>...</code></pre> 193 * Note the text lines within code blocks are terminated with '\n' 194 * instead of explicit MD_TEXT_BR. */ 195 MD_BLOCK_CODE, 196 197 /* Raw HTML block. This itself does not correspond to any particular HTML 198 * tag. The contents of it _is_ raw HTML source intended to be put 199 * in verbatim form to the HTML output. */ 200 MD_BLOCK_HTML, 201 202 /* <p>...</p> */ 203 MD_BLOCK_P, 204 205 /* <table>...</table> and its contents. 206 * Detail: Structure MD_BLOCK_TD_DETAIL (used with MD_BLOCK_TH and MD_BLOCK_TD) 207 * Note all of these are used only if extension MD_FLAG_TABLES is enabled. */ 208 MD_BLOCK_TABLE, 209 MD_BLOCK_THEAD, 210 MD_BLOCK_TBODY, 211 MD_BLOCK_TR, 212 MD_BLOCK_TH, 213 MD_BLOCK_TD 214 } 215 216 /* Span represents an in-line piece of a document which should be rendered with 217 * the same font, color and other attributes. A sequence of spans forms a block 218 * like paragraph or list item. */ 219 alias MD_SPANTYPE = int; 220 enum : MD_SPANTYPE 221 { 222 /* <em>...</em> */ 223 MD_SPAN_EM, 224 225 /* <strong>...</strong> */ 226 MD_SPAN_STRONG, 227 228 /* <a href="xxx">...</a> 229 * Detail: Structure MD_SPAN_A_DETAIL. */ 230 MD_SPAN_A, 231 232 /* <img src="xxx">...</a> 233 * Detail: Structure MD_SPAN_IMG_DETAIL. 234 * Note: Image text can contain nested spans and even nested images. 235 * If rendered into ALT attribute of HTML <IMG> tag, it's responsibility 236 * of the renderer to deal with it. 237 */ 238 MD_SPAN_IMG, 239 240 /* <code>...</code> */ 241 MD_SPAN_CODE, 242 243 /* <del>...</del> 244 * Note: Recognized only when MD_FLAG_STRIKETHROUGH is enabled. 245 */ 246 MD_SPAN_DEL, 247 248 /* For recognizing inline ($) and display ($$) equations 249 * Note: Recognized only when MD_FLAG_LATEXMATHSPANS is enabled. 250 */ 251 MD_SPAN_LATEXMATH, 252 MD_SPAN_LATEXMATH_DISPLAY 253 } 254 255 /* Text is the actual textual contents of span. */ 256 alias MD_TEXTTYPE = int; 257 enum : MD_TEXTTYPE 258 { 259 /* Normal text. */ 260 MD_TEXT_NORMAL = 0, 261 262 /* null character. CommonMark requires replacing null character with 263 * the replacement char U+FFFD, so this allows caller to do that easily. */ 264 MD_TEXT_NULLCHAR, 265 266 /* Line breaks. 267 * Note these are not sent from blocks with verbatim output (MD_BLOCK_CODE 268 * or MD_BLOCK_HTML). In such cases, '\n' is part of the text itself. */ 269 MD_TEXT_BR, /* <br> (hard break) */ 270 MD_TEXT_SOFTBR, /* '\n' in source text where it is not semantically meaningful (soft break) */ 271 272 /* Entity. 273 * (a) Named entity, e.g. 274 * (Note MD4C does not have a list of known entities. 275 * Anything matching the regexp /&[A-Za-z][A-Za-z0-9]{1,47};/ is 276 * treated as a named entity.) 277 * (b) Numerical entity, e.g. Ӓ 278 * (c) Hexadecimal entity, e.g. ካ 279 * 280 * As MD4C is mostly encoding agnostic, application gets the verbatim 281 * entity text into the MD_RENDERER::text_callback(). */ 282 MD_TEXT_ENTITY, 283 284 /* Text in a code block (inside MD_BLOCK_CODE) or inlined code (`code`). 285 * If it is inside MD_BLOCK_CODE, it includes spaces for indentation and 286 * '\n' for new lines. MD_TEXT_BR and MD_TEXT_SOFTBR are not sent for this 287 * kind of text. */ 288 MD_TEXT_CODE, 289 290 /* Text is a raw HTML. If it is contents of a raw HTML block (i.e. not 291 * an inline raw HTML), then MD_TEXT_BR and MD_TEXT_SOFTBR are not used. 292 * The text contains verbatim '\n' for the new lines. */ 293 MD_TEXT_HTML, 294 295 /* Text is inside an equation. This is processed the same way as inlined code 296 * spans (`code`). */ 297 MD_TEXT_LATEXMATH 298 } 299 300 301 /* Alignment enumeration. */ 302 303 alias MD_ALIGN = int; 304 enum : MD_ALIGN 305 { 306 MD_ALIGN_DEFAULT = 0, /* When unspecified. */ 307 MD_ALIGN_LEFT, 308 MD_ALIGN_CENTER, 309 MD_ALIGN_RIGHT 310 } 311 312 313 /* String attribute. 314 * 315 * This wraps strings which are outside of a normal text flow and which are 316 * propagated within various detailed structures, but which still may contain 317 * string portions of different types like e.g. entities. 318 * 319 * So, for example, lets consider an image has a title attribute string 320 * set to "foo " bar". (Note the string size is 14.) 321 * 322 * Then the attribute MD_SPAN_IMG_DETAIL::title shall provide the following: 323 * -- [0]: "foo " (substr_types[0] == MD_TEXT_NORMAL; substr_offsets[0] == 0) 324 * -- [1]: """ (substr_types[1] == MD_TEXT_ENTITY; substr_offsets[1] == 4) 325 * -- [2]: " bar" (substr_types[2] == MD_TEXT_NORMAL; substr_offsets[2] == 10) 326 * -- [3]: (n/a) (n/a ; substr_offsets[3] == 14) 327 * 328 * Note that these conditions are guaranteed: 329 * -- substr_offsets[0] == 0 330 * -- substr_offsets[LAST+1] == size 331 * -- Only MD_TEXT_NORMAL, MD_TEXT_ENTITY, MD_TEXT_NULLCHAR substrings can appear. 332 */ 333 struct MD_ATTRIBUTE 334 { 335 const (MD_CHAR)* text; 336 MD_SIZE size; 337 const (MD_TEXTTYPE)* substr_types; 338 const (MD_OFFSET)* substr_offsets; 339 } 340 341 342 /* Detailed info for MD_BLOCK_UL. */ 343 struct MD_BLOCK_UL_DETAIL 344 { 345 int is_tight; /* Non-zero if tight list, zero if loose. */ 346 MD_CHAR mark; /* Item bullet character in MarkDown source of the list, e.g. '-', '+', '*'. */ 347 } 348 349 /* Detailed info for MD_BLOCK_OL. */ 350 struct MD_BLOCK_OL_DETAIL 351 { 352 uint start; /* Start index of the ordered list. */ 353 int is_tight; /* Non-zero if tight list, zero if loose. */ 354 MD_CHAR mark_delimiter; /* Character delimiting the item marks in MarkDown source, e.g. '.' or ')' */ 355 } 356 357 /* Detailed info for MD_BLOCK_LI. */ 358 struct MD_BLOCK_LI_DETAIL 359 { 360 int is_task; /* Can be non-zero only with MD_FLAG_TASKLISTS */ 361 MD_CHAR task_mark; /* If is_task, then one of 'x', 'X' or ' '. Undefined otherwise. */ 362 MD_OFFSET task_mark_offset; /* If is_task, then offset in the input of the char between '[' and ']'. */ 363 } 364 365 /* Detailed info for MD_BLOCK_H. */ 366 struct MD_BLOCK_H_DETAIL 367 { 368 uint level; /* Header level (1 - 6) */ 369 } 370 371 /* Detailed info for MD_BLOCK_CODE. */ 372 struct MD_BLOCK_CODE_DETAIL 373 { 374 MD_ATTRIBUTE info; 375 MD_ATTRIBUTE lang; 376 MD_CHAR fence_char; /* The character used for fenced code block; or zero for indented code block. */ 377 } 378 379 /* Detailed info for MD_BLOCK_TH and MD_BLOCK_TD. */ 380 struct MD_BLOCK_TD_DETAIL 381 { 382 MD_ALIGN align_; 383 } 384 385 /* Detailed info for MD_SPAN_A. */ 386 struct MD_SPAN_A_DETAIL 387 { 388 MD_ATTRIBUTE href; 389 MD_ATTRIBUTE title; 390 } 391 392 /* Detailed info for MD_SPAN_IMG. */ 393 struct MD_SPAN_IMG_DETAIL 394 { 395 MD_ATTRIBUTE src; 396 MD_ATTRIBUTE title; 397 } 398 399 400 /* Flags specifying extensions/deviations from CommonMark specification. 401 * 402 * By default (when MD_RENDERER::flags == 0), we follow CommonMark specification. 403 * The following flags may allow some extensions or deviations from it. 404 */ 405 enum 406 { 407 MD_FLAG_COLLAPSEWHITESPACE = 0x0001, /* In MD_TEXT_NORMAL, collapse non-trivial whitespace into single ' ' */ 408 MD_FLAG_PERMISSIVEATXHEADERS = 0x0002, /* Do not require space in ATX headers ( ###header ) */ 409 MD_FLAG_PERMISSIVEURLAUTOLINKS = 0x0004, /* Recognize URLs as autolinks even without '<', '>' */ 410 MD_FLAG_PERMISSIVEEMAILAUTOLINKS = 0x0008, /* Recognize e-mails as autolinks even without '<', '>' and 'mailto:' */ 411 MD_FLAG_NOINDENTEDCODEBLOCKS = 0x0010, /* Disable indented code blocks. (Only fenced code works.) */ 412 MD_FLAG_NOHTMLBLOCKS = 0x0020, /* Disable raw HTML blocks. */ 413 MD_FLAG_NOHTMLSPANS = 0x0040, /* Disable raw HTML (inline). */ 414 MD_FLAG_TABLES = 0x0100, /* Enable tables extension. */ 415 MD_FLAG_STRIKETHROUGH = 0x0200, /* Enable strikethrough extension. */ 416 MD_FLAG_PERMISSIVEWWWAUTOLINKS = 0x0400, /* Enable WWW autolinks (even without any scheme prefix, if they begin with 'www.') */ 417 MD_FLAG_TASKLISTS = 0x0800, /* Enable task list extension. */ 418 MD_FLAG_LATEXMATHSPANS = 0x1000, /* Enable $ and $$ containing LaTeX equations. */ 419 420 MD_FLAG_PERMISSIVEAUTOLINKS = MD_FLAG_PERMISSIVEEMAILAUTOLINKS | MD_FLAG_PERMISSIVEURLAUTOLINKS | MD_FLAG_PERMISSIVEWWWAUTOLINKS, 421 MD_FLAG_NOHTML = MD_FLAG_NOHTMLBLOCKS | MD_FLAG_NOHTMLSPANS, 422 423 /* Convenient sets of flags corresponding to well-known Markdown dialects. 424 * 425 * Note we may only support subset of features of the referred dialect. 426 * The constant just enables those extensions which bring us as close as 427 * possible given what features we implement. 428 * 429 * ABI compatibility note: Meaning of these can change in time as new 430 * extensions, bringing the dialect closer to the original, are implemented. 431 */ 432 MD_DIALECT_COMMONMARK = 0, 433 MD_DIALECT_GITHUB = (MD_FLAG_PERMISSIVEAUTOLINKS | MD_FLAG_TABLES | MD_FLAG_STRIKETHROUGH | MD_FLAG_TASKLISTS), 434 } 435 436 /* Renderer structure. 437 */ 438 struct MD_PARSER 439 { 440 nothrow: 441 @nogc: 442 /* Reserved. Set to zero. 443 */ 444 uint abi_version; 445 446 /* Dialect options. Bitmask of MD_FLAG_xxxx values. 447 */ 448 uint flags; 449 450 /* Caller-provided rendering callbacks. 451 * 452 * For some block/span types, more detailed information is provided in a 453 * type-specific structure pointed by the argument 'detail'. 454 * 455 * The last argument of all callbacks, 'userdata', is just propagated from 456 * md_parse() and is available for any use by the application. 457 * 458 * Note any strings provided to the callbacks as their arguments or as 459 * members of any detail structure are generally not zero-terminated. 460 * Application has take the respective size information into account. 461 * 462 * Callbacks may abort further parsing of the document by returning non-zero. 463 */ 464 int function(MD_BLOCKTYPE /*type*/, void* /*detail*/, void* /*userdata*/) enter_block; 465 int function(MD_BLOCKTYPE /*type*/, void* /*detail*/, void* /*userdata*/) leave_block; 466 467 int function(MD_SPANTYPE /*type*/, void* /*detail*/, void* /*userdata*/) enter_span; 468 int function(MD_SPANTYPE /*type*/, void* /*detail*/, void* /*userdata*/) leave_span; 469 470 int function(MD_TEXTTYPE /*type*/, const(MD_CHAR)* /*text*/, MD_SIZE /*size*/, void* /*userdata*/) text; 471 472 /* Debug callback. Optional (may be null). 473 * 474 * If provided and something goes wrong, this function gets called. 475 * This is intended for debugging and problem diagnosis for developers; 476 * it is not intended to provide any errors suitable for displaying to an 477 * end user. 478 */ 479 void function(const(char)* /*msg*/, void* /*userdata*/) debug_log; 480 481 /* Reserved. Set to null. 482 */ 483 void function() syntax; 484 } 485 486 487 /***************************** 488 *** Miscellaneous Stuff *** 489 *****************************/ 490 491 492 /* Misc. macros. */ 493 494 enum TRUE = 1; 495 enum FALSE = 0; 496 497 498 /************************ 499 *** Internal Types *** 500 ************************/ 501 502 /* These are omnipresent so lets save some typing. */ 503 alias CHAR = MD_CHAR; 504 alias SZ = MD_SIZE; 505 alias OFF = MD_OFFSET; 506 507 /* During analyzes of inline marks, we need to manage some "mark chains", 508 * of (yet unresolved) openers. This structure holds start/end of the chain. 509 * The chain internals are then realized through MD_MARK::prev and ::next. 510 */ 511 struct MD_MARKCHAIN 512 { 513 int head; /* Index of first mark in the chain, or -1 if empty. */ 514 int tail; /* Index of last mark in the chain, or -1 if empty. */ 515 } 516 517 enum OPENERS_CHAIN_FIRST = 2; 518 enum OPENERS_CHAIN_LAST = 11; 519 520 /* Context propagated through all the parsing. */ 521 struct MD_CTX 522 { 523 nothrow: 524 @nogc: 525 526 /* Immutable stuff (parameters of md_parse()). */ 527 const(CHAR)* text; 528 SZ size; 529 MD_PARSER parser; 530 void* userdata; 531 532 /* When this is true, it allows some optimizations. */ 533 int doc_ends_with_newline; 534 535 /* Helper temporary growing buffer. */ 536 CHAR* buffer; 537 uint alloc_buffer; 538 539 /* Reference definitions. */ 540 MD_REF_DEF* ref_defs; 541 int n_ref_defs; 542 int alloc_ref_defs; 543 void** ref_def_hashtable; 544 int ref_def_hashtable_size; 545 546 /* Stack of inline/span markers. 547 * This is only used for parsing a single block contents but by storing it 548 * here we may reuse the stack for subsequent blocks; i.e. we have fewer 549 * (re)allocations. */ 550 MD_MARK* marks; 551 int n_marks; 552 int alloc_marks; 553 554 ubyte[256] mark_char_map; 555 /* For resolving of inline spans. */ 556 MD_MARKCHAIN[12] mark_chains; 557 558 MD_MARKCHAIN* PTR_CHAIN() return { return &mark_chains[0]; } 559 MD_MARKCHAIN* TABLECELLBOUNDARIES() return { return &mark_chains[1]; } 560 MD_MARKCHAIN* ASTERISK_OPENERS_extraword_mod3_0() return { return &mark_chains[2]; } 561 MD_MARKCHAIN* ASTERISK_OPENERS_extraword_mod3_1() return { return &mark_chains[3]; } 562 MD_MARKCHAIN* ASTERISK_OPENERS_extraword_mod3_2() return { return &mark_chains[4]; } 563 MD_MARKCHAIN* ASTERISK_OPENERS_intraword_mod3_0() return { return &mark_chains[5]; } 564 MD_MARKCHAIN* ASTERISK_OPENERS_intraword_mod3_1() return { return &mark_chains[6]; } 565 MD_MARKCHAIN* ASTERISK_OPENERS_intraword_mod3_2() return { return &mark_chains[7]; } 566 MD_MARKCHAIN* UNDERSCORE_OPENERS() return { return &mark_chains[8]; } 567 MD_MARKCHAIN* TILDE_OPENERS() return { return &mark_chains[9]; } 568 MD_MARKCHAIN* BRACKET_OPENERS() return { return &mark_chains[10]; } 569 MD_MARKCHAIN* DOLLAR_OPENERS() return { return &mark_chains[11]; } 570 571 int n_table_cell_boundaries; 572 573 /* For resolving links. */ 574 int unresolved_link_head; 575 int unresolved_link_tail; 576 577 /* For resolving raw HTML. */ 578 OFF html_comment_horizon; 579 OFF html_proc_instr_horizon; 580 OFF html_decl_horizon; 581 OFF html_cdata_horizon; 582 583 /* For block analysis. 584 * Notes: 585 * -- It holds MD_BLOCK as well as MD_LINE structures. After each 586 * MD_BLOCK, its (multiple) MD_LINE(s) follow. 587 * -- For MD_BLOCK_HTML and MD_BLOCK_CODE, MD_VERBATIMLINE(s) are used 588 * instead of MD_LINE(s). 589 */ 590 void* block_bytes; 591 MD_BLOCK* current_block; 592 int n_block_bytes; 593 int alloc_block_bytes; 594 595 /* For container block analysis. */ 596 MD_CONTAINER* containers; 597 int n_containers; 598 int alloc_containers; 599 600 /* Minimal indentation to call the block "indented code block". */ 601 uint code_indent_offset; 602 603 /* Contextual info for line analysis. */ 604 SZ code_fence_length; /* For checking closing fence length. */ 605 int html_block_type; /* For checking closing raw HTML condition. */ 606 int last_line_has_list_loosening_effect; 607 int last_list_item_starts_with_two_blank_lines; 608 609 void MD_LOG(const(char)* msg) 610 { 611 if(parser.debug_log != null) 612 parser.debug_log(msg, userdata); 613 } 614 615 /* Character accessors. */ 616 CHAR CH(OFF off) 617 { 618 return text[off]; 619 } 620 621 const(CHAR)* STR(OFF off) 622 { 623 return text + off; 624 } 625 626 bool ISANYOF(OFF off, const(CHAR)* palette) { return ISANYOF_(CH(off), palette); } 627 bool ISANYOF2(OFF off, CHAR ch1, CHAR ch2) { return ISANYOF2_(CH(off), ch1, ch2); } 628 bool ISANYOF3(OFF off, CHAR ch1, CHAR ch2, CHAR ch3) { return ISANYOF3_(CH(off), ch1, ch2, ch3); } 629 bool ISASCII(OFF off) { return ISASCII_(CH(off)); } 630 bool ISBLANK(OFF off) { return ISBLANK_(CH(off)); } 631 bool ISNEWLINE(OFF off) { return ISNEWLINE_(CH(off)); } 632 bool ISWHITESPACE(OFF off) { return ISWHITESPACE_(CH(off)); } 633 bool ISCNTRL(OFF off) { return ISCNTRL_(CH(off)); } 634 bool ISPUNCT(OFF off) { return ISPUNCT_(CH(off)); } 635 bool ISUPPER(OFF off) { return ISUPPER_(CH(off)); } 636 bool ISLOWER(OFF off) { return ISLOWER_(CH(off)); } 637 bool ISALPHA(OFF off) { return ISALPHA_(CH(off)); } 638 bool ISDIGIT(OFF off) { return ISDIGIT_(CH(off)); } 639 bool ISXDIGIT(OFF off) { return ISXDIGIT_(CH(off)); } 640 bool ISALNUM(OFF off) { return ISALNUM_(CH(off)); } 641 } 642 643 alias MD_LINETYPE = int; 644 enum : MD_LINETYPE 645 { 646 MD_LINE_BLANK, 647 MD_LINE_HR, 648 MD_LINE_ATXHEADER, 649 MD_LINE_SETEXTHEADER, 650 MD_LINE_SETEXTUNDERLINE, 651 MD_LINE_INDENTEDCODE, 652 MD_LINE_FENCEDCODE, 653 MD_LINE_HTML, 654 MD_LINE_TEXT, 655 MD_LINE_TABLE, 656 MD_LINE_TABLEUNDERLINE 657 } 658 659 struct MD_LINE_ANALYSIS 660 { 661 nothrow: 662 @nogc: 663 short type_; 664 ushort data_; 665 666 MD_LINETYPE type() const 667 { 668 return type_; 669 } 670 671 void type(MD_LINETYPE value) 672 { 673 type_ = cast(short)value; 674 } 675 676 int data() const 677 { 678 return data_; 679 } 680 681 void data(uint value) 682 { 683 data_ = cast(ushort)value; 684 } 685 686 OFF beg; 687 OFF end; 688 uint indent; /* Indentation level. */ 689 } 690 691 struct MD_LINE 692 { 693 OFF beg; 694 OFF end; 695 } 696 697 struct MD_VERBATIMLINE 698 { 699 OFF beg; 700 OFF end; 701 OFF indent; 702 } 703 704 705 /***************** 706 *** Helpers *** 707 *****************/ 708 709 pure 710 { 711 /* Character classification. 712 * Note we assume ASCII compatibility of code points < 128 here. */ 713 bool ISIN_(CHAR ch, CHAR ch_min, CHAR ch_max) 714 { 715 return (ch_min <= cast(uint)(ch) && cast(uint)(ch) <= ch_max); 716 } 717 718 bool ISANYOF_(CHAR ch, const(CHAR)* palette) 719 { 720 return md_strchr(palette, ch) != null; 721 } 722 723 bool ISANYOF2_(CHAR ch, CHAR ch1, CHAR ch2) 724 { 725 return (ch == ch1) || (ch == ch2); 726 } 727 728 bool ISANYOF3_(CHAR ch, CHAR ch1, CHAR ch2, CHAR ch3) 729 { 730 return (ch == ch1) || (ch == ch2) || (ch == ch3); 731 } 732 733 bool ISASCII_(CHAR ch) 734 { 735 return (cast(uint)ch) <= 127; 736 } 737 738 bool ISBLANK_(CHAR ch) 739 { 740 return ISANYOF2_(ch, ' ', '\t'); 741 } 742 743 bool ISNEWLINE_(CHAR ch) 744 { 745 return ISANYOF2_(ch, '\r', '\n'); 746 } 747 748 bool ISWHITESPACE_(CHAR ch) 749 { 750 return ISBLANK_(ch) || ISANYOF2_(ch, '\v', '\f'); 751 } 752 753 bool ISCNTRL_(CHAR ch) 754 { 755 return (cast(uint)(ch) <= 31 || cast(uint)(ch) == 127); 756 } 757 758 bool ISPUNCT_(CHAR ch) 759 { 760 return ISIN_(ch, 33, 47) || ISIN_(ch, 58, 64) || ISIN_(ch, 91, 96) || ISIN_(ch, 123, 126); 761 } 762 763 bool ISUPPER_(CHAR ch) 764 { 765 return ISIN_(ch, 'A', 'Z'); 766 } 767 768 bool ISLOWER_(CHAR ch) 769 { 770 return ISIN_(ch, 'a', 'z'); 771 } 772 773 bool ISALPHA_(CHAR ch) 774 { 775 return ISUPPER_(ch) || ISLOWER_(ch); 776 } 777 778 bool ISDIGIT_(CHAR ch) 779 { 780 return ISIN_(ch, '0', '9'); 781 } 782 783 bool ISXDIGIT_(CHAR ch) 784 { 785 return ISDIGIT_(ch) || ISIN_(ch, 'A', 'F') || ISIN_(ch, 'a', 'f'); 786 } 787 788 bool ISALNUM_(CHAR ch) 789 { 790 return ISALPHA_(ch) || ISDIGIT_(ch); 791 } 792 } 793 794 const(CHAR)* md_strchr(const(CHAR)* str, CHAR ch) pure 795 { 796 OFF i; 797 for(i = 0; str[i] != '\0'; i++) { 798 if(ch == str[i]) 799 return (str + i); 800 } 801 return null; 802 } 803 804 /* Case insensitive check of string equality. */ 805 int md_ascii_case_eq(const(CHAR)* s1, const(CHAR)* s2, SZ n) 806 { 807 OFF i; 808 for(i = 0; i < n; i++) { 809 CHAR ch1 = s1[i]; 810 CHAR ch2 = s2[i]; 811 812 if(ISLOWER_(ch1)) 813 ch1 += ('A'-'a'); 814 if(ISLOWER_(ch2)) 815 ch2 += ('A'-'a'); 816 if(ch1 != ch2) 817 return FALSE; 818 } 819 return TRUE; 820 } 821 822 int md_ascii_eq(const(CHAR)* s1, const(CHAR)* s2, SZ n) 823 { 824 return memcmp(s1, s2, n * CHAR.sizeof) == 0; 825 } 826 827 int md_text_with_null_replacement(MD_CTX* ctx, MD_TEXTTYPE type, const(CHAR)* str, SZ size) 828 { 829 OFF off = 0; 830 int ret = 0; 831 832 while(1) { 833 while(off < size && str[off] != '\0') 834 off++; 835 836 if(off > 0) { 837 ret = ctx.parser.text(type, str, off, ctx.userdata); 838 if(ret != 0) 839 return ret; 840 841 str += off; 842 size -= off; 843 off = 0; 844 } 845 846 if(off >= size) 847 return 0; 848 849 ret = ctx.parser.text(MD_TEXT_NULLCHAR, "", 1, ctx.userdata); 850 if(ret != 0) 851 return ret; 852 off++; 853 } 854 } 855 856 int MD_TEMP_BUFFER(MD_CTX* ctx, SZ sz) 857 { 858 if(sz > ctx.alloc_buffer) 859 { 860 CHAR* new_buffer; 861 SZ new_size = ((sz) + (sz) / 2 + 128) & ~127; 862 new_buffer = cast(CHAR*) realloc_safe(ctx.buffer, new_size); 863 if (new_buffer == null) 864 { 865 ctx.MD_LOG("realloc() failed."); 866 return -1; 867 } 868 ctx.buffer = new_buffer; 869 ctx.alloc_buffer = new_size; 870 } 871 return 0; 872 } 873 874 int MD_ENTER_BLOCK(MD_CTX* ctx, MD_BLOCKTYPE type, void* arg) 875 { 876 int ret = ctx.parser.enter_block(type, arg, ctx.userdata); 877 if(ret != 0) 878 { 879 ctx.MD_LOG("Aborted from enter_block() callback."); 880 return ret; 881 } 882 return 0; 883 } 884 885 int MD_LEAVE_BLOCK(MD_CTX* ctx, MD_BLOCKTYPE type, void* arg) 886 { 887 int ret = ctx.parser.leave_block(type, arg, ctx.userdata); 888 if(ret != 0) 889 { 890 ctx.MD_LOG("Aborted from leave_block() callback."); 891 return ret; 892 } 893 return 0; 894 } 895 896 int MD_ENTER_SPAN(MD_CTX* ctx, MD_SPANTYPE type, void* arg) 897 { 898 int ret = ctx.parser.enter_span(type, arg, ctx.userdata); 899 if(ret != 0) 900 { 901 ctx.MD_LOG("Aborted from enter_span() callback."); 902 return ret; 903 } 904 return 0; 905 } 906 907 int MD_LEAVE_SPAN(MD_CTX* ctx, MD_SPANTYPE type, void* arg) 908 { 909 int ret = ctx.parser.leave_span(type, arg, ctx.userdata); 910 if(ret != 0) 911 { 912 ctx.MD_LOG("Aborted from leave_span() callback."); 913 return ret; 914 } 915 return 0; 916 } 917 918 int MD_TEXT(MD_CTX* ctx, MD_TEXTTYPE type, const(MD_CHAR)* str, MD_SIZE size) 919 { 920 if(size > 0) 921 { 922 int ret = ctx.parser.text((type), (str), (size), ctx.userdata); 923 if (ret != 0) 924 { 925 ctx.MD_LOG("Aborted from text() callback."); 926 return ret; 927 } 928 } 929 return 0; 930 } 931 932 int MD_TEXT_INSECURE(MD_CTX* ctx, MD_TEXTTYPE type, const(MD_CHAR)* str, MD_SIZE size) 933 { 934 if(size > 0) 935 { 936 int ret = md_text_with_null_replacement(ctx, type, str, size); 937 if(ret != 0) 938 { 939 ctx.MD_LOG("Aborted from text() callback."); 940 return ret; 941 } 942 } 943 return 0; 944 } 945 946 /************************* 947 *** Unicode Support *** 948 *************************/ 949 950 struct MD_UNICODE_FOLD_INFO 951 { 952 uint[3] codepoints; 953 int n_codepoints; 954 }; 955 956 957 958 /* Binary search over sorted "map" of codepoints. Consecutive sequences 959 * of codepoints may be encoded in the map by just using the 960 * (MIN_CODEPOINT | 0x40000000) and (MAX_CODEPOINT | 0x80000000). 961 * 962 * Returns index of the found record in the map (in the case of ranges, 963 * the minimal value is used); or -1 on failure. */ 964 int md_unicode_bsearch__(uint codepoint, const(uint)* map, size_t map_size) 965 { 966 int beg, end; 967 int pivot_beg, pivot_end; 968 969 beg = 0; 970 end = cast(int) map_size-1; 971 while(beg <= end) { 972 /* Pivot may be a range, not just a single value. */ 973 pivot_beg = pivot_end = (beg + end) / 2; 974 if(map[pivot_end] & 0x40000000) 975 pivot_end++; 976 if(map[pivot_beg] & 0x80000000) 977 pivot_beg--; 978 979 if(codepoint < (map[pivot_beg] & 0x00ffffff)) 980 end = pivot_beg - 1; 981 else if(codepoint > (map[pivot_end] & 0x00ffffff)) 982 beg = pivot_end + 1; 983 else 984 return pivot_beg; 985 } 986 987 return -1; 988 } 989 990 bool md_is_unicode_whitespace__(uint codepoint) 991 { 992 /* Unicode "Zs" category. 993 * (generated by scripts/build_whitespace_map.py) */ 994 static immutable uint[] WHITESPACE_MAP = 995 [ 996 0x0020, 0x00a0, 0x1680, 0x2000| 0x40000000, 0x200a | 0x80000000, 0x202f, 0x205f, 0x3000 997 ]; 998 999 /* The ASCII ones are the most frequently used ones, also CommonMark 1000 * specification requests few more in this range. */ 1001 if(codepoint <= 0x7f) 1002 return ISWHITESPACE_(cast(CHAR)codepoint); 1003 1004 return (md_unicode_bsearch__(codepoint, WHITESPACE_MAP.ptr, WHITESPACE_MAP.length) >= 0); 1005 } 1006 1007 bool md_is_unicode_punct__(uint codepoint) 1008 { 1009 /* Unicode "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps" categories. 1010 * (generated by scripts/build_punct_map.py) */ 1011 static immutable uint[] PUNCT_MAP = 1012 [ 1013 0x0021 | 0x40000000,0x0023 | 0x80000000, 0x0025 | 0x40000000,0x002a | 0x80000000, 0x002c | 0x40000000,0x002f | 0x80000000, 0x003a | 0x40000000,0x003b | 0x80000000, 0x003f | 0x40000000,0x0040 | 0x80000000, 1014 0x005b | 0x40000000,0x005d | 0x80000000, 0x005f, 0x007b, 0x007d, 0x00a1, 0x00a7, 0x00ab, 0x00b6 | 0x40000000,0x00b7 | 0x80000000, 1015 0x00bb, 0x00bf, 0x037e, 0x0387, 0x055a | 0x40000000,0x055f | 0x80000000, 0x0589 | 0x40000000,0x058a | 0x80000000, 0x05be, 0x05c0, 1016 0x05c3, 0x05c6, 0x05f3 | 0x40000000,0x05f4 | 0x80000000, 0x0609 | 0x40000000,0x060a | 0x80000000, 0x060c | 0x40000000,0x060d | 0x80000000, 0x061b, 0x061e | 0x40000000,0x061f | 0x80000000, 1017 0x066a | 0x40000000,0x066d | 0x80000000, 0x06d4, 0x0700 | 0x40000000,0x070d | 0x80000000, 0x07f7 | 0x40000000,0x07f9 | 0x80000000, 0x0830 | 0x40000000,0x083e | 0x80000000, 0x085e, 1018 0x0964 | 0x40000000,0x0965 | 0x80000000, 0x0970, 0x09fd, 0x0a76, 0x0af0, 0x0c77, 0x0c84, 0x0df4, 0x0e4f, 1019 0x0e5a | 0x40000000,0x0e5b | 0x80000000, 0x0f04 | 0x40000000,0x0f12 | 0x80000000, 0x0f14, 0x0f3a | 0x40000000,0x0f3d | 0x80000000, 0x0f85, 0x0fd0 | 0x40000000,0x0fd4 | 0x80000000, 1020 0x0fd9 | 0x40000000,0x0fda | 0x80000000, 0x104a | 0x40000000,0x104f | 0x80000000, 0x10fb, 0x1360 | 0x40000000,0x1368 | 0x80000000, 0x1400, 0x166e, 0x169b | 0x40000000,0x169c | 0x80000000, 1021 0x16eb | 0x40000000,0x16ed | 0x80000000, 0x1735 | 0x40000000,0x1736 | 0x80000000, 0x17d4 | 0x40000000,0x17d6 | 0x80000000, 0x17d8 | 0x40000000,0x17da | 0x80000000, 0x1800 | 0x40000000,0x180a | 0x80000000, 1022 0x1944 | 0x40000000,0x1945 | 0x80000000, 0x1a1e | 0x40000000,0x1a1f | 0x80000000, 0x1aa0 | 0x40000000,0x1aa6 | 0x80000000, 0x1aa8 | 0x40000000,0x1aad | 0x80000000, 0x1b5a | 0x40000000,0x1b60 | 0x80000000, 1023 0x1bfc | 0x40000000,0x1bff | 0x80000000, 0x1c3b | 0x40000000,0x1c3f | 0x80000000, 0x1c7e | 0x40000000,0x1c7f | 0x80000000, 0x1cc0 | 0x40000000,0x1cc7 | 0x80000000, 0x1cd3, 0x2010 | 0x40000000,0x2027 | 0x80000000, 1024 0x2030 | 0x40000000,0x2043 | 0x80000000, 0x2045 | 0x40000000,0x2051 | 0x80000000, 0x2053 | 0x40000000,0x205e | 0x80000000, 0x207d | 0x40000000,0x207e | 0x80000000, 0x208d | 0x40000000,0x208e | 0x80000000, 1025 0x2308 | 0x40000000,0x230b | 0x80000000, 0x2329 | 0x40000000,0x232a | 0x80000000, 0x2768 | 0x40000000,0x2775 | 0x80000000, 0x27c5 | 0x40000000,0x27c6 | 0x80000000, 0x27e6 | 0x40000000,0x27ef | 0x80000000, 1026 0x2983 | 0x40000000,0x2998 | 0x80000000, 0x29d8 | 0x40000000,0x29db | 0x80000000, 0x29fc | 0x40000000,0x29fd | 0x80000000, 0x2cf9 | 0x40000000,0x2cfc | 0x80000000, 0x2cfe | 0x40000000,0x2cff | 0x80000000, 0x2d70, 1027 0x2e00 | 0x40000000,0x2e2e | 0x80000000, 0x2e30 | 0x40000000,0x2e4f | 0x80000000, 0x3001 | 0x40000000,0x3003 | 0x80000000, 0x3008 | 0x40000000,0x3011 | 0x80000000, 0x3014 | 0x40000000,0x301f | 0x80000000, 0x3030, 1028 0x303d, 0x30a0, 0x30fb, 0xa4fe | 0x40000000,0xa4ff | 0x80000000, 0xa60d | 0x40000000,0xa60f | 0x80000000, 0xa673, 0xa67e, 1029 0xa6f2 | 0x40000000,0xa6f7 | 0x80000000, 0xa874 | 0x40000000,0xa877 | 0x80000000, 0xa8ce | 0x40000000,0xa8cf | 0x80000000, 0xa8f8 | 0x40000000,0xa8fa | 0x80000000, 0xa8fc, 0xa92e | 0x40000000,0xa92f | 0x80000000, 1030 0xa95f, 0xa9c1 | 0x40000000,0xa9cd | 0x80000000, 0xa9de | 0x40000000,0xa9df | 0x80000000, 0xaa5c | 0x40000000,0xaa5f | 0x80000000, 0xaade | 0x40000000,0xaadf | 0x80000000, 0xaaf0 | 0x40000000,0xaaf1 | 0x80000000, 1031 0xabeb, 0xfd3e | 0x40000000,0xfd3f | 0x80000000, 0xfe10 | 0x40000000,0xfe19 | 0x80000000, 0xfe30 | 0x40000000,0xfe52 | 0x80000000, 0xfe54 | 0x40000000,0xfe61 | 0x80000000, 0xfe63, 0xfe68, 1032 0xfe6a | 0x40000000,0xfe6b | 0x80000000, 0xff01 | 0x40000000,0xff03 | 0x80000000, 0xff05 | 0x40000000,0xff0a | 0x80000000, 0xff0c | 0x40000000,0xff0f | 0x80000000, 0xff1a | 0x40000000,0xff1b | 0x80000000, 1033 0xff1f | 0x40000000,0xff20 | 0x80000000, 0xff3b | 0x40000000,0xff3d | 0x80000000, 0xff3f, 0xff5b, 0xff5d, 0xff5f | 0x40000000,0xff65 | 0x80000000, 0x10100 | 0x40000000,0x10102 | 0x80000000, 1034 0x1039f, 0x103d0, 0x1056f, 0x10857, 0x1091f, 0x1093f, 0x10a50 | 0x40000000,0x10a58 | 0x80000000, 0x10a7f, 1035 0x10af0 | 0x40000000,0x10af6 | 0x80000000, 0x10b39 | 0x40000000,0x10b3f | 0x80000000, 0x10b99 | 0x40000000,0x10b9c | 0x80000000, 0x10f55 | 0x40000000,0x10f59 | 0x80000000, 0x11047 | 0x40000000,0x1104d | 0x80000000, 1036 0x110bb | 0x40000000,0x110bc | 0x80000000, 0x110be | 0x40000000,0x110c1 | 0x80000000, 0x11140 | 0x40000000,0x11143 | 0x80000000, 0x11174 | 0x40000000,0x11175 | 0x80000000, 0x111c5 | 0x40000000,0x111c8 | 0x80000000, 1037 0x111cd, 0x111db, 0x111dd | 0x40000000,0x111df | 0x80000000, 0x11238 | 0x40000000,0x1123d | 0x80000000, 0x112a9, 0x1144b | 0x40000000,0x1144f | 0x80000000, 1038 0x1145b, 0x1145d, 0x114c6, 0x115c1 | 0x40000000,0x115d7 | 0x80000000, 0x11641 | 0x40000000,0x11643 | 0x80000000, 0x11660 | 0x40000000,0x1166c | 0x80000000, 1039 0x1173c | 0x40000000,0x1173e | 0x80000000, 0x1183b, 0x119e2, 0x11a3f | 0x40000000,0x11a46 | 0x80000000, 0x11a9a | 0x40000000,0x11a9c | 0x80000000, 0x11a9e | 0x40000000,0x11aa2 | 0x80000000, 1040 0x11c41 | 0x40000000,0x11c45 | 0x80000000, 0x11c70 | 0x40000000,0x11c71 | 0x80000000, 0x11ef7 | 0x40000000,0x11ef8 | 0x80000000, 0x11fff, 0x12470 | 0x40000000,0x12474 | 0x80000000, 1041 0x16a6e | 0x40000000,0x16a6f | 0x80000000, 0x16af5, 0x16b37 | 0x40000000,0x16b3b | 0x80000000, 0x16b44, 0x16e97 | 0x40000000,0x16e9a | 0x80000000, 0x16fe2, 1042 0x1bc9f, 0x1da87 | 0x40000000,0x1da8b | 0x80000000, 0x1e95e | 0x40000000,0x1e95f | 0x80000000 1043 ]; 1044 1045 /* The ASCII ones are the most frequently used ones, also CommonMark 1046 * specification requests few more in this range. */ 1047 if(codepoint <= 0x7f) 1048 return ISPUNCT_(cast(CHAR)codepoint); 1049 1050 return (md_unicode_bsearch__(codepoint, PUNCT_MAP.ptr, PUNCT_MAP.length) >= 0); 1051 } 1052 1053 void md_get_unicode_fold_info(uint codepoint, MD_UNICODE_FOLD_INFO* info) 1054 { 1055 /* Unicode "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps" categories. 1056 * (generated by scripts/build_punct_map.py) */ 1057 static immutable uint[] FOLD_MAP_1 = 1058 [ 1059 0x0041 | 0x40000000, 0x005a | 0x80000000, 0x00b5, 0x00c0 | 0x40000000, 0x00d6 | 0x80000000, 0x00d8 | 0x40000000, 0x00de | 0x80000000, 0x0100 | 0x40000000, 0x012e | 0x80000000, 0x0132 | 0x40000000, 0x0136 | 0x80000000, 1060 0x0139 | 0x40000000, 0x0147 | 0x80000000, 0x014a | 0x40000000, 0x0176 | 0x80000000, 0x0178, 0x0179 | 0x40000000, 0x017d | 0x80000000, 0x017f, 0x0181, 0x0182, 1061 0x0186, 0x0187, 0x0189, 0x018b, 0x018e, 0x018f, 0x0190, 0x0191, 0x0193, 1062 0x0194, 0x0196, 0x0197, 0x0198, 0x019c, 0x019d, 0x019f, 0x01a0 | 0x40000000, 0x01a4 | 0x80000000, 0x01a6, 1063 0x01a7, 0x01a9, 0x01ac, 0x01ae, 0x01af, 0x01b1, 0x01b3, 0x01b7, 0x01b8, 1064 0x01bc, 0x01c4, 0x01c5, 0x01c7, 0x01c8, 0x01ca, 0x01cb | 0x40000000, 0x01db | 0x80000000, 0x01de | 0x40000000, 0x01ee | 0x80000000, 1065 0x01f1, 0x01f2, 0x01f6, 0x01f7, 0x01f8 | 0x40000000, 0x021e | 0x80000000, 0x0220, 0x0222 | 0x40000000, 0x0232 | 0x80000000, 0x023a, 1066 0x023b, 0x023d, 0x023e, 0x0241, 0x0243, 0x0244, 0x0245, 0x0246 | 0x40000000, 0x024e | 0x80000000, 0x0345, 1067 0x0370, 0x0376, 0x037f, 0x0386, 0x0388 | 0x40000000, 0x038a | 0x80000000, 0x038c, 0x038e, 0x0391 | 0x40000000, 0x03a1 | 0x80000000, 1068 0x03a3 | 0x40000000, 0x03ab | 0x80000000, 0x03c2, 0x03cf, 0x03d0, 0x03d1, 0x03d5, 0x03d6, 0x03d8 | 0x40000000, 0x03ee | 0x80000000, 1069 0x03f0, 0x03f1, 0x03f4, 0x03f5, 0x03f7, 0x03f9, 0x03fa, 0x03fd | 0x40000000, 0x03ff | 0x80000000, 1070 0x0400 | 0x40000000, 0x040f | 0x80000000, 0x0410 | 0x40000000, 0x042f | 0x80000000, 0x0460 | 0x40000000, 0x0480 | 0x80000000, 0x048a | 0x40000000, 0x04be | 0x80000000, 0x04c0, 0x04c1 | 0x40000000, 0x04cd | 0x80000000, 1071 0x04d0 | 0x40000000, 0x052e | 0x80000000, 0x0531 | 0x40000000, 0x0556 | 0x80000000, 0x10a0 | 0x40000000, 0x10c5 | 0x80000000, 0x10c7, 0x10cd, 0x13f8 | 0x40000000, 0x13fd | 0x80000000, 0x1c80, 1072 0x1c81, 0x1c82, 0x1c83, 0x1c85, 0x1c86, 0x1c87, 0x1c88, 0x1c90 | 0x40000000, 0x1cba | 0x80000000, 1073 0x1cbd | 0x40000000, 0x1cbf | 0x80000000, 0x1e00 | 0x40000000, 0x1e94 | 0x80000000, 0x1e9b, 0x1ea0 | 0x40000000, 0x1efe | 0x80000000, 0x1f08 | 0x40000000, 0x1f0f | 0x80000000, 0x1f18 | 0x40000000, 0x1f1d | 0x80000000, 1074 0x1f28 | 0x40000000, 0x1f2f | 0x80000000, 0x1f38 | 0x40000000, 0x1f3f | 0x80000000, 0x1f48 | 0x40000000, 0x1f4d | 0x80000000, 0x1f59, 0x1f5b, 0x1f5d, 0x1f5f, 1075 0x1f68 | 0x40000000, 0x1f6f | 0x80000000, 0x1fb8, 0x1fba, 0x1fbe, 0x1fc8 | 0x40000000, 0x1fcb | 0x80000000, 0x1fd8, 0x1fda, 0x1fe8, 1076 0x1fea, 0x1fec, 0x1ff8, 0x1ffa, 0x2126, 0x212a, 0x212b, 0x2132, 0x2160 | 0x40000000, 0x216f | 0x80000000, 1077 0x2183, 0x24b6 | 0x40000000, 0x24cf | 0x80000000, 0x2c00 | 0x40000000, 0x2c2e | 0x80000000, 0x2c60, 0x2c62, 0x2c63, 0x2c64, 1078 0x2c67 | 0x40000000, 0x2c6b | 0x80000000, 0x2c6d, 0x2c6e, 0x2c6f, 0x2c70, 0x2c72, 0x2c75, 0x2c7e, 1079 0x2c80 | 0x40000000, 0x2ce2 | 0x80000000, 0x2ceb, 0x2cf2, 0xa640 | 0x40000000, 0xa66c | 0x80000000, 0xa680 | 0x40000000, 0xa69a | 0x80000000, 0xa722 | 0x40000000, 0xa72e | 0x80000000, 1080 0xa732 | 0x40000000, 0xa76e | 0x80000000, 0xa779, 0xa77d, 0xa77e | 0x40000000, 0xa786 | 0x80000000, 0xa78b, 0xa78d, 0xa790, 1081 0xa796 | 0x40000000, 0xa7a8 | 0x80000000, 0xa7aa, 0xa7ab, 0xa7ac, 0xa7ad, 0xa7ae, 0xa7b0, 0xa7b1, 0xa7b2, 1082 0xa7b3, 0xa7b4 | 0x40000000, 0xa7be | 0x80000000, 0xa7c2, 0xa7c4, 0xa7c5, 0xa7c6, 0xab70 | 0x40000000, 0xabbf | 0x80000000, 1083 0xff21 | 0x40000000, 0xff3a | 0x80000000, 0x10400 | 0x40000000, 0x10427 | 0x80000000, 0x104b0 | 0x40000000, 0x104d3 | 0x80000000, 0x10c80 | 0x40000000, 0x10cb2 | 0x80000000, 0x118a0 | 0x40000000, 0x118bf | 0x80000000, 1084 0x16e40 | 0x40000000, 0x16e5f | 0x80000000, 0x1e900 | 0x40000000, 0x1e921 | 0x80000000 1085 ]; 1086 1087 static immutable uint[] FOLD_MAP_1_DATA = 1088 [ 1089 0x0061, 0x007a, 0x03bc, 0x00e0, 0x00f6, 0x00f8, 0x00fe, 0x0101, 0x012f, 0x0133, 0x0137, 0x013a, 0x0148, 1090 0x014b, 0x0177, 0x00ff, 0x017a, 0x017e, 0x0073, 0x0253, 0x0183, 0x0254, 0x0188, 0x0256, 0x018c, 0x01dd, 1091 0x0259, 0x025b, 0x0192, 0x0260, 0x0263, 0x0269, 0x0268, 0x0199, 0x026f, 0x0272, 0x0275, 0x01a1, 0x01a5, 1092 0x0280, 0x01a8, 0x0283, 0x01ad, 0x0288, 0x01b0, 0x028a, 0x01b4, 0x0292, 0x01b9, 0x01bd, 0x01c6, 0x01c6, 1093 0x01c9, 0x01c9, 0x01cc, 0x01cc, 0x01dc, 0x01df, 0x01ef, 0x01f3, 0x01f3, 0x0195, 0x01bf, 0x01f9, 0x021f, 1094 0x019e, 0x0223, 0x0233, 0x2c65, 0x023c, 0x019a, 0x2c66, 0x0242, 0x0180, 0x0289, 0x028c, 0x0247, 0x024f, 1095 0x03b9, 0x0371, 0x0377, 0x03f3, 0x03ac, 0x03ad, 0x03af, 0x03cc, 0x03cd, 0x03b1, 0x03c1, 0x03c3, 0x03cb, 1096 0x03c3, 0x03d7, 0x03b2, 0x03b8, 0x03c6, 0x03c0, 0x03d9, 0x03ef, 0x03ba, 0x03c1, 0x03b8, 0x03b5, 0x03f8, 1097 0x03f2, 0x03fb, 0x037b, 0x037d, 0x0450, 0x045f, 0x0430, 0x044f, 0x0461, 0x0481, 0x048b, 0x04bf, 0x04cf, 1098 0x04c2, 0x04ce, 0x04d1, 0x052f, 0x0561, 0x0586, 0x2d00, 0x2d25, 0x2d27, 0x2d2d, 0x13f0, 0x13f5, 0x0432, 1099 0x0434, 0x043e, 0x0441, 0x0442, 0x044a, 0x0463, 0xa64b, 0x10d0, 0x10fa, 0x10fd, 0x10ff, 0x1e01, 0x1e95, 1100 0x1e61, 0x1ea1, 0x1eff, 0x1f00, 0x1f07, 0x1f10, 0x1f15, 0x1f20, 0x1f27, 0x1f30, 0x1f37, 0x1f40, 0x1f45, 1101 0x1f51, 0x1f53, 0x1f55, 0x1f57, 0x1f60, 0x1f67, 0x1fb0, 0x1f70, 0x03b9, 0x1f72, 0x1f75, 0x1fd0, 0x1f76, 1102 0x1fe0, 0x1f7a, 0x1fe5, 0x1f78, 0x1f7c, 0x03c9, 0x006b, 0x00e5, 0x214e, 0x2170, 0x217f, 0x2184, 0x24d0, 1103 0x24e9, 0x2c30, 0x2c5e, 0x2c61, 0x026b, 0x1d7d, 0x027d, 0x2c68, 0x2c6c, 0x0251, 0x0271, 0x0250, 0x0252, 1104 0x2c73, 0x2c76, 0x023f, 0x2c81, 0x2ce3, 0x2cec, 0x2cf3, 0xa641, 0xa66d, 0xa681, 0xa69b, 0xa723, 0xa72f, 1105 0xa733, 0xa76f, 0xa77a, 0x1d79, 0xa77f, 0xa787, 0xa78c, 0x0265, 0xa791, 0xa797, 0xa7a9, 0x0266, 0x025c, 1106 0x0261, 0x026c, 0x026a, 0x029e, 0x0287, 0x029d, 0xab53, 0xa7b5, 0xa7bf, 0xa7c3, 0xa794, 0x0282, 0x1d8e, 1107 0x13a0, 0x13ef, 0xff41, 0xff5a, 0x10428, 0x1044f, 0x104d8, 0x104fb, 0x10cc0, 0x10cf2, 0x118c0, 0x118df, 1108 0x16e60, 0x16e7f, 0x1e922, 0x1e943 1109 ]; 1110 1111 static immutable uint[] FOLD_MAP_2 = 1112 [ 1113 0x00df, 0x0130, 0x0149, 0x01f0, 0x0587, 0x1e96, 0x1e97, 0x1e98, 0x1e99, 1114 0x1e9a, 0x1e9e, 0x1f50, 0x1f80 | 0x40000000, 0x1f87 | 0x80000000, 0x1f88 | 0x40000000, 0x1f8f | 0x80000000, 0x1f90 | 0x40000000, 0x1f97 | 0x80000000, 0x1f98 | 0x40000000, 0x1f9f | 0x80000000, 1115 0x1fa0 | 0x40000000, 0x1fa7 | 0x80000000, 0x1fa8 | 0x40000000, 0x1faf | 0x80000000, 0x1fb2, 0x1fb3, 0x1fb4, 0x1fb6, 0x1fbc, 0x1fc2, 1116 0x1fc3, 0x1fc4, 0x1fc6, 0x1fcc, 0x1fd6, 0x1fe4, 0x1fe6, 0x1ff2, 0x1ff3, 1117 0x1ff4, 0x1ff6, 0x1ffc, 0xfb00, 0xfb01, 0xfb02, 0xfb05, 0xfb06, 0xfb13, 1118 0xfb14, 0xfb15, 0xfb16, 0xfb17 1119 ]; 1120 1121 static immutable uint[] FOLD_MAP_2_DATA = 1122 [ 1123 0x0073,0x0073, 0x0069,0x0307, 0x02bc,0x006e, 0x006a,0x030c, 0x0565,0x0582, 0x0068,0x0331, 0x0074,0x0308, 1124 0x0077,0x030a, 0x0079,0x030a, 0x0061,0x02be, 0x0073,0x0073, 0x03c5,0x0313, 0x1f00,0x03b9, 0x1f07,0x03b9, 1125 0x1f00,0x03b9, 0x1f07,0x03b9, 0x1f20,0x03b9, 0x1f27,0x03b9, 0x1f20,0x03b9, 0x1f27,0x03b9, 0x1f60,0x03b9, 1126 0x1f67,0x03b9, 0x1f60,0x03b9, 0x1f67,0x03b9, 0x1f70,0x03b9, 0x03b1,0x03b9, 0x03ac,0x03b9, 0x03b1,0x0342, 1127 0x03b1,0x03b9, 0x1f74,0x03b9, 0x03b7,0x03b9, 0x03ae,0x03b9, 0x03b7,0x0342, 0x03b7,0x03b9, 0x03b9,0x0342, 1128 0x03c1,0x0313, 0x03c5,0x0342, 0x1f7c,0x03b9, 0x03c9,0x03b9, 0x03ce,0x03b9, 0x03c9,0x0342, 0x03c9,0x03b9, 1129 0x0066,0x0066, 0x0066,0x0069, 0x0066,0x006c, 0x0073,0x0074, 0x0073,0x0074, 0x0574,0x0576, 0x0574,0x0565, 1130 0x0574,0x056b, 0x057e,0x0576, 0x0574,0x056d 1131 ]; 1132 1133 static immutable uint[] FOLD_MAP_3 = 1134 [ 1135 0x0390, 0x03b0, 0x1f52, 0x1f54, 0x1f56, 0x1fb7, 0x1fc7, 0x1fd2, 0x1fd3, 1136 0x1fd7, 0x1fe2, 0x1fe3, 0x1fe7, 0x1ff7, 0xfb03, 0xfb04 1137 ]; 1138 1139 static immutable uint[] FOLD_MAP_3_DATA = 1140 [ 1141 0x03b9,0x0308,0x0301, 0x03c5,0x0308,0x0301, 0x03c5,0x0313,0x0300, 0x03c5,0x0313,0x0301, 1142 0x03c5,0x0313,0x0342, 0x03b1,0x0342,0x03b9, 0x03b7,0x0342,0x03b9, 0x03b9,0x0308,0x0300, 1143 0x03b9,0x0308,0x0301, 0x03b9,0x0308,0x0342, 0x03c5,0x0308,0x0300, 0x03c5,0x0308,0x0301, 1144 0x03c5,0x0308,0x0342, 0x03c9,0x0342,0x03b9, 0x0066,0x0066,0x0069, 0x0066,0x0066,0x006c 1145 ]; 1146 1147 static struct FOLD_MAP 1148 { 1149 const(uint)* map; 1150 const(uint)* data; 1151 size_t map_size; 1152 int n_codepoints; 1153 } 1154 1155 /*static immutable*/ FOLD_MAP[3] FOLD_MAP_LIST = 1156 [ 1157 FOLD_MAP(FOLD_MAP_1.ptr, FOLD_MAP_1_DATA.ptr, FOLD_MAP_1.length, 1), 1158 FOLD_MAP(FOLD_MAP_2.ptr, FOLD_MAP_2_DATA.ptr, FOLD_MAP_2.length, 2), 1159 FOLD_MAP(FOLD_MAP_3.ptr, FOLD_MAP_3_DATA.ptr, FOLD_MAP_3.length, 3), 1160 ]; 1161 1162 int i; 1163 1164 /* Fast path for ASCII characters. */ 1165 if(codepoint <= 0x7f) { 1166 info.codepoints[0] = codepoint; 1167 if(ISUPPER_(cast(CHAR)codepoint)) 1168 info.codepoints[0] += 'a' - 'A'; 1169 info.n_codepoints = 1; 1170 return; 1171 } 1172 1173 /* Try to locate the codepoint in any of the maps. */ 1174 for(i = 0; i < cast(int) (FOLD_MAP_LIST.length); i++) { 1175 int index; 1176 1177 index = md_unicode_bsearch__(codepoint, FOLD_MAP_LIST[i].map, FOLD_MAP_LIST[i].map_size); 1178 if(index >= 0) { 1179 /* Found the mapping. */ 1180 int n_codepoints = FOLD_MAP_LIST[i].n_codepoints; 1181 const uint* map = FOLD_MAP_LIST[i].map; 1182 const uint* codepoints = FOLD_MAP_LIST[i].data + (index * n_codepoints); 1183 1184 memcpy(info.codepoints.ptr, codepoints, uint.sizeof * n_codepoints); 1185 info.n_codepoints = n_codepoints; 1186 1187 if(FOLD_MAP_LIST[i].map[index] != codepoint) { 1188 /* The found mapping maps whole range of codepoints, 1189 * i.e. we have to offset info.codepoints[0] accordingly. */ 1190 if((map[index] & 0x00ffffff)+1 == codepoints[0]) { 1191 /* Alternating type of the range. */ 1192 info.codepoints[0] = codepoint + ((codepoint & 0x1) == (map[index] & 0x1) ? 1 : 0); 1193 } else { 1194 /* Range to range kind of mapping. */ 1195 info.codepoints[0] += (codepoint - (map[index] & 0x00ffffff)); 1196 } 1197 } 1198 1199 return; 1200 } 1201 } 1202 1203 /* No mapping found. Map the codepoint to itself. */ 1204 info.codepoints[0] = codepoint; 1205 info.n_codepoints = 1; 1206 } 1207 1208 1209 bool IS_UTF8_LEAD1(CHAR ch) 1210 { 1211 return cast(ubyte)(ch) <= 0x7f; 1212 } 1213 1214 bool IS_UTF8_LEAD2(CHAR ch) 1215 { 1216 return (cast(ubyte)(ch) & 0xe0) == 0xc0; 1217 } 1218 1219 bool IS_UTF8_LEAD3(CHAR ch) 1220 { 1221 return (cast(ubyte)(ch) & 0xf0) == 0xe0; 1222 } 1223 1224 bool IS_UTF8_LEAD4(CHAR ch) 1225 { 1226 return (cast(ubyte)(ch) & 0xf8) == 0xf0; 1227 } 1228 1229 bool IS_UTF8_TAIL(CHAR ch) 1230 { 1231 return (cast(ubyte)(ch) & 0xc0) == 0x80; 1232 } 1233 1234 uint md_decode_utf8__(const(CHAR)* str, SZ str_size, SZ* p_size) 1235 { 1236 if(!IS_UTF8_LEAD1(str[0])) { 1237 if(IS_UTF8_LEAD2(str[0])) { 1238 if(1 < str_size && IS_UTF8_TAIL(str[1])) { 1239 if(p_size != null) 1240 *p_size = 2; 1241 1242 return ((cast(uint)str[0] & 0x1f) << 6) | 1243 ((cast(uint)str[1] & 0x3f) << 0); 1244 } 1245 } else if(IS_UTF8_LEAD3(str[0])) { 1246 if(2 < str_size && IS_UTF8_TAIL(str[1]) && IS_UTF8_TAIL(str[2])) { 1247 if(p_size != null) 1248 *p_size = 3; 1249 1250 return ((cast(uint)str[0] & 0x0f) << 12) | 1251 ((cast(uint)str[1] & 0x3f) << 6) | 1252 ((cast(uint)str[2] & 0x3f) << 0); 1253 } 1254 } else if(IS_UTF8_LEAD4(str[0])) { 1255 if(3 < str_size && IS_UTF8_TAIL(str[1]) && IS_UTF8_TAIL(str[2]) && IS_UTF8_TAIL(str[3])) { 1256 if(p_size != null) 1257 *p_size = 4; 1258 1259 return ((cast(uint)str[0] & 0x07) << 18) | 1260 ((cast(uint)str[1] & 0x3f) << 12) | 1261 ((cast(uint)str[2] & 0x3f) << 6) | 1262 ((cast(uint)str[3] & 0x3f) << 0); 1263 } 1264 } 1265 } 1266 1267 if(p_size != null) 1268 *p_size = 1; 1269 return cast(uint) str[0]; 1270 } 1271 1272 uint md_decode_utf8_before__(MD_CTX* ctx, OFF off) 1273 { 1274 if(!IS_UTF8_LEAD1(ctx.CH(off-1))) { 1275 if(off > 1 && IS_UTF8_LEAD2(ctx.CH(off-2)) && IS_UTF8_TAIL(ctx.CH(off-1))) 1276 return ((cast(uint)ctx.CH(off-2) & 0x1f) << 6) | 1277 ((cast(uint)ctx.CH(off-1) & 0x3f) << 0); 1278 1279 if(off > 2 && IS_UTF8_LEAD3(ctx.CH(off-3)) && IS_UTF8_TAIL(ctx.CH(off-2)) && IS_UTF8_TAIL(ctx.CH(off-1))) 1280 return ((cast(uint)ctx.CH(off-3) & 0x0f) << 12) | 1281 ((cast(uint)ctx.CH(off-2) & 0x3f) << 6) | 1282 ((cast(uint)ctx.CH(off-1) & 0x3f) << 0); 1283 1284 if(off > 3 && IS_UTF8_LEAD4(ctx.CH(off-4)) && IS_UTF8_TAIL(ctx.CH(off-3)) && IS_UTF8_TAIL(ctx.CH(off-2)) && IS_UTF8_TAIL(ctx.CH(off-1))) 1285 return ((cast(uint)ctx.CH(off-4) & 0x07) << 18) | 1286 ((cast(uint)ctx.CH(off-3) & 0x3f) << 12) | 1287 ((cast(uint)ctx.CH(off-2) & 0x3f) << 6) | 1288 ((cast(uint)ctx.CH(off-1) & 0x3f) << 0); 1289 } 1290 1291 return cast(uint) ctx.CH(off-1); 1292 } 1293 1294 bool ISUNICODEWHITESPACE_(uint codepoint) 1295 { 1296 return md_is_unicode_whitespace__(codepoint); 1297 } 1298 1299 bool ISUNICODEWHITESPACE(MD_CTX* ctx, OFF off) 1300 { 1301 return md_is_unicode_whitespace__(md_decode_utf8__(ctx.STR(off), ctx.size - (off), null)); 1302 } 1303 1304 bool ISUNICODEWHITESPACEBEFORE(MD_CTX* ctx, OFF off) 1305 { 1306 return md_is_unicode_whitespace__(md_decode_utf8_before__(ctx, off)); 1307 } 1308 1309 bool ISUNICODEPUNCT(MD_CTX* ctx, OFF off) 1310 { 1311 return md_is_unicode_punct__(md_decode_utf8__(ctx.STR(off), ctx.size - (off), null)); 1312 } 1313 1314 bool ISUNICODEPUNCTBEFORE(MD_CTX* ctx, OFF off) 1315 { 1316 return md_is_unicode_punct__(md_decode_utf8_before__(ctx, off)); 1317 } 1318 1319 uint md_decode_unicode(const(CHAR)* str, OFF off, SZ str_size, SZ* p_char_size) 1320 { 1321 return md_decode_utf8__(str+off, str_size-off, p_char_size); 1322 } 1323 1324 /************************************* 1325 *** Helper string manipulations *** 1326 *************************************/ 1327 1328 /* Fill buffer with copy of the string between 'beg' and 'end' but replace any 1329 * line breaks with given replacement character. 1330 * 1331 * NOTE: Caller is responsible to make sure the buffer is large enough. 1332 * (Given the output is always shorter then input, (end - beg) is good idea 1333 * what the caller should allocate.) 1334 */ 1335 void md_merge_lines(MD_CTX* ctx, OFF beg, OFF end, const(MD_LINE)* lines, int n_lines, 1336 CHAR line_break_replacement_char, CHAR* buffer, SZ* p_size) 1337 { 1338 CHAR* ptr = buffer; 1339 int line_index = 0; 1340 OFF off = beg; 1341 1342 while(1) { 1343 const MD_LINE* line = &lines[line_index]; 1344 OFF line_end = line.end; 1345 if(end < line_end) 1346 line_end = end; 1347 1348 while(off < line_end) { 1349 *ptr = ctx.CH(off); 1350 ptr++; 1351 off++; 1352 } 1353 1354 if(off >= end) { 1355 *p_size = cast(uint)(ptr - buffer); 1356 return; 1357 } 1358 1359 *ptr = line_break_replacement_char; 1360 ptr++; 1361 1362 line_index++; 1363 off = lines[line_index].beg; 1364 } 1365 } 1366 1367 /* Wrapper of md_merge_lines() which allocates new buffer for the output string. 1368 */ 1369 int md_merge_lines_alloc(MD_CTX* ctx, OFF beg, OFF end, const(MD_LINE)* lines, int n_lines, 1370 CHAR line_break_replacement_char, const(CHAR)** p_str, SZ* p_size) 1371 { 1372 CHAR* buffer; 1373 1374 buffer = cast(CHAR*) malloc(CHAR.sizeof * (end - beg)); 1375 if(buffer == null) { 1376 ctx.MD_LOG("malloc() failed."); 1377 return -1; 1378 } 1379 1380 md_merge_lines(ctx, beg, end, lines, n_lines, 1381 line_break_replacement_char, buffer, p_size); 1382 1383 *p_str = buffer; 1384 return 0; 1385 } 1386 1387 OFF md_skip_unicode_whitespace(const(CHAR)* label, OFF off, SZ size) 1388 { 1389 SZ char_size; 1390 uint codepoint; 1391 1392 while(off < size) { 1393 codepoint = md_decode_unicode(label, off, size, &char_size); 1394 if(!ISUNICODEWHITESPACE_(codepoint) && !ISNEWLINE_(label[off])) 1395 break; 1396 off += char_size; 1397 } 1398 1399 return off; 1400 } 1401 1402 1403 /****************************** 1404 *** Recognizing raw HTML *** 1405 ******************************/ 1406 1407 /* md_is_html_tag() may be called when processing inlines (inline raw HTML) 1408 * or when breaking document to blocks (checking for start of HTML block type 7). 1409 * 1410 * When breaking document to blocks, we do not yet know line boundaries, but 1411 * in that case the whole tag has to live on a single line. We distinguish this 1412 * by n_lines == 0. 1413 */ 1414 int md_is_html_tag(MD_CTX* ctx, const(MD_LINE)* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) 1415 { 1416 int attr_state; 1417 OFF off = beg; 1418 OFF line_end = (n_lines > 0) ? lines[0].end : ctx.size; 1419 int i = 0; 1420 1421 assert(ctx.CH(beg) == '<'); 1422 1423 if(off + 1 >= line_end) 1424 return FALSE; 1425 off++; 1426 1427 /* For parsing attributes, we need a little state automaton below. 1428 * State -1: no attributes are allowed. 1429 * State 0: attribute could follow after some whitespace. 1430 * State 1: after a whitespace (attribute name may follow). 1431 * State 2: after attribute name ('=' MAY follow). 1432 * State 3: after '=' (value specification MUST follow). 1433 * State 41: in middle of unquoted attribute value. 1434 * State 42: in middle of single-quoted attribute value. 1435 * State 43: in middle of double-quoted attribute value. 1436 */ 1437 attr_state = 0; 1438 1439 if(ctx.CH(off) == '/') { 1440 /* Closer tag "</ ... >". No attributes may be present. */ 1441 attr_state = -1; 1442 off++; 1443 } 1444 1445 /* Tag name */ 1446 if(off >= line_end || !ctx.ISALPHA(off)) 1447 return FALSE; 1448 off++; 1449 while(off < line_end && (ctx.ISALNUM(off) || ctx.CH(off) == '-')) 1450 off++; 1451 1452 /* (Optional) attributes (if not closer), (optional) '/' (if not closer) 1453 * and final '>'. */ 1454 while(1) { 1455 while(off < line_end && !ctx.ISNEWLINE(off)) { 1456 if(attr_state > 40) { 1457 if(attr_state == 41 && (ctx.ISBLANK(off) || ctx.ISANYOF(off, "\"'=<>`"))) { 1458 attr_state = 0; 1459 off--; /* Put the char back for re-inspection in the new state. */ 1460 } else if(attr_state == 42 && ctx.CH(off) == '\'') { 1461 attr_state = 0; 1462 } else if(attr_state == 43 && ctx.CH(off) == '"') { 1463 attr_state = 0; 1464 } 1465 off++; 1466 } else if(ctx.ISWHITESPACE(off)) { 1467 if(attr_state == 0) 1468 attr_state = 1; 1469 off++; 1470 } else if(attr_state <= 2 && ctx.CH(off) == '>') { 1471 /* End. */ 1472 goto done; 1473 } else if(attr_state <= 2 && ctx.CH(off) == '/' && off+1 < line_end && ctx.CH(off+1) == '>') { 1474 /* End with digraph '/>' */ 1475 off++; 1476 goto done; 1477 } else if((attr_state == 1 || attr_state == 2) && (ctx.ISALPHA(off) || ctx.CH(off) == '_' || ctx.CH(off) == ':')) { 1478 off++; 1479 /* Attribute name */ 1480 while(off < line_end && (ctx.ISALNUM(off) || ctx.ISANYOF(off, "_.:-"))) 1481 off++; 1482 attr_state = 2; 1483 } else if(attr_state == 2 && ctx.CH(off) == '=') { 1484 /* Attribute assignment sign */ 1485 off++; 1486 attr_state = 3; 1487 } else if(attr_state == 3) { 1488 /* Expecting start of attribute value. */ 1489 if(ctx.CH(off) == '"') 1490 attr_state = 43; 1491 else if(ctx.CH(off) == '\'') 1492 attr_state = 42; 1493 else if(!ctx.ISANYOF(off, "\"'=<>`") && !ctx.ISNEWLINE(off)) 1494 attr_state = 41; 1495 else 1496 return FALSE; 1497 off++; 1498 } else { 1499 /* Anything unexpected. */ 1500 return FALSE; 1501 } 1502 } 1503 1504 /* We have to be on a single line. See definition of start condition 1505 * of HTML block, type 7. */ 1506 if(n_lines == 0) 1507 return FALSE; 1508 1509 i++; 1510 if(i >= n_lines) 1511 return FALSE; 1512 1513 off = lines[i].beg; 1514 line_end = lines[i].end; 1515 1516 if(attr_state == 0 || attr_state == 41) 1517 attr_state = 1; 1518 1519 if(off >= max_end) 1520 return FALSE; 1521 } 1522 1523 done: 1524 if(off >= max_end) 1525 return FALSE; 1526 1527 *p_end = off+1; 1528 return TRUE; 1529 } 1530 1531 static int 1532 md_scan_for_html_closer(MD_CTX* ctx, const MD_CHAR* str, MD_SIZE len, 1533 const MD_LINE* lines, int n_lines, 1534 OFF beg, OFF max_end, OFF* p_end, 1535 OFF* p_scan_horizon) 1536 { 1537 OFF off = beg; 1538 int i = 0; 1539 1540 if(off < *p_scan_horizon && *p_scan_horizon >= max_end - len) { 1541 /* We have already scanned the range up to the max_end so we know 1542 * there is nothing to see. */ 1543 return FALSE; 1544 } 1545 1546 while(TRUE) { 1547 while(off + len <= lines[i].end && off + len <= max_end) { 1548 if(md_ascii_eq(ctx.STR(off), str, len)) { 1549 /* Success. */ 1550 *p_end = off + len; 1551 return TRUE; 1552 } 1553 off++; 1554 } 1555 1556 i++; 1557 if(off >= max_end || i >= n_lines) { 1558 /* Failure. */ 1559 *p_scan_horizon = off; 1560 return FALSE; 1561 } 1562 1563 off = lines[i].beg; 1564 } 1565 } 1566 1567 static int 1568 md_is_html_comment(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) 1569 { 1570 OFF off = beg; 1571 1572 assert(ctx.CH(beg) == '<'); 1573 1574 if(off + 4 >= lines[0].end) 1575 return FALSE; 1576 if(ctx.CH(off+1) != '!' || ctx.CH(off+2) != '-' || ctx.CH(off+3) != '-') 1577 return FALSE; 1578 off += 4; 1579 1580 /* ">" and "." must not follow the opening. */ 1581 if(off < lines[0].end && ctx.CH(off) == '>') 1582 return FALSE; 1583 if(off+1 < lines[0].end && ctx.CH(off) == '-' && ctx.CH(off+1) == '>') 1584 return FALSE; 1585 1586 /* HTML comment must not contain "--", so we scan just for "--" instead 1587 * of "-." and verify manually that '>' follows. */ 1588 if(md_scan_for_html_closer(ctx, "--", 2, 1589 lines, n_lines, off, max_end, p_end, &ctx.html_comment_horizon)) 1590 { 1591 if(*p_end < max_end && ctx.CH(*p_end) == '>') { 1592 *p_end = *p_end + 1; 1593 return TRUE; 1594 } 1595 } 1596 1597 return FALSE; 1598 } 1599 1600 static int 1601 md_is_html_processing_instruction(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) 1602 { 1603 OFF off = beg; 1604 1605 if(off + 2 >= lines[0].end) 1606 return FALSE; 1607 if(ctx.CH(off+1) != '?') 1608 return FALSE; 1609 off += 2; 1610 1611 return md_scan_for_html_closer(ctx, "?>", 2, 1612 lines, n_lines, off, max_end, p_end, &ctx.html_proc_instr_horizon); 1613 } 1614 1615 static int 1616 md_is_html_declaration(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) 1617 { 1618 OFF off = beg; 1619 1620 if(off + 2 >= lines[0].end) 1621 return FALSE; 1622 if(ctx.CH(off+1) != '!') 1623 return FALSE; 1624 off += 2; 1625 1626 /* Declaration name. */ 1627 if(off >= lines[0].end || !ctx.ISALPHA(off)) 1628 return FALSE; 1629 off++; 1630 while(off < lines[0].end && ctx.ISALPHA(off)) 1631 off++; 1632 if(off < lines[0].end && !ctx.ISWHITESPACE(off)) 1633 return FALSE; 1634 1635 return md_scan_for_html_closer(ctx, ">", 1, 1636 lines, n_lines, off, max_end, p_end, &ctx.html_decl_horizon); 1637 } 1638 1639 static int 1640 md_is_html_cdata(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) 1641 { 1642 string open_str = "<![CDATA["; 1643 1644 OFF off = beg; 1645 1646 if(off + open_str.length >= lines[0].end) 1647 return FALSE; 1648 if(memcmp(ctx.STR(off), open_str.ptr, open_str.length) != 0) 1649 return FALSE; 1650 off += open_str.length; 1651 1652 if(lines[n_lines-1].end < max_end) 1653 max_end = lines[n_lines-1].end - 2; 1654 1655 return md_scan_for_html_closer(ctx, "]]>", 3, 1656 lines, n_lines, off, max_end, p_end, &ctx.html_cdata_horizon); 1657 } 1658 1659 static int 1660 md_is_html_any(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end) 1661 { 1662 assert(ctx.CH(beg) == '<'); 1663 return (md_is_html_tag(ctx, lines, n_lines, beg, max_end, p_end) || 1664 md_is_html_comment(ctx, lines, n_lines, beg, max_end, p_end) || 1665 md_is_html_processing_instruction(ctx, lines, n_lines, beg, max_end, p_end) || 1666 md_is_html_declaration(ctx, lines, n_lines, beg, max_end, p_end) || 1667 md_is_html_cdata(ctx, lines, n_lines, beg, max_end, p_end)); 1668 } 1669 1670 1671 /**************************** 1672 *** Recognizing Entity *** 1673 ****************************/ 1674 1675 static int 1676 md_is_hex_entity_contents(MD_CTX* ctx, const(CHAR)* text, OFF beg, OFF max_end, OFF* p_end) 1677 { 1678 OFF off = beg; 1679 1680 while(off < max_end && ISXDIGIT_(text[off]) && off - beg <= 8) 1681 off++; 1682 1683 if(1 <= off - beg && off - beg <= 6) { 1684 *p_end = off; 1685 return TRUE; 1686 } else { 1687 return FALSE; 1688 } 1689 } 1690 1691 static int 1692 md_is_dec_entity_contents(MD_CTX* ctx, const(CHAR)* text, OFF beg, OFF max_end, OFF* p_end) 1693 { 1694 OFF off = beg; 1695 1696 while(off < max_end && ISDIGIT_(text[off]) && off - beg <= 8) 1697 off++; 1698 1699 if(1 <= off - beg && off - beg <= 7) { 1700 *p_end = off; 1701 return TRUE; 1702 } else { 1703 return FALSE; 1704 } 1705 } 1706 1707 static int 1708 md_is_named_entity_contents(MD_CTX* ctx, const(CHAR)* text, OFF beg, OFF max_end, OFF* p_end) 1709 { 1710 OFF off = beg; 1711 1712 if(off < max_end && ISALPHA_(text[off])) 1713 off++; 1714 else 1715 return FALSE; 1716 1717 while(off < max_end && ISALNUM_(text[off]) && off - beg <= 48) 1718 off++; 1719 1720 if(2 <= off - beg && off - beg <= 48) { 1721 *p_end = off; 1722 return TRUE; 1723 } else { 1724 return FALSE; 1725 } 1726 } 1727 1728 static int 1729 md_is_entity_str(MD_CTX* ctx, const(CHAR)* text, OFF beg, OFF max_end, OFF* p_end) 1730 { 1731 int is_contents; 1732 OFF off = beg; 1733 1734 assert(text[off] == '&'); 1735 off++; 1736 1737 if(off+2 < max_end && text[off] == '#' && (text[off+1] == 'x' || text[off+1] == 'X')) 1738 is_contents = md_is_hex_entity_contents(ctx, text, off+2, max_end, &off); 1739 else if(off+1 < max_end && text[off] == '#') 1740 is_contents = md_is_dec_entity_contents(ctx, text, off+1, max_end, &off); 1741 else 1742 is_contents = md_is_named_entity_contents(ctx, text, off, max_end, &off); 1743 1744 if(is_contents && off < max_end && text[off] == ';') { 1745 *p_end = off+1; 1746 return TRUE; 1747 } else { 1748 return FALSE; 1749 } 1750 } 1751 1752 static int 1753 md_is_entity(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end) 1754 { 1755 return md_is_entity_str(ctx, ctx.text, beg, max_end, p_end); 1756 } 1757 1758 1759 /****************************** 1760 *** Attribute Management *** 1761 ******************************/ 1762 1763 struct MD_ATTRIBUTE_BUILD 1764 { 1765 CHAR* text = null; 1766 MD_TEXTTYPE* substr_types = null; 1767 OFF* substr_offsets = null; 1768 int substr_count = 0; 1769 int substr_alloc = 0; 1770 MD_TEXTTYPE[1] trivial_types = [0]; 1771 OFF[2] trivial_offsets = [0, 0]; 1772 } 1773 1774 1775 enum MD_BUILD_ATTR_NO_ESCAPES = 0x0001; 1776 1777 void* realloc_safe(void* ptr, size_t newSize) 1778 { 1779 import core.stdc.stdlib : free, realloc; 1780 1781 if (newSize == 0) 1782 { 1783 free(ptr); 1784 return null; 1785 } 1786 1787 return realloc(ptr, newSize); 1788 } 1789 1790 1791 int md_build_attr_append_substr(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build, 1792 MD_TEXTTYPE type, OFF off) 1793 { 1794 if(build.substr_count >= build.substr_alloc) { 1795 MD_TEXTTYPE* new_substr_types; 1796 OFF* new_substr_offsets; 1797 1798 build.substr_alloc = (build.substr_alloc == 0 ? 8 : build.substr_alloc * 2); 1799 1800 new_substr_types = cast(MD_TEXTTYPE*) realloc_safe(build.substr_types, 1801 build.substr_alloc * MD_TEXTTYPE.sizeof); 1802 if(new_substr_types == null) { 1803 ctx.MD_LOG("realloc() failed."); 1804 return -1; 1805 } 1806 /* Note +1 to reserve space for final offset (== raw_size). */ 1807 new_substr_offsets = cast(OFF*) realloc_safe(build.substr_offsets, 1808 (build.substr_alloc+1) * OFF.sizeof); 1809 if(new_substr_offsets == null) { 1810 ctx.MD_LOG("realloc() failed."); 1811 free(new_substr_types); 1812 return -1; 1813 } 1814 1815 build.substr_types = new_substr_types; 1816 build.substr_offsets = new_substr_offsets; 1817 } 1818 1819 build.substr_types[build.substr_count] = type; 1820 build.substr_offsets[build.substr_count] = off; 1821 build.substr_count++; 1822 return 0; 1823 } 1824 1825 void md_free_attribute(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build) 1826 { 1827 if(build.substr_alloc > 0) { 1828 free(build.text); 1829 free(build.substr_types); 1830 free(build.substr_offsets); 1831 } 1832 } 1833 1834 int md_build_attribute(MD_CTX* ctx, const(CHAR)* raw_text, SZ raw_size, 1835 uint flags, MD_ATTRIBUTE* attr, MD_ATTRIBUTE_BUILD* build) 1836 { 1837 OFF raw_off, off; 1838 int is_trivial; 1839 int ret = 0; 1840 1841 memset(build, 0, MD_ATTRIBUTE_BUILD.sizeof); 1842 1843 /* If there is no backslash and no ampersand, build trivial attribute 1844 * without any malloc(). */ 1845 is_trivial = TRUE; 1846 for(raw_off = 0; raw_off < raw_size; raw_off++) { 1847 if(ISANYOF3_(raw_text[raw_off], '\\', '&', '\0')) { 1848 is_trivial = FALSE; 1849 break; 1850 } 1851 } 1852 1853 if(is_trivial) { 1854 build.text = cast(CHAR*) (raw_size ? raw_text : null); 1855 build.substr_types = build.trivial_types.ptr; 1856 build.substr_offsets = build.trivial_offsets.ptr; 1857 build.substr_count = 1; 1858 build.substr_alloc = 0; 1859 build.trivial_types[0] = MD_TEXT_NORMAL; 1860 build.trivial_offsets[0] = 0; 1861 build.trivial_offsets[1] = raw_size; 1862 off = raw_size; 1863 } else { 1864 build.text = cast(CHAR*) malloc(raw_size * CHAR.sizeof); 1865 if(build.text == null) { 1866 ctx.MD_LOG("malloc() failed."); 1867 goto abort; 1868 } 1869 1870 raw_off = 0; 1871 off = 0; 1872 1873 while(raw_off < raw_size) { 1874 if(raw_text[raw_off] == '\0') { 1875 ret = (md_build_attr_append_substr(ctx, build, MD_TEXT_NULLCHAR, off)); 1876 if (ret < 0) goto abort; 1877 memcpy(build.text + off, raw_text + raw_off, 1); 1878 off++; 1879 raw_off++; 1880 continue; 1881 } 1882 1883 if(raw_text[raw_off] == '&') { 1884 OFF ent_end; 1885 1886 if(md_is_entity_str(ctx, raw_text, raw_off, raw_size, &ent_end)) { 1887 ret = (md_build_attr_append_substr(ctx, build, MD_TEXT_ENTITY, off)); 1888 if (ret < 0) goto abort; 1889 memcpy(build.text + off, raw_text + raw_off, ent_end - raw_off); 1890 off += ent_end - raw_off; 1891 raw_off = ent_end; 1892 continue; 1893 } 1894 } 1895 1896 if(build.substr_count == 0 || build.substr_types[build.substr_count-1] != MD_TEXT_NORMAL) 1897 { 1898 ret = (md_build_attr_append_substr(ctx, build, MD_TEXT_NORMAL, off)); 1899 if (ret < 0) goto abort; 1900 } 1901 1902 if(!(flags & MD_BUILD_ATTR_NO_ESCAPES) && 1903 raw_text[raw_off] == '\\' && raw_off+1 < raw_size && 1904 (ISPUNCT_(raw_text[raw_off+1]) || ISNEWLINE_(raw_text[raw_off+1]))) 1905 raw_off++; 1906 1907 build.text[off++] = raw_text[raw_off++]; 1908 } 1909 build.substr_offsets[build.substr_count] = off; 1910 } 1911 1912 attr.text = build.text; 1913 attr.size = off; 1914 attr.substr_offsets = build.substr_offsets; 1915 attr.substr_types = build.substr_types; 1916 return 0; 1917 1918 abort: 1919 md_free_attribute(ctx, build); 1920 return -1; 1921 } 1922 1923 1924 /********************************************* 1925 *** Dictionary of Reference Definitions *** 1926 *********************************************/ 1927 1928 enum MD_FNV1A_BASE = 2166136261; 1929 enum MD_FNV1A_PRIME = 16777619; 1930 1931 uint md_fnv1a(uint base, const(void)* data, size_t n) 1932 { 1933 const(ubyte)* buf = cast(const(ubyte)*) data; 1934 uint hash = base; 1935 size_t i; 1936 1937 for(i = 0; i < n; i++) { 1938 hash ^= buf[i]; 1939 hash *= MD_FNV1A_PRIME; 1940 } 1941 1942 return hash; 1943 } 1944 1945 1946 struct MD_REF_DEF 1947 { 1948 const(CHAR)* label; 1949 const(CHAR)* title; 1950 uint hash; 1951 SZ label_size; 1952 bool label_needs_free; 1953 bool title_needs_free; 1954 SZ title_size; 1955 OFF dest_beg; 1956 OFF dest_end; 1957 }; 1958 1959 /* Label equivalence is quite complicated with regards to whitespace and case 1960 * folding. This complicates computing a hash of it as well as direct comparison 1961 * of two labels. */ 1962 1963 uint md_link_label_hash(const(CHAR)* label, SZ size) 1964 { 1965 uint hash = MD_FNV1A_BASE; 1966 OFF off; 1967 uint codepoint; 1968 int is_whitespace = FALSE; 1969 1970 off = md_skip_unicode_whitespace(label, 0, size); 1971 while(off < size) { 1972 SZ char_size; 1973 1974 codepoint = md_decode_unicode(label, off, size, &char_size); 1975 is_whitespace = ISUNICODEWHITESPACE_(codepoint) || ISNEWLINE_(label[off]); 1976 1977 if(is_whitespace) { 1978 codepoint = ' '; 1979 hash = md_fnv1a(hash, &codepoint, uint.sizeof); 1980 off = md_skip_unicode_whitespace(label, off, size); 1981 } else { 1982 MD_UNICODE_FOLD_INFO fold_info; 1983 1984 md_get_unicode_fold_info(codepoint, &fold_info); 1985 hash = md_fnv1a(hash, fold_info.codepoints.ptr, fold_info.n_codepoints * uint.sizeof); 1986 off += char_size; 1987 } 1988 } 1989 1990 return hash; 1991 } 1992 1993 OFF md_link_label_cmp_load_fold_info(const(CHAR)* label, OFF off, SZ size, 1994 MD_UNICODE_FOLD_INFO* fold_info) 1995 { 1996 uint codepoint; 1997 SZ char_size; 1998 1999 if(off >= size) { 2000 /* Treat end of link label as a whitespace. */ 2001 goto whitespace; 2002 } 2003 2004 if(ISNEWLINE_(label[off])) { 2005 /* Treat new lines as a whitespace. */ 2006 off++; 2007 goto whitespace; 2008 } 2009 2010 codepoint = md_decode_unicode(label, off, size, &char_size); 2011 off += char_size; 2012 if(ISUNICODEWHITESPACE_(codepoint)) { 2013 /* Treat all whitespace as equivalent */ 2014 goto whitespace; 2015 } 2016 2017 /* Get real folding info. */ 2018 md_get_unicode_fold_info(codepoint, fold_info); 2019 return off; 2020 2021 whitespace: 2022 fold_info.codepoints[0] = ' '; 2023 fold_info.n_codepoints = 1; 2024 return off; 2025 } 2026 2027 static int 2028 md_link_label_cmp(const(CHAR)* a_label, SZ a_size, const(CHAR)* b_label, SZ b_size) 2029 { 2030 OFF a_off; 2031 OFF b_off; 2032 int a_reached_end = FALSE; 2033 int b_reached_end = FALSE; 2034 MD_UNICODE_FOLD_INFO a_fi = { 0 }; 2035 MD_UNICODE_FOLD_INFO b_fi = { 0 }; 2036 OFF a_fi_off = 0; 2037 OFF b_fi_off = 0; 2038 int cmp; 2039 2040 a_off = md_skip_unicode_whitespace(a_label, 0, a_size); 2041 b_off = md_skip_unicode_whitespace(b_label, 0, b_size); 2042 while(!a_reached_end && !b_reached_end) { 2043 /* If needed, load fold info for next char. */ 2044 if(a_fi_off >= a_fi.n_codepoints) { 2045 a_fi_off = 0; 2046 a_off = md_link_label_cmp_load_fold_info(a_label, a_off, a_size, &a_fi); 2047 a_reached_end = (a_off >= a_size); 2048 } 2049 if(b_fi_off >= b_fi.n_codepoints) { 2050 b_fi_off = 0; 2051 b_off = md_link_label_cmp_load_fold_info(b_label, b_off, b_size, &b_fi); 2052 b_reached_end = (b_off >= b_size); 2053 } 2054 2055 cmp = b_fi.codepoints[b_fi_off] - a_fi.codepoints[a_fi_off]; 2056 if(cmp != 0) 2057 return cmp; 2058 2059 a_fi_off++; 2060 b_fi_off++; 2061 } 2062 2063 return 0; 2064 } 2065 2066 struct MD_REF_DEF_LIST 2067 { 2068 nothrow: 2069 @nogc: 2070 2071 int n_ref_defs; 2072 int alloc_ref_defs; 2073 2074 /* Valid items always point into ctx.ref_defs[] */ 2075 MD_REF_DEF* ref_defs_space; // Starting here, a list of pointer at the end of the struct 2076 2077 // To allocate a MD_REF_DEF_LIST 2078 static size_t SIZEOF(int numDefRefs) 2079 { 2080 return 8 + (MD_REF_DEF*).sizeof * numDefRefs; 2081 } 2082 2083 // Returns: a slice of ref defs embedded at the end of the struct 2084 static MD_REF_DEF*[] refDefs(MD_REF_DEF_LIST* list) 2085 { 2086 return (&(list.ref_defs_space))[0..list.n_ref_defs]; 2087 } 2088 2089 ref MD_REF_DEF* ref_defs_nth(size_t index) 2090 { 2091 MD_REF_DEF** base = &ref_defs_space; 2092 return base[index]; 2093 } 2094 } 2095 2096 extern(C) int md_ref_def_cmp(scope const(void)* a, scope const void* b) 2097 { 2098 const(MD_REF_DEF)* a_ref = *cast(const(MD_REF_DEF*)*)a; 2099 const(MD_REF_DEF)* b_ref = *cast(const(MD_REF_DEF*)*)b; 2100 2101 if(a_ref.hash < b_ref.hash) 2102 return -1; 2103 else if(a_ref.hash > b_ref.hash) 2104 return +1; 2105 else 2106 return md_link_label_cmp(a_ref.label, a_ref.label_size, b_ref.label, b_ref.label_size); 2107 } 2108 2109 extern(C) int md_ref_def_cmp_stable(scope const(void)* a, scope const(void)* b) 2110 { 2111 int cmp; 2112 2113 cmp = md_ref_def_cmp(a, b); 2114 2115 /* Ensure stability of the sorting. */ 2116 if(cmp == 0) { 2117 const(MD_REF_DEF)* a_ref = *cast(const(MD_REF_DEF*)*)a; 2118 const(MD_REF_DEF)* b_ref = *cast(const(MD_REF_DEF*)*)b; 2119 2120 if(a_ref < b_ref) 2121 cmp = -1; 2122 else if(a_ref > b_ref) 2123 cmp = +1; 2124 else 2125 cmp = 0; 2126 } 2127 2128 return cmp; 2129 } 2130 2131 int md_build_ref_def_hashtable(MD_CTX* ctx) 2132 { 2133 int i, j; 2134 2135 if(ctx.n_ref_defs == 0) 2136 return 0; 2137 2138 ctx.ref_def_hashtable_size = (ctx.n_ref_defs * 5) / 4; 2139 ctx.ref_def_hashtable = cast(void**) malloc(ctx.ref_def_hashtable_size * (void*).sizeof); 2140 if(ctx.ref_def_hashtable == null) { 2141 ctx.MD_LOG("malloc() failed."); 2142 goto abort; 2143 } 2144 memset(ctx.ref_def_hashtable, 0, ctx.ref_def_hashtable_size * (void*).sizeof); 2145 2146 /* Each member of ctx.ref_def_hashtable[] can be: 2147 * -- null, 2148 * -- pointer to the MD_REF_DEF in ctx.ref_defs[], or 2149 * -- pointer to a MD_REF_DEF_LIST, which holds multiple pointers to 2150 * such MD_REF_DEFs. 2151 */ 2152 for(i = 0; i < ctx.n_ref_defs; i++) { 2153 MD_REF_DEF* def = &ctx.ref_defs[i]; 2154 void* bucket; 2155 MD_REF_DEF_LIST* list; 2156 2157 def.hash = md_link_label_hash(def.label, def.label_size); 2158 bucket = ctx.ref_def_hashtable[def.hash % ctx.ref_def_hashtable_size]; 2159 2160 if(bucket == null) { 2161 ctx.ref_def_hashtable[def.hash % ctx.ref_def_hashtable_size] = def; 2162 continue; 2163 } 2164 2165 if(ctx.ref_defs <= cast(MD_REF_DEF*) bucket && cast(MD_REF_DEF*) bucket < ctx.ref_defs + ctx.n_ref_defs) { 2166 /* The bucket already contains one ref. def. Lets see whether it 2167 * is the same label (ref. def. duplicate) or different one 2168 * (hash conflict). */ 2169 MD_REF_DEF* old_def = cast(MD_REF_DEF*) bucket; 2170 2171 if(md_link_label_cmp(def.label, def.label_size, old_def.label, old_def.label_size) == 0) { 2172 /* Ignore this ref. def. */ 2173 continue; 2174 } 2175 2176 /* Make the bucket capable of holding more ref. defs. */ 2177 list = cast(MD_REF_DEF_LIST*) malloc(MD_REF_DEF_LIST.SIZEOF(4)); 2178 if(list == null) { 2179 ctx.MD_LOG("malloc() failed."); 2180 goto abort; 2181 } 2182 list.ref_defs_nth(0) = old_def; 2183 list.ref_defs_nth(1) = def; 2184 list.n_ref_defs = 2; 2185 list.alloc_ref_defs = 4; 2186 ctx.ref_def_hashtable[def.hash % ctx.ref_def_hashtable_size] = list; 2187 continue; 2188 } 2189 2190 /* Append the def to the bucket list. */ 2191 list = cast(MD_REF_DEF_LIST*) bucket; 2192 if(list.n_ref_defs >= list.alloc_ref_defs) { 2193 MD_REF_DEF_LIST* list_tmp = cast(MD_REF_DEF_LIST*) realloc_safe(list, MD_REF_DEF_LIST.SIZEOF( 2 * list.alloc_ref_defs )); 2194 if(list_tmp == null) { 2195 ctx.MD_LOG("realloc() failed."); 2196 goto abort; 2197 } 2198 list = list_tmp; 2199 list.alloc_ref_defs *= 2; 2200 ctx.ref_def_hashtable[def.hash % ctx.ref_def_hashtable_size] = list; 2201 } 2202 2203 list.ref_defs_nth(list.n_ref_defs) = def; 2204 list.n_ref_defs++; 2205 } 2206 2207 /* Sort the complex buckets so we can use bsearch() with them. */ 2208 for(i = 0; i < ctx.ref_def_hashtable_size; i++) { 2209 void* bucket = ctx.ref_def_hashtable[i]; 2210 MD_REF_DEF_LIST* list; 2211 2212 if(bucket == null) 2213 continue; 2214 if(ctx.ref_defs <= cast(MD_REF_DEF*) bucket && cast(MD_REF_DEF*) bucket < ctx.ref_defs + ctx.n_ref_defs) 2215 continue; 2216 2217 list = cast(MD_REF_DEF_LIST*) bucket; 2218 qsort(MD_REF_DEF_LIST.refDefs(list).ptr, list.n_ref_defs, (MD_REF_DEF*).sizeof, &md_ref_def_cmp_stable); 2219 2220 /* Disable duplicates. */ 2221 for(j = 1; j < list.n_ref_defs; j++) { 2222 if(md_ref_def_cmp(&list.ref_defs_nth(j-1), &list.ref_defs_nth(j)) == 0) 2223 list.ref_defs_nth(j) = list.ref_defs_nth(j-1); 2224 } 2225 } 2226 2227 return 0; 2228 2229 abort: 2230 return -1; 2231 } 2232 2233 static void 2234 md_free_ref_def_hashtable(MD_CTX* ctx) 2235 { 2236 if(ctx.ref_def_hashtable != null) { 2237 int i; 2238 2239 for(i = 0; i < ctx.ref_def_hashtable_size; i++) { 2240 void* bucket = ctx.ref_def_hashtable[i]; 2241 if(bucket == null) 2242 continue; 2243 if(ctx.ref_defs <= cast(MD_REF_DEF*) bucket && cast(MD_REF_DEF*) bucket < ctx.ref_defs + ctx.n_ref_defs) 2244 continue; 2245 free(bucket); 2246 } 2247 2248 free(ctx.ref_def_hashtable); 2249 } 2250 } 2251 2252 const(MD_REF_DEF)* md_lookup_ref_def(MD_CTX* ctx, const(CHAR)* label, SZ label_size) 2253 { 2254 uint hash; 2255 void* bucket; 2256 2257 if(ctx.ref_def_hashtable_size == 0) 2258 return null; 2259 2260 hash = md_link_label_hash(label, label_size); 2261 bucket = ctx.ref_def_hashtable[hash % ctx.ref_def_hashtable_size]; 2262 2263 if(bucket == null) { 2264 return null; 2265 } else if(ctx.ref_defs <= cast(MD_REF_DEF*) bucket && cast(MD_REF_DEF*) bucket < ctx.ref_defs + ctx.n_ref_defs) { 2266 const MD_REF_DEF* def = cast(MD_REF_DEF*) bucket; 2267 2268 if(md_link_label_cmp(def.label, def.label_size, label, label_size) == 0) 2269 return def; 2270 else 2271 return null; 2272 } else { 2273 MD_REF_DEF_LIST* list = cast(MD_REF_DEF_LIST*) bucket; 2274 MD_REF_DEF key_buf; 2275 const MD_REF_DEF* key = &key_buf; 2276 const(MD_REF_DEF*)* ret; 2277 2278 key_buf.label = cast(CHAR*) label; 2279 key_buf.label_size = label_size; 2280 key_buf.hash = md_link_label_hash(key_buf.label, key_buf.label_size); 2281 2282 ret = cast(const(MD_REF_DEF*)*) bsearch(&key, MD_REF_DEF_LIST.refDefs(list).ptr, 2283 list.n_ref_defs, (MD_REF_DEF*).sizeof, &md_ref_def_cmp); 2284 if(ret != null) 2285 return *ret; 2286 else 2287 return null; 2288 } 2289 } 2290 2291 2292 /*************************** 2293 *** Recognizing Links *** 2294 ***************************/ 2295 2296 /* Note this code is partially shared between processing inlines and blocks 2297 * as reference definitions and links share some helper parser functions. 2298 */ 2299 2300 struct MD_LINK_ATTR 2301 { 2302 OFF dest_beg; 2303 OFF dest_end; 2304 2305 const(CHAR)* title; 2306 SZ title_size; 2307 bool title_needs_free; 2308 } 2309 2310 2311 static int 2312 md_is_link_label(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, 2313 OFF* p_end, int* p_beg_line_index, int* p_end_line_index, 2314 OFF* p_contents_beg, OFF* p_contents_end) 2315 { 2316 OFF off = beg; 2317 OFF contents_beg = 0; 2318 OFF contents_end = 0; 2319 int line_index = 0; 2320 int len = 0; 2321 2322 if(ctx.CH(off) != '[') 2323 return FALSE; 2324 off++; 2325 2326 while(1) { 2327 OFF line_end = lines[line_index].end; 2328 2329 while(off < line_end) { 2330 if(ctx.CH(off) == '\\' && off+1 < ctx.size && (ctx.ISPUNCT(off+1) || ctx.ISNEWLINE(off+1))) { 2331 if(contents_end == 0) { 2332 contents_beg = off; 2333 *p_beg_line_index = line_index; 2334 } 2335 contents_end = off + 2; 2336 off += 2; 2337 } else if(ctx.CH(off) == '[') { 2338 return FALSE; 2339 } else if(ctx.CH(off) == ']') { 2340 if(contents_beg < contents_end) { 2341 /* Success. */ 2342 *p_contents_beg = contents_beg; 2343 *p_contents_end = contents_end; 2344 *p_end = off+1; 2345 *p_end_line_index = line_index; 2346 return TRUE; 2347 } else { 2348 /* Link label must have some non-whitespace contents. */ 2349 return FALSE; 2350 } 2351 } else { 2352 uint codepoint; 2353 SZ char_size; 2354 2355 codepoint = md_decode_unicode(ctx.text, off, ctx.size, &char_size); 2356 if(!ISUNICODEWHITESPACE_(codepoint)) { 2357 if(contents_end == 0) { 2358 contents_beg = off; 2359 *p_beg_line_index = line_index; 2360 } 2361 contents_end = off + char_size; 2362 } 2363 2364 off += char_size; 2365 } 2366 2367 len++; 2368 if(len > 999) 2369 return FALSE; 2370 } 2371 2372 line_index++; 2373 len++; 2374 if(line_index < n_lines) 2375 off = lines[line_index].beg; 2376 else 2377 break; 2378 } 2379 2380 return FALSE; 2381 } 2382 2383 static int 2384 md_is_link_destination_A(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, 2385 OFF* p_contents_beg, OFF* p_contents_end) 2386 { 2387 OFF off = beg; 2388 2389 if(off >= max_end || ctx.CH(off) != '<') 2390 return FALSE; 2391 off++; 2392 2393 while(off < max_end) { 2394 if(ctx.CH(off) == '\\' && off+1 < max_end && ctx.ISPUNCT(off+1)) { 2395 off += 2; 2396 continue; 2397 } 2398 2399 if(ctx.ISNEWLINE(off) || ctx.CH(off) == '<') 2400 return FALSE; 2401 2402 if(ctx.CH(off) == '>') { 2403 /* Success. */ 2404 *p_contents_beg = beg+1; 2405 *p_contents_end = off; 2406 *p_end = off+1; 2407 return TRUE; 2408 } 2409 2410 off++; 2411 } 2412 2413 return FALSE; 2414 } 2415 2416 static int 2417 md_is_link_destination_B(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, 2418 OFF* p_contents_beg, OFF* p_contents_end) 2419 { 2420 OFF off = beg; 2421 int parenthesis_level = 0; 2422 2423 while(off < max_end) { 2424 if(ctx.CH(off) == '\\' && off+1 < max_end && ctx.ISPUNCT(off+1)) { 2425 off += 2; 2426 continue; 2427 } 2428 2429 if(ctx.ISWHITESPACE(off) || ctx.ISCNTRL(off)) 2430 break; 2431 2432 /* Link destination may include balanced pairs of unescaped '(' ')'. 2433 * Note we limit the maximal nesting level by 32 to protect us from 2434 * https://github.com/jgm/cmark/issues/214 */ 2435 if(ctx.CH(off) == '(') { 2436 parenthesis_level++; 2437 if(parenthesis_level > 32) 2438 return FALSE; 2439 } else if(ctx.CH(off) == ')') { 2440 if(parenthesis_level == 0) 2441 break; 2442 parenthesis_level--; 2443 } 2444 2445 off++; 2446 } 2447 2448 if(parenthesis_level != 0 || off == beg) 2449 return FALSE; 2450 2451 /* Success. */ 2452 *p_contents_beg = beg; 2453 *p_contents_end = off; 2454 *p_end = off; 2455 return TRUE; 2456 } 2457 2458 static int 2459 md_is_link_destination(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, 2460 OFF* p_contents_beg, OFF* p_contents_end) 2461 { 2462 if(ctx.CH(beg) == '<') 2463 return md_is_link_destination_A(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end); 2464 else 2465 return md_is_link_destination_B(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end); 2466 } 2467 2468 static int 2469 md_is_link_title(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, 2470 OFF* p_end, int* p_beg_line_index, int* p_end_line_index, 2471 OFF* p_contents_beg, OFF* p_contents_end) 2472 { 2473 OFF off = beg; 2474 CHAR closer_char; 2475 int line_index = 0; 2476 2477 /* White space with up to one line break. */ 2478 while(off < lines[line_index].end && ctx.ISWHITESPACE(off)) 2479 off++; 2480 if(off >= lines[line_index].end) { 2481 line_index++; 2482 if(line_index >= n_lines) 2483 return FALSE; 2484 off = lines[line_index].beg; 2485 } 2486 if(off == beg) 2487 return FALSE; 2488 2489 *p_beg_line_index = line_index; 2490 2491 /* First char determines how to detect end of it. */ 2492 switch(ctx.CH(off)) { 2493 case '"': closer_char = '"'; break; 2494 case '\'': closer_char = '\''; break; 2495 case '(': closer_char = ')'; break; 2496 default: return FALSE; 2497 } 2498 off++; 2499 2500 *p_contents_beg = off; 2501 2502 while(line_index < n_lines) { 2503 OFF line_end = lines[line_index].end; 2504 2505 while(off < line_end) { 2506 if(ctx.CH(off) == '\\' && off+1 < ctx.size && (ctx.ISPUNCT(off+1) || ctx.ISNEWLINE(off+1))) { 2507 off++; 2508 } else if(ctx.CH(off) == closer_char) { 2509 /* Success. */ 2510 *p_contents_end = off; 2511 *p_end = off+1; 2512 *p_end_line_index = line_index; 2513 return TRUE; 2514 } else if(closer_char == ')' && ctx.CH(off) == '(') { 2515 /* ()-style title cannot contain (unescaped '(')) */ 2516 return FALSE; 2517 } 2518 2519 off++; 2520 } 2521 2522 line_index++; 2523 } 2524 2525 return FALSE; 2526 } 2527 2528 /* Returns 0 if it is not a reference definition. 2529 * 2530 * Returns N > 0 if it is a reference definition. N then corresponds to the 2531 * number of lines forming it). In this case the definition is stored for 2532 * resolving any links referring to it. 2533 * 2534 * Returns -1 in case of an error (out of memory). 2535 */ 2536 int md_is_link_reference_definition(MD_CTX* ctx, const(MD_LINE)* lines, int n_lines) 2537 { 2538 OFF label_contents_beg; 2539 OFF label_contents_end; 2540 int label_contents_line_index = -1; 2541 int label_is_multiline; 2542 const(CHAR)* label; 2543 SZ label_size; 2544 bool label_needs_free = false; 2545 OFF dest_contents_beg; 2546 OFF dest_contents_end; 2547 OFF title_contents_beg; 2548 OFF title_contents_end; 2549 int title_contents_line_index; 2550 int title_is_multiline; 2551 OFF off; 2552 int line_index = 0; 2553 int tmp_line_index; 2554 MD_REF_DEF* def; 2555 int ret; 2556 2557 /* Link label. */ 2558 if(!md_is_link_label(ctx, lines, n_lines, lines[0].beg, 2559 &off, &label_contents_line_index, &line_index, 2560 &label_contents_beg, &label_contents_end)) 2561 return FALSE; 2562 label_is_multiline = (label_contents_line_index != line_index); 2563 2564 /* Colon. */ 2565 if(off >= lines[line_index].end || ctx.CH(off) != ':') 2566 return FALSE; 2567 off++; 2568 2569 /* Optional white space with up to one line break. */ 2570 while(off < lines[line_index].end && ctx.ISWHITESPACE(off)) 2571 off++; 2572 if(off >= lines[line_index].end) { 2573 line_index++; 2574 if(line_index >= n_lines) 2575 return FALSE; 2576 off = lines[line_index].beg; 2577 } 2578 2579 /* Link destination. */ 2580 if(!md_is_link_destination(ctx, off, lines[line_index].end, 2581 &off, &dest_contents_beg, &dest_contents_end)) 2582 return FALSE; 2583 2584 /* (Optional) title. Note we interpret it as an title only if nothing 2585 * more follows on its last line. */ 2586 if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off, 2587 &off, &title_contents_line_index, &tmp_line_index, 2588 &title_contents_beg, &title_contents_end) 2589 && off >= lines[line_index + tmp_line_index].end) 2590 { 2591 title_is_multiline = (tmp_line_index != title_contents_line_index); 2592 title_contents_line_index += line_index; 2593 line_index += tmp_line_index; 2594 } else { 2595 /* Not a title. */ 2596 title_is_multiline = FALSE; 2597 title_contents_beg = off; 2598 title_contents_end = off; 2599 title_contents_line_index = 0; 2600 } 2601 2602 /* Nothing more can follow on the last line. */ 2603 if(off < lines[line_index].end) 2604 return FALSE; 2605 2606 /* Construct label. */ 2607 if(!label_is_multiline) { 2608 label = cast(CHAR*) ctx.STR(label_contents_beg); 2609 label_size = label_contents_end - label_contents_beg; 2610 label_needs_free = false; 2611 } else { 2612 ret = (md_merge_lines_alloc(ctx, label_contents_beg, label_contents_end, 2613 lines + label_contents_line_index, n_lines - label_contents_line_index, 2614 ' ', &label, &label_size)); 2615 if (ret < 0) goto abort; 2616 label_needs_free = true; 2617 } 2618 2619 /* Store the reference definition. */ 2620 if(ctx.n_ref_defs >= ctx.alloc_ref_defs) { 2621 MD_REF_DEF* new_defs; 2622 2623 ctx.alloc_ref_defs = (ctx.alloc_ref_defs > 0 ? ctx.alloc_ref_defs * 2 : 16); 2624 new_defs = cast(MD_REF_DEF*) realloc_safe(ctx.ref_defs, ctx.alloc_ref_defs * MD_REF_DEF.sizeof); 2625 if(new_defs == null) { 2626 ctx.MD_LOG("realloc() failed."); 2627 ret = -1; 2628 goto abort; 2629 } 2630 2631 ctx.ref_defs = new_defs; 2632 } 2633 2634 def = &ctx.ref_defs[ctx.n_ref_defs]; 2635 memset(def, 0, MD_REF_DEF.sizeof); 2636 2637 def.label = label; 2638 def.label_size = label_size; 2639 def.label_needs_free = label_needs_free; 2640 2641 def.dest_beg = dest_contents_beg; 2642 def.dest_end = dest_contents_end; 2643 2644 if(title_contents_beg >= title_contents_end) { 2645 def.title = null; 2646 def.title_size = 0; 2647 } else if(!title_is_multiline) { 2648 def.title = cast(CHAR*) ctx.STR(title_contents_beg); 2649 def.title_size = title_contents_end - title_contents_beg; 2650 } else { 2651 ret = (md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end, 2652 lines + title_contents_line_index, n_lines - title_contents_line_index, 2653 '\n', &def.title, &def.title_size)); 2654 if (ret < 0) goto abort; 2655 def.title_needs_free = true; 2656 } 2657 2658 /* Success. */ 2659 ctx.n_ref_defs++; 2660 return line_index + 1; 2661 2662 abort: 2663 /* Failure. */ 2664 if(label_needs_free) 2665 free(cast(void*)label); // Note: const_cast here 2666 return -1; 2667 } 2668 2669 static int 2670 md_is_link_reference(MD_CTX* ctx, const(MD_LINE)* lines, int n_lines, 2671 OFF beg, OFF end, MD_LINK_ATTR* attr) 2672 { 2673 const(MD_REF_DEF)* def; 2674 const(MD_LINE)* beg_line; 2675 const(MD_LINE)* end_line; 2676 const(CHAR)* label; 2677 SZ label_size; 2678 int ret; 2679 2680 assert(ctx.CH(beg) == '[' || ctx.CH(beg) == '!'); 2681 assert(ctx.CH(end-1) == ']'); 2682 2683 beg += (ctx.CH(beg) == '!' ? 2 : 1); 2684 end--; 2685 2686 /* Find lines corresponding to the beg and end positions. */ 2687 assert(lines[0].beg <= beg); 2688 beg_line = lines; 2689 while(beg >= beg_line.end) 2690 beg_line++; 2691 2692 assert(end <= lines[n_lines-1].end); 2693 end_line = beg_line; 2694 while(end >= end_line.end) 2695 end_line++; 2696 2697 if(beg_line != end_line) { 2698 ret = (md_merge_lines_alloc(ctx, beg, end, beg_line, 2699 cast(int)(n_lines - (beg_line - lines)), ' ', &label, &label_size)); 2700 if (ret < 0) goto abort; 2701 } else { 2702 label = cast(CHAR*) ctx.STR(beg); 2703 label_size = end - beg; 2704 } 2705 2706 def = md_lookup_ref_def(ctx, label, label_size); 2707 if(def != null) { 2708 attr.dest_beg = def.dest_beg; 2709 attr.dest_end = def.dest_end; 2710 attr.title = def.title; 2711 attr.title_size = def.title_size; 2712 attr.title_needs_free = false; 2713 } 2714 2715 if(beg_line != end_line) 2716 free(cast(void*)label); // Note: const_cast here 2717 2718 ret = (def != null); 2719 2720 abort: 2721 return ret; 2722 } 2723 2724 static int 2725 md_is_inline_link_spec(MD_CTX* ctx, const MD_LINE* lines, int n_lines, 2726 OFF beg, OFF* p_end, MD_LINK_ATTR* attr) 2727 { 2728 int line_index = 0; 2729 int tmp_line_index; 2730 OFF title_contents_beg; 2731 OFF title_contents_end; 2732 int title_contents_line_index; 2733 int title_is_multiline; 2734 OFF off = beg; 2735 int ret = FALSE; 2736 2737 while(off >= lines[line_index].end) 2738 line_index++; 2739 2740 assert(ctx.CH(off) == '('); 2741 off++; 2742 2743 /* Optional white space with up to one line break. */ 2744 while(off < lines[line_index].end && ctx.ISWHITESPACE(off)) 2745 off++; 2746 if(off >= lines[line_index].end && ctx.ISNEWLINE(off)) { 2747 line_index++; 2748 if(line_index >= n_lines) 2749 return FALSE; 2750 off = lines[line_index].beg; 2751 } 2752 2753 /* Link destination may be omitted, but only when not also having a title. */ 2754 if(off < ctx.size && ctx.CH(off) == ')') { 2755 attr.dest_beg = off; 2756 attr.dest_end = off; 2757 attr.title = null; 2758 attr.title_size = 0; 2759 attr.title_needs_free = false; 2760 off++; 2761 *p_end = off; 2762 return TRUE; 2763 } 2764 2765 /* Link destination. */ 2766 if(!md_is_link_destination(ctx, off, lines[line_index].end, 2767 &off, &attr.dest_beg, &attr.dest_end)) 2768 return FALSE; 2769 2770 /* (Optional) title. */ 2771 if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off, 2772 &off, &title_contents_line_index, &tmp_line_index, 2773 &title_contents_beg, &title_contents_end)) 2774 { 2775 title_is_multiline = (tmp_line_index != title_contents_line_index); 2776 title_contents_line_index += line_index; 2777 line_index += tmp_line_index; 2778 } else { 2779 /* Not a title. */ 2780 title_is_multiline = FALSE; 2781 title_contents_beg = off; 2782 title_contents_end = off; 2783 title_contents_line_index = 0; 2784 } 2785 2786 /* Optional whitespace followed with final ')'. */ 2787 while(off < lines[line_index].end && ctx.ISWHITESPACE(off)) 2788 off++; 2789 if(off >= lines[line_index].end && ctx.ISNEWLINE(off)) { 2790 line_index++; 2791 if(line_index >= n_lines) 2792 return FALSE; 2793 off = lines[line_index].beg; 2794 } 2795 if(ctx.CH(off) != ')') 2796 goto abort; 2797 off++; 2798 2799 if(title_contents_beg >= title_contents_end) { 2800 attr.title = null; 2801 attr.title_size = 0; 2802 attr.title_needs_free = false; 2803 } else if(!title_is_multiline) { 2804 attr.title = cast(CHAR*) ctx.STR(title_contents_beg); // Note: const_cast here! 2805 attr.title_size = title_contents_end - title_contents_beg; 2806 attr.title_needs_free = false; 2807 } else { 2808 ret = (md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end, 2809 lines + title_contents_line_index, n_lines - title_contents_line_index, 2810 '\n', &attr.title, &attr.title_size)); 2811 if (ret < 0) goto abort; 2812 attr.title_needs_free = true; 2813 } 2814 2815 *p_end = off; 2816 ret = TRUE; 2817 2818 abort: 2819 return ret; 2820 } 2821 2822 void md_free_ref_defs(MD_CTX* ctx) 2823 { 2824 int i; 2825 2826 for(i = 0; i < ctx.n_ref_defs; i++) { 2827 MD_REF_DEF* def = &ctx.ref_defs[i]; 2828 2829 if(def.label_needs_free) 2830 free(cast(void*)def.label); // Note: const_cast here 2831 if(def.title_needs_free) 2832 free(cast(void*)def.title); // Note: const_cast here 2833 } 2834 2835 free(ctx.ref_defs); 2836 } 2837 2838 2839 /****************************************** 2840 *** Processing Inlines (a.k.a Spans) *** 2841 ******************************************/ 2842 2843 /* We process inlines in few phases: 2844 * 2845 * (1) We go through the block text and collect all significant characters 2846 * which may start/end a span or some other significant position into 2847 * ctx.marks[]. Core of this is what md_collect_marks() does. 2848 * 2849 * We also do some very brief preliminary context-less analysis, whether 2850 * it might be opener or closer (e.g. of an emphasis span). 2851 * 2852 * This speeds the other steps as we do not need to re-iterate over all 2853 * characters anymore. 2854 * 2855 * (2) We analyze each potential mark types, in order by their precedence. 2856 * 2857 * In each md_analyze_XXX() function, we re-iterate list of the marks, 2858 * skipping already resolved regions (in preceding precedences) and try to 2859 * resolve them. 2860 * 2861 * (2.1) For trivial marks, which are single (e.g. HTML entity), we just mark 2862 * them as resolved. 2863 * 2864 * (2.2) For range-type marks, we analyze whether the mark could be closer 2865 * and, if yes, whether there is some preceding opener it could satisfy. 2866 * 2867 * If not we check whether it could be really an opener and if yes, we 2868 * remember it so subsequent closers may resolve it. 2869 * 2870 * (3) Finally, when all marks were analyzed, we render the block contents 2871 * by calling MD_RENDERER::text() callback, interrupting by ::enter_span() 2872 * or ::close_span() whenever we reach a resolved mark. 2873 */ 2874 2875 2876 /* The mark structure. 2877 * 2878 * '\\': Maybe escape sequence. 2879 * '\0': null char. 2880 * '*': Maybe (strong) emphasis start/end. 2881 * '_': Maybe (strong) emphasis start/end. 2882 * '~': Maybe strikethrough start/end (needs MD_FLAG_STRIKETHROUGH). 2883 * '`': Maybe code span start/end. 2884 * '&': Maybe start of entity. 2885 * ';': Maybe end of entity. 2886 * '<': Maybe start of raw HTML or autolink. 2887 * '>': Maybe end of raw HTML or autolink. 2888 * '[': Maybe start of link label or link text. 2889 * '!': Equivalent of '[' for image. 2890 * ']': Maybe end of link label or link text. 2891 * '@': Maybe permissive e-mail auto-link (needs MD_FLAG_PERMISSIVEEMAILAUTOLINKS). 2892 * ':': Maybe permissive URL auto-link (needs MD_FLAG_PERMISSIVEURLAUTOLINKS). 2893 * '.': Maybe permissive WWW auto-link (needs MD_FLAG_PERMISSIVEWWWAUTOLINKS). 2894 * 'D': Dummy mark, it reserves a space for splitting a previous mark 2895 * (e.g. emphasis) or to make more space for storing some special data 2896 * related to the preceding mark (e.g. link). 2897 * 2898 * Note that not all instances of these chars in the text imply creation of the 2899 * structure. Only those which have (or may have, after we see more context) 2900 * the special meaning. 2901 * 2902 * (Keep this struct as small as possible to fit as much of them into CPU 2903 * cache line.) 2904 */ 2905 2906 struct MD_MARK { 2907 OFF beg; 2908 OFF end; 2909 2910 /* For unresolved openers, 'prev' and 'next' form the chain of open openers 2911 * of given type 'ch'. 2912 * 2913 * During resolving, we disconnect from the chain and point to the 2914 * corresponding counterpart so opener points to its closer and vice versa. 2915 */ 2916 int prev; 2917 int next; 2918 CHAR ch; 2919 ubyte flags; 2920 }; 2921 2922 /* Mark flags (these apply to ALL mark types). */ 2923 enum MD_MARK_POTENTIAL_OPENER = 0x01; /* Maybe opener. */ 2924 enum MD_MARK_POTENTIAL_CLOSER = 0x02; /* Maybe closer. */ 2925 enum MD_MARK_OPENER = 0x04; /* Definitely opener. */ 2926 enum MD_MARK_CLOSER = 0x08; /* Definitely closer. */ 2927 enum MD_MARK_RESOLVED = 0x10; /* Resolved in any definite way. */ 2928 2929 /* Mark flags specific for various mark types (so they can share bits). */ 2930 enum MD_MARK_EMPH_INTRAWORD = 0x20; /* Helper for the "rule of 3". */ 2931 enum MD_MARK_EMPH_MOD3_0 = 0x40; 2932 enum MD_MARK_EMPH_MOD3_1 = 0x80; 2933 enum MD_MARK_EMPH_MOD3_2 = (0x40 | 0x80); 2934 enum MD_MARK_EMPH_MOD3_MASK = (0x40 | 0x80); 2935 enum MD_MARK_AUTOLINK = 0x20; /* Distinguisher for '<', '>'. */ 2936 enum MD_MARK_VALIDPERMISSIVEAUTOLINK = 0x20; /* For permissive autolinks. */ 2937 2938 MD_MARKCHAIN* md_asterisk_chain(MD_CTX* ctx, uint flags) 2939 { 2940 switch(flags & (MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_MASK)) 2941 { 2942 case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_0: return ctx.ASTERISK_OPENERS_intraword_mod3_0; 2943 case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_1: return ctx.ASTERISK_OPENERS_intraword_mod3_1; 2944 case MD_MARK_EMPH_INTRAWORD | MD_MARK_EMPH_MOD3_2: return ctx.ASTERISK_OPENERS_intraword_mod3_2; 2945 case MD_MARK_EMPH_MOD3_0: return ctx.ASTERISK_OPENERS_extraword_mod3_0; 2946 case MD_MARK_EMPH_MOD3_1: return ctx.ASTERISK_OPENERS_extraword_mod3_1; 2947 case MD_MARK_EMPH_MOD3_2: return ctx.ASTERISK_OPENERS_extraword_mod3_2; 2948 default: assert(false); 2949 } 2950 } 2951 2952 MD_MARKCHAIN* md_mark_chain(MD_CTX* ctx, int mark_index) 2953 { 2954 MD_MARK* mark = &ctx.marks[mark_index]; 2955 2956 switch(mark.ch) { 2957 case '*': return md_asterisk_chain(ctx, mark.flags); 2958 case '_': return ctx.UNDERSCORE_OPENERS; 2959 case '~': return ctx.TILDE_OPENERS; 2960 case '[': return ctx.BRACKET_OPENERS; 2961 case '|': return ctx.TABLECELLBOUNDARIES; 2962 default: return null; 2963 } 2964 } 2965 2966 MD_MARK* md_push_mark(MD_CTX* ctx) 2967 { 2968 if(ctx.n_marks >= ctx.alloc_marks) { 2969 MD_MARK* new_marks; 2970 2971 ctx.alloc_marks = (ctx.alloc_marks > 0 ? ctx.alloc_marks * 2 : 64); 2972 new_marks = cast(MD_MARK*) realloc_safe(ctx.marks, ctx.alloc_marks * MD_MARK.sizeof); 2973 if(new_marks == null) { 2974 ctx.MD_LOG("realloc() failed."); 2975 return null; 2976 } 2977 2978 ctx.marks = new_marks; 2979 } 2980 2981 return &ctx.marks[ctx.n_marks++]; 2982 } 2983 2984 int PUSH_MARK_(MD_CTX* ctx, MD_MARK** mark) 2985 { 2986 *mark = md_push_mark(ctx); 2987 if(*mark == null) 2988 { 2989 return -1; 2990 } 2991 return 0; 2992 } 2993 2994 int PUSH_MARK(MD_CTX* ctx, MD_MARK** mark, CHAR ch_, OFF beg_, OFF end_, int flags_) 2995 { 2996 int ret = PUSH_MARK_(ctx, mark); 2997 if (ret != 0) 2998 return ret; 2999 3000 (*mark).beg = (beg_); 3001 (*mark).end = (end_); 3002 (*mark).prev = -1; 3003 (*mark).next = -1; 3004 (*mark).ch = cast(char)(ch_); 3005 (*mark).flags = cast(ubyte)flags_; 3006 return 0; 3007 } 3008 3009 static void 3010 md_mark_chain_append(MD_CTX* ctx, MD_MARKCHAIN* chain, int mark_index) 3011 { 3012 if(chain.tail >= 0) 3013 ctx.marks[chain.tail].next = mark_index; 3014 else 3015 chain.head = mark_index; 3016 3017 ctx.marks[mark_index].prev = chain.tail; 3018 chain.tail = mark_index; 3019 } 3020 3021 /* Sometimes, we need to store a pointer into the mark. It is quite rare 3022 * so we do not bother to make MD_MARK use union, and it can only happen 3023 * for dummy marks. */ 3024 void md_mark_store_ptr(MD_CTX* ctx, int mark_index, const(void)* ptr) 3025 { 3026 MD_MARK* mark = &ctx.marks[mark_index]; 3027 assert(mark.ch == 'D'); 3028 3029 /* Check only members beg and end are misused for this. */ 3030 assert((void*).sizeof <= 2 * OFF.sizeof); 3031 memcpy(mark, &ptr, (void*).sizeof); 3032 } 3033 3034 static void* 3035 md_mark_get_ptr(MD_CTX* ctx, int mark_index) 3036 { 3037 void* ptr; 3038 MD_MARK* mark = &ctx.marks[mark_index]; 3039 assert(mark.ch == 'D'); 3040 memcpy(&ptr, mark, (void*).sizeof); 3041 return ptr; 3042 } 3043 3044 static void 3045 md_resolve_range(MD_CTX* ctx, MD_MARKCHAIN* chain, int opener_index, int closer_index) 3046 { 3047 MD_MARK* opener = &ctx.marks[opener_index]; 3048 MD_MARK* closer = &ctx.marks[closer_index]; 3049 3050 /* Remove opener from the list of openers. */ 3051 if(chain != null) { 3052 if(opener.prev >= 0) 3053 ctx.marks[opener.prev].next = opener.next; 3054 else 3055 chain.head = opener.next; 3056 3057 if(opener.next >= 0) 3058 ctx.marks[opener.next].prev = opener.prev; 3059 else 3060 chain.tail = opener.prev; 3061 } 3062 3063 /* Interconnect opener and closer and mark both as resolved. */ 3064 opener.next = closer_index; 3065 opener.flags |= MD_MARK_OPENER | MD_MARK_RESOLVED; 3066 closer.prev = opener_index; 3067 closer.flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED; 3068 } 3069 3070 3071 enum MD_ROLLBACK_ALL = 0; 3072 enum MD_ROLLBACK_CROSSING = 1; 3073 3074 /* In the range ctx.marks[opener_index] ... [closer_index], undo some or all 3075 * resolvings accordingly to these rules: 3076 * 3077 * (1) All openers BEFORE the range corresponding to any closer inside the 3078 * range are un-resolved and they are re-added to their respective chains 3079 * of unresolved openers. This ensures we can reuse the opener for closers 3080 * AFTER the range. 3081 * 3082 * (2) If 'how' is MD_ROLLBACK_ALL, then ALL resolved marks inside the range 3083 * are discarded. 3084 * 3085 * (3) If 'how' is MD_ROLLBACK_CROSSING, only closers with openers handled 3086 * in (1) are discarded. I.e. pairs of openers and closers which are both 3087 * inside the range are retained as well as any unpaired marks. 3088 */ 3089 static void 3090 md_rollback(MD_CTX* ctx, int opener_index, int closer_index, int how) 3091 { 3092 int i; 3093 int mark_index; 3094 3095 /* Cut all unresolved openers at the mark index. */ 3096 for(i = OPENERS_CHAIN_FIRST; i < OPENERS_CHAIN_LAST+1; i++) { 3097 MD_MARKCHAIN* chain = &ctx.mark_chains[i]; 3098 3099 while(chain.tail >= opener_index) 3100 chain.tail = ctx.marks[chain.tail].prev; 3101 3102 if(chain.tail >= 0) 3103 ctx.marks[chain.tail].next = -1; 3104 else 3105 chain.head = -1; 3106 } 3107 3108 /* Go backwards so that un-resolved openers are re-added into their 3109 * respective chains, in the right order. */ 3110 mark_index = closer_index - 1; 3111 while(mark_index > opener_index) { 3112 MD_MARK* mark = &ctx.marks[mark_index]; 3113 int mark_flags = mark.flags; 3114 int discard_flag = (how == MD_ROLLBACK_ALL); 3115 3116 if(mark.flags & MD_MARK_CLOSER) { 3117 int mark_opener_index = mark.prev; 3118 3119 /* Undo opener BEFORE the range. */ 3120 if(mark_opener_index < opener_index) { 3121 MD_MARK* mark_opener = &ctx.marks[mark_opener_index]; 3122 MD_MARKCHAIN* chain; 3123 3124 mark_opener.flags &= ~(MD_MARK_OPENER | MD_MARK_CLOSER | MD_MARK_RESOLVED); 3125 chain = md_mark_chain(ctx, opener_index); 3126 if(chain != null) { 3127 md_mark_chain_append(ctx, chain, mark_opener_index); 3128 discard_flag = 1; 3129 } 3130 } 3131 } 3132 3133 /* And reset our flags. */ 3134 if(discard_flag) 3135 mark.flags &= ~(MD_MARK_OPENER | MD_MARK_CLOSER | MD_MARK_RESOLVED); 3136 3137 /* Jump as far as we can over unresolved or non-interesting marks. */ 3138 switch(how) { 3139 case MD_ROLLBACK_CROSSING: 3140 if((mark_flags & MD_MARK_CLOSER) && mark.prev > opener_index) { 3141 /* If we are closer with opener INSIDE the range, there may 3142 * not be any other crosser inside the subrange. */ 3143 mark_index = mark.prev; 3144 break; 3145 } 3146 goto default; 3147 /* Pass through. */ 3148 default: 3149 mark_index--; 3150 break; 3151 } 3152 } 3153 } 3154 3155 void md_build_mark_char_map(MD_CTX* ctx) 3156 { 3157 memset(ctx.mark_char_map.ptr, 0, ctx.mark_char_map.length); 3158 3159 ctx.mark_char_map['\\'] = 1; 3160 ctx.mark_char_map['*'] = 1; 3161 ctx.mark_char_map['_'] = 1; 3162 ctx.mark_char_map['`'] = 1; 3163 ctx.mark_char_map['&'] = 1; 3164 ctx.mark_char_map[';'] = 1; 3165 ctx.mark_char_map['<'] = 1; 3166 ctx.mark_char_map['>'] = 1; 3167 ctx.mark_char_map['['] = 1; 3168 ctx.mark_char_map['!'] = 1; 3169 ctx.mark_char_map[']'] = 1; 3170 ctx.mark_char_map['\0'] = 1; 3171 3172 if(ctx.parser.flags & MD_FLAG_STRIKETHROUGH) 3173 ctx.mark_char_map['~'] = 1; 3174 3175 if(ctx.parser.flags & MD_FLAG_LATEXMATHSPANS) 3176 ctx.mark_char_map['$'] = 1; 3177 3178 if(ctx.parser.flags & MD_FLAG_PERMISSIVEEMAILAUTOLINKS) 3179 ctx.mark_char_map['@'] = 1; 3180 3181 if(ctx.parser.flags & MD_FLAG_PERMISSIVEURLAUTOLINKS) 3182 ctx.mark_char_map[':'] = 1; 3183 3184 if(ctx.parser.flags & MD_FLAG_PERMISSIVEWWWAUTOLINKS) 3185 ctx.mark_char_map['.'] = 1; 3186 3187 if(ctx.parser.flags & MD_FLAG_TABLES) 3188 ctx.mark_char_map['|'] = 1; 3189 3190 if(ctx.parser.flags & MD_FLAG_COLLAPSEWHITESPACE) { 3191 int i; 3192 3193 for(i = 0; i < cast(int) (ctx.mark_char_map).sizeof; i++) { 3194 if(ISWHITESPACE_(cast(CHAR)i)) 3195 ctx.mark_char_map[i] = 1; 3196 } 3197 } 3198 } 3199 3200 /* We limit code span marks to lower then 32 backticks. This solves the 3201 * pathologic case of too many openers, each of different length: Their 3202 * resolving would be then O(n^2). */ 3203 enum CODESPAN_MARK_MAXLEN = 32; 3204 3205 int md_is_code_span(MD_CTX* ctx, const(MD_LINE)* lines, int n_lines, OFF beg, 3206 OFF* p_opener_beg, OFF* p_opener_end, 3207 OFF* p_closer_beg, OFF* p_closer_end, 3208 OFF* last_potential_closers, 3209 int* p_reached_paragraph_end) 3210 { 3211 OFF opener_beg = beg; 3212 OFF opener_end; 3213 OFF closer_beg; 3214 OFF closer_end; 3215 SZ mark_len; 3216 OFF line_end; 3217 int has_space_after_opener = FALSE; 3218 int has_eol_after_opener = FALSE; 3219 int has_space_before_closer = FALSE; 3220 int has_eol_before_closer = FALSE; 3221 int has_only_space = TRUE; 3222 int line_index = 0; 3223 3224 line_end = lines[0].end; 3225 opener_end = opener_beg; 3226 while(opener_end < line_end && ctx.CH(opener_end) == '`') 3227 opener_end++; 3228 has_space_after_opener = (opener_end < line_end && ctx.CH(opener_end) == ' '); 3229 has_eol_after_opener = (opener_end == line_end); 3230 3231 /* The caller needs to know end of the opening mark even if we fail. */ 3232 *p_opener_end = opener_end; 3233 3234 mark_len = opener_end - opener_beg; 3235 if(mark_len > CODESPAN_MARK_MAXLEN) 3236 return FALSE; 3237 3238 /* Check whether we already know there is no closer of this length. 3239 * If so, re-scan does no sense. This fixes issue #59. */ 3240 if(last_potential_closers[mark_len-1] >= lines[n_lines-1].end || 3241 (*p_reached_paragraph_end && last_potential_closers[mark_len-1] < opener_end)) 3242 return FALSE; 3243 3244 closer_beg = opener_end; 3245 closer_end = opener_end; 3246 3247 /* Find closer mark. */ 3248 while(TRUE) { 3249 while(closer_beg < line_end && ctx.CH(closer_beg) != '`') { 3250 if(ctx.CH(closer_beg) != ' ') 3251 has_only_space = FALSE; 3252 closer_beg++; 3253 } 3254 closer_end = closer_beg; 3255 while(closer_end < line_end && ctx.CH(closer_end) == '`') 3256 closer_end++; 3257 3258 if(closer_end - closer_beg == mark_len) { 3259 /* Success. */ 3260 has_space_before_closer = (closer_beg > lines[line_index].beg && ctx.CH(closer_beg-1) == ' '); 3261 has_eol_before_closer = (closer_beg == lines[line_index].beg); 3262 break; 3263 } 3264 3265 if(closer_end - closer_beg > 0) { 3266 /* We have found a back-tick which is not part of the closer. */ 3267 has_only_space = FALSE; 3268 3269 /* But if we eventually fail, remember it as a potential closer 3270 * of its own length for future attempts. This mitigates needs for 3271 * rescans. */ 3272 if(closer_end - closer_beg < CODESPAN_MARK_MAXLEN) { 3273 if(closer_beg > last_potential_closers[closer_end - closer_beg - 1]) 3274 last_potential_closers[closer_end - closer_beg - 1] = closer_beg; 3275 } 3276 } 3277 3278 if(closer_end >= line_end) { 3279 line_index++; 3280 if(line_index >= n_lines) { 3281 /* Reached end of the paragraph and still nothing. */ 3282 *p_reached_paragraph_end = TRUE; 3283 return FALSE; 3284 } 3285 /* Try on the next line. */ 3286 line_end = lines[line_index].end; 3287 closer_beg = lines[line_index].beg; 3288 } else { 3289 closer_beg = closer_end; 3290 } 3291 } 3292 3293 /* If there is a space or a new line both after and before the opener 3294 * (and if the code span is not made of spaces only), consume one initial 3295 * and one trailing space as part of the marks. */ 3296 if(!has_only_space && 3297 (has_space_after_opener || has_eol_after_opener) && 3298 (has_space_before_closer || has_eol_before_closer)) 3299 { 3300 if(has_space_after_opener) 3301 opener_end++; 3302 else 3303 opener_end = lines[1].beg; 3304 3305 if(has_space_before_closer) 3306 closer_beg--; 3307 else { 3308 closer_beg = lines[line_index-1].end; 3309 /* We need to eat the preceding "\r\n" but not any line trailing 3310 * spaces. */ 3311 while(closer_beg < ctx.size && ctx.ISBLANK(closer_beg)) 3312 closer_beg++; 3313 } 3314 } 3315 3316 *p_opener_beg = opener_beg; 3317 *p_opener_end = opener_end; 3318 *p_closer_beg = closer_beg; 3319 *p_closer_end = closer_end; 3320 return TRUE; 3321 } 3322 3323 static int 3324 md_is_autolink_uri(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end) 3325 { 3326 OFF off = beg+1; 3327 3328 assert(ctx.CH(beg) == '<'); 3329 3330 /* Check for scheme. */ 3331 if(off >= max_end || !ctx.ISASCII(off)) 3332 return FALSE; 3333 off++; 3334 while(1) { 3335 if(off >= max_end) 3336 return FALSE; 3337 if(off - beg > 32) 3338 return FALSE; 3339 if(ctx.CH(off) == ':' && off - beg >= 3) 3340 break; 3341 if(!ctx.ISALNUM(off) && ctx.CH(off) != '+' && ctx.CH(off) != '-' && ctx.CH(off) != '.') 3342 return FALSE; 3343 off++; 3344 } 3345 3346 /* Check the path after the scheme. */ 3347 while(off < max_end && ctx.CH(off) != '>') { 3348 if(ctx.ISWHITESPACE(off) || ctx.ISCNTRL(off) || ctx.CH(off) == '<') 3349 return FALSE; 3350 off++; 3351 } 3352 3353 if(off >= max_end) 3354 return FALSE; 3355 3356 assert(ctx.CH(off) == '>'); 3357 *p_end = off+1; 3358 return TRUE; 3359 } 3360 3361 static int 3362 md_is_autolink_email(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end) 3363 { 3364 OFF off = beg + 1; 3365 int label_len; 3366 3367 assert(ctx.CH(beg) == '<'); 3368 3369 /* The code should correspond to this regexp: 3370 /^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+ 3371 @[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])? 3372 (?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/ 3373 */ 3374 3375 /* Username (before '@'). */ 3376 while(off < max_end && (ctx.ISALNUM(off) || ctx.ISANYOF(off, ".!#$%&'*+/=?^_`{|}~-"))) 3377 off++; 3378 if(off <= beg+1) 3379 return FALSE; 3380 3381 /* '@' */ 3382 if(off >= max_end || ctx.CH(off) != '@') 3383 return FALSE; 3384 off++; 3385 3386 /* Labels delimited with '.'; each label is sequence of 1 - 62 alnum 3387 * characters or '-', but '-' is not allowed as first or last char. */ 3388 label_len = 0; 3389 while(off < max_end) { 3390 if(ctx.ISALNUM(off)) 3391 label_len++; 3392 else if(ctx.CH(off) == '-' && label_len > 0) 3393 label_len++; 3394 else if(ctx.CH(off) == '.' && label_len > 0 && ctx.CH(off-1) != '-') 3395 label_len = 0; 3396 else 3397 break; 3398 3399 if(label_len > 62) 3400 return FALSE; 3401 3402 off++; 3403 } 3404 3405 if(label_len <= 0 || off >= max_end || ctx.CH(off) != '>' || ctx.CH(off-1) == '-') 3406 return FALSE; 3407 3408 *p_end = off+1; 3409 return TRUE; 3410 } 3411 3412 static int 3413 md_is_autolink(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, int* p_missing_mailto) 3414 { 3415 if(md_is_autolink_uri(ctx, beg, max_end, p_end)) { 3416 *p_missing_mailto = FALSE; 3417 return TRUE; 3418 } 3419 3420 if(md_is_autolink_email(ctx, beg, max_end, p_end)) { 3421 *p_missing_mailto = TRUE; 3422 return TRUE; 3423 } 3424 3425 return FALSE; 3426 } 3427 3428 /* For 8-bit encodings, mark_char_map[] covers all 256 elements. */ 3429 bool IS_MARK_CHAR(MD_CTX* ctx, OFF off) 3430 { 3431 return (ctx.mark_char_map[cast(ubyte) ctx.CH(off)]) != 0; 3432 } 3433 3434 int md_collect_marks(MD_CTX* ctx, const(MD_LINE)* lines, int n_lines, int table_mode) 3435 { 3436 int i; 3437 int ret = 0; 3438 MD_MARK* mark; 3439 OFF[CODESPAN_MARK_MAXLEN] codespan_last_potential_closers = 3440 [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]; 3442 3443 int codespan_scanned_till_paragraph_end = FALSE; 3444 3445 for(i = 0; i < n_lines; i++) 3446 { 3447 const(MD_LINE)* line = &lines[i]; 3448 OFF off = line.beg; 3449 OFF line_end = line.end; 3450 3451 while(true) 3452 { 3453 CHAR ch; 3454 3455 /* Optimization: Use some loop unrolling. */ 3456 while(off + 3 < line_end && !IS_MARK_CHAR(ctx, off+0) && !IS_MARK_CHAR(ctx, off+1) 3457 && !IS_MARK_CHAR(ctx, off+2) && !IS_MARK_CHAR(ctx, off+3)) 3458 off += 4; 3459 while(off < line_end && !IS_MARK_CHAR(ctx, off+0)) 3460 off++; 3461 3462 if(off >= line_end) 3463 break; 3464 3465 ch = ctx.CH(off); 3466 3467 /* A backslash escape. 3468 * It can go beyond line.end as it may involve escaped new 3469 * line to form a hard break. */ 3470 if(ch == '\\' && off+1 < ctx.size && (ctx.ISPUNCT(off+1) || ctx.ISNEWLINE(off+1))) { 3471 /* Hard-break cannot be on the last line of the block. */ 3472 if(!ctx.ISNEWLINE(off+1) || i+1 < n_lines) 3473 { 3474 ret = PUSH_MARK(ctx, &mark, ch, off, off+2, MD_MARK_RESOLVED); 3475 if (ret != 0) goto abort; 3476 } 3477 off += 2; 3478 continue; 3479 } 3480 3481 /* A potential (string) emphasis start/end. */ 3482 if(ch == '*' || ch == '_') { 3483 OFF tmp = off+1; 3484 int left_level; /* What precedes: 0 = whitespace; 1 = punctuation; 2 = other char. */ 3485 int right_level; /* What follows: 0 = whitespace; 1 = punctuation; 2 = other char. */ 3486 3487 while(tmp < line_end && ctx.CH(tmp) == ch) 3488 tmp++; 3489 3490 if(off == line.beg || ctx.ISUNICODEWHITESPACEBEFORE(off)) 3491 left_level = 0; 3492 else if(ctx.ISUNICODEPUNCTBEFORE(off)) 3493 left_level = 1; 3494 else 3495 left_level = 2; 3496 3497 if(tmp == line_end || ctx.ISUNICODEWHITESPACE(tmp)) 3498 right_level = 0; 3499 else if(ctx.ISUNICODEPUNCT(tmp)) 3500 right_level = 1; 3501 else 3502 right_level = 2; 3503 3504 /* Intra-word underscore doesn't have special meaning. */ 3505 if(ch == '_' && left_level == 2 && right_level == 2) { 3506 left_level = 0; 3507 right_level = 0; 3508 } 3509 3510 if(left_level != 0 || right_level != 0) { 3511 uint flags = 0; 3512 3513 if(left_level > 0 && left_level >= right_level) 3514 flags |= MD_MARK_POTENTIAL_CLOSER; 3515 if(right_level > 0 && right_level >= left_level) 3516 flags |= MD_MARK_POTENTIAL_OPENER; 3517 if(left_level == 2 && right_level == 2) 3518 flags |= MD_MARK_EMPH_INTRAWORD; 3519 3520 /* For "the rule of three" we need to remember the original 3521 * size of the mark (modulo three), before we potentially 3522 * split the mark when being later resolved partially by some 3523 * shorter closer. */ 3524 switch((tmp - off) % 3) 3525 { 3526 case 0: flags |= MD_MARK_EMPH_MOD3_0; break; 3527 case 1: flags |= MD_MARK_EMPH_MOD3_1; break; 3528 case 2: flags |= MD_MARK_EMPH_MOD3_2; break; 3529 default: break; 3530 } 3531 3532 ret = PUSH_MARK(ctx, &mark, ch, off, tmp, flags); 3533 if (ret != 0) goto abort; 3534 3535 /* During resolving, multiple asterisks may have to be 3536 * split into independent span start/ends. Consider e.g. 3537 * "**foo* bar*". Therefore we push also some empty dummy 3538 * marks to have enough space for that. */ 3539 off++; 3540 while(off < tmp) { 3541 ret = PUSH_MARK(ctx, &mark, 'D', off, off, 0); 3542 if (ret != 0) goto abort; 3543 off++; 3544 } 3545 continue; 3546 } 3547 3548 off = tmp; 3549 continue; 3550 } 3551 3552 /* A potential code span start/end. */ 3553 if(ch == '`') { 3554 OFF opener_beg, opener_end; 3555 OFF closer_beg, closer_end; 3556 int is_code_span; 3557 3558 is_code_span = md_is_code_span(ctx, lines + i, n_lines - i, off, 3559 &opener_beg, &opener_end, &closer_beg, &closer_end, 3560 codespan_last_potential_closers.ptr, 3561 &codespan_scanned_till_paragraph_end); 3562 if(is_code_span) { 3563 ret = PUSH_MARK(ctx, &mark, '`', opener_beg, opener_end, MD_MARK_OPENER | MD_MARK_RESOLVED); 3564 if (ret != 0) goto abort; 3565 ret = PUSH_MARK(ctx, &mark, '`', closer_beg, closer_end, MD_MARK_CLOSER | MD_MARK_RESOLVED); 3566 if (ret != 0) goto abort; 3567 ctx.marks[ctx.n_marks-2].next = ctx.n_marks-1; 3568 ctx.marks[ctx.n_marks-1].prev = ctx.n_marks-2; 3569 3570 off = closer_end; 3571 3572 /* Advance the current line accordingly. */ 3573 while(off > line_end) { 3574 i++; 3575 line++; 3576 line_end = line.end; 3577 } 3578 continue; 3579 } 3580 3581 off = opener_end; 3582 continue; 3583 } 3584 3585 /* A potential entity start. */ 3586 if(ch == '&') { 3587 ret = PUSH_MARK(ctx, &mark, ch, off, off+1, MD_MARK_POTENTIAL_OPENER); 3588 if (ret != 0) goto abort; 3589 off++; 3590 continue; 3591 } 3592 3593 /* A potential entity end. */ 3594 if(ch == ';') { 3595 /* We surely cannot be entity unless the previous mark is '&'. */ 3596 if(ctx.n_marks > 0 && ctx.marks[ctx.n_marks-1].ch == '&') 3597 { 3598 ret = PUSH_MARK(ctx, &mark, ch, off, off+1, MD_MARK_POTENTIAL_CLOSER); 3599 if (ret != 0) goto abort; 3600 } 3601 3602 off++; 3603 continue; 3604 } 3605 3606 /* A potential autolink or raw HTML start/end. */ 3607 if(ch == '<') { 3608 int is_autolink; 3609 OFF autolink_end; 3610 int missing_mailto; 3611 3612 if(!(ctx.parser.flags & MD_FLAG_NOHTMLSPANS)) { 3613 int is_html; 3614 OFF html_end; 3615 3616 /* Given the nature of the raw HTML, we have to recognize 3617 * it here. Doing so later in md_analyze_lt_gt() could 3618 * open can of worms of quadratic complexity. */ 3619 is_html = md_is_html_any(ctx, lines + i, n_lines - i, off, 3620 lines[n_lines-1].end, &html_end); 3621 if(is_html) { 3622 ret = PUSH_MARK(ctx, &mark, '<', off, off, MD_MARK_OPENER | MD_MARK_RESOLVED); 3623 if (ret != 0) goto abort; 3624 ret = PUSH_MARK(ctx, &mark, '>', html_end, html_end, MD_MARK_CLOSER | MD_MARK_RESOLVED); 3625 if (ret != 0) goto abort; 3626 ctx.marks[ctx.n_marks-2].next = ctx.n_marks-1; 3627 ctx.marks[ctx.n_marks-1].prev = ctx.n_marks-2; 3628 off = html_end; 3629 3630 /* Advance the current line accordingly. */ 3631 while(off > line_end) { 3632 i++; 3633 line++; 3634 line_end = line.end; 3635 } 3636 continue; 3637 } 3638 } 3639 3640 is_autolink = md_is_autolink(ctx, off, lines[n_lines-1].end, 3641 &autolink_end, &missing_mailto); 3642 if(is_autolink) { 3643 ret = PUSH_MARK(ctx, &mark, (missing_mailto ? '@' : '<'), off, off+1, 3644 MD_MARK_OPENER | MD_MARK_RESOLVED | MD_MARK_AUTOLINK); 3645 if (ret != 0) goto abort; 3646 ret = PUSH_MARK(ctx, &mark, '>', autolink_end-1, autolink_end, 3647 MD_MARK_CLOSER | MD_MARK_RESOLVED | MD_MARK_AUTOLINK); 3648 if (ret != 0) goto abort; 3649 ctx.marks[ctx.n_marks-2].next = ctx.n_marks-1; 3650 ctx.marks[ctx.n_marks-1].prev = ctx.n_marks-2; 3651 off = autolink_end; 3652 continue; 3653 } 3654 3655 off++; 3656 continue; 3657 } 3658 3659 /* A potential link or its part. */ 3660 if(ch == '[' || (ch == '!' && off+1 < line_end && ctx.CH(off+1) == '[')) { 3661 OFF tmp = (ch == '[' ? off+1 : off+2); 3662 ret = PUSH_MARK(ctx, &mark, ch, off, tmp, MD_MARK_POTENTIAL_OPENER); 3663 if (ret != 0) goto abort; 3664 off = tmp; 3665 /* Two dummies to make enough place for data we need if it is 3666 * a link. */ 3667 ret = PUSH_MARK(ctx, &mark, 'D', off, off, 0); 3668 if (ret != 0) goto abort; 3669 ret = PUSH_MARK(ctx, &mark, 'D', off, off, 0); 3670 if (ret != 0) goto abort; 3671 continue; 3672 } 3673 if(ch == ']') { 3674 ret = PUSH_MARK(ctx, &mark, ch, off, off+1, MD_MARK_POTENTIAL_CLOSER); 3675 if (ret != 0) goto abort; 3676 off++; 3677 continue; 3678 } 3679 3680 /* A potential permissive e-mail autolink. */ 3681 if(ch == '@') { 3682 if(line.beg + 1 <= off && ctx.ISALNUM(off-1) && 3683 off + 3 < line.end && ctx.ISALNUM(off+1)) 3684 { 3685 ret = PUSH_MARK(ctx, &mark, ch, off, off+1, MD_MARK_POTENTIAL_OPENER); 3686 if (ret != 0) goto abort; 3687 /* Push a dummy as a reserve for a closer. */ 3688 ret = PUSH_MARK(ctx, &mark, 'D', off, off, 0); 3689 if (ret != 0) goto abort; 3690 } 3691 3692 off++; 3693 continue; 3694 } 3695 3696 /* A potential permissive URL autolink. */ 3697 if(ch == ':') 3698 { 3699 static struct Scheme 3700 { 3701 const(CHAR)* scheme; 3702 SZ scheme_size; 3703 const(CHAR)* suffix; 3704 SZ suffix_size; 3705 } 3706 3707 static immutable Scheme[] scheme_map = 3708 [ 3709 Scheme("http", 4, "//", 2), 3710 Scheme("https", 5, "//", 2), 3711 Scheme("ftp", 3, "//", 2) 3712 ]; 3713 3714 int scheme_index; 3715 3716 for(scheme_index = 0; scheme_index < cast(int) (scheme_map.length); scheme_index++) { 3717 const(CHAR)* scheme = scheme_map[scheme_index].scheme; 3718 const SZ scheme_size = scheme_map[scheme_index].scheme_size; 3719 const(CHAR)* suffix = scheme_map[scheme_index].suffix; 3720 const SZ suffix_size = scheme_map[scheme_index].suffix_size; 3721 3722 if(line.beg + scheme_size <= off && md_ascii_eq(ctx.STR(off-scheme_size), scheme, scheme_size) && 3723 (line.beg + scheme_size == off || ctx.ISWHITESPACE(off-scheme_size-1) || ctx.ISANYOF(off-scheme_size-1, "*_~([")) && 3724 off + 1 + suffix_size < line.end && md_ascii_eq(ctx.STR(off+1), suffix, suffix_size)) 3725 { 3726 ret = PUSH_MARK(ctx, &mark, ch, off-scheme_size, off+1+suffix_size, MD_MARK_POTENTIAL_OPENER); 3727 if (ret != 0) goto abort; 3728 /* Push a dummy as a reserve for a closer. */ 3729 ret = PUSH_MARK(ctx, &mark, 'D', off, off, 0); 3730 if (ret != 0) goto abort; 3731 off += 1 + suffix_size; 3732 continue; 3733 } 3734 } 3735 3736 off++; 3737 continue; 3738 } 3739 3740 /* A potential permissive WWW autolink. */ 3741 if(ch == '.') { 3742 if(line.beg + 3 <= off && md_ascii_eq(ctx.STR(off-3), "www", 3) && 3743 (line.beg + 3 == off || ctx.ISWHITESPACE(off-4) || ctx.ISANYOF(off-4, "*_~([")) && 3744 off + 1 < line_end) 3745 { 3746 ret = PUSH_MARK(ctx, &mark, ch, off-3, off+1, MD_MARK_POTENTIAL_OPENER); 3747 if (ret != 0) goto abort; 3748 /* Push a dummy as a reserve for a closer. */ 3749 ret = PUSH_MARK(ctx, &mark, 'D', off, off, 0); 3750 if (ret != 0) goto abort; 3751 off++; 3752 continue; 3753 } 3754 3755 off++; 3756 continue; 3757 } 3758 3759 /* A potential table cell boundary. */ 3760 if(table_mode && ch == '|') { 3761 ret = PUSH_MARK(ctx, &mark, ch, off, off+1, 0); 3762 if (ret != 0) goto abort; 3763 off++; 3764 continue; 3765 } 3766 3767 /* A potential strikethrough start/end. */ 3768 if(ch == '~') { 3769 OFF tmp = off+1; 3770 3771 while(tmp < line_end && ctx.CH(tmp) == '~') 3772 tmp++; 3773 3774 ret = PUSH_MARK(ctx, &mark, ch, off, tmp, MD_MARK_POTENTIAL_OPENER | MD_MARK_POTENTIAL_CLOSER); 3775 if (ret != 0) goto abort; 3776 off = tmp; 3777 continue; 3778 } 3779 3780 /* A potential equation start/end */ 3781 if(ch == '$') { 3782 /* We can have at most two consecutive $ signs, 3783 * where two dollar signs signify a display equation. */ 3784 OFF tmp = off+1; 3785 3786 while(tmp < line_end && ctx.CH(tmp) == '$') 3787 tmp++; 3788 3789 if (tmp - off <= 2) 3790 { 3791 ret = PUSH_MARK(ctx, &mark, ch, off, tmp, MD_MARK_POTENTIAL_OPENER | MD_MARK_POTENTIAL_CLOSER); 3792 if (ret != 0) goto abort; 3793 } 3794 off = tmp; 3795 continue; 3796 } 3797 3798 /* Turn non-trivial whitespace into single space. */ 3799 if(ISWHITESPACE_(ch)) { 3800 OFF tmp = off+1; 3801 3802 while(tmp < line_end && ctx.ISWHITESPACE(tmp)) 3803 tmp++; 3804 3805 if(tmp - off > 1 || ch != ' ') 3806 { 3807 ret = PUSH_MARK(ctx, &mark, ch, off, tmp, MD_MARK_RESOLVED); 3808 if (ret != 0) goto abort; 3809 } 3810 3811 off = tmp; 3812 continue; 3813 } 3814 3815 /* null character. */ 3816 if(ch == '\0') { 3817 ret = PUSH_MARK(ctx, &mark, ch, off, off+1, MD_MARK_RESOLVED); 3818 if (ret != 0) goto abort; 3819 off++; 3820 continue; 3821 } 3822 3823 off++; 3824 } 3825 } 3826 3827 /* Add a dummy mark at the end of the mark vector to simplify 3828 * process_inlines(). */ 3829 ret = PUSH_MARK(ctx, &mark, 127, ctx.size, ctx.size, MD_MARK_RESOLVED); 3830 if (ret != 0) goto abort; 3831 3832 abort: 3833 return ret; 3834 } 3835 3836 static void 3837 md_analyze_bracket(MD_CTX* ctx, int mark_index) 3838 { 3839 /* We cannot really resolve links here as for that we would need 3840 * more context. E.g. a following pair of brackets (reference link), 3841 * or enclosing pair of brackets (if the inner is the link, the outer 3842 * one cannot be.) 3843 * 3844 * Therefore we here only construct a list of resolved '[' ']' pairs 3845 * ordered by position of the closer. This allows ur to analyze what is 3846 * or is not link in the right order, from inside to outside in case 3847 * of nested brackets. 3848 * 3849 * The resolving itself is deferred into md_resolve_links(). 3850 */ 3851 3852 MD_MARK* mark = &ctx.marks[mark_index]; 3853 3854 if(mark.flags & MD_MARK_POTENTIAL_OPENER) { 3855 md_mark_chain_append(ctx, ctx.BRACKET_OPENERS, mark_index); 3856 return; 3857 } 3858 3859 if(ctx.BRACKET_OPENERS.tail >= 0) { 3860 /* Pop the opener from the chain. */ 3861 int opener_index = ctx.BRACKET_OPENERS.tail; 3862 MD_MARK* opener = &ctx.marks[opener_index]; 3863 if(opener.prev >= 0) 3864 ctx.marks[opener.prev].next = -1; 3865 else 3866 ctx.BRACKET_OPENERS.head = -1; 3867 ctx.BRACKET_OPENERS.tail = opener.prev; 3868 3869 /* Interconnect the opener and closer. */ 3870 opener.next = mark_index; 3871 mark.prev = opener_index; 3872 3873 /* Add the pair into chain of potential links for md_resolve_links(). 3874 * Note we misuse opener.prev for this as opener.next points to its 3875 * closer. */ 3876 if(ctx.unresolved_link_tail >= 0) 3877 ctx.marks[ctx.unresolved_link_tail].prev = opener_index; 3878 else 3879 ctx.unresolved_link_head = opener_index; 3880 ctx.unresolved_link_tail = opener_index; 3881 opener.prev = -1; 3882 } 3883 } 3884 3885 /* Forward declaration. */ 3886 static void md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines, 3887 int mark_beg, int mark_end); 3888 3889 static int 3890 md_resolve_links(MD_CTX* ctx, const MD_LINE* lines, int n_lines) 3891 { 3892 int opener_index = ctx.unresolved_link_head; 3893 OFF last_link_beg = 0; 3894 OFF last_link_end = 0; 3895 OFF last_img_beg = 0; 3896 OFF last_img_end = 0; 3897 3898 while(opener_index >= 0) { 3899 MD_MARK* opener = &ctx.marks[opener_index]; 3900 int closer_index = opener.next; 3901 MD_MARK* closer = &ctx.marks[closer_index]; 3902 int next_index = opener.prev; 3903 MD_MARK* next_opener; 3904 MD_MARK* next_closer; 3905 MD_LINK_ATTR attr; 3906 int is_link = FALSE; 3907 3908 if(next_index >= 0) { 3909 next_opener = &ctx.marks[next_index]; 3910 next_closer = &ctx.marks[next_opener.next]; 3911 } else { 3912 next_opener = null; 3913 next_closer = null; 3914 } 3915 3916 /* If nested ("[ [ ] ]"), we need to make sure that: 3917 * - The outer does not end inside of (...) belonging to the inner. 3918 * - The outer cannot be link if the inner is link (i.e. not image). 3919 * 3920 * (Note we here analyze from inner to outer as the marks are ordered 3921 * by closer.beg.) 3922 */ 3923 if((opener.beg < last_link_beg && closer.end < last_link_end) || 3924 (opener.beg < last_img_beg && closer.end < last_img_end) || 3925 (opener.beg < last_link_end && opener.ch == '[')) 3926 { 3927 opener_index = next_index; 3928 continue; 3929 } 3930 3931 if(next_opener != null && next_opener.beg == closer.end) { 3932 if(next_closer.beg > closer.end + 1) { 3933 /* Might be full reference link. */ 3934 is_link = md_is_link_reference(ctx, lines, n_lines, next_opener.beg, next_closer.end, &attr); 3935 } else { 3936 /* Might be shortcut reference link. */ 3937 is_link = md_is_link_reference(ctx, lines, n_lines, opener.beg, closer.end, &attr); 3938 } 3939 3940 if(is_link < 0) 3941 return -1; 3942 3943 if(is_link) { 3944 /* Eat the 2nd "[...]". */ 3945 closer.end = next_closer.end; 3946 } 3947 } else { 3948 if(closer.end < ctx.size && ctx.CH(closer.end) == '(') { 3949 /* Might be inline link. */ 3950 OFF inline_link_end = uint.max; 3951 3952 is_link = md_is_inline_link_spec(ctx, lines, n_lines, closer.end, &inline_link_end, &attr); 3953 if(is_link < 0) 3954 return -1; 3955 3956 /* Check the closing ')' is not inside an already resolved range 3957 * (i.e. a range with a higher priority), e.g. a code span. */ 3958 if(is_link) { 3959 int i = closer_index + 1; 3960 3961 while(i < ctx.n_marks) { 3962 MD_MARK* mark = &ctx.marks[i]; 3963 3964 if(mark.beg >= inline_link_end) 3965 break; 3966 if((mark.flags & (MD_MARK_OPENER | MD_MARK_RESOLVED)) == (MD_MARK_OPENER | MD_MARK_RESOLVED)) { 3967 if(ctx.marks[mark.next].beg >= inline_link_end) { 3968 /* Cancel the link status. */ 3969 if(attr.title_needs_free) 3970 free(cast(void*)(attr.title)); 3971 is_link = FALSE; 3972 break; 3973 } 3974 3975 i = mark.next + 1; 3976 } else { 3977 i++; 3978 } 3979 } 3980 } 3981 3982 if(is_link) { 3983 /* Eat the "(...)" */ 3984 closer.end = inline_link_end; 3985 } 3986 } 3987 3988 if(!is_link) { 3989 /* Might be collapsed reference link. */ 3990 is_link = md_is_link_reference(ctx, lines, n_lines, opener.beg, closer.end, &attr); 3991 if(is_link < 0) 3992 return -1; 3993 } 3994 } 3995 3996 if(is_link) { 3997 /* Resolve the brackets as a link. */ 3998 opener.flags |= MD_MARK_OPENER | MD_MARK_RESOLVED; 3999 closer.flags |= MD_MARK_CLOSER | MD_MARK_RESOLVED; 4000 4001 /* If it is a link, we store the destination and title in the two 4002 * dummy marks after the opener. */ 4003 assert(ctx.marks[opener_index+1].ch == 'D'); 4004 ctx.marks[opener_index+1].beg = attr.dest_beg; 4005 ctx.marks[opener_index+1].end = attr.dest_end; 4006 4007 assert(ctx.marks[opener_index+2].ch == 'D'); 4008 md_mark_store_ptr(ctx, opener_index+2, attr.title); 4009 if(attr.title_needs_free) 4010 md_mark_chain_append(ctx, ctx.PTR_CHAIN, opener_index+2); 4011 ctx.marks[opener_index+2].prev = attr.title_size; 4012 4013 if(opener.ch == '[') { 4014 last_link_beg = opener.beg; 4015 last_link_end = closer.end; 4016 } else { 4017 last_img_beg = opener.beg; 4018 last_img_end = closer.end; 4019 } 4020 4021 md_analyze_link_contents(ctx, lines, n_lines, opener_index+1, closer_index); 4022 } 4023 4024 opener_index = next_index; 4025 } 4026 4027 return 0; 4028 } 4029 4030 /* Analyze whether the mark '&' starts a HTML entity. 4031 * If so, update its flags as well as flags of corresponding closer ';'. */ 4032 static void 4033 md_analyze_entity(MD_CTX* ctx, int mark_index) 4034 { 4035 MD_MARK* opener = &ctx.marks[mark_index]; 4036 MD_MARK* closer; 4037 OFF off; 4038 4039 /* Cannot be entity if there is no closer as the next mark. 4040 * (Any other mark between would mean strange character which cannot be 4041 * part of the entity. 4042 * 4043 * So we can do all the work on '&' and do not call this later for the 4044 * closing mark ';'. 4045 */ 4046 if(mark_index + 1 >= ctx.n_marks) 4047 return; 4048 closer = &ctx.marks[mark_index+1]; 4049 if(closer.ch != ';') 4050 return; 4051 4052 if(md_is_entity(ctx, opener.beg, closer.end, &off)) { 4053 assert(off == closer.end); 4054 4055 md_resolve_range(ctx, null, mark_index, mark_index+1); 4056 opener.end = closer.end; 4057 } 4058 } 4059 4060 static void 4061 md_analyze_table_cell_boundary(MD_CTX* ctx, int mark_index) 4062 { 4063 MD_MARK* mark = &ctx.marks[mark_index]; 4064 mark.flags |= MD_MARK_RESOLVED; 4065 4066 md_mark_chain_append(ctx, ctx.TABLECELLBOUNDARIES, mark_index); 4067 ctx.n_table_cell_boundaries++; 4068 } 4069 4070 /* Split a longer mark into two. The new mark takes the given count of 4071 * characters. May only be called if an adequate number of dummy 'D' marks 4072 * follows. 4073 */ 4074 static int 4075 md_split_emph_mark(MD_CTX* ctx, int mark_index, SZ n) 4076 { 4077 MD_MARK* mark = &ctx.marks[mark_index]; 4078 int new_mark_index = mark_index + (mark.end - mark.beg - n); 4079 MD_MARK* dummy = &ctx.marks[new_mark_index]; 4080 4081 assert(mark.end - mark.beg > n); 4082 assert(dummy.ch == 'D'); 4083 4084 memcpy(dummy, mark, MD_MARK.sizeof); 4085 mark.end -= n; 4086 dummy.beg = mark.end; 4087 4088 return new_mark_index; 4089 } 4090 4091 static void 4092 md_analyze_emph(MD_CTX* ctx, int mark_index) 4093 { 4094 MD_MARK* mark = &ctx.marks[mark_index]; 4095 MD_MARKCHAIN* chain = md_mark_chain(ctx, mark_index); 4096 4097 /* If we can be a closer, try to resolve with the preceding opener. */ 4098 if(mark.flags & MD_MARK_POTENTIAL_CLOSER) { 4099 MD_MARK* opener = null; 4100 int opener_index; 4101 4102 if(mark.ch == '*') { 4103 MD_MARKCHAIN*[6] opener_chains; 4104 int i, n_opener_chains; 4105 uint flags = mark.flags; 4106 4107 /* Apply "rule of three". (This is why we break asterisk opener 4108 * marks into multiple chains.) */ 4109 n_opener_chains = 0; 4110 opener_chains[n_opener_chains++] = ctx.ASTERISK_OPENERS_intraword_mod3_0; 4111 if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2) 4112 opener_chains[n_opener_chains++] = ctx.ASTERISK_OPENERS_intraword_mod3_1; 4113 if((flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1) 4114 opener_chains[n_opener_chains++] = ctx.ASTERISK_OPENERS_intraword_mod3_2; 4115 opener_chains[n_opener_chains++] = ctx.ASTERISK_OPENERS_extraword_mod3_0; 4116 if(!(flags & MD_MARK_EMPH_INTRAWORD) || (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_2) 4117 opener_chains[n_opener_chains++] = ctx.ASTERISK_OPENERS_extraword_mod3_1; 4118 if(!(flags & MD_MARK_EMPH_INTRAWORD) || (flags & MD_MARK_EMPH_MOD3_MASK) != MD_MARK_EMPH_MOD3_1) 4119 opener_chains[n_opener_chains++] = ctx.ASTERISK_OPENERS_extraword_mod3_2; 4120 4121 /* Opener is the most recent mark from the allowed chains. */ 4122 for(i = 0; i < n_opener_chains; i++) { 4123 if(opener_chains[i].tail >= 0) { 4124 int tmp_index = opener_chains[i].tail; 4125 MD_MARK* tmp_mark = &ctx.marks[tmp_index]; 4126 if(opener == null || tmp_mark.end > opener.end) { 4127 opener_index = tmp_index; 4128 opener = tmp_mark; 4129 } 4130 } 4131 } 4132 } else { 4133 /* Simple emph. mark */ 4134 if(chain.tail >= 0) { 4135 opener_index = chain.tail; 4136 opener = &ctx.marks[opener_index]; 4137 } 4138 } 4139 4140 /* Resolve, if we have found matching opener. */ 4141 if(opener != null) { 4142 SZ opener_size = opener.end - opener.beg; 4143 SZ closer_size = mark.end - mark.beg; 4144 4145 if(opener_size > closer_size) { 4146 opener_index = md_split_emph_mark(ctx, opener_index, closer_size); 4147 md_mark_chain_append(ctx, md_mark_chain(ctx, opener_index), opener_index); 4148 } else if(opener_size < closer_size) { 4149 md_split_emph_mark(ctx, mark_index, closer_size - opener_size); 4150 } 4151 4152 md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_CROSSING); 4153 md_resolve_range(ctx, chain, opener_index, mark_index); 4154 return; 4155 } 4156 } 4157 4158 /* If we could not resolve as closer, we may be yet be an opener. */ 4159 if(mark.flags & MD_MARK_POTENTIAL_OPENER) 4160 md_mark_chain_append(ctx, chain, mark_index); 4161 } 4162 4163 static void 4164 md_analyze_tilde(MD_CTX* ctx, int mark_index) 4165 { 4166 /* We attempt to be Github Flavored Markdown compatible here. GFM says 4167 * that length of the tilde sequence is not important at all. Note that 4168 * implies the ctx.TILDE_OPENERS chain can have at most one item. */ 4169 4170 if(ctx.TILDE_OPENERS.head >= 0) { 4171 /* The chain already contains an opener, so we may resolve the span. */ 4172 int opener_index = ctx.TILDE_OPENERS.head; 4173 4174 md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_CROSSING); 4175 md_resolve_range(ctx, ctx.TILDE_OPENERS, opener_index, mark_index); 4176 } else { 4177 /* We can only be opener. */ 4178 md_mark_chain_append(ctx, ctx.TILDE_OPENERS, mark_index); 4179 } 4180 } 4181 4182 static void 4183 md_analyze_dollar(MD_CTX* ctx, int mark_index) 4184 { 4185 /* This should mimic the way inline equations work in LaTeX, so there 4186 * can only ever be one item in the chain (i.e. the dollars can't be 4187 * nested). This is basically the same as the md_analyze_tilde function, 4188 * except that we require matching openers and closers to be of the same 4189 * length. 4190 * 4191 * E.g.: $abc$$def$$ => abc (display equation) def (end equation) */ 4192 if(ctx.DOLLAR_OPENERS.head >= 0) { 4193 /* If the potential closer has a non-matching number of $, discard */ 4194 MD_MARK* open = &ctx.marks[ctx.DOLLAR_OPENERS.head]; 4195 MD_MARK* close = &ctx.marks[mark_index]; 4196 4197 int opener_index = ctx.DOLLAR_OPENERS.head; 4198 md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_ALL); 4199 if (open.end - open.beg == close.end - close.beg) { 4200 /* We are the matching closer */ 4201 md_resolve_range(ctx, ctx.DOLLAR_OPENERS, opener_index, mark_index); 4202 } else { 4203 /* We don't match the opener, so discard old opener and insert as opener */ 4204 md_mark_chain_append(ctx, ctx.DOLLAR_OPENERS, mark_index); 4205 } 4206 } else { 4207 /* No unmatched openers, so we are opener */ 4208 md_mark_chain_append(ctx, ctx.DOLLAR_OPENERS, mark_index); 4209 } 4210 } 4211 4212 static void 4213 md_analyze_permissive_url_autolink(MD_CTX* ctx, int mark_index) 4214 { 4215 MD_MARK* opener = &ctx.marks[mark_index]; 4216 int closer_index = mark_index + 1; 4217 MD_MARK* closer = &ctx.marks[closer_index]; 4218 MD_MARK* next_resolved_mark; 4219 OFF off = opener.end; 4220 int n_dots = FALSE; 4221 int has_underscore_in_last_seg = FALSE; 4222 int has_underscore_in_next_to_last_seg = FALSE; 4223 int n_opened_parenthesis = 0; 4224 4225 /* Check for domain. */ 4226 while(off < ctx.size) { 4227 if(ctx.ISALNUM(off) || ctx.CH(off) == '-') { 4228 off++; 4229 } else if(ctx.CH(off) == '.') { 4230 /* We must see at least one period. */ 4231 n_dots++; 4232 has_underscore_in_next_to_last_seg = has_underscore_in_last_seg; 4233 has_underscore_in_last_seg = FALSE; 4234 off++; 4235 } else if(ctx.CH(off) == '_') { 4236 /* No underscore may be present in the last two domain segments. */ 4237 has_underscore_in_last_seg = TRUE; 4238 off++; 4239 } else { 4240 break; 4241 } 4242 } 4243 if(off > opener.end && ctx.CH(off-1) == '.') { 4244 off--; 4245 n_dots--; 4246 } 4247 if(off <= opener.end || n_dots == 0 || has_underscore_in_next_to_last_seg || has_underscore_in_last_seg) 4248 return; 4249 4250 /* Check for path. */ 4251 next_resolved_mark = closer + 1; 4252 while(next_resolved_mark.ch == 'D' || !(next_resolved_mark.flags & MD_MARK_RESOLVED)) 4253 next_resolved_mark++; 4254 while(off < next_resolved_mark.beg && ctx.CH(off) != '<' && !ctx.ISWHITESPACE(off) && !ctx.ISNEWLINE(off)) { 4255 /* Parenthesis must be balanced. */ 4256 if(ctx.CH(off) == '(') { 4257 n_opened_parenthesis++; 4258 } else if(ctx.CH(off) == ')') { 4259 if(n_opened_parenthesis > 0) 4260 n_opened_parenthesis--; 4261 else 4262 break; 4263 } 4264 4265 off++; 4266 } 4267 /* These cannot be last char In such case they are more likely normal 4268 * punctuation. */ 4269 if(ctx.ISANYOF(off-1, "?!.,:*_~")) 4270 off--; 4271 4272 /* Ok. Lets call it auto-link. Adapt opener and create closer to zero 4273 * length so all the contents becomes the link text. */ 4274 assert(closer.ch == 'D'); 4275 opener.end = opener.beg; 4276 closer.ch = opener.ch; 4277 closer.beg = off; 4278 closer.end = off; 4279 md_resolve_range(ctx, null, mark_index, closer_index); 4280 } 4281 4282 /* The permissive autolinks do not have to be enclosed in '<' '>' but we 4283 * instead impose stricter rules what is understood as an e-mail address 4284 * here. Actually any non-alphanumeric characters with exception of '.' 4285 * are prohibited both in username and after '@'. */ 4286 static void 4287 md_analyze_permissive_email_autolink(MD_CTX* ctx, int mark_index) 4288 { 4289 MD_MARK* opener = &ctx.marks[mark_index]; 4290 int closer_index; 4291 MD_MARK* closer; 4292 OFF beg = opener.beg; 4293 OFF end = opener.end; 4294 int dot_count = 0; 4295 4296 assert(ctx.CH(beg) == '@'); 4297 4298 /* Scan for name before '@'. */ 4299 while(beg > 0 && (ctx.ISALNUM(beg-1) || ctx.ISANYOF(beg-1, ".-_+"))) 4300 beg--; 4301 4302 /* Scan for domain after '@'. */ 4303 while(end < ctx.size && (ctx.ISALNUM(end) || ctx.ISANYOF(end, ".-_"))) { 4304 if(ctx.CH(end) == '.') 4305 dot_count++; 4306 end++; 4307 } 4308 if(ctx.CH(end-1) == '.') { /* Final '.' not part of it. */ 4309 dot_count--; 4310 end--; 4311 } 4312 else if(ctx.ISANYOF2(end-1, '-', '_')) /* These are forbidden at the end. */ 4313 return; 4314 if(ctx.CH(end-1) == '@' || dot_count == 0) 4315 return; 4316 4317 /* Ok. Lets call it auto-link. Adapt opener and create closer to zero 4318 * length so all the contents becomes the link text. */ 4319 closer_index = mark_index + 1; 4320 closer = &ctx.marks[closer_index]; 4321 assert(closer.ch == 'D'); 4322 4323 opener.beg = beg; 4324 opener.end = beg; 4325 closer.ch = opener.ch; 4326 closer.beg = end; 4327 closer.end = end; 4328 md_resolve_range(ctx, null, mark_index, closer_index); 4329 } 4330 4331 static void 4332 md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, 4333 int mark_beg, int mark_end, const(CHAR)* mark_chars) 4334 { 4335 int i = mark_beg; 4336 4337 while(i < mark_end) { 4338 MD_MARK* mark = &ctx.marks[i]; 4339 4340 /* Skip resolved spans. */ 4341 if(mark.flags & MD_MARK_RESOLVED) { 4342 if(mark.flags & MD_MARK_OPENER) { 4343 assert(i < mark.next); 4344 i = mark.next + 1; 4345 } else { 4346 i++; 4347 } 4348 continue; 4349 } 4350 4351 /* Skip marks we do not want to deal with. */ 4352 if(!ISANYOF_(mark.ch, mark_chars)) { 4353 i++; 4354 continue; 4355 } 4356 4357 /* Analyze the mark. */ 4358 switch(mark.ch) { 4359 case '[': /* Pass through. */ 4360 case '!': /* Pass through. */ 4361 case ']': md_analyze_bracket(ctx, i); break; 4362 case '&': md_analyze_entity(ctx, i); break; 4363 case '|': md_analyze_table_cell_boundary(ctx, i); break; 4364 case '_': /* Pass through. */ 4365 case '*': md_analyze_emph(ctx, i); break; 4366 case '~': md_analyze_tilde(ctx, i); break; 4367 case '$': md_analyze_dollar(ctx, i); break; 4368 case '.': /* Pass through. */ 4369 case ':': md_analyze_permissive_url_autolink(ctx, i); break; 4370 case '@': md_analyze_permissive_email_autolink(ctx, i); break; 4371 default: break; 4372 } 4373 4374 i++; 4375 } 4376 } 4377 4378 /* Analyze marks (build ctx.marks). */ 4379 static int 4380 md_analyze_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode) 4381 { 4382 int ret; 4383 4384 /* Reset the previously collected stack of marks. */ 4385 ctx.n_marks = 0; 4386 4387 /* Collect all marks. */ 4388 ret = (md_collect_marks(ctx, lines, n_lines, table_mode)); 4389 if (ret < 0) goto abort; 4390 4391 /* We analyze marks in few groups to handle their precedence. */ 4392 /* (1) Entities; code spans; autolinks; raw HTML. */ 4393 md_analyze_marks(ctx, lines, n_lines, 0, ctx.n_marks, "&"); 4394 4395 if(table_mode) { 4396 /* (2) Analyze table cell boundaries. 4397 * Note we reset ctx.TABLECELLBOUNDARIES chain prior to the call md_analyze_marks(), 4398 * not after, because caller may need it. */ 4399 assert(n_lines == 1); 4400 ctx.TABLECELLBOUNDARIES.head = -1; 4401 ctx.TABLECELLBOUNDARIES.tail = -1; 4402 ctx.n_table_cell_boundaries = 0; 4403 md_analyze_marks(ctx, lines, n_lines, 0, ctx.n_marks, "|"); 4404 return ret; 4405 } 4406 4407 /* (3) Links. */ 4408 md_analyze_marks(ctx, lines, n_lines, 0, ctx.n_marks, "[]!"); 4409 ret = (md_resolve_links(ctx, lines, n_lines)); 4410 if (ret < 0) goto abort; 4411 ctx.BRACKET_OPENERS.head = -1; 4412 ctx.BRACKET_OPENERS.tail = -1; 4413 ctx.unresolved_link_head = -1; 4414 ctx.unresolved_link_tail = -1; 4415 4416 /* (4) Emphasis and strong emphasis; permissive autolinks. */ 4417 md_analyze_link_contents(ctx, lines, n_lines, 0, ctx.n_marks); 4418 4419 abort: 4420 return ret; 4421 } 4422 4423 static void 4424 md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines, 4425 int mark_beg, int mark_end) 4426 { 4427 md_analyze_marks(ctx, lines, n_lines, mark_beg, mark_end, "*_~$@:."); 4428 ctx.ASTERISK_OPENERS_extraword_mod3_0.head = -1; 4429 ctx.ASTERISK_OPENERS_extraword_mod3_0.tail = -1; 4430 ctx.ASTERISK_OPENERS_extraword_mod3_1.head = -1; 4431 ctx.ASTERISK_OPENERS_extraword_mod3_1.tail = -1; 4432 ctx.ASTERISK_OPENERS_extraword_mod3_2.head = -1; 4433 ctx.ASTERISK_OPENERS_extraword_mod3_2.tail = -1; 4434 ctx.ASTERISK_OPENERS_intraword_mod3_0.head = -1; 4435 ctx.ASTERISK_OPENERS_intraword_mod3_0.tail = -1; 4436 ctx.ASTERISK_OPENERS_intraword_mod3_1.head = -1; 4437 ctx.ASTERISK_OPENERS_intraword_mod3_1.tail = -1; 4438 ctx.ASTERISK_OPENERS_intraword_mod3_2.head = -1; 4439 ctx.ASTERISK_OPENERS_intraword_mod3_2.tail = -1; 4440 ctx.UNDERSCORE_OPENERS.head = -1; 4441 ctx.UNDERSCORE_OPENERS.tail = -1; 4442 ctx.TILDE_OPENERS.head = -1; 4443 ctx.TILDE_OPENERS.tail = -1; 4444 ctx.DOLLAR_OPENERS.head = -1; 4445 ctx.DOLLAR_OPENERS.tail = -1; 4446 } 4447 4448 static int 4449 md_enter_leave_span_a(MD_CTX* ctx, int enter, MD_SPANTYPE type, 4450 const(CHAR)* dest, SZ dest_size, int prohibit_escapes_in_dest, 4451 const(CHAR)* title, SZ title_size) 4452 { 4453 MD_ATTRIBUTE_BUILD href_build = MD_ATTRIBUTE_BUILD.init; 4454 MD_ATTRIBUTE_BUILD title_build = MD_ATTRIBUTE_BUILD.init; 4455 MD_SPAN_A_DETAIL det; 4456 int ret = 0; 4457 4458 /* Note we here rely on fact that MD_SPAN_A_DETAIL and 4459 * MD_SPAN_IMG_DETAIL are binary-compatible. */ 4460 memset(&det, 0, MD_SPAN_A_DETAIL.sizeof); 4461 ret = (md_build_attribute(ctx, dest, dest_size, 4462 (prohibit_escapes_in_dest ? MD_BUILD_ATTR_NO_ESCAPES : 0), 4463 &det.href, &href_build)); 4464 if (ret < 0) goto abort; 4465 ret = (md_build_attribute(ctx, title, title_size, 0, &det.title, &title_build)); 4466 if (ret < 0) goto abort; 4467 4468 if(enter) 4469 { 4470 ret = MD_ENTER_SPAN(ctx, type, &det); 4471 if (ret != 0) goto abort; 4472 } 4473 else 4474 { 4475 ret = MD_LEAVE_SPAN(ctx, type, &det); 4476 if (ret != 0) goto abort; 4477 } 4478 4479 abort: 4480 md_free_attribute(ctx, &href_build); 4481 md_free_attribute(ctx, &title_build); 4482 return ret; 4483 } 4484 4485 /* Render the output, accordingly to the analyzed ctx.marks. */ 4486 static int 4487 md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines) 4488 { 4489 MD_TEXTTYPE text_type; 4490 const(MD_LINE)* line = lines; 4491 MD_MARK* prev_mark = null; 4492 MD_MARK* mark; 4493 OFF off = lines[0].beg; 4494 OFF end = lines[n_lines-1].end; 4495 int enforce_hardbreak = 0; 4496 int ret = 0; 4497 4498 /* Find first resolved mark. Note there is always at least one resolved 4499 * mark, the dummy last one after the end of the latest line we actually 4500 * never really reach. This saves us of a lot of special checks and cases 4501 * in this function. */ 4502 mark = ctx.marks; 4503 while(!(mark.flags & MD_MARK_RESOLVED)) 4504 mark++; 4505 4506 text_type = MD_TEXT_NORMAL; 4507 4508 while(1) { 4509 /* Process the text up to the next mark or end-of-line. */ 4510 OFF tmp = (line.end < mark.beg ? line.end : mark.beg); 4511 if(tmp > off) { 4512 ret = MD_TEXT(ctx, text_type, ctx.STR(off), tmp - off); 4513 if (ret != 0) goto abort; 4514 off = tmp; 4515 } 4516 4517 /* If reached the mark, process it and move to next one. */ 4518 if(off >= mark.beg) { 4519 switch(mark.ch) { 4520 case '\\': /* Backslash escape. */ 4521 if(ctx.ISNEWLINE(mark.beg+1)) 4522 enforce_hardbreak = 1; 4523 else 4524 { 4525 ret = MD_TEXT(ctx, text_type, ctx.STR(mark.beg+1), 1); 4526 if (ret != 0) goto abort; 4527 } 4528 break; 4529 4530 case ' ': /* Non-trivial space. */ 4531 ret = MD_TEXT(ctx, text_type, " ", 1); 4532 if (ret != 0) goto abort; 4533 break; 4534 4535 case '`': /* Code span. */ 4536 if(mark.flags & MD_MARK_OPENER) { 4537 ret = MD_ENTER_SPAN(ctx, MD_SPAN_CODE, null); 4538 if (ret != 0) goto abort; 4539 text_type = MD_TEXT_CODE; 4540 } else { 4541 ret = MD_LEAVE_SPAN(ctx, MD_SPAN_CODE, null); 4542 if (ret != 0) goto abort; 4543 text_type = MD_TEXT_NORMAL; 4544 } 4545 break; 4546 4547 case '_': 4548 case '*': /* Emphasis, strong emphasis. */ 4549 if(mark.flags & MD_MARK_OPENER) { 4550 if((mark.end - off) % 2) { 4551 ret = MD_ENTER_SPAN(ctx, MD_SPAN_EM, null); 4552 if (ret != 0) goto abort; 4553 off++; 4554 } 4555 while(off + 1 < mark.end) { 4556 ret = MD_ENTER_SPAN(ctx, MD_SPAN_STRONG, null); 4557 if (ret != 0) goto abort; 4558 off += 2; 4559 } 4560 } else { 4561 while(off + 1 < mark.end) { 4562 ret = MD_LEAVE_SPAN(ctx, MD_SPAN_STRONG, null); 4563 if (ret != 0) goto abort; 4564 off += 2; 4565 } 4566 if((mark.end - off) % 2) { 4567 ret = MD_LEAVE_SPAN(ctx, MD_SPAN_EM, null); 4568 if (ret != 0) goto abort; 4569 off++; 4570 } 4571 } 4572 break; 4573 4574 case '~': 4575 if(mark.flags & MD_MARK_OPENER) 4576 { 4577 ret = MD_ENTER_SPAN(ctx, MD_SPAN_DEL, null); 4578 if (ret != 0) goto abort; 4579 } 4580 else 4581 { 4582 ret = MD_LEAVE_SPAN(ctx, MD_SPAN_DEL, null); 4583 if (ret != 0) goto abort; 4584 } 4585 break; 4586 4587 case '$': 4588 if(mark.flags & MD_MARK_OPENER) { 4589 ret = MD_ENTER_SPAN(ctx, (mark.end - off) % 2 ? MD_SPAN_LATEXMATH : MD_SPAN_LATEXMATH_DISPLAY, null); 4590 if (ret != 0) goto abort; 4591 text_type = MD_TEXT_LATEXMATH; 4592 } else { 4593 ret = MD_LEAVE_SPAN(ctx, (mark.end - off) % 2 ? MD_SPAN_LATEXMATH : MD_SPAN_LATEXMATH_DISPLAY, null); 4594 if (ret != 0) goto abort; 4595 text_type = MD_TEXT_NORMAL; 4596 } 4597 break; 4598 4599 case '[': /* Link, image. */ 4600 case '!': 4601 case ']': 4602 { 4603 const MD_MARK* opener = (mark.ch != ']' ? mark : &ctx.marks[mark.prev]); 4604 const MD_MARK* dest_mark = opener+1; 4605 const MD_MARK* title_mark = opener+2; 4606 4607 assert(dest_mark.ch == 'D'); 4608 assert(title_mark.ch == 'D'); 4609 4610 ret = (md_enter_leave_span_a(ctx, (mark.ch != ']') ? 1 : 0, 4611 (opener.ch == '!' ? MD_SPAN_IMG : MD_SPAN_A), 4612 ctx.STR(dest_mark.beg), dest_mark.end - dest_mark.beg, FALSE, 4613 cast(char*) md_mark_get_ptr(ctx, cast(int)(title_mark - ctx.marks)), title_mark.prev)); 4614 if (ret < 0) goto abort; 4615 4616 /* link/image closer may span multiple lines. */ 4617 if(mark.ch == ']') { 4618 while(mark.end > line.end) 4619 line++; 4620 } 4621 4622 break; 4623 } 4624 4625 case '<': 4626 case '>': /* Autolink or raw HTML. */ 4627 if(!(mark.flags & MD_MARK_AUTOLINK)) { 4628 /* Raw HTML. */ 4629 if(mark.flags & MD_MARK_OPENER) 4630 text_type = MD_TEXT_HTML; 4631 else 4632 text_type = MD_TEXT_NORMAL; 4633 break; 4634 } 4635 /* Pass through, if auto-link. */ 4636 goto case '.'; 4637 4638 case '@': /* Permissive e-mail autolink. */ 4639 case ':': /* Permissive URL autolink. */ 4640 case '.': /* Permissive WWW autolink. */ 4641 { 4642 MD_MARK* opener = ((mark.flags & MD_MARK_OPENER) ? mark : &ctx.marks[mark.prev]); 4643 MD_MARK* closer = &ctx.marks[opener.next]; 4644 const(CHAR)* dest = ctx.STR(opener.end); 4645 SZ dest_size = closer.beg - opener.end; 4646 4647 /* For permissive auto-links we do not know closer mark 4648 * position at the time of md_collect_marks(), therefore 4649 * it can be out-of-order in ctx.marks[]. 4650 * 4651 * With this flag, we make sure that we output the closer 4652 * only if we processed the opener. */ 4653 if(mark.flags & MD_MARK_OPENER) 4654 closer.flags |= MD_MARK_VALIDPERMISSIVEAUTOLINK; 4655 4656 if(opener.ch == '@' || opener.ch == '.') { 4657 dest_size += 7; 4658 ret = MD_TEMP_BUFFER(ctx, dest_size * CHAR.sizeof); 4659 if (ret < 0) goto abort; 4660 memcpy(ctx.buffer, 4661 (opener.ch == '@' ? "mailto:" : "http://").ptr, 4662 7 * CHAR.sizeof); 4663 memcpy(ctx.buffer + 7, dest, (dest_size-7) * CHAR.sizeof); 4664 dest = ctx.buffer; 4665 } 4666 4667 if(closer.flags & MD_MARK_VALIDPERMISSIVEAUTOLINK) 4668 { 4669 ret = (md_enter_leave_span_a(ctx, (mark.flags & MD_MARK_OPENER), 4670 MD_SPAN_A, dest, dest_size, TRUE, null, 0)); 4671 if (ret < 0) goto abort; 4672 } 4673 break; 4674 } 4675 4676 case '&': /* Entity. */ 4677 ret = MD_TEXT(ctx, MD_TEXT_ENTITY, ctx.STR(mark.beg), mark.end - mark.beg); 4678 if (ret != 0) goto abort; 4679 break; 4680 4681 case '\0': 4682 ret = MD_TEXT(ctx, MD_TEXT_NULLCHAR, "", 1); 4683 if (ret != 0) goto abort; 4684 break; 4685 4686 case 127: 4687 goto abort; 4688 4689 default: 4690 break; 4691 } 4692 4693 off = mark.end; 4694 4695 /* Move to next resolved mark. */ 4696 prev_mark = mark; 4697 mark++; 4698 while(!(mark.flags & MD_MARK_RESOLVED) || mark.beg < off) 4699 mark++; 4700 } 4701 4702 /* If reached end of line, move to next one. */ 4703 if(off >= line.end) { 4704 /* If it is the last line, we are done. */ 4705 if(off >= end) 4706 break; 4707 4708 if(text_type == MD_TEXT_CODE || text_type == MD_TEXT_LATEXMATH) { 4709 OFF tmp2; 4710 4711 assert(prev_mark != null); 4712 assert(ISANYOF2_(prev_mark.ch, '`', '$') && (prev_mark.flags & MD_MARK_OPENER)); 4713 assert(ISANYOF2_(mark.ch, '`', '$') && (mark.flags & MD_MARK_CLOSER)); 4714 4715 /* Inside a code span, trailing line whitespace has to be 4716 * outputted. */ 4717 tmp2 = off; 4718 while(off < ctx.size && ctx.ISBLANK(off)) 4719 off++; 4720 if(off > tmp2) 4721 { 4722 ret = MD_TEXT(ctx, text_type, ctx.STR(tmp2), off-tmp2); 4723 if (ret != 0) goto abort; 4724 } 4725 4726 /* and new lines are transformed into single spaces. */ 4727 if(prev_mark.end < off && off < mark.beg) 4728 { 4729 ret = MD_TEXT(ctx, text_type, " ", 1); 4730 if (ret != 0) goto abort; 4731 } 4732 } else if(text_type == MD_TEXT_HTML) { 4733 /* Inside raw HTML, we output the new line verbatim, including 4734 * any trailing spaces. */ 4735 OFF tmp2 = off; 4736 4737 while(tmp2 < end && ctx.ISBLANK(tmp2)) 4738 tmp2++; 4739 if(tmp2 > off) 4740 { 4741 ret = MD_TEXT(ctx, MD_TEXT_HTML, ctx.STR(off), tmp2 - off); 4742 if (ret != 0) goto abort; 4743 } 4744 ret = MD_TEXT(ctx, MD_TEXT_HTML, "\n", 1); 4745 if (ret != 0) goto abort; 4746 } else { 4747 /* Output soft or hard line break. */ 4748 MD_TEXTTYPE break_type = MD_TEXT_SOFTBR; 4749 4750 if(text_type == MD_TEXT_NORMAL) { 4751 if(enforce_hardbreak) 4752 break_type = MD_TEXT_BR; 4753 else if((ctx.CH(line.end) == ' ' && ctx.CH(line.end+1) == ' ')) 4754 break_type = MD_TEXT_BR; 4755 } 4756 4757 ret = MD_TEXT(ctx, break_type, "\n", 1); 4758 if (ret != 0) goto abort; 4759 } 4760 4761 /* Move to the next line. */ 4762 line++; 4763 off = line.beg; 4764 4765 enforce_hardbreak = 0; 4766 } 4767 } 4768 4769 abort: 4770 return ret; 4771 } 4772 4773 4774 /*************************** 4775 *** Processing Tables *** 4776 ***************************/ 4777 4778 void md_analyze_table_alignment(MD_CTX* ctx, OFF beg, OFF end, MD_ALIGN* align_, int n_align) 4779 { 4780 static immutable MD_ALIGN[] align_map = 4781 [ 4782 MD_ALIGN_DEFAULT, 4783 MD_ALIGN_LEFT, 4784 MD_ALIGN_RIGHT, 4785 MD_ALIGN_CENTER 4786 ]; 4787 OFF off = beg; 4788 4789 while(n_align > 0) { 4790 int index = 0; /* index into align_map[] */ 4791 4792 while(ctx.CH(off) != '-') 4793 off++; 4794 if(off > beg && ctx.CH(off-1) == ':') 4795 index |= 1; 4796 while(off < end && ctx.CH(off) == '-') 4797 off++; 4798 if(off < end && ctx.CH(off) == ':') 4799 index |= 2; 4800 4801 *align_ = align_map[index]; 4802 align_++; 4803 n_align--; 4804 } 4805 4806 } 4807 4808 int md_process_table_cell(MD_CTX* ctx, MD_BLOCKTYPE cell_type, MD_ALIGN align_, OFF beg, OFF end) 4809 { 4810 MD_LINE line; 4811 MD_BLOCK_TD_DETAIL det; 4812 int ret = 0; 4813 4814 while(beg < end && ctx.ISWHITESPACE(beg)) 4815 beg++; 4816 while(end > beg && ctx.ISWHITESPACE(end-1)) 4817 end--; 4818 4819 det.align_ = align_; 4820 line.beg = beg; 4821 line.end = end; 4822 4823 ret = MD_ENTER_BLOCK(ctx, cell_type, &det); 4824 if (ret != 0) goto abort; 4825 ret = (md_process_normal_block_contents(ctx, &line, 1)); 4826 if (ret < 0) goto abort; 4827 ret = MD_LEAVE_BLOCK(ctx, cell_type, &det); 4828 if (ret != 0) goto abort; 4829 4830 abort: 4831 return ret; 4832 } 4833 4834 int md_process_table_row(MD_CTX* ctx, MD_BLOCKTYPE cell_type, OFF beg, OFF end, 4835 const MD_ALIGN* align_, int col_count) 4836 { 4837 MD_LINE line; 4838 OFF* pipe_offs = null; 4839 int i, j, n; 4840 int ret = 0; 4841 4842 line.beg = beg; 4843 line.end = end; 4844 4845 /* Break the line into table cells by identifying pipe characters who 4846 * form the cell boundary. */ 4847 ret = (md_analyze_inlines(ctx, &line, 1, TRUE)); 4848 if (ret < 0) goto abort; 4849 4850 /* We have to remember the cell boundaries in local buffer because 4851 * ctx.marks[] shall be reused during cell contents processing. */ 4852 n = ctx.n_table_cell_boundaries; 4853 pipe_offs = cast(OFF*) malloc(n * OFF.sizeof); 4854 if(pipe_offs == null) { 4855 ctx.MD_LOG("malloc() failed."); 4856 ret = -1; 4857 goto abort; 4858 } 4859 for(i = ctx.TABLECELLBOUNDARIES.head, j = 0; i >= 0; i = ctx.marks[i].next) { 4860 MD_MARK* mark = &ctx.marks[i]; 4861 pipe_offs[j++] = mark.beg; 4862 } 4863 4864 /* Process cells. */ 4865 ret = MD_ENTER_BLOCK(ctx, MD_BLOCK_TR, null); 4866 if (ret != 0) goto abort; 4867 4868 j = 0; 4869 if(beg < pipe_offs[0] && j < col_count) 4870 { 4871 ret = (md_process_table_cell(ctx, cell_type, align_[j++], beg, pipe_offs[0])); 4872 if (ret < 0) goto abort; 4873 } 4874 for(i = 0; i < n-1 && j < col_count; i++) 4875 { 4876 ret = (md_process_table_cell(ctx, cell_type, align_[j++], pipe_offs[i]+1, pipe_offs[i+1])); 4877 if (ret < 0) goto abort; 4878 } 4879 if(pipe_offs[n-1] < end-1 && j < col_count) 4880 { 4881 ret = (md_process_table_cell(ctx, cell_type, align_[j++], pipe_offs[n-1]+1, end)); 4882 if (ret < 0) goto abort; 4883 } 4884 /* Make sure we call enough table cells even if the current table contains 4885 * too few of them. */ 4886 while(j < col_count) 4887 { 4888 ret = (md_process_table_cell(ctx, cell_type, align_[j++], 0, 0)); 4889 if (ret < 0) goto abort; 4890 } 4891 4892 ret = MD_LEAVE_BLOCK(ctx, MD_BLOCK_TR, null); 4893 if (ret != 0) goto abort; 4894 4895 abort: 4896 free(pipe_offs); 4897 4898 /* Free any temporary memory blocks stored within some dummy marks. */ 4899 for(i = ctx.PTR_CHAIN.head; i >= 0; i = ctx.marks[i].next) 4900 free(md_mark_get_ptr(ctx, i)); 4901 ctx.PTR_CHAIN.head = -1; 4902 ctx.PTR_CHAIN.tail = -1; 4903 4904 return ret; 4905 } 4906 4907 int md_process_table_block_contents(MD_CTX* ctx, int col_count, const MD_LINE* lines, int n_lines) 4908 { 4909 MD_ALIGN* align_; 4910 int i; 4911 int ret = 0; 4912 4913 /* At least two lines have to be present: The column headers and the line 4914 * with the underlines. */ 4915 assert(n_lines >= 2); 4916 4917 align_ = cast(MD_ALIGN*) malloc(col_count * MD_ALIGN.sizeof); 4918 if(align_ == null) { 4919 ctx.MD_LOG("malloc() failed."); 4920 ret = -1; 4921 goto abort; 4922 } 4923 4924 md_analyze_table_alignment(ctx, lines[1].beg, lines[1].end, align_, col_count); 4925 4926 ret = MD_ENTER_BLOCK(ctx, MD_BLOCK_THEAD, null); 4927 if (ret != 0) goto abort; 4928 ret = (md_process_table_row(ctx, MD_BLOCK_TH, 4929 lines[0].beg, lines[0].end, align_, col_count)); 4930 if (ret < 0) goto abort; 4931 ret = MD_LEAVE_BLOCK(ctx, MD_BLOCK_THEAD, null); 4932 if (ret != 0) goto abort; 4933 4934 ret = MD_ENTER_BLOCK(ctx, MD_BLOCK_TBODY, null); 4935 if (ret != 0) goto abort; 4936 for(i = 2; i < n_lines; i++) { 4937 ret = (md_process_table_row(ctx, MD_BLOCK_TD, 4938 lines[i].beg, lines[i].end, align_, col_count)); 4939 if (ret < 0) goto abort; 4940 } 4941 ret = MD_LEAVE_BLOCK(ctx, MD_BLOCK_TBODY, null); 4942 if (ret != 0) goto abort; 4943 4944 abort: 4945 free(align_); 4946 return ret; 4947 } 4948 4949 int md_is_table_row(MD_CTX* ctx, OFF beg, OFF* p_end) 4950 { 4951 MD_LINE line; 4952 int i; 4953 int ret = FALSE; 4954 4955 line.beg = beg; 4956 line.end = beg; 4957 4958 /* Find end of line. */ 4959 while(line.end < ctx.size && !ctx.ISNEWLINE(line.end)) 4960 line.end++; 4961 4962 ret = (md_analyze_inlines(ctx, &line, 1, TRUE)); 4963 if (ret < 0) goto abort; 4964 4965 if(ctx.TABLECELLBOUNDARIES.head >= 0) { 4966 if(p_end != null) 4967 *p_end = line.end; 4968 ret = TRUE; 4969 } 4970 4971 abort: 4972 /* Free any temporary memory blocks stored within some dummy marks. */ 4973 for(i = ctx.PTR_CHAIN.head; i >= 0; i = ctx.marks[i].next) 4974 free(md_mark_get_ptr(ctx, i)); 4975 ctx.PTR_CHAIN.head = -1; 4976 ctx.PTR_CHAIN.tail = -1; 4977 4978 return ret; 4979 } 4980 4981 4982 /************************** 4983 *** Processing Block *** 4984 **************************/ 4985 4986 enum MD_BLOCK_CONTAINER_OPENER = 0x01; 4987 enum MD_BLOCK_CONTAINER_CLOSER = 0x02; 4988 enum MD_BLOCK_CONTAINER = (MD_BLOCK_CONTAINER_OPENER | MD_BLOCK_CONTAINER_CLOSER); 4989 enum MD_BLOCK_LOOSE_LIST = 0x04; 4990 enum MD_BLOCK_SETEXT_HEADER = 0x08; 4991 4992 struct MD_BLOCK 4993 { 4994 nothrow: 4995 @nogc: 4996 ubyte type_; 4997 ubyte flags_; 4998 ushort data_; 4999 5000 MD_BLOCKTYPE type() const { return type_; } 5001 void type(MD_BLOCKTYPE value) { type_ = cast(ubyte)value; } 5002 5003 uint flags() const { return flags_; } 5004 void flags(uint value) { flags_ = cast(ubyte)value; } 5005 5006 /* MD_BLOCK_H: Header level (1 - 6) 5007 * MD_BLOCK_CODE: Non-zero if fenced, zero if indented. 5008 * MD_BLOCK_LI: Task mark character (0 if not task list item, 'x', 'X' or ' '). 5009 * MD_BLOCK_TABLE: Column count (as determined by the table underline). 5010 */ 5011 uint data() const { return data_; } 5012 void data(uint value) { data_ = cast(ubyte)value; } 5013 5014 /* Leaf blocks: Count of lines (MD_LINE or MD_VERBATIMLINE) on the block. 5015 * MD_BLOCK_LI: Task mark offset in the input doc. 5016 * MD_BLOCK_OL: Start item number. 5017 */ 5018 uint n_lines; 5019 } 5020 5021 static assert(MD_BLOCK.sizeof == 8); 5022 5023 struct MD_CONTAINER 5024 { 5025 nothrow: 5026 @nogc: 5027 5028 CHAR ch; 5029 5030 ubyte is_loose_; 5031 ubyte is_task_; 5032 5033 uint is_loose() { return is_loose_; } 5034 void is_loose(uint value) { is_loose_ = cast(ubyte)value; } 5035 5036 uint is_task() { return is_task_; } 5037 void is_task(uint value) { is_task_ = cast(ubyte)value; } 5038 5039 uint start; 5040 uint mark_indent; 5041 uint contents_indent; 5042 OFF block_byte_off; 5043 OFF task_mark_off; 5044 } 5045 5046 5047 int md_process_normal_block_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines) 5048 { 5049 int i; 5050 int ret; 5051 5052 ret = (md_analyze_inlines(ctx, lines, n_lines, FALSE)); 5053 if (ret < 0) goto abort; 5054 ret = (md_process_inlines(ctx, lines, n_lines)); 5055 if (ret < 0) goto abort; 5056 5057 abort: 5058 /* Free any temporary memory blocks stored within some dummy marks. */ 5059 for(i = ctx.PTR_CHAIN.head; i >= 0; i = ctx.marks[i].next) 5060 free(md_mark_get_ptr(ctx, i)); 5061 ctx.PTR_CHAIN.head = -1; 5062 ctx.PTR_CHAIN.tail = -1; 5063 5064 return ret; 5065 } 5066 5067 int md_process_verbatim_block_contents(MD_CTX* ctx, MD_TEXTTYPE text_type, const MD_VERBATIMLINE* lines, int n_lines) 5068 { 5069 static immutable string indent_chunk_str = " "; 5070 5071 int i; 5072 int ret = 0; 5073 5074 for(i = 0; i < n_lines; i++) { 5075 const MD_VERBATIMLINE* line = &lines[i]; 5076 int indent = line.indent; 5077 5078 assert(indent >= 0); 5079 5080 /* Output code indentation. */ 5081 while(indent > cast(int)(indent_chunk_str.length)) { 5082 ret = MD_TEXT(ctx, text_type, indent_chunk_str.ptr, cast(SZ)(indent_chunk_str.length)); 5083 if (ret != 0) goto abort; 5084 indent -= indent_chunk_str.length; 5085 } 5086 if(indent > 0) 5087 { 5088 ret = MD_TEXT(ctx, text_type, indent_chunk_str.ptr, indent); 5089 if (ret != 0) goto abort; 5090 } 5091 5092 /* Output the code line itself. */ 5093 ret = MD_TEXT_INSECURE(ctx, text_type, ctx.STR(line.beg), line.end - line.beg); 5094 if (ret != 0) goto abort; 5095 5096 /* Enforce end-of-line. */ 5097 ret = MD_TEXT(ctx, text_type, "\n", 1); 5098 if (ret != 0) goto abort; 5099 } 5100 5101 abort: 5102 return ret; 5103 } 5104 5105 static int 5106 md_process_code_block_contents(MD_CTX* ctx, int is_fenced, const(MD_VERBATIMLINE)* lines, int n_lines) 5107 { 5108 if(is_fenced) { 5109 /* Skip the first line in case of fenced code: It is the fence. 5110 * (Only the starting fence is present due to logic in md_analyze_line().) */ 5111 lines++; 5112 n_lines--; 5113 } else { 5114 /* Ignore blank lines at start/end of indented code block. */ 5115 while(n_lines > 0 && lines[0].beg == lines[0].end) { 5116 lines++; 5117 n_lines--; 5118 } 5119 while(n_lines > 0 && lines[n_lines-1].beg == lines[n_lines-1].end) { 5120 n_lines--; 5121 } 5122 } 5123 5124 if(n_lines == 0) 5125 return 0; 5126 5127 return md_process_verbatim_block_contents(ctx, MD_TEXT_CODE, lines, n_lines); 5128 } 5129 5130 int md_setup_fenced_code_detail(MD_CTX* ctx, const(MD_BLOCK)* block, MD_BLOCK_CODE_DETAIL* det, 5131 MD_ATTRIBUTE_BUILD* info_build, MD_ATTRIBUTE_BUILD* lang_build) 5132 { 5133 const(MD_VERBATIMLINE)* fence_line = cast(const(MD_VERBATIMLINE)*)(block + 1); 5134 OFF beg = fence_line.beg; 5135 OFF end = fence_line.end; 5136 OFF lang_end; 5137 CHAR fence_ch = ctx.CH(fence_line.beg); 5138 int ret = 0; 5139 5140 /* Skip the fence itself. */ 5141 while(beg < ctx.size && ctx.CH(beg) == fence_ch) 5142 beg++; 5143 /* Trim initial spaces. */ 5144 while(beg < ctx.size && ctx.CH(beg) == ' ') 5145 beg++; 5146 5147 /* Trim trailing spaces. */ 5148 while(end > beg && ctx.CH(end-1) == ' ') 5149 end--; 5150 5151 /* Build info string attribute. */ 5152 ret = (md_build_attribute(ctx, ctx.STR(beg), end - beg, 0, &det.info, info_build)); 5153 if (ret < 0) goto abort; 5154 5155 /* Build info string attribute. */ 5156 lang_end = beg; 5157 while(lang_end < end && !ctx.ISWHITESPACE(lang_end)) 5158 lang_end++; 5159 ret = (md_build_attribute(ctx, ctx.STR(beg), lang_end - beg, 0, &det.lang, lang_build)); 5160 if (ret < 0) goto abort; 5161 5162 det.fence_char = fence_ch; 5163 5164 abort: 5165 return ret; 5166 } 5167 5168 static int 5169 md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block) 5170 { 5171 static union HeaderOrCode 5172 { 5173 MD_BLOCK_H_DETAIL header; 5174 MD_BLOCK_CODE_DETAIL code; 5175 } 5176 HeaderOrCode det; 5177 MD_ATTRIBUTE_BUILD info_build; 5178 MD_ATTRIBUTE_BUILD lang_build; 5179 int is_in_tight_list; 5180 int clean_fence_code_detail = FALSE; 5181 int ret = 0; 5182 5183 memset(&det, 0, det.sizeof); 5184 5185 if(ctx.n_containers == 0) 5186 is_in_tight_list = FALSE; 5187 else 5188 is_in_tight_list = !ctx.containers[ctx.n_containers-1].is_loose; 5189 5190 switch(block.type) 5191 { 5192 case MD_BLOCK_H: 5193 det.header.level = block.data; 5194 break; 5195 5196 case MD_BLOCK_CODE: 5197 /* For fenced code block, we may need to set the info string. */ 5198 if(block.data != 0) { 5199 memset(&det.code, 0, MD_BLOCK_CODE_DETAIL.sizeof); 5200 clean_fence_code_detail = TRUE; 5201 ret = (md_setup_fenced_code_detail(ctx, block, &det.code, &info_build, &lang_build)); 5202 if (ret < 0) goto abort; 5203 } 5204 break; 5205 5206 default: 5207 /* Noop. */ 5208 break; 5209 } 5210 5211 if(!is_in_tight_list || block.type != MD_BLOCK_P) 5212 { 5213 ret = MD_ENTER_BLOCK(ctx, block.type, cast(void*) &det); 5214 if (ret != 0) goto abort; 5215 } 5216 5217 /* Process the block contents accordingly to is type. */ 5218 switch(block.type) { 5219 case MD_BLOCK_HR: 5220 /* noop */ 5221 break; 5222 5223 case MD_BLOCK_CODE: 5224 ret = (md_process_code_block_contents(ctx, (block.data != 0), 5225 cast(const(MD_VERBATIMLINE)*)(block + 1), block.n_lines)); 5226 if (ret < 0) goto abort; 5227 break; 5228 5229 case MD_BLOCK_HTML: 5230 ret = (md_process_verbatim_block_contents(ctx, MD_TEXT_HTML, 5231 cast(const(MD_VERBATIMLINE)*)(block + 1), block.n_lines)); 5232 if (ret < 0) goto abort; 5233 break; 5234 5235 case MD_BLOCK_TABLE: 5236 ret = (md_process_table_block_contents(ctx, block.data, 5237 cast(const(MD_LINE)*)(block + 1), block.n_lines)); 5238 if (ret < 0) goto abort; 5239 break; 5240 5241 default: 5242 ret = (md_process_normal_block_contents(ctx, 5243 cast(const(MD_LINE)*)(block + 1), block.n_lines)); 5244 if (ret < 0) goto abort; 5245 break; 5246 } 5247 5248 if(!is_in_tight_list || block.type != MD_BLOCK_P) 5249 { 5250 ret = MD_LEAVE_BLOCK(ctx, block.type, cast(void*) &det); 5251 if (ret != 0) goto abort; 5252 } 5253 5254 abort: 5255 if(clean_fence_code_detail) { 5256 md_free_attribute(ctx, &info_build); 5257 md_free_attribute(ctx, &lang_build); 5258 } 5259 return ret; 5260 } 5261 5262 int md_process_all_blocks(MD_CTX* ctx) 5263 { 5264 int byte_off = 0; 5265 int ret = 0; 5266 5267 /* ctx.containers now is not needed for detection of lists and list items 5268 * so we reuse it for tracking what lists are loose or tight. We rely 5269 * on the fact the vector is large enough to hold the deepest nesting 5270 * level of lists. */ 5271 ctx.n_containers = 0; 5272 5273 while(byte_off < ctx.n_block_bytes) { 5274 MD_BLOCK* block = cast(MD_BLOCK*)(cast(char*)ctx.block_bytes + byte_off); 5275 static union Det 5276 { 5277 MD_BLOCK_UL_DETAIL ul; 5278 MD_BLOCK_OL_DETAIL ol; 5279 MD_BLOCK_LI_DETAIL li; 5280 } 5281 5282 Det det; 5283 5284 switch(block.type) { 5285 case MD_BLOCK_UL: 5286 det.ul.is_tight = (block.flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE; 5287 det.ul.mark = cast(CHAR) block.data; 5288 break; 5289 5290 case MD_BLOCK_OL: 5291 det.ol.start = block.n_lines; 5292 det.ol.is_tight = (block.flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE; 5293 det.ol.mark_delimiter = cast(CHAR) block.data; 5294 break; 5295 5296 case MD_BLOCK_LI: 5297 det.li.is_task = (block.data != 0); 5298 det.li.task_mark = cast(CHAR) block.data; 5299 det.li.task_mark_offset = cast(OFF) block.n_lines; 5300 break; 5301 5302 default: 5303 /* noop */ 5304 break; 5305 } 5306 5307 if(block.flags & MD_BLOCK_CONTAINER) { 5308 if(block.flags & MD_BLOCK_CONTAINER_CLOSER) { 5309 ret = MD_LEAVE_BLOCK(ctx, block.type, &det); 5310 if (ret != 0) goto abort; 5311 5312 if(block.type == MD_BLOCK_UL || block.type == MD_BLOCK_OL || block.type == MD_BLOCK_QUOTE) 5313 ctx.n_containers--; 5314 } 5315 5316 if(block.flags & MD_BLOCK_CONTAINER_OPENER) { 5317 ret = MD_ENTER_BLOCK(ctx, block.type, &det); 5318 if (ret != 0) goto abort; 5319 5320 if(block.type == MD_BLOCK_UL || block.type == MD_BLOCK_OL) { 5321 ctx.containers[ctx.n_containers].is_loose = (block.flags & MD_BLOCK_LOOSE_LIST); 5322 ctx.n_containers++; 5323 } else if(block.type == MD_BLOCK_QUOTE) { 5324 /* This causes that any text in a block quote, even if 5325 * nested inside a tight list item, is wrapped with 5326 * <p>...</p>. */ 5327 ctx.containers[ctx.n_containers].is_loose = TRUE; 5328 ctx.n_containers++; 5329 } 5330 } 5331 } else { 5332 ret = (md_process_leaf_block(ctx, block)); 5333 if (ret < 0) goto abort; 5334 5335 if(block.type == MD_BLOCK_CODE || block.type == MD_BLOCK_HTML) 5336 byte_off += block.n_lines * MD_VERBATIMLINE.sizeof; 5337 else 5338 byte_off += block.n_lines * MD_LINE.sizeof; 5339 } 5340 5341 byte_off += MD_BLOCK.sizeof; 5342 } 5343 5344 ctx.n_block_bytes = 0; 5345 5346 abort: 5347 return ret; 5348 } 5349 5350 5351 /************************************ 5352 *** Grouping Lines into Blocks *** 5353 ************************************/ 5354 5355 static void* 5356 md_push_block_bytes(MD_CTX* ctx, int n_bytes) 5357 { 5358 void* ptr; 5359 5360 if(ctx.n_block_bytes + n_bytes > ctx.alloc_block_bytes) { 5361 void* new_block_bytes; 5362 5363 ctx.alloc_block_bytes = (ctx.alloc_block_bytes > 0 ? ctx.alloc_block_bytes * 2 : 512); 5364 new_block_bytes = realloc_safe(ctx.block_bytes, ctx.alloc_block_bytes); 5365 if(new_block_bytes == null) { 5366 ctx.MD_LOG("realloc() failed."); 5367 return null; 5368 } 5369 5370 /* Fix the .current_block after the reallocation. */ 5371 if(ctx.current_block != null) { 5372 OFF off_current_block = cast(uint)( cast(char*) ctx.current_block - cast(char*) ctx.block_bytes ); 5373 ctx.current_block = cast(MD_BLOCK*) (cast(char*) new_block_bytes + off_current_block); 5374 } 5375 5376 ctx.block_bytes = new_block_bytes; 5377 } 5378 5379 ptr = cast(char*)ctx.block_bytes + ctx.n_block_bytes; 5380 ctx.n_block_bytes += n_bytes; 5381 return ptr; 5382 } 5383 5384 static int 5385 md_start_new_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* line) 5386 { 5387 MD_BLOCK* block; 5388 5389 assert(ctx.current_block == null); 5390 5391 block = cast(MD_BLOCK*) md_push_block_bytes(ctx, MD_BLOCK.sizeof); 5392 if(block == null) 5393 return -1; 5394 5395 switch(line.type) { 5396 case MD_LINE_HR: 5397 block.type = MD_BLOCK_HR; 5398 break; 5399 5400 case MD_LINE_ATXHEADER: 5401 case MD_LINE_SETEXTHEADER: 5402 block.type = MD_BLOCK_H; 5403 break; 5404 5405 case MD_LINE_FENCEDCODE: 5406 case MD_LINE_INDENTEDCODE: 5407 block.type = MD_BLOCK_CODE; 5408 break; 5409 5410 case MD_LINE_TEXT: 5411 block.type = MD_BLOCK_P; 5412 break; 5413 5414 case MD_LINE_HTML: 5415 block.type = MD_BLOCK_HTML; 5416 break; 5417 5418 case MD_LINE_BLANK: 5419 case MD_LINE_SETEXTUNDERLINE: 5420 case MD_LINE_TABLEUNDERLINE: 5421 default: 5422 assert(false); 5423 } 5424 5425 block.flags = 0; 5426 block.data = line.data; 5427 block.n_lines = 0; 5428 5429 ctx.current_block = block; 5430 return 0; 5431 } 5432 5433 /* Eat from start of current (textual) block any reference definitions and 5434 * remember them so we can resolve any links referring to them. 5435 * 5436 * (Reference definitions can only be at start of it as they cannot break 5437 * a paragraph.) 5438 */ 5439 int md_consume_link_reference_definitions(MD_CTX* ctx) 5440 { 5441 MD_LINE* lines = cast(MD_LINE*) (ctx.current_block + 1); 5442 int n_lines = ctx.current_block.n_lines; 5443 int n = 0; 5444 5445 /* Compute how many lines at the start of the block form one or more 5446 * reference definitions. */ 5447 while(n < n_lines) { 5448 int n_link_ref_lines; 5449 5450 n_link_ref_lines = md_is_link_reference_definition(ctx, 5451 lines + n, n_lines - n); 5452 /* Not a reference definition? */ 5453 if(n_link_ref_lines == 0) 5454 break; 5455 5456 /* We fail if it is the ref. def. but it could not be stored due 5457 * a memory allocation error. */ 5458 if(n_link_ref_lines < 0) 5459 return -1; 5460 5461 n += n_link_ref_lines; 5462 } 5463 5464 /* If there was at least one reference definition, we need to remove 5465 * its lines from the block, or perhaps even the whole block. */ 5466 if(n > 0) { 5467 if(n == n_lines) { 5468 /* Remove complete block. */ 5469 ctx.n_block_bytes -= n * MD_LINE.sizeof; 5470 ctx.n_block_bytes -= MD_BLOCK.sizeof; 5471 ctx.current_block = null; 5472 } else { 5473 /* Remove just some initial lines from the block. */ 5474 memmove(lines, lines + n, (n_lines - n) * MD_LINE.sizeof); 5475 ctx.current_block.n_lines -= n; 5476 ctx.n_block_bytes -= n * MD_LINE.sizeof; 5477 } 5478 } 5479 5480 return 0; 5481 } 5482 5483 static int 5484 md_end_current_block(MD_CTX* ctx) 5485 { 5486 int ret = 0; 5487 5488 if(ctx.current_block == null) 5489 return ret; 5490 5491 /* Check whether there is a reference definition. (We do this here instead 5492 * of in md_analyze_line() because reference definition can take multiple 5493 * lines.) */ 5494 if(ctx.current_block.type == MD_BLOCK_P || 5495 (ctx.current_block.type == MD_BLOCK_H && (ctx.current_block.flags & MD_BLOCK_SETEXT_HEADER))) 5496 { 5497 MD_LINE* lines = cast(MD_LINE*) (ctx.current_block + 1); 5498 if(ctx.CH(lines[0].beg) == '[') { 5499 ret = (md_consume_link_reference_definitions(ctx)); 5500 if (ret < 0) goto abort; 5501 if(ctx.current_block == null) 5502 return ret; 5503 } 5504 } 5505 5506 if(ctx.current_block.type == MD_BLOCK_H && (ctx.current_block.flags & MD_BLOCK_SETEXT_HEADER)) { 5507 int n_lines = ctx.current_block.n_lines; 5508 5509 if(n_lines > 1) { 5510 /* Get rid of the underline. */ 5511 ctx.current_block.n_lines--; 5512 ctx.n_block_bytes -= MD_LINE.sizeof; 5513 } else { 5514 /* Only the underline has left after eating the ref. defs. 5515 * Keep the line as beginning of a new ordinary paragraph. */ 5516 ctx.current_block.type = MD_BLOCK_P; 5517 return 0; 5518 } 5519 } 5520 5521 /* Mark we are not building any block anymore. */ 5522 ctx.current_block = null; 5523 5524 abort: 5525 return ret; 5526 } 5527 5528 static int 5529 md_add_line_into_current_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* analysis) 5530 { 5531 assert(ctx.current_block != null); 5532 5533 if(ctx.current_block.type == MD_BLOCK_CODE || ctx.current_block.type == MD_BLOCK_HTML) { 5534 MD_VERBATIMLINE* line; 5535 5536 line = cast(MD_VERBATIMLINE*) md_push_block_bytes(ctx, MD_VERBATIMLINE.sizeof); 5537 if(line == null) 5538 return -1; 5539 5540 line.indent = analysis.indent; 5541 line.beg = analysis.beg; 5542 line.end = analysis.end; 5543 } else { 5544 MD_LINE* line; 5545 5546 line = cast(MD_LINE*) md_push_block_bytes(ctx, MD_LINE.sizeof); 5547 if(line == null) 5548 return -1; 5549 5550 line.beg = analysis.beg; 5551 line.end = analysis.end; 5552 } 5553 ctx.current_block.n_lines++; 5554 5555 return 0; 5556 } 5557 5558 static int 5559 md_push_container_bytes(MD_CTX* ctx, MD_BLOCKTYPE type, uint start, 5560 uint data, uint flags) 5561 { 5562 MD_BLOCK* block; 5563 int ret = 0; 5564 5565 ret = (md_end_current_block(ctx)); 5566 if (ret < 0) goto abort; 5567 5568 block = cast(MD_BLOCK*) md_push_block_bytes(ctx, MD_BLOCK.sizeof); 5569 if(block == null) 5570 return -1; 5571 5572 block.type = type; 5573 block.flags = flags; 5574 block.data = data; 5575 block.n_lines = start; 5576 5577 abort: 5578 return ret; 5579 } 5580 5581 5582 5583 /*********************** 5584 *** Line Analysis *** 5585 ***********************/ 5586 5587 static int 5588 md_is_hr_line(MD_CTX* ctx, OFF beg, OFF* p_end, OFF* p_killer) 5589 { 5590 OFF off = beg + 1; 5591 int n = 1; 5592 5593 while(off < ctx.size && (ctx.CH(off) == ctx.CH(beg) || ctx.CH(off) == ' ' || ctx.CH(off) == '\t')) { 5594 if(ctx.CH(off) == ctx.CH(beg)) 5595 n++; 5596 off++; 5597 } 5598 5599 if(n < 3) { 5600 *p_killer = off; 5601 return FALSE; 5602 } 5603 5604 /* Nothing else can be present on the line. */ 5605 if(off < ctx.size && !ctx.ISNEWLINE(off)) { 5606 *p_killer = off; 5607 return FALSE; 5608 } 5609 5610 *p_end = off; 5611 return TRUE; 5612 } 5613 5614 static int 5615 md_is_atxheader_line(MD_CTX* ctx, OFF beg, OFF* p_beg, OFF* p_end, uint* p_level) 5616 { 5617 int n; 5618 OFF off = beg + 1; 5619 5620 while(off < ctx.size && ctx.CH(off) == '#' && off - beg < 7) 5621 off++; 5622 n = off - beg; 5623 5624 if(n > 6) 5625 return FALSE; 5626 *p_level = n; 5627 5628 if(!(ctx.parser.flags & MD_FLAG_PERMISSIVEATXHEADERS) && off < ctx.size && 5629 ctx.CH(off) != ' ' && ctx.CH(off) != '\t' && !ctx.ISNEWLINE(off)) 5630 return FALSE; 5631 5632 while(off < ctx.size && ctx.CH(off) == ' ') 5633 off++; 5634 *p_beg = off; 5635 *p_end = off; 5636 return TRUE; 5637 } 5638 5639 static int 5640 md_is_setext_underline(MD_CTX* ctx, OFF beg, OFF* p_end, uint* p_level) 5641 { 5642 OFF off = beg + 1; 5643 5644 while(off < ctx.size && ctx.CH(off) == ctx.CH(beg)) 5645 off++; 5646 5647 /* Optionally, space(s) can follow. */ 5648 while(off < ctx.size && ctx.CH(off) == ' ') 5649 off++; 5650 5651 /* But nothing more is allowed on the line. */ 5652 if(off < ctx.size && !ctx.ISNEWLINE(off)) 5653 return FALSE; 5654 5655 *p_level = (ctx.CH(beg) == '=' ? 1 : 2); 5656 *p_end = off; 5657 return TRUE; 5658 } 5659 5660 int md_is_table_underline(MD_CTX* ctx, OFF beg, OFF* p_end, uint* p_col_count) 5661 { 5662 OFF off = beg; 5663 int found_pipe = FALSE; 5664 uint col_count = 0; 5665 5666 if(off < ctx.size && ctx.CH(off) == '|') { 5667 found_pipe = TRUE; 5668 off++; 5669 while(off < ctx.size && ctx.ISWHITESPACE(off)) 5670 off++; 5671 } 5672 5673 while(1) { 5674 OFF cell_beg; 5675 int delimited = FALSE; 5676 5677 /* Cell underline ("-----", ":----", "----:" or ":----:") */ 5678 cell_beg = off; 5679 if(off < ctx.size && ctx.CH(off) == ':') 5680 off++; 5681 while(off < ctx.size && ctx.CH(off) == '-') 5682 off++; 5683 if(off < ctx.size && ctx.CH(off) == ':') 5684 off++; 5685 if(off - cell_beg < 3) 5686 return FALSE; 5687 5688 col_count++; 5689 5690 /* Pipe delimiter (optional at the end of line). */ 5691 while(off < ctx.size && ctx.ISWHITESPACE(off)) 5692 off++; 5693 if(off < ctx.size && ctx.CH(off) == '|') { 5694 delimited = TRUE; 5695 found_pipe = TRUE; 5696 off++; 5697 while(off < ctx.size && ctx.ISWHITESPACE(off)) 5698 off++; 5699 } 5700 5701 /* Success, if we reach end of line. */ 5702 if(off >= ctx.size || ctx.ISNEWLINE(off)) 5703 break; 5704 5705 if(!delimited) 5706 return FALSE; 5707 } 5708 5709 if(!found_pipe) 5710 return FALSE; 5711 5712 *p_end = off; 5713 *p_col_count = col_count; 5714 return TRUE; 5715 } 5716 5717 static int 5718 md_is_opening_code_fence(MD_CTX* ctx, OFF beg, OFF* p_end) 5719 { 5720 OFF off = beg; 5721 5722 while(off < ctx.size && ctx.CH(off) == ctx.CH(beg)) 5723 off++; 5724 5725 /* Fence must have at least three characters. */ 5726 if(off - beg < 3) 5727 return FALSE; 5728 5729 ctx.code_fence_length = off - beg; 5730 5731 /* Optionally, space(s) can follow. */ 5732 while(off < ctx.size && ctx.CH(off) == ' ') 5733 off++; 5734 5735 /* Optionally, an info string can follow. */ 5736 while(off < ctx.size && !ctx.ISNEWLINE(off)) { 5737 /* Backtick-based fence must not contain '`' in the info string. */ 5738 if(ctx.CH(beg) == '`' && ctx.CH(off) == '`') 5739 return FALSE; 5740 off++; 5741 } 5742 5743 *p_end = off; 5744 return TRUE; 5745 } 5746 5747 static int 5748 md_is_closing_code_fence(MD_CTX* ctx, CHAR ch, OFF beg, OFF* p_end) 5749 { 5750 OFF off = beg; 5751 int ret = FALSE; 5752 5753 /* Closing fence must have at least the same length and use same char as 5754 * opening one. */ 5755 while(off < ctx.size && ctx.CH(off) == ch) 5756 off++; 5757 if(off - beg < ctx.code_fence_length) 5758 goto out_; 5759 5760 /* Optionally, space(s) can follow */ 5761 while(off < ctx.size && ctx.CH(off) == ' ') 5762 off++; 5763 5764 /* But nothing more is allowed on the line. */ 5765 if(off < ctx.size && !ctx.ISNEWLINE(off)) 5766 goto out_; 5767 5768 ret = TRUE; 5769 5770 out_: 5771 /* Note we set *p_end even on failure: If we are not closing fence, caller 5772 * would eat the line anyway without any parsing. */ 5773 *p_end = off; 5774 return ret; 5775 } 5776 5777 /* Returns type of the raw HTML block, or FALSE if it is not HTML block. 5778 * (Refer to CommonMark specification for details about the types.) 5779 */ 5780 int md_is_html_block_start_condition(MD_CTX* ctx, OFF beg) 5781 { 5782 /* Type 6 is started by a long list of allowed tags. We use two-level 5783 * tree to speed-up the search. */ 5784 5785 static immutable string Xend = null; 5786 static immutable string[] t1 = [ "script", "pre", "style", Xend ]; 5787 static immutable string[] a6 = [ "address", "article", "aside", Xend ]; 5788 static immutable string[] b6 = [ "base", "basefont", "blockquote", "body", Xend ]; 5789 static immutable string[] c6 = [ "caption", "center", "col", "colgroup", Xend ]; 5790 static immutable string[] d6 = [ "dd", "details", "dialog", "dir", 5791 "div", "dl", "dt", Xend ]; 5792 static immutable string[] f6 = [ "fieldset", "figcaption", "figure", "footer", 5793 "form", "frame", "frameset", Xend ]; 5794 static immutable string[] h6 = [ "h1", "head", "header", "hr", "html", Xend ]; 5795 static immutable string[] i6 = [ "iframe", Xend ]; 5796 static immutable string[] l6 = [ "legend", "li", "link", Xend ]; 5797 static immutable string[] m6 = [ "main", "menu", "menuitem", Xend ]; 5798 static immutable string[] n6 = [ "nav", "noframes", Xend ]; 5799 static immutable string[] o6 = [ "ol", "optgroup", "option", Xend ]; 5800 static immutable string[] p6 = [ "p", "param", Xend ]; 5801 static immutable string[] s6 = [ "section", "source", "summary", Xend ]; 5802 static immutable string[] t6 = [ "table", "tbody", "td", "tfoot", "th", 5803 "thead", "title", "tr", "track", Xend ]; 5804 static immutable string[] u6 = [ "ul", Xend ]; 5805 static immutable string[] xx = [ Xend ]; 5806 5807 immutable(string)*[26] map6; 5808 map6[0] = a6.ptr; 5809 map6[1] = b6.ptr; 5810 map6[2] = c6.ptr; 5811 map6[3] = d6.ptr; 5812 map6[4] = xx.ptr; 5813 map6[5] = f6.ptr; 5814 map6[6] = xx.ptr; 5815 map6[7] = h6.ptr; 5816 map6[8] = i6.ptr; 5817 map6[9] = xx.ptr; 5818 map6[10] = xx.ptr; 5819 map6[11] = l6.ptr; 5820 map6[12] = m6.ptr; 5821 map6[13] = n6.ptr; 5822 map6[14] = o6.ptr; 5823 map6[15] = p6.ptr; 5824 map6[16] = xx.ptr; 5825 map6[17] = xx.ptr; 5826 map6[18] = s6.ptr; 5827 map6[19] = t6.ptr; 5828 map6[20] = u6.ptr; 5829 map6[21] = xx.ptr; 5830 map6[22] = xx.ptr; 5831 map6[23] = xx.ptr; 5832 map6[24] = xx.ptr; 5833 map6[25] = xx.ptr; 5834 5835 OFF off = beg + 1; 5836 int i; 5837 5838 /* Check for type 1: <script, <pre, or <style */ 5839 for(i = 0; t1[i].ptr != null; i++) 5840 { 5841 if(off + t1[i].length <= ctx.size) 5842 { 5843 if(md_ascii_case_eq(ctx.STR(off), t1[i].ptr, cast(uint)(t1[i].length))) 5844 return 1; 5845 } 5846 } 5847 5848 /* Check for type 2: <!-- */ 5849 if(off + 3 < ctx.size && ctx.CH(off) == '!' && ctx.CH(off+1) == '-' && ctx.CH(off+2) == '-') 5850 return 2; 5851 5852 /* Check for type 3: <? */ 5853 if(off < ctx.size && ctx.CH(off) == '?') 5854 return 3; 5855 5856 /* Check for type 4 or 5: <! */ 5857 if(off < ctx.size && ctx.CH(off) == '!') { 5858 /* Check for type 4: <! followed by uppercase letter. */ 5859 if(off + 1 < ctx.size && ctx.ISUPPER(off+1)) 5860 return 4; 5861 5862 /* Check for type 5: <![CDATA[ */ 5863 if(off + 8 < ctx.size) { 5864 if(md_ascii_eq(ctx.STR(off), "![CDATA[", 8 * CHAR.sizeof)) 5865 return 5; 5866 } 5867 } 5868 5869 /* Check for type 6: Many possible starting tags listed above. */ 5870 if(off + 1 < ctx.size && (ctx.ISALPHA(off) || (ctx.CH(off) == '/' && ctx.ISALPHA(off+1)))) { 5871 int slot; 5872 const(string)* tags; 5873 5874 if(ctx.CH(off) == '/') 5875 off++; 5876 5877 slot = (ctx.ISUPPER(off) ? ctx.CH(off) - 'A' : ctx.CH(off) - 'a'); 5878 tags = map6[slot]; 5879 5880 for(i = 0; tags[i].ptr != null; i++) { 5881 if(off + tags[i].length <= ctx.size) { 5882 if(md_ascii_case_eq(ctx.STR(off), tags[i].ptr, cast(uint) tags[i].length)) { 5883 OFF tmp = cast(uint)(off + tags[i].length); 5884 if(tmp >= ctx.size) 5885 return 6; 5886 if(ctx.ISBLANK(tmp) || ctx.ISNEWLINE(tmp) || ctx.CH(tmp) == '>') 5887 return 6; 5888 if(tmp+1 < ctx.size && ctx.CH(tmp) == '/' && ctx.CH(tmp+1) == '>') 5889 return 6; 5890 break; 5891 } 5892 } 5893 } 5894 } 5895 5896 /* Check for type 7: any COMPLETE other opening or closing tag. */ 5897 if(off + 1 < ctx.size) { 5898 OFF end; 5899 5900 if(md_is_html_tag(ctx, null, 0, beg, ctx.size, &end)) { 5901 /* Only optional whitespace and new line may follow. */ 5902 while(end < ctx.size && ctx.ISWHITESPACE(end)) 5903 end++; 5904 if(end >= ctx.size || ctx.ISNEWLINE(end)) 5905 return 7; 5906 } 5907 } 5908 5909 return FALSE; 5910 } 5911 5912 /* Case sensitive check whether there is a substring 'what' between 'beg' 5913 * and end of line. */ 5914 static int 5915 md_line_contains(MD_CTX* ctx, OFF beg, const(CHAR)* what, SZ what_len, OFF* p_end) 5916 { 5917 OFF i; 5918 for(i = beg; i + what_len < ctx.size; i++) { 5919 if(ctx.ISNEWLINE(i)) 5920 break; 5921 if(memcmp(ctx.STR(i), what, what_len * CHAR.sizeof) == 0) { 5922 *p_end = i + what_len; 5923 return TRUE; 5924 } 5925 } 5926 5927 *p_end = i; 5928 return FALSE; 5929 } 5930 5931 /* Returns type of HTML block end condition or FALSE if not an end condition. 5932 * 5933 * Note it fills p_end even when it is not end condition as the caller 5934 * does not need to analyze contents of a raw HTML block. 5935 */ 5936 int md_is_html_block_end_condition(MD_CTX* ctx, OFF beg, OFF* p_end) 5937 { 5938 switch(ctx.html_block_type) { 5939 case 1: 5940 { 5941 OFF off = beg; 5942 5943 while(off < ctx.size && !ctx.ISNEWLINE(off)) { 5944 if(ctx.CH(off) == '<') { 5945 if(md_ascii_case_eq(ctx.STR(off), "</script>", 9)) { 5946 *p_end = off + 9; 5947 return TRUE; 5948 } 5949 5950 if(md_ascii_case_eq(ctx.STR(off), "</style>", 8)) { 5951 *p_end = off + 8; 5952 return TRUE; 5953 } 5954 5955 if(md_ascii_case_eq(ctx.STR(off), "</pre>", 6)) { 5956 *p_end = off + 6; 5957 return TRUE; 5958 } 5959 } 5960 5961 off++; 5962 } 5963 *p_end = off; 5964 return FALSE; 5965 } 5966 5967 case 2: 5968 return (md_line_contains(ctx, beg, "-->", 3, p_end) ? 2 : FALSE); 5969 5970 case 3: 5971 return (md_line_contains(ctx, beg, "?>", 2, p_end) ? 3 : FALSE); 5972 5973 case 4: 5974 return (md_line_contains(ctx, beg, ">", 1, p_end) ? 4 : FALSE); 5975 5976 case 5: 5977 return (md_line_contains(ctx, beg, "]]>", 3, p_end) ? 5 : FALSE); 5978 5979 case 6: /* Pass through */ 5980 case 7: 5981 *p_end = beg; 5982 return (ctx.ISNEWLINE(beg) ? ctx.html_block_type : FALSE); 5983 5984 default: 5985 assert(false); 5986 } 5987 } 5988 5989 5990 static int 5991 md_is_container_compatible(const MD_CONTAINER* pivot, const MD_CONTAINER* container) 5992 { 5993 /* Block quote has no "items" like lists. */ 5994 if(container.ch == '>') 5995 return FALSE; 5996 5997 if(container.ch != pivot.ch) 5998 return FALSE; 5999 if(container.mark_indent > pivot.contents_indent) 6000 return FALSE; 6001 6002 return TRUE; 6003 } 6004 6005 static int 6006 md_push_container(MD_CTX* ctx, const MD_CONTAINER* container) 6007 { 6008 if(ctx.n_containers >= ctx.alloc_containers) { 6009 MD_CONTAINER* new_containers; 6010 6011 ctx.alloc_containers = (ctx.alloc_containers > 0 ? ctx.alloc_containers * 2 : 16); 6012 new_containers = cast(MD_CONTAINER*) realloc_safe(ctx.containers, ctx.alloc_containers * MD_CONTAINER.sizeof); 6013 if (new_containers == null) { 6014 ctx.MD_LOG("realloc() failed."); 6015 return -1; 6016 } 6017 6018 ctx.containers = new_containers; 6019 } 6020 6021 memcpy(&ctx.containers[ctx.n_containers++], container, MD_CONTAINER.sizeof); 6022 return 0; 6023 } 6024 6025 static int 6026 md_enter_child_containers(MD_CTX* ctx, int n_children, uint data) 6027 { 6028 int i; 6029 int ret = 0; 6030 6031 for(i = ctx.n_containers - n_children; i < ctx.n_containers; i++) { 6032 MD_CONTAINER* c = &ctx.containers[i]; 6033 int is_ordered_list = FALSE; 6034 6035 switch(c.ch) { 6036 case ')': 6037 case '.': 6038 is_ordered_list = TRUE; 6039 /* Pass through */ 6040 goto case '-'; 6041 6042 case '-': 6043 case '+': 6044 case '*': 6045 /* Remember offset in ctx.block_bytes so we can revisit the 6046 * block if we detect it is a loose list. */ 6047 md_end_current_block(ctx); 6048 c.block_byte_off = ctx.n_block_bytes; 6049 6050 ret = (md_push_container_bytes(ctx, 6051 (is_ordered_list ? MD_BLOCK_OL : MD_BLOCK_UL), 6052 c.start, data, MD_BLOCK_CONTAINER_OPENER)); 6053 if (ret < 0) goto abort; 6054 ret = (md_push_container_bytes(ctx, MD_BLOCK_LI, 6055 c.task_mark_off, 6056 (c.is_task ? ctx.CH(c.task_mark_off) : 0), 6057 MD_BLOCK_CONTAINER_OPENER)); 6058 if (ret < 0) goto abort; 6059 break; 6060 6061 case '>': 6062 ret = (md_push_container_bytes(ctx, MD_BLOCK_QUOTE, 0, 0, MD_BLOCK_CONTAINER_OPENER)); 6063 if (ret < 0) goto abort; 6064 break; 6065 6066 default: 6067 assert(false); 6068 } 6069 } 6070 6071 abort: 6072 return ret; 6073 } 6074 6075 static int 6076 md_leave_child_containers(MD_CTX* ctx, int n_keep) 6077 { 6078 int ret = 0; 6079 6080 while(ctx.n_containers > n_keep) { 6081 MD_CONTAINER* c = &ctx.containers[ctx.n_containers-1]; 6082 int is_ordered_list = FALSE; 6083 6084 switch(c.ch) { 6085 case ')': 6086 case '.': 6087 is_ordered_list = TRUE; 6088 /* Pass through */ 6089 goto case '-'; 6090 6091 case '-': 6092 case '+': 6093 case '*': 6094 ret = (md_push_container_bytes(ctx, MD_BLOCK_LI, 6095 c.task_mark_off, (c.is_task ? ctx.CH(c.task_mark_off) : 0), 6096 MD_BLOCK_CONTAINER_CLOSER)); 6097 if (ret < 0) goto abort; 6098 ret = (md_push_container_bytes(ctx, 6099 (is_ordered_list ? MD_BLOCK_OL : MD_BLOCK_UL), 0, 6100 c.ch, MD_BLOCK_CONTAINER_CLOSER)); 6101 if (ret < 0) goto abort; 6102 break; 6103 6104 case '>': 6105 ret = (md_push_container_bytes(ctx, MD_BLOCK_QUOTE, 0, 6106 0, MD_BLOCK_CONTAINER_CLOSER)); 6107 if (ret < 0) goto abort; 6108 break; 6109 6110 default: 6111 assert(false); 6112 } 6113 6114 ctx.n_containers--; 6115 } 6116 6117 abort: 6118 return ret; 6119 } 6120 6121 static int 6122 md_is_container_mark(MD_CTX* ctx, uint indent, OFF beg, OFF* p_end, MD_CONTAINER* p_container) 6123 { 6124 OFF off = beg; 6125 OFF max_end; 6126 6127 if(indent >= ctx.code_indent_offset) 6128 return FALSE; 6129 6130 /* Check for block quote mark. */ 6131 if(off < ctx.size && ctx.CH(off) == '>') { 6132 off++; 6133 p_container.ch = '>'; 6134 p_container.is_loose = FALSE; 6135 p_container.is_task = FALSE; 6136 p_container.mark_indent = indent; 6137 p_container.contents_indent = indent + 1; 6138 *p_end = off; 6139 return TRUE; 6140 } 6141 6142 /* Check for list item bullet mark. */ 6143 if(off+1 < ctx.size && ctx.ISANYOF(off, "-+*") && (ctx.ISBLANK(off+1) || ctx.ISNEWLINE(off+1))) { 6144 p_container.ch = ctx.CH(off); 6145 p_container.is_loose = FALSE; 6146 p_container.is_task = FALSE; 6147 p_container.mark_indent = indent; 6148 p_container.contents_indent = indent + 1; 6149 *p_end = off + 1; 6150 return TRUE; 6151 } 6152 6153 /* Check for ordered list item marks. */ 6154 max_end = off + 9; 6155 if(max_end > ctx.size) 6156 max_end = ctx.size; 6157 p_container.start = 0; 6158 while(off < max_end && ctx.ISDIGIT(off)) { 6159 p_container.start = p_container.start * 10 + ctx.CH(off) - '0'; 6160 off++; 6161 } 6162 if(off+1 < ctx.size && (ctx.CH(off) == '.' || ctx.CH(off) == ')') && (ctx.ISBLANK(off+1) || ctx.ISNEWLINE(off+1))) { 6163 p_container.ch = ctx.CH(off); 6164 p_container.is_loose = FALSE; 6165 p_container.is_task = FALSE; 6166 p_container.mark_indent = indent; 6167 p_container.contents_indent = indent + off - beg + 1; 6168 *p_end = off + 1; 6169 return TRUE; 6170 } 6171 6172 return FALSE; 6173 } 6174 6175 uint md_line_indentation(MD_CTX* ctx, uint total_indent, OFF beg, OFF* p_end) 6176 { 6177 OFF off = beg; 6178 uint indent = total_indent; 6179 6180 while(off < ctx.size && ctx.ISBLANK(off)) { 6181 if(ctx.CH(off) == '\t') 6182 indent = (indent + 4) & ~3; 6183 else 6184 indent++; 6185 off++; 6186 } 6187 6188 *p_end = off; 6189 return indent - total_indent; 6190 } 6191 6192 static const MD_LINE_ANALYSIS md_dummy_blank_line = { MD_LINE_BLANK, 0 }; 6193 6194 /* Analyze type of the line and find some its properties. This serves as a 6195 * main input for determining type and boundaries of a block. */ 6196 int md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, 6197 const(MD_LINE_ANALYSIS)* pivot_line, MD_LINE_ANALYSIS* line) 6198 { 6199 uint total_indent = 0; 6200 int n_parents = 0; 6201 int n_brothers = 0; 6202 int n_children = 0; 6203 MD_CONTAINER container = { 0 }; 6204 int prev_line_has_list_loosening_effect = ctx.last_line_has_list_loosening_effect; 6205 OFF off = beg; 6206 OFF hr_killer = 0; 6207 int ret = 0; 6208 6209 line.indent = md_line_indentation(ctx, total_indent, off, &off); 6210 total_indent += line.indent; 6211 line.beg = off; 6212 6213 /* Given the indentation and block quote marks '>', determine how many of 6214 * the current containers are our parents. */ 6215 while(n_parents < ctx.n_containers) { 6216 MD_CONTAINER* c = &ctx.containers[n_parents]; 6217 6218 if(c.ch == '>' && line.indent < ctx.code_indent_offset && 6219 off < ctx.size && ctx.CH(off) == '>') 6220 { 6221 /* Block quote mark. */ 6222 off++; 6223 total_indent++; 6224 line.indent = md_line_indentation(ctx, total_indent, off, &off); 6225 total_indent += line.indent; 6226 6227 /* The optional 1st space after '>' is part of the block quote mark. */ 6228 if(line.indent > 0) 6229 line.indent--; 6230 6231 line.beg = off; 6232 } else if(c.ch != '>' && line.indent >= c.contents_indent) { 6233 /* List. */ 6234 line.indent -= c.contents_indent; 6235 } else { 6236 break; 6237 } 6238 6239 n_parents++; 6240 } 6241 6242 if(off >= ctx.size || ctx.ISNEWLINE(off)) { 6243 /* Blank line does not need any real indentation to be nested inside 6244 * a list. */ 6245 if(n_brothers + n_children == 0) { 6246 while(n_parents < ctx.n_containers && ctx.containers[n_parents].ch != '>') 6247 n_parents++; 6248 } 6249 } 6250 6251 while(TRUE) { 6252 /* Check whether we are fenced code continuation. */ 6253 if(pivot_line.type == MD_LINE_FENCEDCODE) { 6254 line.beg = off; 6255 6256 /* We are another MD_LINE_FENCEDCODE unless we are closing fence 6257 * which we transform into MD_LINE_BLANK. */ 6258 if(line.indent < ctx.code_indent_offset) { 6259 if(md_is_closing_code_fence(ctx, ctx.CH(pivot_line.beg), off, &off)) { 6260 line.type = MD_LINE_BLANK; 6261 ctx.last_line_has_list_loosening_effect = FALSE; 6262 break; 6263 } 6264 } 6265 6266 /* Change indentation accordingly to the initial code fence. */ 6267 if(n_parents == ctx.n_containers) { 6268 if(line.indent > pivot_line.indent) 6269 line.indent -= pivot_line.indent; 6270 else 6271 line.indent = 0; 6272 6273 line.type = MD_LINE_FENCEDCODE; 6274 break; 6275 } 6276 } 6277 6278 /* Check whether we are HTML block continuation. */ 6279 if(pivot_line.type == MD_LINE_HTML && ctx.html_block_type > 0) { 6280 int html_block_type; 6281 6282 html_block_type = md_is_html_block_end_condition(ctx, off, &off); 6283 if(html_block_type > 0) { 6284 assert(html_block_type == ctx.html_block_type); 6285 6286 /* Make sure this is the last line of the block. */ 6287 ctx.html_block_type = 0; 6288 6289 /* Some end conditions serve as blank lines at the same time. */ 6290 if(html_block_type == 6 || html_block_type == 7) { 6291 line.type = MD_LINE_BLANK; 6292 line.indent = 0; 6293 break; 6294 } 6295 } 6296 6297 if(n_parents == ctx.n_containers) { 6298 line.type = MD_LINE_HTML; 6299 break; 6300 } 6301 } 6302 6303 /* Check for blank line. */ 6304 if(off >= ctx.size || ctx.ISNEWLINE(off)) { 6305 if(pivot_line.type == MD_LINE_INDENTEDCODE && n_parents == ctx.n_containers) { 6306 line.type = MD_LINE_INDENTEDCODE; 6307 if(line.indent > ctx.code_indent_offset) 6308 line.indent -= ctx.code_indent_offset; 6309 else 6310 line.indent = 0; 6311 ctx.last_line_has_list_loosening_effect = FALSE; 6312 } else { 6313 line.type = MD_LINE_BLANK; 6314 ctx.last_line_has_list_loosening_effect = (n_parents > 0 && 6315 n_brothers + n_children == 0 && 6316 ctx.containers[n_parents-1].ch != '>'); 6317 6318 /* See https://github.com/mity/md4c/issues/6 6319 * 6320 * This ugly checking tests we are in (yet empty) list item but not 6321 * its very first line (with the list item mark). 6322 * 6323 * If we are such blank line, then any following non-blank line 6324 * which would be part of this list item actually ends the list 6325 * because "a list item can begin with at most one blank line." 6326 */ 6327 if(n_parents > 0 && ctx.containers[n_parents-1].ch != '>' && 6328 n_brothers + n_children == 0 && ctx.current_block == null && 6329 ctx.n_block_bytes > cast(int) MD_BLOCK.sizeof) 6330 { 6331 MD_BLOCK* top_block = cast(MD_BLOCK*) (cast(char*)ctx.block_bytes + ctx.n_block_bytes - MD_BLOCK.sizeof); 6332 if(top_block.type == MD_BLOCK_LI) 6333 ctx.last_list_item_starts_with_two_blank_lines = TRUE; 6334 } 6335 } 6336 break; 6337 } else { 6338 /* This is 2nd half of the hack. If the flag is set (that is there 6339 * were 2nd blank line at the start of the list item) and we would also 6340 * belonging to such list item, then interrupt the list. */ 6341 ctx.last_line_has_list_loosening_effect = FALSE; 6342 if(ctx.last_list_item_starts_with_two_blank_lines) { 6343 if(n_parents > 0 && ctx.containers[n_parents-1].ch != '>' && 6344 n_brothers + n_children == 0 && ctx.current_block == null && 6345 ctx.n_block_bytes > cast(int) MD_BLOCK.sizeof) 6346 { 6347 MD_BLOCK* top_block = cast(MD_BLOCK*) (cast(char*)ctx.block_bytes + ctx.n_block_bytes - MD_BLOCK.sizeof); 6348 if(top_block.type == MD_BLOCK_LI) 6349 n_parents--; 6350 } 6351 6352 ctx.last_list_item_starts_with_two_blank_lines = FALSE; 6353 } 6354 } 6355 6356 /* Check whether we are Setext underline. */ 6357 if(line.indent < ctx.code_indent_offset && pivot_line.type == MD_LINE_TEXT 6358 && (ctx.CH(off) == '=' || ctx.CH(off) == '-') 6359 && (n_parents == ctx.n_containers)) 6360 { 6361 uint level; 6362 6363 if(md_is_setext_underline(ctx, off, &off, &level)) { 6364 line.type = MD_LINE_SETEXTUNDERLINE; 6365 line.data = level; 6366 break; 6367 } 6368 } 6369 6370 /* Check for thematic break line. */ 6371 if(line.indent < ctx.code_indent_offset && ctx.ISANYOF(off, "-_*") && off >= hr_killer) { 6372 if(md_is_hr_line(ctx, off, &off, &hr_killer)) { 6373 line.type = MD_LINE_HR; 6374 break; 6375 } 6376 } 6377 6378 /* Check for "brother" container. I.e. whether we are another list item 6379 * in already started list. */ 6380 if(n_parents < ctx.n_containers && n_brothers + n_children == 0) { 6381 OFF tmp; 6382 6383 if(md_is_container_mark(ctx, line.indent, off, &tmp, &container) && 6384 md_is_container_compatible(&ctx.containers[n_parents], &container)) 6385 { 6386 pivot_line = &md_dummy_blank_line; 6387 6388 off = tmp; 6389 6390 total_indent += container.contents_indent - container.mark_indent; 6391 line.indent = md_line_indentation(ctx, total_indent, off, &off); 6392 total_indent += line.indent; 6393 line.beg = off; 6394 6395 /* Some of the following whitespace actually still belongs to the mark. */ 6396 if(off >= ctx.size || ctx.ISNEWLINE(off)) { 6397 container.contents_indent++; 6398 } else if(line.indent <= ctx.code_indent_offset) { 6399 container.contents_indent += line.indent; 6400 line.indent = 0; 6401 } else { 6402 container.contents_indent += 1; 6403 line.indent--; 6404 } 6405 6406 ctx.containers[n_parents].mark_indent = container.mark_indent; 6407 ctx.containers[n_parents].contents_indent = container.contents_indent; 6408 6409 n_brothers++; 6410 continue; 6411 } 6412 } 6413 6414 /* Check for indented code. 6415 * Note indented code block cannot interrupt a paragraph. */ 6416 if(line.indent >= ctx.code_indent_offset && 6417 (pivot_line.type == MD_LINE_BLANK || pivot_line.type == MD_LINE_INDENTEDCODE)) 6418 { 6419 line.type = MD_LINE_INDENTEDCODE; 6420 assert(line.indent >= ctx.code_indent_offset); 6421 line.indent -= ctx.code_indent_offset; 6422 line.data = 0; 6423 break; 6424 } 6425 6426 /* Check for start of a new container block. */ 6427 if(line.indent < ctx.code_indent_offset && 6428 md_is_container_mark(ctx, line.indent, off, &off, &container)) 6429 { 6430 if(pivot_line.type == MD_LINE_TEXT && n_parents == ctx.n_containers && 6431 (off >= ctx.size || ctx.ISNEWLINE(off)) && container.ch != '>') 6432 { 6433 /* Noop. List mark followed by a blank line cannot interrupt a paragraph. */ 6434 } else if(pivot_line.type == MD_LINE_TEXT && n_parents == ctx.n_containers && 6435 (container.ch == '.' || container.ch == ')') && container.start != 1) 6436 { 6437 /* Noop. Ordered list cannot interrupt a paragraph unless the start index is 1. */ 6438 } else { 6439 total_indent += container.contents_indent - container.mark_indent; 6440 line.indent = md_line_indentation(ctx, total_indent, off, &off); 6441 total_indent += line.indent; 6442 6443 line.beg = off; 6444 line.data = container.ch; 6445 6446 /* Some of the following whitespace actually still belongs to the mark. */ 6447 if(off >= ctx.size || ctx.ISNEWLINE(off)) { 6448 container.contents_indent++; 6449 } else if(line.indent <= ctx.code_indent_offset) { 6450 container.contents_indent += line.indent; 6451 line.indent = 0; 6452 } else { 6453 container.contents_indent += 1; 6454 line.indent--; 6455 } 6456 6457 if(n_brothers + n_children == 0) 6458 pivot_line = &md_dummy_blank_line; 6459 6460 if(n_children == 0) 6461 { 6462 ret = (md_leave_child_containers(ctx, n_parents + n_brothers)); 6463 if (ret < 0) goto abort; 6464 } 6465 6466 n_children++; 6467 ret = (md_push_container(ctx, &container)); 6468 if (ret < 0) goto abort; 6469 continue; 6470 } 6471 } 6472 6473 /* Check whether we are table continuation. */ 6474 if(pivot_line.type == MD_LINE_TABLE && md_is_table_row(ctx, off, &off) && 6475 n_parents == ctx.n_containers) 6476 { 6477 line.type = MD_LINE_TABLE; 6478 break; 6479 } 6480 6481 /* Check for ATX header. */ 6482 if(line.indent < ctx.code_indent_offset && ctx.CH(off) == '#') { 6483 uint level; 6484 6485 if(md_is_atxheader_line(ctx, off, &line.beg, &off, &level)) { 6486 line.type = MD_LINE_ATXHEADER; 6487 line.data = level; 6488 break; 6489 } 6490 } 6491 6492 /* Check whether we are starting code fence. */ 6493 if(ctx.CH(off) == '`' || ctx.CH(off) == '~') { 6494 if(md_is_opening_code_fence(ctx, off, &off)) { 6495 line.type = MD_LINE_FENCEDCODE; 6496 line.data = 1; 6497 break; 6498 } 6499 } 6500 6501 /* Check for start of raw HTML block. */ 6502 if(ctx.CH(off) == '<' && !(ctx.parser.flags & MD_FLAG_NOHTMLBLOCKS)) 6503 { 6504 ctx.html_block_type = md_is_html_block_start_condition(ctx, off); 6505 6506 /* HTML block type 7 cannot interrupt paragraph. */ 6507 if(ctx.html_block_type == 7 && pivot_line.type == MD_LINE_TEXT) 6508 ctx.html_block_type = 0; 6509 6510 if(ctx.html_block_type > 0) { 6511 /* The line itself also may immediately close the block. */ 6512 if(md_is_html_block_end_condition(ctx, off, &off) == ctx.html_block_type) { 6513 /* Make sure this is the last line of the block. */ 6514 ctx.html_block_type = 0; 6515 } 6516 6517 line.type = MD_LINE_HTML; 6518 break; 6519 } 6520 } 6521 6522 /* Check for table underline. */ 6523 if((ctx.parser.flags & MD_FLAG_TABLES) && pivot_line.type == MD_LINE_TEXT && 6524 (ctx.CH(off) == '|' || ctx.CH(off) == '-' || ctx.CH(off) == ':') && 6525 n_parents == ctx.n_containers) 6526 { 6527 uint col_count; 6528 6529 if(ctx.current_block != null && ctx.current_block.n_lines == 1 && 6530 md_is_table_underline(ctx, off, &off, &col_count) && 6531 md_is_table_row(ctx, pivot_line.beg, null)) 6532 { 6533 line.data = col_count; 6534 line.type = MD_LINE_TABLEUNDERLINE; 6535 break; 6536 } 6537 } 6538 6539 /* By default, we are normal text line. */ 6540 line.type = MD_LINE_TEXT; 6541 if(pivot_line.type == MD_LINE_TEXT && n_brothers + n_children == 0) { 6542 /* Lazy continuation. */ 6543 n_parents = ctx.n_containers; 6544 } 6545 6546 /* Check for task mark. */ 6547 if((ctx.parser.flags & MD_FLAG_TASKLISTS) && n_brothers + n_children > 0 && 6548 ISANYOF_(ctx.containers[ctx.n_containers-1].ch, "-+*.)")) 6549 { 6550 OFF tmp = off; 6551 6552 while(tmp < ctx.size && tmp < off + 3 && ctx.ISBLANK(tmp)) 6553 tmp++; 6554 if(tmp + 2 < ctx.size && ctx.CH(tmp) == '[' && 6555 ctx.ISANYOF(tmp+1, "xX ") && ctx.CH(tmp+2) == ']' && 6556 (tmp + 3 == ctx.size || ctx.ISBLANK(tmp+3) || ctx.ISNEWLINE(tmp+3))) 6557 { 6558 MD_CONTAINER* task_container = (n_children > 0 ? &ctx.containers[ctx.n_containers-1] : &container); 6559 task_container.is_task = TRUE; 6560 task_container.task_mark_off = tmp + 1; 6561 off = tmp + 3; 6562 while(ctx.ISWHITESPACE(off)) 6563 off++; 6564 line.beg = off; 6565 } 6566 } 6567 6568 break; 6569 } 6570 6571 /* Scan for end of the line. 6572 * 6573 * Note this is quite a bottleneck of the parsing as we here iterate almost 6574 * over compete document. 6575 */ 6576 { 6577 /* Optimization: Use some loop unrolling. */ 6578 while(off + 3 < ctx.size && !ctx.ISNEWLINE(off+0) && !ctx.ISNEWLINE(off+1) 6579 && !ctx.ISNEWLINE(off+2) && !ctx.ISNEWLINE(off+3)) 6580 off += 4; 6581 while(off < ctx.size && !ctx.ISNEWLINE(off)) 6582 off++; 6583 } 6584 6585 /* Set end of the line. */ 6586 line.end = off; 6587 6588 /* But for ATX header, we should exclude the optional trailing mark. */ 6589 if(line.type == MD_LINE_ATXHEADER) { 6590 OFF tmp = line.end; 6591 while(tmp > line.beg && ctx.CH(tmp-1) == ' ') 6592 tmp--; 6593 while(tmp > line.beg && ctx.CH(tmp-1) == '#') 6594 tmp--; 6595 if(tmp == line.beg || ctx.CH(tmp-1) == ' ' || (ctx.parser.flags & MD_FLAG_PERMISSIVEATXHEADERS)) 6596 line.end = tmp; 6597 } 6598 6599 /* Trim trailing spaces. */ 6600 if(line.type != MD_LINE_INDENTEDCODE && line.type != MD_LINE_FENCEDCODE) { 6601 while(line.end > line.beg && ctx.CH(line.end-1) == ' ') 6602 line.end--; 6603 } 6604 6605 /* Eat also the new line. */ 6606 if(off < ctx.size && ctx.CH(off) == '\r') 6607 off++; 6608 if(off < ctx.size && ctx.CH(off) == '\n') 6609 off++; 6610 6611 *p_end = off; 6612 6613 /* If we belong to a list after seeing a blank line, the list is loose. */ 6614 if(prev_line_has_list_loosening_effect && line.type != MD_LINE_BLANK && n_parents + n_brothers > 0) { 6615 MD_CONTAINER* c = &ctx.containers[n_parents + n_brothers - 1]; 6616 if(c.ch != '>') { 6617 MD_BLOCK* block = cast(MD_BLOCK*) ((cast(char*)ctx.block_bytes) + c.block_byte_off); 6618 block.flags = block.flags | MD_BLOCK_LOOSE_LIST; 6619 } 6620 } 6621 6622 /* Leave any containers we are not part of anymore. */ 6623 if(n_children == 0 && n_parents + n_brothers < ctx.n_containers) 6624 { 6625 ret = (md_leave_child_containers(ctx, n_parents + n_brothers)); 6626 if (ret < 0) goto abort; 6627 } 6628 6629 /* Enter any container we found a mark for. */ 6630 if(n_brothers > 0) { 6631 assert(n_brothers == 1); 6632 ret = (md_push_container_bytes(ctx, MD_BLOCK_LI, 6633 ctx.containers[n_parents].task_mark_off, 6634 (ctx.containers[n_parents].is_task ? ctx.CH(ctx.containers[n_parents].task_mark_off) : 0), 6635 MD_BLOCK_CONTAINER_CLOSER)); 6636 if (ret < 0) goto abort; 6637 ret = (md_push_container_bytes(ctx, MD_BLOCK_LI, 6638 container.task_mark_off, 6639 (container.is_task ? ctx.CH(container.task_mark_off) : 0), 6640 MD_BLOCK_CONTAINER_OPENER)); 6641 if (ret < 0) goto abort; 6642 ctx.containers[n_parents].is_task = container.is_task; 6643 ctx.containers[n_parents].task_mark_off = container.task_mark_off; 6644 } 6645 6646 if(n_children > 0) 6647 { 6648 ret = (md_enter_child_containers(ctx, n_children, line.data)); 6649 if (ret < 0) goto abort; 6650 } 6651 6652 abort: 6653 return ret; 6654 } 6655 6656 int md_process_line(MD_CTX* ctx, const(MD_LINE_ANALYSIS)** p_pivot_line, MD_LINE_ANALYSIS* line) 6657 { 6658 const(MD_LINE_ANALYSIS)* pivot_line = *p_pivot_line; 6659 int ret = 0; 6660 6661 /* Blank line ends current leaf block. */ 6662 if(line.type == MD_LINE_BLANK) { 6663 ret = (md_end_current_block(ctx)); 6664 if (ret < 0) goto abort; 6665 *p_pivot_line = &md_dummy_blank_line; 6666 return 0; 6667 } 6668 6669 /* Some line types form block on their own. */ 6670 if(line.type == MD_LINE_HR || line.type == MD_LINE_ATXHEADER) { 6671 ret = (md_end_current_block(ctx)); 6672 if (ret < 0) goto abort; 6673 6674 /* Add our single-line block. */ 6675 ret = (md_start_new_block(ctx, line)); 6676 if (ret < 0) goto abort; 6677 ret = (md_add_line_into_current_block(ctx, line)); 6678 if (ret < 0) goto abort; 6679 ret = (md_end_current_block(ctx)); 6680 if (ret < 0) goto abort; 6681 *p_pivot_line = &md_dummy_blank_line; 6682 return 0; 6683 } 6684 6685 /* MD_LINE_SETEXTUNDERLINE changes meaning of the current block and ends it. */ 6686 if(line.type == MD_LINE_SETEXTUNDERLINE) { 6687 assert(ctx.current_block != null); 6688 ctx.current_block.type = MD_BLOCK_H; 6689 ctx.current_block.data = line.data; 6690 ctx.current_block.flags = ctx.current_block.flags | MD_BLOCK_SETEXT_HEADER; 6691 ret = (md_add_line_into_current_block(ctx, line)); 6692 if (ret < 0) goto abort; 6693 ret = (md_end_current_block(ctx)); 6694 if (ret < 0) goto abort; 6695 if(ctx.current_block == null) { 6696 *p_pivot_line = &md_dummy_blank_line; 6697 } else { 6698 /* This happens if we have consumed all the body as link ref. defs. 6699 * and downgraded the underline into start of a new paragraph block. */ 6700 line.type = MD_LINE_TEXT; 6701 *p_pivot_line = line; 6702 } 6703 return 0; 6704 } 6705 6706 /* MD_LINE_TABLEUNDERLINE changes meaning of the current block. */ 6707 if(line.type == MD_LINE_TABLEUNDERLINE) { 6708 assert(ctx.current_block != null); 6709 assert(ctx.current_block.n_lines == 1); 6710 ctx.current_block.type = MD_BLOCK_TABLE; 6711 ctx.current_block.data = line.data; 6712 assert(pivot_line != &md_dummy_blank_line); 6713 (cast(MD_LINE_ANALYSIS*)pivot_line).type = MD_LINE_TABLE; 6714 ret = (md_add_line_into_current_block(ctx, line)); 6715 if (ret < 0) goto abort; 6716 return 0; 6717 } 6718 6719 /* The current block also ends if the line has different type. */ 6720 if(line.type != pivot_line.type) 6721 { 6722 ret = (md_end_current_block(ctx)); 6723 if (ret < 0) goto abort; 6724 } 6725 6726 /* The current line may start a new block. */ 6727 if(ctx.current_block == null) { 6728 ret = (md_start_new_block(ctx, line)); 6729 if (ret < 0) goto abort; 6730 *p_pivot_line = line; 6731 } 6732 6733 /* In all other cases the line is just a continuation of the current block. */ 6734 ret = (md_add_line_into_current_block(ctx, line)); 6735 if (ret < 0) goto abort; 6736 6737 abort: 6738 return ret; 6739 } 6740 6741 int md_process_doc(MD_CTX *ctx) 6742 { 6743 const(MD_LINE_ANALYSIS)* pivot_line = &md_dummy_blank_line; 6744 MD_LINE_ANALYSIS[2] line_buf; 6745 MD_LINE_ANALYSIS* line = &line_buf[0]; 6746 OFF off = 0; 6747 int ret = 0; 6748 6749 ret = MD_ENTER_BLOCK(ctx, MD_BLOCK_DOC, null); 6750 if (ret != 0) goto abort; 6751 6752 while(off < ctx.size) { 6753 if(line == pivot_line) 6754 line = (line == &line_buf[0] ? &line_buf[1] : &line_buf[0]); 6755 6756 ret = (md_analyze_line(ctx, off, &off, pivot_line, line)); 6757 if (ret < 0) goto abort; 6758 ret = (md_process_line(ctx, &pivot_line, line)); 6759 if (ret < 0) goto abort; 6760 } 6761 6762 md_end_current_block(ctx); 6763 6764 ret = (md_build_ref_def_hashtable(ctx)); 6765 if (ret < 0) goto abort; 6766 6767 /* Process all blocks. */ 6768 ret = (md_leave_child_containers(ctx, 0)); 6769 if (ret < 0) goto abort; 6770 ret = (md_process_all_blocks(ctx)); 6771 if (ret < 0) goto abort; 6772 6773 ret = MD_LEAVE_BLOCK(ctx, MD_BLOCK_DOC, null); 6774 if (ret != 0) goto abort; 6775 6776 abort: 6777 6778 debug(bench) 6779 /* Output some memory consumption statistics. */ 6780 { 6781 char[256] buffer; 6782 sprintf(buffer, "Alloced %u bytes for block buffer.", 6783 cast(uint)(ctx.alloc_block_bytes)); 6784 ctx.MD_LOG(buffer); 6785 6786 sprintf(buffer, "Alloced %u bytes for containers buffer.", 6787 cast(uint)(ctx.alloc_containers * MD_CONTAINER.sizeof)); 6788 ctx.MD_LOG(buffer); 6789 6790 sprintf(buffer, "Alloced %u bytes for marks buffer.", 6791 cast(uint)(ctx.alloc_marks * MD_MARK.sizeof)); 6792 ctx.MD_LOG(buffer); 6793 6794 sprintf(buffer, "Alloced %u bytes for aux. buffer.", 6795 cast(uint)(ctx.alloc_buffer * MD_CHAR.sizeof)); 6796 ctx.MD_LOG(buffer); 6797 } 6798 6799 return ret; 6800 } 6801 6802 6803 /******************** 6804 *** Public API *** 6805 ********************/ 6806 6807 /** 6808 * Parse the Markdown document stored in the string 'text' of size 'size'. 6809 * The renderer provides callbacks to be called during the parsing so the 6810 * caller can render the document on the screen or convert the Markdown 6811 * to another format. 6812 * 6813 * Zero is returned on success. If a runtime error occurs (e.g. a memory 6814 * fails), -1 is returned. If the processing is aborted due any callback 6815 * returning non-zero, md_parse() the return value of the callback is returned. 6816 */ 6817 int md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userdata) 6818 { 6819 MD_CTX ctx; 6820 int i; 6821 int ret; 6822 6823 if(parser.abi_version != 0) { 6824 if(parser.debug_log != null) 6825 parser.debug_log("Unsupported abi_version.", userdata); 6826 return -1; 6827 } 6828 6829 /* Setup context structure. */ 6830 memset(&ctx, 0, MD_CTX.sizeof); 6831 ctx.text = text; 6832 ctx.size = size; 6833 memcpy(&ctx.parser, parser, MD_PARSER.sizeof); 6834 ctx.userdata = userdata; 6835 ctx.code_indent_offset = (ctx.parser.flags & MD_FLAG_NOINDENTEDCODEBLOCKS) ? cast(OFF)(-1) : 4; 6836 md_build_mark_char_map(&ctx); 6837 ctx.doc_ends_with_newline = (size > 0 && ISNEWLINE_(text[size-1])); 6838 6839 /* Reset all unresolved opener mark chains. */ 6840 for(i = 0; i < cast(int) (ctx.mark_chains.length); i++) { 6841 ctx.mark_chains[i].head = -1; 6842 ctx.mark_chains[i].tail = -1; 6843 } 6844 ctx.unresolved_link_head = -1; 6845 ctx.unresolved_link_tail = -1; 6846 6847 /* All the work. */ 6848 ret = md_process_doc(&ctx); 6849 6850 /* Clean-up. */ 6851 md_free_ref_defs(&ctx); 6852 md_free_ref_def_hashtable(&ctx); 6853 free(ctx.buffer); 6854 free(ctx.marks); 6855 free(ctx.block_bytes); 6856 free(ctx.containers); 6857 6858 return ret; 6859 } 6860 6861 // 6862 // HTML ENTITIES 6863 // 6864 6865 /* Most entities are formed by single Unicode codepoint, few by two codepoints. 6866 * Single-codepoint entities have codepoints[1] set to zero. */ 6867 struct entity 6868 { 6869 const(char)* name; 6870 uint[2] codepoints; 6871 } 6872 6873 /* The table is generated from https://html.spec.whatwg.org/entities.json */ 6874 static immutable entity[] entity_table = 6875 [ 6876 entity( "Æ", [ 198, 0 ] ), 6877 entity( "&", [ 38, 0 ] ), 6878 entity( "Á", [ 193, 0 ] ), 6879 entity( "Ă", [ 258, 0 ] ), 6880 entity( "Â", [ 194, 0 ] ), 6881 entity( "А", [ 1040, 0 ] ), 6882 entity( "𝔄", [ 120068, 0 ] ), 6883 entity( "À", [ 192, 0 ] ), 6884 entity( "Α", [ 913, 0 ] ), 6885 entity( "Ā", [ 256, 0 ] ), 6886 entity( "⩓", [ 10835, 0 ] ), 6887 entity( "Ą", [ 260, 0 ] ), 6888 entity( "𝔸", [ 120120, 0 ] ), 6889 entity( "⁡", [ 8289, 0 ] ), 6890 entity( "Å", [ 197, 0 ] ), 6891 entity( "𝒜", [ 119964, 0 ] ), 6892 entity( "≔", [ 8788, 0 ] ), 6893 entity( "Ã", [ 195, 0 ] ), 6894 entity( "Ä", [ 196, 0 ] ), 6895 entity( "∖", [ 8726, 0 ] ), 6896 entity( "⫧", [ 10983, 0 ] ), 6897 entity( "⌆", [ 8966, 0 ] ), 6898 entity( "Б", [ 1041, 0 ] ), 6899 entity( "∵", [ 8757, 0 ] ), 6900 entity( "ℬ", [ 8492, 0 ] ), 6901 entity( "Β", [ 914, 0 ] ), 6902 entity( "𝔅", [ 120069, 0 ] ), 6903 entity( "𝔹", [ 120121, 0 ] ), 6904 entity( "˘", [ 728, 0 ] ), 6905 entity( "ℬ", [ 8492, 0 ] ), 6906 entity( "≎", [ 8782, 0 ] ), 6907 entity( "Ч", [ 1063, 0 ] ), 6908 entity( "©", [ 169, 0 ] ), 6909 entity( "Ć", [ 262, 0 ] ), 6910 entity( "⋒", [ 8914, 0 ] ), 6911 entity( "ⅅ", [ 8517, 0 ] ), 6912 entity( "ℭ", [ 8493, 0 ] ), 6913 entity( "Č", [ 268, 0 ] ), 6914 entity( "Ç", [ 199, 0 ] ), 6915 entity( "Ĉ", [ 264, 0 ] ), 6916 entity( "∰", [ 8752, 0 ] ), 6917 entity( "Ċ", [ 266, 0 ] ), 6918 entity( "¸", [ 184, 0 ] ), 6919 entity( "·", [ 183, 0 ] ), 6920 entity( "ℭ", [ 8493, 0 ] ), 6921 entity( "Χ", [ 935, 0 ] ), 6922 entity( "⊙", [ 8857, 0 ] ), 6923 entity( "⊖", [ 8854, 0 ] ), 6924 entity( "⊕", [ 8853, 0 ] ), 6925 entity( "⊗", [ 8855, 0 ] ), 6926 entity( "∲", [ 8754, 0 ] ), 6927 entity( "”", [ 8221, 0 ] ), 6928 entity( "’", [ 8217, 0 ] ), 6929 entity( "∷", [ 8759, 0 ] ), 6930 entity( "⩴", [ 10868, 0 ] ), 6931 entity( "≡", [ 8801, 0 ] ), 6932 entity( "∯", [ 8751, 0 ] ), 6933 entity( "∮", [ 8750, 0 ] ), 6934 entity( "ℂ", [ 8450, 0 ] ), 6935 entity( "∐", [ 8720, 0 ] ), 6936 entity( "∳", [ 8755, 0 ] ), 6937 entity( "⨯", [ 10799, 0 ] ), 6938 entity( "𝒞", [ 119966, 0 ] ), 6939 entity( "⋓", [ 8915, 0 ] ), 6940 entity( "≍", [ 8781, 0 ] ), 6941 entity( "ⅅ", [ 8517, 0 ] ), 6942 entity( "⤑", [ 10513, 0 ] ), 6943 entity( "Ђ", [ 1026, 0 ] ), 6944 entity( "Ѕ", [ 1029, 0 ] ), 6945 entity( "Џ", [ 1039, 0 ] ), 6946 entity( "‡", [ 8225, 0 ] ), 6947 entity( "↡", [ 8609, 0 ] ), 6948 entity( "⫤", [ 10980, 0 ] ), 6949 entity( "Ď", [ 270, 0 ] ), 6950 entity( "Д", [ 1044, 0 ] ), 6951 entity( "∇", [ 8711, 0 ] ), 6952 entity( "Δ", [ 916, 0 ] ), 6953 entity( "𝔇", [ 120071, 0 ] ), 6954 entity( "´", [ 180, 0 ] ), 6955 entity( "˙", [ 729, 0 ] ), 6956 entity( "˝", [ 733, 0 ] ), 6957 entity( "`", [ 96, 0 ] ), 6958 entity( "˜", [ 732, 0 ] ), 6959 entity( "⋄", [ 8900, 0 ] ), 6960 entity( "ⅆ", [ 8518, 0 ] ), 6961 entity( "𝔻", [ 120123, 0 ] ), 6962 entity( "¨", [ 168, 0 ] ), 6963 entity( "⃜", [ 8412, 0 ] ), 6964 entity( "≐", [ 8784, 0 ] ), 6965 entity( "∯", [ 8751, 0 ] ), 6966 entity( "¨", [ 168, 0 ] ), 6967 entity( "⇓", [ 8659, 0 ] ), 6968 entity( "⇐", [ 8656, 0 ] ), 6969 entity( "⇔", [ 8660, 0 ] ), 6970 entity( "⫤", [ 10980, 0 ] ), 6971 entity( "⟸", [ 10232, 0 ] ), 6972 entity( "⟺", [ 10234, 0 ] ), 6973 entity( "⟹", [ 10233, 0 ] ), 6974 entity( "⇒", [ 8658, 0 ] ), 6975 entity( "⊨", [ 8872, 0 ] ), 6976 entity( "⇑", [ 8657, 0 ] ), 6977 entity( "⇕", [ 8661, 0 ] ), 6978 entity( "∥", [ 8741, 0 ] ), 6979 entity( "↓", [ 8595, 0 ] ), 6980 entity( "⤓", [ 10515, 0 ] ), 6981 entity( "⇵", [ 8693, 0 ] ), 6982 entity( "̑", [ 785, 0 ] ), 6983 entity( "⥐", [ 10576, 0 ] ), 6984 entity( "⥞", [ 10590, 0 ] ), 6985 entity( "↽", [ 8637, 0 ] ), 6986 entity( "⥖", [ 10582, 0 ] ), 6987 entity( "⥟", [ 10591, 0 ] ), 6988 entity( "⇁", [ 8641, 0 ] ), 6989 entity( "⥗", [ 10583, 0 ] ), 6990 entity( "⊤", [ 8868, 0 ] ), 6991 entity( "↧", [ 8615, 0 ] ), 6992 entity( "⇓", [ 8659, 0 ] ), 6993 entity( "𝒟", [ 119967, 0 ] ), 6994 entity( "Đ", [ 272, 0 ] ), 6995 entity( "Ŋ", [ 330, 0 ] ), 6996 entity( "Ð", [ 208, 0 ] ), 6997 entity( "É", [ 201, 0 ] ), 6998 entity( "Ě", [ 282, 0 ] ), 6999 entity( "Ê", [ 202, 0 ] ), 7000 entity( "Э", [ 1069, 0 ] ), 7001 entity( "Ė", [ 278, 0 ] ), 7002 entity( "𝔈", [ 120072, 0 ] ), 7003 entity( "È", [ 200, 0 ] ), 7004 entity( "∈", [ 8712, 0 ] ), 7005 entity( "Ē", [ 274, 0 ] ), 7006 entity( "◻", [ 9723, 0 ] ), 7007 entity( "▫", [ 9643, 0 ] ), 7008 entity( "Ę", [ 280, 0 ] ), 7009 entity( "𝔼", [ 120124, 0 ] ), 7010 entity( "Ε", [ 917, 0 ] ), 7011 entity( "⩵", [ 10869, 0 ] ), 7012 entity( "≂", [ 8770, 0 ] ), 7013 entity( "⇌", [ 8652, 0 ] ), 7014 entity( "ℰ", [ 8496, 0 ] ), 7015 entity( "⩳", [ 10867, 0 ] ), 7016 entity( "Η", [ 919, 0 ] ), 7017 entity( "Ë", [ 203, 0 ] ), 7018 entity( "∃", [ 8707, 0 ] ), 7019 entity( "ⅇ", [ 8519, 0 ] ), 7020 entity( "Ф", [ 1060, 0 ] ), 7021 entity( "𝔉", [ 120073, 0 ] ), 7022 entity( "◼", [ 9724, 0 ] ), 7023 entity( "▪", [ 9642, 0 ] ), 7024 entity( "𝔽", [ 120125, 0 ] ), 7025 entity( "∀", [ 8704, 0 ] ), 7026 entity( "ℱ", [ 8497, 0 ] ), 7027 entity( "ℱ", [ 8497, 0 ] ), 7028 entity( "Ѓ", [ 1027, 0 ] ), 7029 entity( ">", [ 62, 0 ] ), 7030 entity( "Γ", [ 915, 0 ] ), 7031 entity( "Ϝ", [ 988, 0 ] ), 7032 entity( "Ğ", [ 286, 0 ] ), 7033 entity( "Ģ", [ 290, 0 ] ), 7034 entity( "Ĝ", [ 284, 0 ] ), 7035 entity( "Г", [ 1043, 0 ] ), 7036 entity( "Ġ", [ 288, 0 ] ), 7037 entity( "𝔊", [ 120074, 0 ] ), 7038 entity( "⋙", [ 8921, 0 ] ), 7039 entity( "𝔾", [ 120126, 0 ] ), 7040 entity( "≥", [ 8805, 0 ] ), 7041 entity( "⋛", [ 8923, 0 ] ), 7042 entity( "≧", [ 8807, 0 ] ), 7043 entity( "⪢", [ 10914, 0 ] ), 7044 entity( "≷", [ 8823, 0 ] ), 7045 entity( "⩾", [ 10878, 0 ] ), 7046 entity( "≳", [ 8819, 0 ] ), 7047 entity( "𝒢", [ 119970, 0 ] ), 7048 entity( "≫", [ 8811, 0 ] ), 7049 entity( "Ъ", [ 1066, 0 ] ), 7050 entity( "ˇ", [ 711, 0 ] ), 7051 entity( "^", [ 94, 0 ] ), 7052 entity( "Ĥ", [ 292, 0 ] ), 7053 entity( "ℌ", [ 8460, 0 ] ), 7054 entity( "ℋ", [ 8459, 0 ] ), 7055 entity( "ℍ", [ 8461, 0 ] ), 7056 entity( "─", [ 9472, 0 ] ), 7057 entity( "ℋ", [ 8459, 0 ] ), 7058 entity( "Ħ", [ 294, 0 ] ), 7059 entity( "≎", [ 8782, 0 ] ), 7060 entity( "≏", [ 8783, 0 ] ), 7061 entity( "Е", [ 1045, 0 ] ), 7062 entity( "IJ", [ 306, 0 ] ), 7063 entity( "Ё", [ 1025, 0 ] ), 7064 entity( "Í", [ 205, 0 ] ), 7065 entity( "Î", [ 206, 0 ] ), 7066 entity( "И", [ 1048, 0 ] ), 7067 entity( "İ", [ 304, 0 ] ), 7068 entity( "ℑ", [ 8465, 0 ] ), 7069 entity( "Ì", [ 204, 0 ] ), 7070 entity( "ℑ", [ 8465, 0 ] ), 7071 entity( "Ī", [ 298, 0 ] ), 7072 entity( "ⅈ", [ 8520, 0 ] ), 7073 entity( "⇒", [ 8658, 0 ] ), 7074 entity( "∬", [ 8748, 0 ] ), 7075 entity( "∫", [ 8747, 0 ] ), 7076 entity( "⋂", [ 8898, 0 ] ), 7077 entity( "⁣", [ 8291, 0 ] ), 7078 entity( "⁢", [ 8290, 0 ] ), 7079 entity( "Į", [ 302, 0 ] ), 7080 entity( "𝕀", [ 120128, 0 ] ), 7081 entity( "Ι", [ 921, 0 ] ), 7082 entity( "ℐ", [ 8464, 0 ] ), 7083 entity( "Ĩ", [ 296, 0 ] ), 7084 entity( "І", [ 1030, 0 ] ), 7085 entity( "Ï", [ 207, 0 ] ), 7086 entity( "Ĵ", [ 308, 0 ] ), 7087 entity( "Й", [ 1049, 0 ] ), 7088 entity( "𝔍", [ 120077, 0 ] ), 7089 entity( "𝕁", [ 120129, 0 ] ), 7090 entity( "𝒥", [ 119973, 0 ] ), 7091 entity( "Ј", [ 1032, 0 ] ), 7092 entity( "Є", [ 1028, 0 ] ), 7093 entity( "Х", [ 1061, 0 ] ), 7094 entity( "Ќ", [ 1036, 0 ] ), 7095 entity( "Κ", [ 922, 0 ] ), 7096 entity( "Ķ", [ 310, 0 ] ), 7097 entity( "К", [ 1050, 0 ] ), 7098 entity( "𝔎", [ 120078, 0 ] ), 7099 entity( "𝕂", [ 120130, 0 ] ), 7100 entity( "𝒦", [ 119974, 0 ] ), 7101 entity( "Љ", [ 1033, 0 ] ), 7102 entity( "<", [ 60, 0 ] ), 7103 entity( "Ĺ", [ 313, 0 ] ), 7104 entity( "Λ", [ 923, 0 ] ), 7105 entity( "⟪", [ 10218, 0 ] ), 7106 entity( "ℒ", [ 8466, 0 ] ), 7107 entity( "↞", [ 8606, 0 ] ), 7108 entity( "Ľ", [ 317, 0 ] ), 7109 entity( "Ļ", [ 315, 0 ] ), 7110 entity( "Л", [ 1051, 0 ] ), 7111 entity( "⟨", [ 10216, 0 ] ), 7112 entity( "←", [ 8592, 0 ] ), 7113 entity( "⇤", [ 8676, 0 ] ), 7114 entity( "⇆", [ 8646, 0 ] ), 7115 entity( "⌈", [ 8968, 0 ] ), 7116 entity( "⟦", [ 10214, 0 ] ), 7117 entity( "⥡", [ 10593, 0 ] ), 7118 entity( "⇃", [ 8643, 0 ] ), 7119 entity( "⥙", [ 10585, 0 ] ), 7120 entity( "⌊", [ 8970, 0 ] ), 7121 entity( "↔", [ 8596, 0 ] ), 7122 entity( "⥎", [ 10574, 0 ] ), 7123 entity( "⊣", [ 8867, 0 ] ), 7124 entity( "↤", [ 8612, 0 ] ), 7125 entity( "⥚", [ 10586, 0 ] ), 7126 entity( "⊲", [ 8882, 0 ] ), 7127 entity( "⧏", [ 10703, 0 ] ), 7128 entity( "⊴", [ 8884, 0 ] ), 7129 entity( "⥑", [ 10577, 0 ] ), 7130 entity( "⥠", [ 10592, 0 ] ), 7131 entity( "↿", [ 8639, 0 ] ), 7132 entity( "⥘", [ 10584, 0 ] ), 7133 entity( "↼", [ 8636, 0 ] ), 7134 entity( "⥒", [ 10578, 0 ] ), 7135 entity( "⇐", [ 8656, 0 ] ), 7136 entity( "⇔", [ 8660, 0 ] ), 7137 entity( "⋚", [ 8922, 0 ] ), 7138 entity( "≦", [ 8806, 0 ] ), 7139 entity( "≶", [ 8822, 0 ] ), 7140 entity( "⪡", [ 10913, 0 ] ), 7141 entity( "⩽", [ 10877, 0 ] ), 7142 entity( "≲", [ 8818, 0 ] ), 7143 entity( "𝔏", [ 120079, 0 ] ), 7144 entity( "⋘", [ 8920, 0 ] ), 7145 entity( "⇚", [ 8666, 0 ] ), 7146 entity( "Ŀ", [ 319, 0 ] ), 7147 entity( "⟵", [ 10229, 0 ] ), 7148 entity( "⟷", [ 10231, 0 ] ), 7149 entity( "⟶", [ 10230, 0 ] ), 7150 entity( "⟸", [ 10232, 0 ] ), 7151 entity( "⟺", [ 10234, 0 ] ), 7152 entity( "⟹", [ 10233, 0 ] ), 7153 entity( "𝕃", [ 120131, 0 ] ), 7154 entity( "↙", [ 8601, 0 ] ), 7155 entity( "↘", [ 8600, 0 ] ), 7156 entity( "ℒ", [ 8466, 0 ] ), 7157 entity( "↰", [ 8624, 0 ] ), 7158 entity( "Ł", [ 321, 0 ] ), 7159 entity( "≪", [ 8810, 0 ] ), 7160 entity( "⤅", [ 10501, 0 ] ), 7161 entity( "М", [ 1052, 0 ] ), 7162 entity( " ", [ 8287, 0 ] ), 7163 entity( "ℳ", [ 8499, 0 ] ), 7164 entity( "𝔐", [ 120080, 0 ] ), 7165 entity( "∓", [ 8723, 0 ] ), 7166 entity( "𝕄", [ 120132, 0 ] ), 7167 entity( "ℳ", [ 8499, 0 ] ), 7168 entity( "Μ", [ 924, 0 ] ), 7169 entity( "Њ", [ 1034, 0 ] ), 7170 entity( "Ń", [ 323, 0 ] ), 7171 entity( "Ň", [ 327, 0 ] ), 7172 entity( "Ņ", [ 325, 0 ] ), 7173 entity( "Н", [ 1053, 0 ] ), 7174 entity( "​", [ 8203, 0 ] ), 7175 entity( "​", [ 8203, 0 ] ), 7176 entity( "​", [ 8203, 0 ] ), 7177 entity( "​", [ 8203, 0 ] ), 7178 entity( "≫", [ 8811, 0 ] ), 7179 entity( "≪", [ 8810, 0 ] ), 7180 entity( "
", [ 10, 0 ] ), 7181 entity( "𝔑", [ 120081, 0 ] ), 7182 entity( "⁠", [ 8288, 0 ] ), 7183 entity( " ", [ 160, 0 ] ), 7184 entity( "ℕ", [ 8469, 0 ] ), 7185 entity( "⫬", [ 10988, 0 ] ), 7186 entity( "≢", [ 8802, 0 ] ), 7187 entity( "≭", [ 8813, 0 ] ), 7188 entity( "∦", [ 8742, 0 ] ), 7189 entity( "∉", [ 8713, 0 ] ), 7190 entity( "≠", [ 8800, 0 ] ), 7191 entity( "≂̸", [ 8770, 824 ] ), 7192 entity( "∄", [ 8708, 0 ] ), 7193 entity( "≯", [ 8815, 0 ] ), 7194 entity( "≱", [ 8817, 0 ] ), 7195 entity( "≧̸", [ 8807, 824 ] ), 7196 entity( "≫̸", [ 8811, 824 ] ), 7197 entity( "≹", [ 8825, 0 ] ), 7198 entity( "⩾̸", [ 10878, 824 ] ), 7199 entity( "≵", [ 8821, 0 ] ), 7200 entity( "≎̸", [ 8782, 824 ] ), 7201 entity( "≏̸", [ 8783, 824 ] ), 7202 entity( "⋪", [ 8938, 0 ] ), 7203 entity( "⧏̸", [ 10703, 824 ] ), 7204 entity( "⋬", [ 8940, 0 ] ), 7205 entity( "≮", [ 8814, 0 ] ), 7206 entity( "≰", [ 8816, 0 ] ), 7207 entity( "≸", [ 8824, 0 ] ), 7208 entity( "≪̸", [ 8810, 824 ] ), 7209 entity( "⩽̸", [ 10877, 824 ] ), 7210 entity( "≴", [ 8820, 0 ] ), 7211 entity( "⪢̸", [ 10914, 824 ] ), 7212 entity( "⪡̸", [ 10913, 824 ] ), 7213 entity( "⊀", [ 8832, 0 ] ), 7214 entity( "⪯̸", [ 10927, 824 ] ), 7215 entity( "⋠", [ 8928, 0 ] ), 7216 entity( "∌", [ 8716, 0 ] ), 7217 entity( "⋫", [ 8939, 0 ] ), 7218 entity( "⧐̸", [ 10704, 824 ] ), 7219 entity( "⋭", [ 8941, 0 ] ), 7220 entity( "⊏̸", [ 8847, 824 ] ), 7221 entity( "⋢", [ 8930, 0 ] ), 7222 entity( "⊐̸", [ 8848, 824 ] ), 7223 entity( "⋣", [ 8931, 0 ] ), 7224 entity( "⊂⃒", [ 8834, 8402 ] ), 7225 entity( "⊈", [ 8840, 0 ] ), 7226 entity( "⊁", [ 8833, 0 ] ), 7227 entity( "⪰̸", [ 10928, 824 ] ), 7228 entity( "⋡", [ 8929, 0 ] ), 7229 entity( "≿̸", [ 8831, 824 ] ), 7230 entity( "⊃⃒", [ 8835, 8402 ] ), 7231 entity( "⊉", [ 8841, 0 ] ), 7232 entity( "≁", [ 8769, 0 ] ), 7233 entity( "≄", [ 8772, 0 ] ), 7234 entity( "≇", [ 8775, 0 ] ), 7235 entity( "≉", [ 8777, 0 ] ), 7236 entity( "∤", [ 8740, 0 ] ), 7237 entity( "𝒩", [ 119977, 0 ] ), 7238 entity( "Ñ", [ 209, 0 ] ), 7239 entity( "Ν", [ 925, 0 ] ), 7240 entity( "Œ", [ 338, 0 ] ), 7241 entity( "Ó", [ 211, 0 ] ), 7242 entity( "Ô", [ 212, 0 ] ), 7243 entity( "О", [ 1054, 0 ] ), 7244 entity( "Ő", [ 336, 0 ] ), 7245 entity( "𝔒", [ 120082, 0 ] ), 7246 entity( "Ò", [ 210, 0 ] ), 7247 entity( "Ō", [ 332, 0 ] ), 7248 entity( "Ω", [ 937, 0 ] ), 7249 entity( "Ο", [ 927, 0 ] ), 7250 entity( "𝕆", [ 120134, 0 ] ), 7251 entity( "“", [ 8220, 0 ] ), 7252 entity( "‘", [ 8216, 0 ] ), 7253 entity( "⩔", [ 10836, 0 ] ), 7254 entity( "𝒪", [ 119978, 0 ] ), 7255 entity( "Ø", [ 216, 0 ] ), 7256 entity( "Õ", [ 213, 0 ] ), 7257 entity( "⨷", [ 10807, 0 ] ), 7258 entity( "Ö", [ 214, 0 ] ), 7259 entity( "‾", [ 8254, 0 ] ), 7260 entity( "⏞", [ 9182, 0 ] ), 7261 entity( "⎴", [ 9140, 0 ] ), 7262 entity( "⏜", [ 9180, 0 ] ), 7263 entity( "∂", [ 8706, 0 ] ), 7264 entity( "П", [ 1055, 0 ] ), 7265 entity( "𝔓", [ 120083, 0 ] ), 7266 entity( "Φ", [ 934, 0 ] ), 7267 entity( "Π", [ 928, 0 ] ), 7268 entity( "±", [ 177, 0 ] ), 7269 entity( "ℌ", [ 8460, 0 ] ), 7270 entity( "ℙ", [ 8473, 0 ] ), 7271 entity( "⪻", [ 10939, 0 ] ), 7272 entity( "≺", [ 8826, 0 ] ), 7273 entity( "⪯", [ 10927, 0 ] ), 7274 entity( "≼", [ 8828, 0 ] ), 7275 entity( "≾", [ 8830, 0 ] ), 7276 entity( "″", [ 8243, 0 ] ), 7277 entity( "∏", [ 8719, 0 ] ), 7278 entity( "∷", [ 8759, 0 ] ), 7279 entity( "∝", [ 8733, 0 ] ), 7280 entity( "𝒫", [ 119979, 0 ] ), 7281 entity( "Ψ", [ 936, 0 ] ), 7282 entity( """, [ 34, 0 ] ), 7283 entity( "𝔔", [ 120084, 0 ] ), 7284 entity( "ℚ", [ 8474, 0 ] ), 7285 entity( "𝒬", [ 119980, 0 ] ), 7286 entity( "⤐", [ 10512, 0 ] ), 7287 entity( "®", [ 174, 0 ] ), 7288 entity( "Ŕ", [ 340, 0 ] ), 7289 entity( "⟫", [ 10219, 0 ] ), 7290 entity( "↠", [ 8608, 0 ] ), 7291 entity( "⤖", [ 10518, 0 ] ), 7292 entity( "Ř", [ 344, 0 ] ), 7293 entity( "Ŗ", [ 342, 0 ] ), 7294 entity( "Р", [ 1056, 0 ] ), 7295 entity( "ℜ", [ 8476, 0 ] ), 7296 entity( "∋", [ 8715, 0 ] ), 7297 entity( "⇋", [ 8651, 0 ] ), 7298 entity( "⥯", [ 10607, 0 ] ), 7299 entity( "ℜ", [ 8476, 0 ] ), 7300 entity( "Ρ", [ 929, 0 ] ), 7301 entity( "⟩", [ 10217, 0 ] ), 7302 entity( "→", [ 8594, 0 ] ), 7303 entity( "⇥", [ 8677, 0 ] ), 7304 entity( "⇄", [ 8644, 0 ] ), 7305 entity( "⌉", [ 8969, 0 ] ), 7306 entity( "⟧", [ 10215, 0 ] ), 7307 entity( "⥝", [ 10589, 0 ] ), 7308 entity( "⇂", [ 8642, 0 ] ), 7309 entity( "⥕", [ 10581, 0 ] ), 7310 entity( "⌋", [ 8971, 0 ] ), 7311 entity( "⊢", [ 8866, 0 ] ), 7312 entity( "↦", [ 8614, 0 ] ), 7313 entity( "⥛", [ 10587, 0 ] ), 7314 entity( "⊳", [ 8883, 0 ] ), 7315 entity( "⧐", [ 10704, 0 ] ), 7316 entity( "⊵", [ 8885, 0 ] ), 7317 entity( "⥏", [ 10575, 0 ] ), 7318 entity( "⥜", [ 10588, 0 ] ), 7319 entity( "↾", [ 8638, 0 ] ), 7320 entity( "⥔", [ 10580, 0 ] ), 7321 entity( "⇀", [ 8640, 0 ] ), 7322 entity( "⥓", [ 10579, 0 ] ), 7323 entity( "⇒", [ 8658, 0 ] ), 7324 entity( "ℝ", [ 8477, 0 ] ), 7325 entity( "⥰", [ 10608, 0 ] ), 7326 entity( "⇛", [ 8667, 0 ] ), 7327 entity( "ℛ", [ 8475, 0 ] ), 7328 entity( "↱", [ 8625, 0 ] ), 7329 entity( "⧴", [ 10740, 0 ] ), 7330 entity( "Щ", [ 1065, 0 ] ), 7331 entity( "Ш", [ 1064, 0 ] ), 7332 entity( "Ь", [ 1068, 0 ] ), 7333 entity( "Ś", [ 346, 0 ] ), 7334 entity( "⪼", [ 10940, 0 ] ), 7335 entity( "Š", [ 352, 0 ] ), 7336 entity( "Ş", [ 350, 0 ] ), 7337 entity( "Ŝ", [ 348, 0 ] ), 7338 entity( "С", [ 1057, 0 ] ), 7339 entity( "𝔖", [ 120086, 0 ] ), 7340 entity( "↓", [ 8595, 0 ] ), 7341 entity( "←", [ 8592, 0 ] ), 7342 entity( "→", [ 8594, 0 ] ), 7343 entity( "↑", [ 8593, 0 ] ), 7344 entity( "Σ", [ 931, 0 ] ), 7345 entity( "∘", [ 8728, 0 ] ), 7346 entity( "𝕊", [ 120138, 0 ] ), 7347 entity( "√", [ 8730, 0 ] ), 7348 entity( "□", [ 9633, 0 ] ), 7349 entity( "⊓", [ 8851, 0 ] ), 7350 entity( "⊏", [ 8847, 0 ] ), 7351 entity( "⊑", [ 8849, 0 ] ), 7352 entity( "⊐", [ 8848, 0 ] ), 7353 entity( "⊒", [ 8850, 0 ] ), 7354 entity( "⊔", [ 8852, 0 ] ), 7355 entity( "𝒮", [ 119982, 0 ] ), 7356 entity( "⋆", [ 8902, 0 ] ), 7357 entity( "⋐", [ 8912, 0 ] ), 7358 entity( "⋐", [ 8912, 0 ] ), 7359 entity( "⊆", [ 8838, 0 ] ), 7360 entity( "≻", [ 8827, 0 ] ), 7361 entity( "⪰", [ 10928, 0 ] ), 7362 entity( "≽", [ 8829, 0 ] ), 7363 entity( "≿", [ 8831, 0 ] ), 7364 entity( "∋", [ 8715, 0 ] ), 7365 entity( "∑", [ 8721, 0 ] ), 7366 entity( "⋑", [ 8913, 0 ] ), 7367 entity( "⊃", [ 8835, 0 ] ), 7368 entity( "⊇", [ 8839, 0 ] ), 7369 entity( "⋑", [ 8913, 0 ] ), 7370 entity( "Þ", [ 222, 0 ] ), 7371 entity( "™", [ 8482, 0 ] ), 7372 entity( "Ћ", [ 1035, 0 ] ), 7373 entity( "Ц", [ 1062, 0 ] ), 7374 entity( "	", [ 9, 0 ] ), 7375 entity( "Τ", [ 932, 0 ] ), 7376 entity( "Ť", [ 356, 0 ] ), 7377 entity( "Ţ", [ 354, 0 ] ), 7378 entity( "Т", [ 1058, 0 ] ), 7379 entity( "𝔗", [ 120087, 0 ] ), 7380 entity( "∴", [ 8756, 0 ] ), 7381 entity( "Θ", [ 920, 0 ] ), 7382 entity( "  ", [ 8287, 8202 ] ), 7383 entity( " ", [ 8201, 0 ] ), 7384 entity( "∼", [ 8764, 0 ] ), 7385 entity( "≃", [ 8771, 0 ] ), 7386 entity( "≅", [ 8773, 0 ] ), 7387 entity( "≈", [ 8776, 0 ] ), 7388 entity( "𝕋", [ 120139, 0 ] ), 7389 entity( "⃛", [ 8411, 0 ] ), 7390 entity( "𝒯", [ 119983, 0 ] ), 7391 entity( "Ŧ", [ 358, 0 ] ), 7392 entity( "Ú", [ 218, 0 ] ), 7393 entity( "↟", [ 8607, 0 ] ), 7394 entity( "⥉", [ 10569, 0 ] ), 7395 entity( "Ў", [ 1038, 0 ] ), 7396 entity( "Ŭ", [ 364, 0 ] ), 7397 entity( "Û", [ 219, 0 ] ), 7398 entity( "У", [ 1059, 0 ] ), 7399 entity( "Ű", [ 368, 0 ] ), 7400 entity( "𝔘", [ 120088, 0 ] ), 7401 entity( "Ù", [ 217, 0 ] ), 7402 entity( "Ū", [ 362, 0 ] ), 7403 entity( "_", [ 95, 0 ] ), 7404 entity( "⏟", [ 9183, 0 ] ), 7405 entity( "⎵", [ 9141, 0 ] ), 7406 entity( "⏝", [ 9181, 0 ] ), 7407 entity( "⋃", [ 8899, 0 ] ), 7408 entity( "⊎", [ 8846, 0 ] ), 7409 entity( "Ų", [ 370, 0 ] ), 7410 entity( "𝕌", [ 120140, 0 ] ), 7411 entity( "↑", [ 8593, 0 ] ), 7412 entity( "⤒", [ 10514, 0 ] ), 7413 entity( "⇅", [ 8645, 0 ] ), 7414 entity( "↕", [ 8597, 0 ] ), 7415 entity( "⥮", [ 10606, 0 ] ), 7416 entity( "⊥", [ 8869, 0 ] ), 7417 entity( "↥", [ 8613, 0 ] ), 7418 entity( "⇑", [ 8657, 0 ] ), 7419 entity( "⇕", [ 8661, 0 ] ), 7420 entity( "↖", [ 8598, 0 ] ), 7421 entity( "↗", [ 8599, 0 ] ), 7422 entity( "ϒ", [ 978, 0 ] ), 7423 entity( "Υ", [ 933, 0 ] ), 7424 entity( "Ů", [ 366, 0 ] ), 7425 entity( "𝒰", [ 119984, 0 ] ), 7426 entity( "Ũ", [ 360, 0 ] ), 7427 entity( "Ü", [ 220, 0 ] ), 7428 entity( "⊫", [ 8875, 0 ] ), 7429 entity( "⫫", [ 10987, 0 ] ), 7430 entity( "В", [ 1042, 0 ] ), 7431 entity( "⊩", [ 8873, 0 ] ), 7432 entity( "⫦", [ 10982, 0 ] ), 7433 entity( "⋁", [ 8897, 0 ] ), 7434 entity( "‖", [ 8214, 0 ] ), 7435 entity( "‖", [ 8214, 0 ] ), 7436 entity( "∣", [ 8739, 0 ] ), 7437 entity( "|", [ 124, 0 ] ), 7438 entity( "❘", [ 10072, 0 ] ), 7439 entity( "≀", [ 8768, 0 ] ), 7440 entity( " ", [ 8202, 0 ] ), 7441 entity( "𝔙", [ 120089, 0 ] ), 7442 entity( "𝕍", [ 120141, 0 ] ), 7443 entity( "𝒱", [ 119985, 0 ] ), 7444 entity( "⊪", [ 8874, 0 ] ), 7445 entity( "Ŵ", [ 372, 0 ] ), 7446 entity( "⋀", [ 8896, 0 ] ), 7447 entity( "𝔚", [ 120090, 0 ] ), 7448 entity( "𝕎", [ 120142, 0 ] ), 7449 entity( "𝒲", [ 119986, 0 ] ), 7450 entity( "𝔛", [ 120091, 0 ] ), 7451 entity( "Ξ", [ 926, 0 ] ), 7452 entity( "𝕏", [ 120143, 0 ] ), 7453 entity( "𝒳", [ 119987, 0 ] ), 7454 entity( "Я", [ 1071, 0 ] ), 7455 entity( "Ї", [ 1031, 0 ] ), 7456 entity( "Ю", [ 1070, 0 ] ), 7457 entity( "Ý", [ 221, 0 ] ), 7458 entity( "Ŷ", [ 374, 0 ] ), 7459 entity( "Ы", [ 1067, 0 ] ), 7460 entity( "𝔜", [ 120092, 0 ] ), 7461 entity( "𝕐", [ 120144, 0 ] ), 7462 entity( "𝒴", [ 119988, 0 ] ), 7463 entity( "Ÿ", [ 376, 0 ] ), 7464 entity( "Ж", [ 1046, 0 ] ), 7465 entity( "Ź", [ 377, 0 ] ), 7466 entity( "Ž", [ 381, 0 ] ), 7467 entity( "З", [ 1047, 0 ] ), 7468 entity( "Ż", [ 379, 0 ] ), 7469 entity( "​", [ 8203, 0 ] ), 7470 entity( "Ζ", [ 918, 0 ] ), 7471 entity( "ℨ", [ 8488, 0 ] ), 7472 entity( "ℤ", [ 8484, 0 ] ), 7473 entity( "𝒵", [ 119989, 0 ] ), 7474 entity( "á", [ 225, 0 ] ), 7475 entity( "ă", [ 259, 0 ] ), 7476 entity( "∾", [ 8766, 0 ] ), 7477 entity( "∾̳", [ 8766, 819 ] ), 7478 entity( "∿", [ 8767, 0 ] ), 7479 entity( "â", [ 226, 0 ] ), 7480 entity( "´", [ 180, 0 ] ), 7481 entity( "а", [ 1072, 0 ] ), 7482 entity( "æ", [ 230, 0 ] ), 7483 entity( "⁡", [ 8289, 0 ] ), 7484 entity( "𝔞", [ 120094, 0 ] ), 7485 entity( "à", [ 224, 0 ] ), 7486 entity( "ℵ", [ 8501, 0 ] ), 7487 entity( "ℵ", [ 8501, 0 ] ), 7488 entity( "α", [ 945, 0 ] ), 7489 entity( "ā", [ 257, 0 ] ), 7490 entity( "⨿", [ 10815, 0 ] ), 7491 entity( "&", [ 38, 0 ] ), 7492 entity( "∧", [ 8743, 0 ] ), 7493 entity( "⩕", [ 10837, 0 ] ), 7494 entity( "⩜", [ 10844, 0 ] ), 7495 entity( "⩘", [ 10840, 0 ] ), 7496 entity( "⩚", [ 10842, 0 ] ), 7497 entity( "∠", [ 8736, 0 ] ), 7498 entity( "⦤", [ 10660, 0 ] ), 7499 entity( "∠", [ 8736, 0 ] ), 7500 entity( "∡", [ 8737, 0 ] ), 7501 entity( "⦨", [ 10664, 0 ] ), 7502 entity( "⦩", [ 10665, 0 ] ), 7503 entity( "⦪", [ 10666, 0 ] ), 7504 entity( "⦫", [ 10667, 0 ] ), 7505 entity( "⦬", [ 10668, 0 ] ), 7506 entity( "⦭", [ 10669, 0 ] ), 7507 entity( "⦮", [ 10670, 0 ] ), 7508 entity( "⦯", [ 10671, 0 ] ), 7509 entity( "∟", [ 8735, 0 ] ), 7510 entity( "⊾", [ 8894, 0 ] ), 7511 entity( "⦝", [ 10653, 0 ] ), 7512 entity( "∢", [ 8738, 0 ] ), 7513 entity( "Å", [ 197, 0 ] ), 7514 entity( "⍼", [ 9084, 0 ] ), 7515 entity( "ą", [ 261, 0 ] ), 7516 entity( "𝕒", [ 120146, 0 ] ), 7517 entity( "≈", [ 8776, 0 ] ), 7518 entity( "⩰", [ 10864, 0 ] ), 7519 entity( "⩯", [ 10863, 0 ] ), 7520 entity( "≊", [ 8778, 0 ] ), 7521 entity( "≋", [ 8779, 0 ] ), 7522 entity( "'", [ 39, 0 ] ), 7523 entity( "≈", [ 8776, 0 ] ), 7524 entity( "≊", [ 8778, 0 ] ), 7525 entity( "å", [ 229, 0 ] ), 7526 entity( "𝒶", [ 119990, 0 ] ), 7527 entity( "*", [ 42, 0 ] ), 7528 entity( "≈", [ 8776, 0 ] ), 7529 entity( "≍", [ 8781, 0 ] ), 7530 entity( "ã", [ 227, 0 ] ), 7531 entity( "ä", [ 228, 0 ] ), 7532 entity( "∳", [ 8755, 0 ] ), 7533 entity( "⨑", [ 10769, 0 ] ), 7534 entity( "⫭", [ 10989, 0 ] ), 7535 entity( "≌", [ 8780, 0 ] ), 7536 entity( "϶", [ 1014, 0 ] ), 7537 entity( "‵", [ 8245, 0 ] ), 7538 entity( "∽", [ 8765, 0 ] ), 7539 entity( "⋍", [ 8909, 0 ] ), 7540 entity( "⊽", [ 8893, 0 ] ), 7541 entity( "⌅", [ 8965, 0 ] ), 7542 entity( "⌅", [ 8965, 0 ] ), 7543 entity( "⎵", [ 9141, 0 ] ), 7544 entity( "⎶", [ 9142, 0 ] ), 7545 entity( "≌", [ 8780, 0 ] ), 7546 entity( "б", [ 1073, 0 ] ), 7547 entity( "„", [ 8222, 0 ] ), 7548 entity( "∵", [ 8757, 0 ] ), 7549 entity( "∵", [ 8757, 0 ] ), 7550 entity( "⦰", [ 10672, 0 ] ), 7551 entity( "϶", [ 1014, 0 ] ), 7552 entity( "ℬ", [ 8492, 0 ] ), 7553 entity( "β", [ 946, 0 ] ), 7554 entity( "ℶ", [ 8502, 0 ] ), 7555 entity( "≬", [ 8812, 0 ] ), 7556 entity( "𝔟", [ 120095, 0 ] ), 7557 entity( "⋂", [ 8898, 0 ] ), 7558 entity( "◯", [ 9711, 0 ] ), 7559 entity( "⋃", [ 8899, 0 ] ), 7560 entity( "⨀", [ 10752, 0 ] ), 7561 entity( "⨁", [ 10753, 0 ] ), 7562 entity( "⨂", [ 10754, 0 ] ), 7563 entity( "⨆", [ 10758, 0 ] ), 7564 entity( "★", [ 9733, 0 ] ), 7565 entity( "▽", [ 9661, 0 ] ), 7566 entity( "△", [ 9651, 0 ] ), 7567 entity( "⨄", [ 10756, 0 ] ), 7568 entity( "⋁", [ 8897, 0 ] ), 7569 entity( "⋀", [ 8896, 0 ] ), 7570 entity( "⤍", [ 10509, 0 ] ), 7571 entity( "⧫", [ 10731, 0 ] ), 7572 entity( "▪", [ 9642, 0 ] ), 7573 entity( "▴", [ 9652, 0 ] ), 7574 entity( "▾", [ 9662, 0 ] ), 7575 entity( "◂", [ 9666, 0 ] ), 7576 entity( "▸", [ 9656, 0 ] ), 7577 entity( "␣", [ 9251, 0 ] ), 7578 entity( "▒", [ 9618, 0 ] ), 7579 entity( "░", [ 9617, 0 ] ), 7580 entity( "▓", [ 9619, 0 ] ), 7581 entity( "█", [ 9608, 0 ] ), 7582 entity( "=⃥", [ 61, 8421 ] ), 7583 entity( "≡⃥", [ 8801, 8421 ] ), 7584 entity( "⌐", [ 8976, 0 ] ), 7585 entity( "𝕓", [ 120147, 0 ] ), 7586 entity( "⊥", [ 8869, 0 ] ), 7587 entity( "⊥", [ 8869, 0 ] ), 7588 entity( "⋈", [ 8904, 0 ] ), 7589 entity( "╗", [ 9559, 0 ] ), 7590 entity( "╔", [ 9556, 0 ] ), 7591 entity( "╖", [ 9558, 0 ] ), 7592 entity( "╓", [ 9555, 0 ] ), 7593 entity( "═", [ 9552, 0 ] ), 7594 entity( "╦", [ 9574, 0 ] ), 7595 entity( "╩", [ 9577, 0 ] ), 7596 entity( "╤", [ 9572, 0 ] ), 7597 entity( "╧", [ 9575, 0 ] ), 7598 entity( "╝", [ 9565, 0 ] ), 7599 entity( "╚", [ 9562, 0 ] ), 7600 entity( "╜", [ 9564, 0 ] ), 7601 entity( "╙", [ 9561, 0 ] ), 7602 entity( "║", [ 9553, 0 ] ), 7603 entity( "╬", [ 9580, 0 ] ), 7604 entity( "╣", [ 9571, 0 ] ), 7605 entity( "╠", [ 9568, 0 ] ), 7606 entity( "╫", [ 9579, 0 ] ), 7607 entity( "╢", [ 9570, 0 ] ), 7608 entity( "╟", [ 9567, 0 ] ), 7609 entity( "⧉", [ 10697, 0 ] ), 7610 entity( "╕", [ 9557, 0 ] ), 7611 entity( "╒", [ 9554, 0 ] ), 7612 entity( "┐", [ 9488, 0 ] ), 7613 entity( "┌", [ 9484, 0 ] ), 7614 entity( "─", [ 9472, 0 ] ), 7615 entity( "╥", [ 9573, 0 ] ), 7616 entity( "╨", [ 9576, 0 ] ), 7617 entity( "┬", [ 9516, 0 ] ), 7618 entity( "┴", [ 9524, 0 ] ), 7619 entity( "⊟", [ 8863, 0 ] ), 7620 entity( "⊞", [ 8862, 0 ] ), 7621 entity( "⊠", [ 8864, 0 ] ), 7622 entity( "╛", [ 9563, 0 ] ), 7623 entity( "╘", [ 9560, 0 ] ), 7624 entity( "┘", [ 9496, 0 ] ), 7625 entity( "└", [ 9492, 0 ] ), 7626 entity( "│", [ 9474, 0 ] ), 7627 entity( "╪", [ 9578, 0 ] ), 7628 entity( "╡", [ 9569, 0 ] ), 7629 entity( "╞", [ 9566, 0 ] ), 7630 entity( "┼", [ 9532, 0 ] ), 7631 entity( "┤", [ 9508, 0 ] ), 7632 entity( "├", [ 9500, 0 ] ), 7633 entity( "‵", [ 8245, 0 ] ), 7634 entity( "˘", [ 728, 0 ] ), 7635 entity( "¦", [ 166, 0 ] ), 7636 entity( "𝒷", [ 119991, 0 ] ), 7637 entity( "⁏", [ 8271, 0 ] ), 7638 entity( "∽", [ 8765, 0 ] ), 7639 entity( "⋍", [ 8909, 0 ] ), 7640 entity( "\", [ 92, 0 ] ), 7641 entity( "⧅", [ 10693, 0 ] ), 7642 entity( "⟈", [ 10184, 0 ] ), 7643 entity( "•", [ 8226, 0 ] ), 7644 entity( "•", [ 8226, 0 ] ), 7645 entity( "≎", [ 8782, 0 ] ), 7646 entity( "⪮", [ 10926, 0 ] ), 7647 entity( "≏", [ 8783, 0 ] ), 7648 entity( "≏", [ 8783, 0 ] ), 7649 entity( "ć", [ 263, 0 ] ), 7650 entity( "∩", [ 8745, 0 ] ), 7651 entity( "⩄", [ 10820, 0 ] ), 7652 entity( "⩉", [ 10825, 0 ] ), 7653 entity( "⩋", [ 10827, 0 ] ), 7654 entity( "⩇", [ 10823, 0 ] ), 7655 entity( "⩀", [ 10816, 0 ] ), 7656 entity( "∩︀", [ 8745, 65024 ] ), 7657 entity( "⁁", [ 8257, 0 ] ), 7658 entity( "ˇ", [ 711, 0 ] ), 7659 entity( "⩍", [ 10829, 0 ] ), 7660 entity( "č", [ 269, 0 ] ), 7661 entity( "ç", [ 231, 0 ] ), 7662 entity( "ĉ", [ 265, 0 ] ), 7663 entity( "⩌", [ 10828, 0 ] ), 7664 entity( "⩐", [ 10832, 0 ] ), 7665 entity( "ċ", [ 267, 0 ] ), 7666 entity( "¸", [ 184, 0 ] ), 7667 entity( "⦲", [ 10674, 0 ] ), 7668 entity( "¢", [ 162, 0 ] ), 7669 entity( "·", [ 183, 0 ] ), 7670 entity( "𝔠", [ 120096, 0 ] ), 7671 entity( "ч", [ 1095, 0 ] ), 7672 entity( "✓", [ 10003, 0 ] ), 7673 entity( "✓", [ 10003, 0 ] ), 7674 entity( "χ", [ 967, 0 ] ), 7675 entity( "○", [ 9675, 0 ] ), 7676 entity( "⧃", [ 10691, 0 ] ), 7677 entity( "ˆ", [ 710, 0 ] ), 7678 entity( "≗", [ 8791, 0 ] ), 7679 entity( "↺", [ 8634, 0 ] ), 7680 entity( "↻", [ 8635, 0 ] ), 7681 entity( "®", [ 174, 0 ] ), 7682 entity( "Ⓢ", [ 9416, 0 ] ), 7683 entity( "⊛", [ 8859, 0 ] ), 7684 entity( "⊚", [ 8858, 0 ] ), 7685 entity( "⊝", [ 8861, 0 ] ), 7686 entity( "≗", [ 8791, 0 ] ), 7687 entity( "⨐", [ 10768, 0 ] ), 7688 entity( "⫯", [ 10991, 0 ] ), 7689 entity( "⧂", [ 10690, 0 ] ), 7690 entity( "♣", [ 9827, 0 ] ), 7691 entity( "♣", [ 9827, 0 ] ), 7692 entity( ":", [ 58, 0 ] ), 7693 entity( "≔", [ 8788, 0 ] ), 7694 entity( "≔", [ 8788, 0 ] ), 7695 entity( ",", [ 44, 0 ] ), 7696 entity( "@", [ 64, 0 ] ), 7697 entity( "∁", [ 8705, 0 ] ), 7698 entity( "∘", [ 8728, 0 ] ), 7699 entity( "∁", [ 8705, 0 ] ), 7700 entity( "ℂ", [ 8450, 0 ] ), 7701 entity( "≅", [ 8773, 0 ] ), 7702 entity( "⩭", [ 10861, 0 ] ), 7703 entity( "∮", [ 8750, 0 ] ), 7704 entity( "𝕔", [ 120148, 0 ] ), 7705 entity( "∐", [ 8720, 0 ] ), 7706 entity( "©", [ 169, 0 ] ), 7707 entity( "℗", [ 8471, 0 ] ), 7708 entity( "↵", [ 8629, 0 ] ), 7709 entity( "✗", [ 10007, 0 ] ), 7710 entity( "𝒸", [ 119992, 0 ] ), 7711 entity( "⫏", [ 10959, 0 ] ), 7712 entity( "⫑", [ 10961, 0 ] ), 7713 entity( "⫐", [ 10960, 0 ] ), 7714 entity( "⫒", [ 10962, 0 ] ), 7715 entity( "⋯", [ 8943, 0 ] ), 7716 entity( "⤸", [ 10552, 0 ] ), 7717 entity( "⤵", [ 10549, 0 ] ), 7718 entity( "⋞", [ 8926, 0 ] ), 7719 entity( "⋟", [ 8927, 0 ] ), 7720 entity( "↶", [ 8630, 0 ] ), 7721 entity( "⤽", [ 10557, 0 ] ), 7722 entity( "∪", [ 8746, 0 ] ), 7723 entity( "⩈", [ 10824, 0 ] ), 7724 entity( "⩆", [ 10822, 0 ] ), 7725 entity( "⩊", [ 10826, 0 ] ), 7726 entity( "⊍", [ 8845, 0 ] ), 7727 entity( "⩅", [ 10821, 0 ] ), 7728 entity( "∪︀", [ 8746, 65024 ] ), 7729 entity( "↷", [ 8631, 0 ] ), 7730 entity( "⤼", [ 10556, 0 ] ), 7731 entity( "⋞", [ 8926, 0 ] ), 7732 entity( "⋟", [ 8927, 0 ] ), 7733 entity( "⋎", [ 8910, 0 ] ), 7734 entity( "⋏", [ 8911, 0 ] ), 7735 entity( "¤", [ 164, 0 ] ), 7736 entity( "↶", [ 8630, 0 ] ), 7737 entity( "↷", [ 8631, 0 ] ), 7738 entity( "⋎", [ 8910, 0 ] ), 7739 entity( "⋏", [ 8911, 0 ] ), 7740 entity( "∲", [ 8754, 0 ] ), 7741 entity( "∱", [ 8753, 0 ] ), 7742 entity( "⌭", [ 9005, 0 ] ), 7743 entity( "⇓", [ 8659, 0 ] ), 7744 entity( "⥥", [ 10597, 0 ] ), 7745 entity( "†", [ 8224, 0 ] ), 7746 entity( "ℸ", [ 8504, 0 ] ), 7747 entity( "↓", [ 8595, 0 ] ), 7748 entity( "‐", [ 8208, 0 ] ), 7749 entity( "⊣", [ 8867, 0 ] ), 7750 entity( "⤏", [ 10511, 0 ] ), 7751 entity( "˝", [ 733, 0 ] ), 7752 entity( "ď", [ 271, 0 ] ), 7753 entity( "д", [ 1076, 0 ] ), 7754 entity( "ⅆ", [ 8518, 0 ] ), 7755 entity( "‡", [ 8225, 0 ] ), 7756 entity( "⇊", [ 8650, 0 ] ), 7757 entity( "⩷", [ 10871, 0 ] ), 7758 entity( "°", [ 176, 0 ] ), 7759 entity( "δ", [ 948, 0 ] ), 7760 entity( "⦱", [ 10673, 0 ] ), 7761 entity( "⥿", [ 10623, 0 ] ), 7762 entity( "𝔡", [ 120097, 0 ] ), 7763 entity( "⇃", [ 8643, 0 ] ), 7764 entity( "⇂", [ 8642, 0 ] ), 7765 entity( "⋄", [ 8900, 0 ] ), 7766 entity( "⋄", [ 8900, 0 ] ), 7767 entity( "♦", [ 9830, 0 ] ), 7768 entity( "♦", [ 9830, 0 ] ), 7769 entity( "¨", [ 168, 0 ] ), 7770 entity( "ϝ", [ 989, 0 ] ), 7771 entity( "⋲", [ 8946, 0 ] ), 7772 entity( "÷", [ 247, 0 ] ), 7773 entity( "÷", [ 247, 0 ] ), 7774 entity( "⋇", [ 8903, 0 ] ), 7775 entity( "⋇", [ 8903, 0 ] ), 7776 entity( "ђ", [ 1106, 0 ] ), 7777 entity( "⌞", [ 8990, 0 ] ), 7778 entity( "⌍", [ 8973, 0 ] ), 7779 entity( "$", [ 36, 0 ] ), 7780 entity( "𝕕", [ 120149, 0 ] ), 7781 entity( "˙", [ 729, 0 ] ), 7782 entity( "≐", [ 8784, 0 ] ), 7783 entity( "≑", [ 8785, 0 ] ), 7784 entity( "∸", [ 8760, 0 ] ), 7785 entity( "∔", [ 8724, 0 ] ), 7786 entity( "⊡", [ 8865, 0 ] ), 7787 entity( "⌆", [ 8966, 0 ] ), 7788 entity( "↓", [ 8595, 0 ] ), 7789 entity( "⇊", [ 8650, 0 ] ), 7790 entity( "⇃", [ 8643, 0 ] ), 7791 entity( "⇂", [ 8642, 0 ] ), 7792 entity( "⤐", [ 10512, 0 ] ), 7793 entity( "⌟", [ 8991, 0 ] ), 7794 entity( "⌌", [ 8972, 0 ] ), 7795 entity( "𝒹", [ 119993, 0 ] ), 7796 entity( "ѕ", [ 1109, 0 ] ), 7797 entity( "⧶", [ 10742, 0 ] ), 7798 entity( "đ", [ 273, 0 ] ), 7799 entity( "⋱", [ 8945, 0 ] ), 7800 entity( "▿", [ 9663, 0 ] ), 7801 entity( "▾", [ 9662, 0 ] ), 7802 entity( "⇵", [ 8693, 0 ] ), 7803 entity( "⥯", [ 10607, 0 ] ), 7804 entity( "⦦", [ 10662, 0 ] ), 7805 entity( "џ", [ 1119, 0 ] ), 7806 entity( "⟿", [ 10239, 0 ] ), 7807 entity( "⩷", [ 10871, 0 ] ), 7808 entity( "≑", [ 8785, 0 ] ), 7809 entity( "é", [ 233, 0 ] ), 7810 entity( "⩮", [ 10862, 0 ] ), 7811 entity( "ě", [ 283, 0 ] ), 7812 entity( "≖", [ 8790, 0 ] ), 7813 entity( "ê", [ 234, 0 ] ), 7814 entity( "≕", [ 8789, 0 ] ), 7815 entity( "э", [ 1101, 0 ] ), 7816 entity( "ė", [ 279, 0 ] ), 7817 entity( "ⅇ", [ 8519, 0 ] ), 7818 entity( "≒", [ 8786, 0 ] ), 7819 entity( "𝔢", [ 120098, 0 ] ), 7820 entity( "⪚", [ 10906, 0 ] ), 7821 entity( "è", [ 232, 0 ] ), 7822 entity( "⪖", [ 10902, 0 ] ), 7823 entity( "⪘", [ 10904, 0 ] ), 7824 entity( "⪙", [ 10905, 0 ] ), 7825 entity( "⏧", [ 9191, 0 ] ), 7826 entity( "ℓ", [ 8467, 0 ] ), 7827 entity( "⪕", [ 10901, 0 ] ), 7828 entity( "⪗", [ 10903, 0 ] ), 7829 entity( "ē", [ 275, 0 ] ), 7830 entity( "∅", [ 8709, 0 ] ), 7831 entity( "∅", [ 8709, 0 ] ), 7832 entity( "∅", [ 8709, 0 ] ), 7833 entity( " ", [ 8196, 0 ] ), 7834 entity( " ", [ 8197, 0 ] ), 7835 entity( " ", [ 8195, 0 ] ), 7836 entity( "ŋ", [ 331, 0 ] ), 7837 entity( " ", [ 8194, 0 ] ), 7838 entity( "ę", [ 281, 0 ] ), 7839 entity( "𝕖", [ 120150, 0 ] ), 7840 entity( "⋕", [ 8917, 0 ] ), 7841 entity( "⧣", [ 10723, 0 ] ), 7842 entity( "⩱", [ 10865, 0 ] ), 7843 entity( "ε", [ 949, 0 ] ), 7844 entity( "ε", [ 949, 0 ] ), 7845 entity( "ϵ", [ 1013, 0 ] ), 7846 entity( "≖", [ 8790, 0 ] ), 7847 entity( "≕", [ 8789, 0 ] ), 7848 entity( "≂", [ 8770, 0 ] ), 7849 entity( "⪖", [ 10902, 0 ] ), 7850 entity( "⪕", [ 10901, 0 ] ), 7851 entity( "=", [ 61, 0 ] ), 7852 entity( "≟", [ 8799, 0 ] ), 7853 entity( "≡", [ 8801, 0 ] ), 7854 entity( "⩸", [ 10872, 0 ] ), 7855 entity( "⧥", [ 10725, 0 ] ), 7856 entity( "≓", [ 8787, 0 ] ), 7857 entity( "⥱", [ 10609, 0 ] ), 7858 entity( "ℯ", [ 8495, 0 ] ), 7859 entity( "≐", [ 8784, 0 ] ), 7860 entity( "≂", [ 8770, 0 ] ), 7861 entity( "η", [ 951, 0 ] ), 7862 entity( "ð", [ 240, 0 ] ), 7863 entity( "ë", [ 235, 0 ] ), 7864 entity( "€", [ 8364, 0 ] ), 7865 entity( "!", [ 33, 0 ] ), 7866 entity( "∃", [ 8707, 0 ] ), 7867 entity( "ℰ", [ 8496, 0 ] ), 7868 entity( "ⅇ", [ 8519, 0 ] ), 7869 entity( "≒", [ 8786, 0 ] ), 7870 entity( "ф", [ 1092, 0 ] ), 7871 entity( "♀", [ 9792, 0 ] ), 7872 entity( "ffi", [ 64259, 0 ] ), 7873 entity( "ff", [ 64256, 0 ] ), 7874 entity( "ffl", [ 64260, 0 ] ), 7875 entity( "𝔣", [ 120099, 0 ] ), 7876 entity( "fi", [ 64257, 0 ] ), 7877 entity( "fj", [ 102, 106 ] ), 7878 entity( "♭", [ 9837, 0 ] ), 7879 entity( "fl", [ 64258, 0 ] ), 7880 entity( "▱", [ 9649, 0 ] ), 7881 entity( "ƒ", [ 402, 0 ] ), 7882 entity( "𝕗", [ 120151, 0 ] ), 7883 entity( "∀", [ 8704, 0 ] ), 7884 entity( "⋔", [ 8916, 0 ] ), 7885 entity( "⫙", [ 10969, 0 ] ), 7886 entity( "⨍", [ 10765, 0 ] ), 7887 entity( "½", [ 189, 0 ] ), 7888 entity( "½", [ 189, 0 ] ), 7889 entity( "⅓", [ 8531, 0 ] ), 7890 entity( "¼", [ 188, 0 ] ), 7891 entity( "¼", [ 188, 0 ] ), 7892 entity( "⅕", [ 8533, 0 ] ), 7893 entity( "⅙", [ 8537, 0 ] ), 7894 entity( "⅛", [ 8539, 0 ] ), 7895 entity( "⅔", [ 8532, 0 ] ), 7896 entity( "⅖", [ 8534, 0 ] ), 7897 entity( "¾", [ 190, 0 ] ), 7898 entity( "¾", [ 190, 0 ] ), 7899 entity( "⅗", [ 8535, 0 ] ), 7900 entity( "⅜", [ 8540, 0 ] ), 7901 entity( "⅘", [ 8536, 0 ] ), 7902 entity( "⅚", [ 8538, 0 ] ), 7903 entity( "⅝", [ 8541, 0 ] ), 7904 entity( "⅞", [ 8542, 0 ] ), 7905 entity( "⁄", [ 8260, 0 ] ), 7906 entity( "⌢", [ 8994, 0 ] ), 7907 entity( "𝒻", [ 119995, 0 ] ), 7908 entity( "≧", [ 8807, 0 ] ), 7909 entity( "⪌", [ 10892, 0 ] ), 7910 entity( "ǵ", [ 501, 0 ] ), 7911 entity( "γ", [ 947, 0 ] ), 7912 entity( "ϝ", [ 989, 0 ] ), 7913 entity( "⪆", [ 10886, 0 ] ), 7914 entity( "ğ", [ 287, 0 ] ), 7915 entity( "ĝ", [ 285, 0 ] ), 7916 entity( "г", [ 1075, 0 ] ), 7917 entity( "ġ", [ 289, 0 ] ), 7918 entity( "≥", [ 8805, 0 ] ), 7919 entity( "⋛", [ 8923, 0 ] ), 7920 entity( "≥", [ 8805, 0 ] ), 7921 entity( "≧", [ 8807, 0 ] ), 7922 entity( "⩾", [ 10878, 0 ] ), 7923 entity( "⩾", [ 10878, 0 ] ), 7924 entity( "⪩", [ 10921, 0 ] ), 7925 entity( "⪀", [ 10880, 0 ] ), 7926 entity( "⪂", [ 10882, 0 ] ), 7927 entity( "⪄", [ 10884, 0 ] ), 7928 entity( "⋛︀", [ 8923, 65024 ] ), 7929 entity( "⪔", [ 10900, 0 ] ), 7930 entity( "𝔤", [ 120100, 0 ] ), 7931 entity( "≫", [ 8811, 0 ] ), 7932 entity( "⋙", [ 8921, 0 ] ), 7933 entity( "ℷ", [ 8503, 0 ] ), 7934 entity( "ѓ", [ 1107, 0 ] ), 7935 entity( "≷", [ 8823, 0 ] ), 7936 entity( "⪒", [ 10898, 0 ] ), 7937 entity( "⪥", [ 10917, 0 ] ), 7938 entity( "⪤", [ 10916, 0 ] ), 7939 entity( "≩", [ 8809, 0 ] ), 7940 entity( "⪊", [ 10890, 0 ] ), 7941 entity( "⪊", [ 10890, 0 ] ), 7942 entity( "⪈", [ 10888, 0 ] ), 7943 entity( "⪈", [ 10888, 0 ] ), 7944 entity( "≩", [ 8809, 0 ] ), 7945 entity( "⋧", [ 8935, 0 ] ), 7946 entity( "𝕘", [ 120152, 0 ] ), 7947 entity( "`", [ 96, 0 ] ), 7948 entity( "ℊ", [ 8458, 0 ] ), 7949 entity( "≳", [ 8819, 0 ] ), 7950 entity( "⪎", [ 10894, 0 ] ), 7951 entity( "⪐", [ 10896, 0 ] ), 7952 entity( ">", [ 62, 0 ] ), 7953 entity( "⪧", [ 10919, 0 ] ), 7954 entity( "⩺", [ 10874, 0 ] ), 7955 entity( "⋗", [ 8919, 0 ] ), 7956 entity( "⦕", [ 10645, 0 ] ), 7957 entity( "⩼", [ 10876, 0 ] ), 7958 entity( "⪆", [ 10886, 0 ] ), 7959 entity( "⥸", [ 10616, 0 ] ), 7960 entity( "⋗", [ 8919, 0 ] ), 7961 entity( "⋛", [ 8923, 0 ] ), 7962 entity( "⪌", [ 10892, 0 ] ), 7963 entity( "≷", [ 8823, 0 ] ), 7964 entity( "≳", [ 8819, 0 ] ), 7965 entity( "≩︀", [ 8809, 65024 ] ), 7966 entity( "≩︀", [ 8809, 65024 ] ), 7967 entity( "⇔", [ 8660, 0 ] ), 7968 entity( " ", [ 8202, 0 ] ), 7969 entity( "½", [ 189, 0 ] ), 7970 entity( "ℋ", [ 8459, 0 ] ), 7971 entity( "ъ", [ 1098, 0 ] ), 7972 entity( "↔", [ 8596, 0 ] ), 7973 entity( "⥈", [ 10568, 0 ] ), 7974 entity( "↭", [ 8621, 0 ] ), 7975 entity( "ℏ", [ 8463, 0 ] ), 7976 entity( "ĥ", [ 293, 0 ] ), 7977 entity( "♥", [ 9829, 0 ] ), 7978 entity( "♥", [ 9829, 0 ] ), 7979 entity( "…", [ 8230, 0 ] ), 7980 entity( "⊹", [ 8889, 0 ] ), 7981 entity( "𝔥", [ 120101, 0 ] ), 7982 entity( "⤥", [ 10533, 0 ] ), 7983 entity( "⤦", [ 10534, 0 ] ), 7984 entity( "⇿", [ 8703, 0 ] ), 7985 entity( "∻", [ 8763, 0 ] ), 7986 entity( "↩", [ 8617, 0 ] ), 7987 entity( "↪", [ 8618, 0 ] ), 7988 entity( "𝕙", [ 120153, 0 ] ), 7989 entity( "―", [ 8213, 0 ] ), 7990 entity( "𝒽", [ 119997, 0 ] ), 7991 entity( "ℏ", [ 8463, 0 ] ), 7992 entity( "ħ", [ 295, 0 ] ), 7993 entity( "⁃", [ 8259, 0 ] ), 7994 entity( "‐", [ 8208, 0 ] ), 7995 entity( "í", [ 237, 0 ] ), 7996 entity( "⁣", [ 8291, 0 ] ), 7997 entity( "î", [ 238, 0 ] ), 7998 entity( "и", [ 1080, 0 ] ), 7999 entity( "е", [ 1077, 0 ] ), 8000 entity( "¡", [ 161, 0 ] ), 8001 entity( "⇔", [ 8660, 0 ] ), 8002 entity( "𝔦", [ 120102, 0 ] ), 8003 entity( "ì", [ 236, 0 ] ), 8004 entity( "ⅈ", [ 8520, 0 ] ), 8005 entity( "⨌", [ 10764, 0 ] ), 8006 entity( "∭", [ 8749, 0 ] ), 8007 entity( "⧜", [ 10716, 0 ] ), 8008 entity( "℩", [ 8489, 0 ] ), 8009 entity( "ij", [ 307, 0 ] ), 8010 entity( "ī", [ 299, 0 ] ), 8011 entity( "ℑ", [ 8465, 0 ] ), 8012 entity( "ℐ", [ 8464, 0 ] ), 8013 entity( "ℑ", [ 8465, 0 ] ), 8014 entity( "ı", [ 305, 0 ] ), 8015 entity( "⊷", [ 8887, 0 ] ), 8016 entity( "Ƶ", [ 437, 0 ] ), 8017 entity( "∈", [ 8712, 0 ] ), 8018 entity( "℅", [ 8453, 0 ] ), 8019 entity( "∞", [ 8734, 0 ] ), 8020 entity( "⧝", [ 10717, 0 ] ), 8021 entity( "ı", [ 305, 0 ] ), 8022 entity( "∫", [ 8747, 0 ] ), 8023 entity( "⊺", [ 8890, 0 ] ), 8024 entity( "ℤ", [ 8484, 0 ] ), 8025 entity( "⊺", [ 8890, 0 ] ), 8026 entity( "⨗", [ 10775, 0 ] ), 8027 entity( "⨼", [ 10812, 0 ] ), 8028 entity( "ё", [ 1105, 0 ] ), 8029 entity( "į", [ 303, 0 ] ), 8030 entity( "𝕚", [ 120154, 0 ] ), 8031 entity( "ι", [ 953, 0 ] ), 8032 entity( "⨼", [ 10812, 0 ] ), 8033 entity( "¿", [ 191, 0 ] ), 8034 entity( "𝒾", [ 119998, 0 ] ), 8035 entity( "∈", [ 8712, 0 ] ), 8036 entity( "⋹", [ 8953, 0 ] ), 8037 entity( "⋵", [ 8949, 0 ] ), 8038 entity( "⋴", [ 8948, 0 ] ), 8039 entity( "⋳", [ 8947, 0 ] ), 8040 entity( "∈", [ 8712, 0 ] ), 8041 entity( "⁢", [ 8290, 0 ] ), 8042 entity( "ĩ", [ 297, 0 ] ), 8043 entity( "і", [ 1110, 0 ] ), 8044 entity( "ï", [ 239, 0 ] ), 8045 entity( "ĵ", [ 309, 0 ] ), 8046 entity( "й", [ 1081, 0 ] ), 8047 entity( "𝔧", [ 120103, 0 ] ), 8048 entity( "ȷ", [ 567, 0 ] ), 8049 entity( "𝕛", [ 120155, 0 ] ), 8050 entity( "𝒿", [ 119999, 0 ] ), 8051 entity( "ј", [ 1112, 0 ] ), 8052 entity( "є", [ 1108, 0 ] ), 8053 entity( "κ", [ 954, 0 ] ), 8054 entity( "ϰ", [ 1008, 0 ] ), 8055 entity( "ķ", [ 311, 0 ] ), 8056 entity( "к", [ 1082, 0 ] ), 8057 entity( "𝔨", [ 120104, 0 ] ), 8058 entity( "ĸ", [ 312, 0 ] ), 8059 entity( "х", [ 1093, 0 ] ), 8060 entity( "ќ", [ 1116, 0 ] ), 8061 entity( "𝕜", [ 120156, 0 ] ), 8062 entity( "𝓀", [ 120000, 0 ] ), 8063 entity( "⇚", [ 8666, 0 ] ), 8064 entity( "⇐", [ 8656, 0 ] ), 8065 entity( "⤛", [ 10523, 0 ] ), 8066 entity( "⤎", [ 10510, 0 ] ), 8067 entity( "≦", [ 8806, 0 ] ), 8068 entity( "⪋", [ 10891, 0 ] ), 8069 entity( "⥢", [ 10594, 0 ] ), 8070 entity( "ĺ", [ 314, 0 ] ), 8071 entity( "⦴", [ 10676, 0 ] ), 8072 entity( "ℒ", [ 8466, 0 ] ), 8073 entity( "λ", [ 955, 0 ] ), 8074 entity( "⟨", [ 10216, 0 ] ), 8075 entity( "⦑", [ 10641, 0 ] ), 8076 entity( "⟨", [ 10216, 0 ] ), 8077 entity( "⪅", [ 10885, 0 ] ), 8078 entity( "«", [ 171, 0 ] ), 8079 entity( "←", [ 8592, 0 ] ), 8080 entity( "⇤", [ 8676, 0 ] ), 8081 entity( "⤟", [ 10527, 0 ] ), 8082 entity( "⤝", [ 10525, 0 ] ), 8083 entity( "↩", [ 8617, 0 ] ), 8084 entity( "↫", [ 8619, 0 ] ), 8085 entity( "⤹", [ 10553, 0 ] ), 8086 entity( "⥳", [ 10611, 0 ] ), 8087 entity( "↢", [ 8610, 0 ] ), 8088 entity( "⪫", [ 10923, 0 ] ), 8089 entity( "⤙", [ 10521, 0 ] ), 8090 entity( "⪭", [ 10925, 0 ] ), 8091 entity( "⪭︀", [ 10925, 65024 ] ), 8092 entity( "⤌", [ 10508, 0 ] ), 8093 entity( "❲", [ 10098, 0 ] ), 8094 entity( "{", [ 123, 0 ] ), 8095 entity( "[", [ 91, 0 ] ), 8096 entity( "⦋", [ 10635, 0 ] ), 8097 entity( "⦏", [ 10639, 0 ] ), 8098 entity( "⦍", [ 10637, 0 ] ), 8099 entity( "ľ", [ 318, 0 ] ), 8100 entity( "ļ", [ 316, 0 ] ), 8101 entity( "⌈", [ 8968, 0 ] ), 8102 entity( "{", [ 123, 0 ] ), 8103 entity( "л", [ 1083, 0 ] ), 8104 entity( "⤶", [ 10550, 0 ] ), 8105 entity( "“", [ 8220, 0 ] ), 8106 entity( "„", [ 8222, 0 ] ), 8107 entity( "⥧", [ 10599, 0 ] ), 8108 entity( "⥋", [ 10571, 0 ] ), 8109 entity( "↲", [ 8626, 0 ] ), 8110 entity( "≤", [ 8804, 0 ] ), 8111 entity( "←", [ 8592, 0 ] ), 8112 entity( "↢", [ 8610, 0 ] ), 8113 entity( "↽", [ 8637, 0 ] ), 8114 entity( "↼", [ 8636, 0 ] ), 8115 entity( "⇇", [ 8647, 0 ] ), 8116 entity( "↔", [ 8596, 0 ] ), 8117 entity( "⇆", [ 8646, 0 ] ), 8118 entity( "⇋", [ 8651, 0 ] ), 8119 entity( "↭", [ 8621, 0 ] ), 8120 entity( "⋋", [ 8907, 0 ] ), 8121 entity( "⋚", [ 8922, 0 ] ), 8122 entity( "≤", [ 8804, 0 ] ), 8123 entity( "≦", [ 8806, 0 ] ), 8124 entity( "⩽", [ 10877, 0 ] ), 8125 entity( "⩽", [ 10877, 0 ] ), 8126 entity( "⪨", [ 10920, 0 ] ), 8127 entity( "⩿", [ 10879, 0 ] ), 8128 entity( "⪁", [ 10881, 0 ] ), 8129 entity( "⪃", [ 10883, 0 ] ), 8130 entity( "⋚︀", [ 8922, 65024 ] ), 8131 entity( "⪓", [ 10899, 0 ] ), 8132 entity( "⪅", [ 10885, 0 ] ), 8133 entity( "⋖", [ 8918, 0 ] ), 8134 entity( "⋚", [ 8922, 0 ] ), 8135 entity( "⪋", [ 10891, 0 ] ), 8136 entity( "≶", [ 8822, 0 ] ), 8137 entity( "≲", [ 8818, 0 ] ), 8138 entity( "⥼", [ 10620, 0 ] ), 8139 entity( "⌊", [ 8970, 0 ] ), 8140 entity( "𝔩", [ 120105, 0 ] ), 8141 entity( "≶", [ 8822, 0 ] ), 8142 entity( "⪑", [ 10897, 0 ] ), 8143 entity( "↽", [ 8637, 0 ] ), 8144 entity( "↼", [ 8636, 0 ] ), 8145 entity( "⥪", [ 10602, 0 ] ), 8146 entity( "▄", [ 9604, 0 ] ), 8147 entity( "љ", [ 1113, 0 ] ), 8148 entity( "≪", [ 8810, 0 ] ), 8149 entity( "⇇", [ 8647, 0 ] ), 8150 entity( "⌞", [ 8990, 0 ] ), 8151 entity( "⥫", [ 10603, 0 ] ), 8152 entity( "◺", [ 9722, 0 ] ), 8153 entity( "ŀ", [ 320, 0 ] ), 8154 entity( "⎰", [ 9136, 0 ] ), 8155 entity( "⎰", [ 9136, 0 ] ), 8156 entity( "≨", [ 8808, 0 ] ), 8157 entity( "⪉", [ 10889, 0 ] ), 8158 entity( "⪉", [ 10889, 0 ] ), 8159 entity( "⪇", [ 10887, 0 ] ), 8160 entity( "⪇", [ 10887, 0 ] ), 8161 entity( "≨", [ 8808, 0 ] ), 8162 entity( "⋦", [ 8934, 0 ] ), 8163 entity( "⟬", [ 10220, 0 ] ), 8164 entity( "⇽", [ 8701, 0 ] ), 8165 entity( "⟦", [ 10214, 0 ] ), 8166 entity( "⟵", [ 10229, 0 ] ), 8167 entity( "⟷", [ 10231, 0 ] ), 8168 entity( "⟼", [ 10236, 0 ] ), 8169 entity( "⟶", [ 10230, 0 ] ), 8170 entity( "↫", [ 8619, 0 ] ), 8171 entity( "↬", [ 8620, 0 ] ), 8172 entity( "⦅", [ 10629, 0 ] ), 8173 entity( "𝕝", [ 120157, 0 ] ), 8174 entity( "⨭", [ 10797, 0 ] ), 8175 entity( "⨴", [ 10804, 0 ] ), 8176 entity( "∗", [ 8727, 0 ] ), 8177 entity( "_", [ 95, 0 ] ), 8178 entity( "◊", [ 9674, 0 ] ), 8179 entity( "◊", [ 9674, 0 ] ), 8180 entity( "⧫", [ 10731, 0 ] ), 8181 entity( "(", [ 40, 0 ] ), 8182 entity( "⦓", [ 10643, 0 ] ), 8183 entity( "⇆", [ 8646, 0 ] ), 8184 entity( "⌟", [ 8991, 0 ] ), 8185 entity( "⇋", [ 8651, 0 ] ), 8186 entity( "⥭", [ 10605, 0 ] ), 8187 entity( "‎", [ 8206, 0 ] ), 8188 entity( "⊿", [ 8895, 0 ] ), 8189 entity( "‹", [ 8249, 0 ] ), 8190 entity( "𝓁", [ 120001, 0 ] ), 8191 entity( "↰", [ 8624, 0 ] ), 8192 entity( "≲", [ 8818, 0 ] ), 8193 entity( "⪍", [ 10893, 0 ] ), 8194 entity( "⪏", [ 10895, 0 ] ), 8195 entity( "[", [ 91, 0 ] ), 8196 entity( "‘", [ 8216, 0 ] ), 8197 entity( "‚", [ 8218, 0 ] ), 8198 entity( "ł", [ 322, 0 ] ), 8199 entity( "<", [ 60, 0 ] ), 8200 entity( "⪦", [ 10918, 0 ] ), 8201 entity( "⩹", [ 10873, 0 ] ), 8202 entity( "⋖", [ 8918, 0 ] ), 8203 entity( "⋋", [ 8907, 0 ] ), 8204 entity( "⋉", [ 8905, 0 ] ), 8205 entity( "⥶", [ 10614, 0 ] ), 8206 entity( "⩻", [ 10875, 0 ] ), 8207 entity( "⦖", [ 10646, 0 ] ), 8208 entity( "◃", [ 9667, 0 ] ), 8209 entity( "⊴", [ 8884, 0 ] ), 8210 entity( "◂", [ 9666, 0 ] ), 8211 entity( "⥊", [ 10570, 0 ] ), 8212 entity( "⥦", [ 10598, 0 ] ), 8213 entity( "≨︀", [ 8808, 65024 ] ), 8214 entity( "≨︀", [ 8808, 65024 ] ), 8215 entity( "∺", [ 8762, 0 ] ), 8216 entity( "¯", [ 175, 0 ] ), 8217 entity( "♂", [ 9794, 0 ] ), 8218 entity( "✠", [ 10016, 0 ] ), 8219 entity( "✠", [ 10016, 0 ] ), 8220 entity( "↦", [ 8614, 0 ] ), 8221 entity( "↦", [ 8614, 0 ] ), 8222 entity( "↧", [ 8615, 0 ] ), 8223 entity( "↤", [ 8612, 0 ] ), 8224 entity( "↥", [ 8613, 0 ] ), 8225 entity( "▮", [ 9646, 0 ] ), 8226 entity( "⨩", [ 10793, 0 ] ), 8227 entity( "м", [ 1084, 0 ] ), 8228 entity( "—", [ 8212, 0 ] ), 8229 entity( "∡", [ 8737, 0 ] ), 8230 entity( "𝔪", [ 120106, 0 ] ), 8231 entity( "℧", [ 8487, 0 ] ), 8232 entity( "µ", [ 181, 0 ] ), 8233 entity( "∣", [ 8739, 0 ] ), 8234 entity( "*", [ 42, 0 ] ), 8235 entity( "⫰", [ 10992, 0 ] ), 8236 entity( "·", [ 183, 0 ] ), 8237 entity( "−", [ 8722, 0 ] ), 8238 entity( "⊟", [ 8863, 0 ] ), 8239 entity( "∸", [ 8760, 0 ] ), 8240 entity( "⨪", [ 10794, 0 ] ), 8241 entity( "⫛", [ 10971, 0 ] ), 8242 entity( "…", [ 8230, 0 ] ), 8243 entity( "∓", [ 8723, 0 ] ), 8244 entity( "⊧", [ 8871, 0 ] ), 8245 entity( "𝕞", [ 120158, 0 ] ), 8246 entity( "∓", [ 8723, 0 ] ), 8247 entity( "𝓂", [ 120002, 0 ] ), 8248 entity( "∾", [ 8766, 0 ] ), 8249 entity( "μ", [ 956, 0 ] ), 8250 entity( "⊸", [ 8888, 0 ] ), 8251 entity( "⊸", [ 8888, 0 ] ), 8252 entity( "⋙̸", [ 8921, 824 ] ), 8253 entity( "≫⃒", [ 8811, 8402 ] ), 8254 entity( "≫̸", [ 8811, 824 ] ), 8255 entity( "⇍", [ 8653, 0 ] ), 8256 entity( "⇎", [ 8654, 0 ] ), 8257 entity( "⋘̸", [ 8920, 824 ] ), 8258 entity( "≪⃒", [ 8810, 8402 ] ), 8259 entity( "≪̸", [ 8810, 824 ] ), 8260 entity( "⇏", [ 8655, 0 ] ), 8261 entity( "⊯", [ 8879, 0 ] ), 8262 entity( "⊮", [ 8878, 0 ] ), 8263 entity( "∇", [ 8711, 0 ] ), 8264 entity( "ń", [ 324, 0 ] ), 8265 entity( "∠⃒", [ 8736, 8402 ] ), 8266 entity( "≉", [ 8777, 0 ] ), 8267 entity( "⩰̸", [ 10864, 824 ] ), 8268 entity( "≋̸", [ 8779, 824 ] ), 8269 entity( "ʼn", [ 329, 0 ] ), 8270 entity( "≉", [ 8777, 0 ] ), 8271 entity( "♮", [ 9838, 0 ] ), 8272 entity( "♮", [ 9838, 0 ] ), 8273 entity( "ℕ", [ 8469, 0 ] ), 8274 entity( " ", [ 160, 0 ] ), 8275 entity( "≎̸", [ 8782, 824 ] ), 8276 entity( "≏̸", [ 8783, 824 ] ), 8277 entity( "⩃", [ 10819, 0 ] ), 8278 entity( "ň", [ 328, 0 ] ), 8279 entity( "ņ", [ 326, 0 ] ), 8280 entity( "≇", [ 8775, 0 ] ), 8281 entity( "⩭̸", [ 10861, 824 ] ), 8282 entity( "⩂", [ 10818, 0 ] ), 8283 entity( "н", [ 1085, 0 ] ), 8284 entity( "–", [ 8211, 0 ] ), 8285 entity( "≠", [ 8800, 0 ] ), 8286 entity( "⇗", [ 8663, 0 ] ), 8287 entity( "⤤", [ 10532, 0 ] ), 8288 entity( "↗", [ 8599, 0 ] ), 8289 entity( "↗", [ 8599, 0 ] ), 8290 entity( "≐̸", [ 8784, 824 ] ), 8291 entity( "≢", [ 8802, 0 ] ), 8292 entity( "⤨", [ 10536, 0 ] ), 8293 entity( "≂̸", [ 8770, 824 ] ), 8294 entity( "∄", [ 8708, 0 ] ), 8295 entity( "∄", [ 8708, 0 ] ), 8296 entity( "𝔫", [ 120107, 0 ] ), 8297 entity( "≧̸", [ 8807, 824 ] ), 8298 entity( "≱", [ 8817, 0 ] ), 8299 entity( "≱", [ 8817, 0 ] ), 8300 entity( "≧̸", [ 8807, 824 ] ), 8301 entity( "⩾̸", [ 10878, 824 ] ), 8302 entity( "⩾̸", [ 10878, 824 ] ), 8303 entity( "≵", [ 8821, 0 ] ), 8304 entity( "≯", [ 8815, 0 ] ), 8305 entity( "≯", [ 8815, 0 ] ), 8306 entity( "⇎", [ 8654, 0 ] ), 8307 entity( "↮", [ 8622, 0 ] ), 8308 entity( "⫲", [ 10994, 0 ] ), 8309 entity( "∋", [ 8715, 0 ] ), 8310 entity( "⋼", [ 8956, 0 ] ), 8311 entity( "⋺", [ 8954, 0 ] ), 8312 entity( "∋", [ 8715, 0 ] ), 8313 entity( "њ", [ 1114, 0 ] ), 8314 entity( "⇍", [ 8653, 0 ] ), 8315 entity( "≦̸", [ 8806, 824 ] ), 8316 entity( "↚", [ 8602, 0 ] ), 8317 entity( "‥", [ 8229, 0 ] ), 8318 entity( "≰", [ 8816, 0 ] ), 8319 entity( "↚", [ 8602, 0 ] ), 8320 entity( "↮", [ 8622, 0 ] ), 8321 entity( "≰", [ 8816, 0 ] ), 8322 entity( "≦̸", [ 8806, 824 ] ), 8323 entity( "⩽̸", [ 10877, 824 ] ), 8324 entity( "⩽̸", [ 10877, 824 ] ), 8325 entity( "≮", [ 8814, 0 ] ), 8326 entity( "≴", [ 8820, 0 ] ), 8327 entity( "≮", [ 8814, 0 ] ), 8328 entity( "⋪", [ 8938, 0 ] ), 8329 entity( "⋬", [ 8940, 0 ] ), 8330 entity( "∤", [ 8740, 0 ] ), 8331 entity( "𝕟", [ 120159, 0 ] ), 8332 entity( "¬", [ 172, 0 ] ), 8333 entity( "∉", [ 8713, 0 ] ), 8334 entity( "⋹̸", [ 8953, 824 ] ), 8335 entity( "⋵̸", [ 8949, 824 ] ), 8336 entity( "∉", [ 8713, 0 ] ), 8337 entity( "⋷", [ 8951, 0 ] ), 8338 entity( "⋶", [ 8950, 0 ] ), 8339 entity( "∌", [ 8716, 0 ] ), 8340 entity( "∌", [ 8716, 0 ] ), 8341 entity( "⋾", [ 8958, 0 ] ), 8342 entity( "⋽", [ 8957, 0 ] ), 8343 entity( "∦", [ 8742, 0 ] ), 8344 entity( "∦", [ 8742, 0 ] ), 8345 entity( "⫽⃥", [ 11005, 8421 ] ), 8346 entity( "∂̸", [ 8706, 824 ] ), 8347 entity( "⨔", [ 10772, 0 ] ), 8348 entity( "⊀", [ 8832, 0 ] ), 8349 entity( "⋠", [ 8928, 0 ] ), 8350 entity( "⪯̸", [ 10927, 824 ] ), 8351 entity( "⊀", [ 8832, 0 ] ), 8352 entity( "⪯̸", [ 10927, 824 ] ), 8353 entity( "⇏", [ 8655, 0 ] ), 8354 entity( "↛", [ 8603, 0 ] ), 8355 entity( "⤳̸", [ 10547, 824 ] ), 8356 entity( "↝̸", [ 8605, 824 ] ), 8357 entity( "↛", [ 8603, 0 ] ), 8358 entity( "⋫", [ 8939, 0 ] ), 8359 entity( "⋭", [ 8941, 0 ] ), 8360 entity( "⊁", [ 8833, 0 ] ), 8361 entity( "⋡", [ 8929, 0 ] ), 8362 entity( "⪰̸", [ 10928, 824 ] ), 8363 entity( "𝓃", [ 120003, 0 ] ), 8364 entity( "∤", [ 8740, 0 ] ), 8365 entity( "∦", [ 8742, 0 ] ), 8366 entity( "≁", [ 8769, 0 ] ), 8367 entity( "≄", [ 8772, 0 ] ), 8368 entity( "≄", [ 8772, 0 ] ), 8369 entity( "∤", [ 8740, 0 ] ), 8370 entity( "∦", [ 8742, 0 ] ), 8371 entity( "⋢", [ 8930, 0 ] ), 8372 entity( "⋣", [ 8931, 0 ] ), 8373 entity( "⊄", [ 8836, 0 ] ), 8374 entity( "⫅̸", [ 10949, 824 ] ), 8375 entity( "⊈", [ 8840, 0 ] ), 8376 entity( "⊂⃒", [ 8834, 8402 ] ), 8377 entity( "⊈", [ 8840, 0 ] ), 8378 entity( "⫅̸", [ 10949, 824 ] ), 8379 entity( "⊁", [ 8833, 0 ] ), 8380 entity( "⪰̸", [ 10928, 824 ] ), 8381 entity( "⊅", [ 8837, 0 ] ), 8382 entity( "⫆̸", [ 10950, 824 ] ), 8383 entity( "⊉", [ 8841, 0 ] ), 8384 entity( "⊃⃒", [ 8835, 8402 ] ), 8385 entity( "⊉", [ 8841, 0 ] ), 8386 entity( "⫆̸", [ 10950, 824 ] ), 8387 entity( "≹", [ 8825, 0 ] ), 8388 entity( "ñ", [ 241, 0 ] ), 8389 entity( "≸", [ 8824, 0 ] ), 8390 entity( "⋪", [ 8938, 0 ] ), 8391 entity( "⋬", [ 8940, 0 ] ), 8392 entity( "⋫", [ 8939, 0 ] ), 8393 entity( "⋭", [ 8941, 0 ] ), 8394 entity( "ν", [ 957, 0 ] ), 8395 entity( "#", [ 35, 0 ] ), 8396 entity( "№", [ 8470, 0 ] ), 8397 entity( " ", [ 8199, 0 ] ), 8398 entity( "⊭", [ 8877, 0 ] ), 8399 entity( "⤄", [ 10500, 0 ] ), 8400 entity( "≍⃒", [ 8781, 8402 ] ), 8401 entity( "⊬", [ 8876, 0 ] ), 8402 entity( "≥⃒", [ 8805, 8402 ] ), 8403 entity( ">⃒", [ 62, 8402 ] ), 8404 entity( "⧞", [ 10718, 0 ] ), 8405 entity( "⤂", [ 10498, 0 ] ), 8406 entity( "≤⃒", [ 8804, 8402 ] ), 8407 entity( "<⃒", [ 60, 8402 ] ), 8408 entity( "⊴⃒", [ 8884, 8402 ] ), 8409 entity( "⤃", [ 10499, 0 ] ), 8410 entity( "⊵⃒", [ 8885, 8402 ] ), 8411 entity( "∼⃒", [ 8764, 8402 ] ), 8412 entity( "⇖", [ 8662, 0 ] ), 8413 entity( "⤣", [ 10531, 0 ] ), 8414 entity( "↖", [ 8598, 0 ] ), 8415 entity( "↖", [ 8598, 0 ] ), 8416 entity( "⤧", [ 10535, 0 ] ), 8417 entity( "Ⓢ", [ 9416, 0 ] ), 8418 entity( "ó", [ 243, 0 ] ), 8419 entity( "⊛", [ 8859, 0 ] ), 8420 entity( "⊚", [ 8858, 0 ] ), 8421 entity( "ô", [ 244, 0 ] ), 8422 entity( "о", [ 1086, 0 ] ), 8423 entity( "⊝", [ 8861, 0 ] ), 8424 entity( "ő", [ 337, 0 ] ), 8425 entity( "⨸", [ 10808, 0 ] ), 8426 entity( "⊙", [ 8857, 0 ] ), 8427 entity( "⦼", [ 10684, 0 ] ), 8428 entity( "œ", [ 339, 0 ] ), 8429 entity( "⦿", [ 10687, 0 ] ), 8430 entity( "𝔬", [ 120108, 0 ] ), 8431 entity( "˛", [ 731, 0 ] ), 8432 entity( "ò", [ 242, 0 ] ), 8433 entity( "⧁", [ 10689, 0 ] ), 8434 entity( "⦵", [ 10677, 0 ] ), 8435 entity( "Ω", [ 937, 0 ] ), 8436 entity( "∮", [ 8750, 0 ] ), 8437 entity( "↺", [ 8634, 0 ] ), 8438 entity( "⦾", [ 10686, 0 ] ), 8439 entity( "⦻", [ 10683, 0 ] ), 8440 entity( "‾", [ 8254, 0 ] ), 8441 entity( "⧀", [ 10688, 0 ] ), 8442 entity( "ō", [ 333, 0 ] ), 8443 entity( "ω", [ 969, 0 ] ), 8444 entity( "ο", [ 959, 0 ] ), 8445 entity( "⦶", [ 10678, 0 ] ), 8446 entity( "⊖", [ 8854, 0 ] ), 8447 entity( "𝕠", [ 120160, 0 ] ), 8448 entity( "⦷", [ 10679, 0 ] ), 8449 entity( "⦹", [ 10681, 0 ] ), 8450 entity( "⊕", [ 8853, 0 ] ), 8451 entity( "∨", [ 8744, 0 ] ), 8452 entity( "↻", [ 8635, 0 ] ), 8453 entity( "⩝", [ 10845, 0 ] ), 8454 entity( "ℴ", [ 8500, 0 ] ), 8455 entity( "ℴ", [ 8500, 0 ] ), 8456 entity( "ª", [ 170, 0 ] ), 8457 entity( "º", [ 186, 0 ] ), 8458 entity( "⊶", [ 8886, 0 ] ), 8459 entity( "⩖", [ 10838, 0 ] ), 8460 entity( "⩗", [ 10839, 0 ] ), 8461 entity( "⩛", [ 10843, 0 ] ), 8462 entity( "ℴ", [ 8500, 0 ] ), 8463 entity( "ø", [ 248, 0 ] ), 8464 entity( "⊘", [ 8856, 0 ] ), 8465 entity( "õ", [ 245, 0 ] ), 8466 entity( "⊗", [ 8855, 0 ] ), 8467 entity( "⨶", [ 10806, 0 ] ), 8468 entity( "ö", [ 246, 0 ] ), 8469 entity( "⌽", [ 9021, 0 ] ), 8470 entity( "∥", [ 8741, 0 ] ), 8471 entity( "¶", [ 182, 0 ] ), 8472 entity( "∥", [ 8741, 0 ] ), 8473 entity( "⫳", [ 10995, 0 ] ), 8474 entity( "⫽", [ 11005, 0 ] ), 8475 entity( "∂", [ 8706, 0 ] ), 8476 entity( "п", [ 1087, 0 ] ), 8477 entity( "%", [ 37, 0 ] ), 8478 entity( ".", [ 46, 0 ] ), 8479 entity( "‰", [ 8240, 0 ] ), 8480 entity( "⊥", [ 8869, 0 ] ), 8481 entity( "‱", [ 8241, 0 ] ), 8482 entity( "𝔭", [ 120109, 0 ] ), 8483 entity( "φ", [ 966, 0 ] ), 8484 entity( "ϕ", [ 981, 0 ] ), 8485 entity( "ℳ", [ 8499, 0 ] ), 8486 entity( "☎", [ 9742, 0 ] ), 8487 entity( "π", [ 960, 0 ] ), 8488 entity( "⋔", [ 8916, 0 ] ), 8489 entity( "ϖ", [ 982, 0 ] ), 8490 entity( "ℏ", [ 8463, 0 ] ), 8491 entity( "ℎ", [ 8462, 0 ] ), 8492 entity( "ℏ", [ 8463, 0 ] ), 8493 entity( "+", [ 43, 0 ] ), 8494 entity( "⨣", [ 10787, 0 ] ), 8495 entity( "⊞", [ 8862, 0 ] ), 8496 entity( "⨢", [ 10786, 0 ] ), 8497 entity( "∔", [ 8724, 0 ] ), 8498 entity( "⨥", [ 10789, 0 ] ), 8499 entity( "⩲", [ 10866, 0 ] ), 8500 entity( "±", [ 177, 0 ] ), 8501 entity( "⨦", [ 10790, 0 ] ), 8502 entity( "⨧", [ 10791, 0 ] ), 8503 entity( "±", [ 177, 0 ] ), 8504 entity( "⨕", [ 10773, 0 ] ), 8505 entity( "𝕡", [ 120161, 0 ] ), 8506 entity( "£", [ 163, 0 ] ), 8507 entity( "≺", [ 8826, 0 ] ), 8508 entity( "⪳", [ 10931, 0 ] ), 8509 entity( "⪷", [ 10935, 0 ] ), 8510 entity( "≼", [ 8828, 0 ] ), 8511 entity( "⪯", [ 10927, 0 ] ), 8512 entity( "≺", [ 8826, 0 ] ), 8513 entity( "⪷", [ 10935, 0 ] ), 8514 entity( "≼", [ 8828, 0 ] ), 8515 entity( "⪯", [ 10927, 0 ] ), 8516 entity( "⪹", [ 10937, 0 ] ), 8517 entity( "⪵", [ 10933, 0 ] ), 8518 entity( "⋨", [ 8936, 0 ] ), 8519 entity( "≾", [ 8830, 0 ] ), 8520 entity( "′", [ 8242, 0 ] ), 8521 entity( "ℙ", [ 8473, 0 ] ), 8522 entity( "⪵", [ 10933, 0 ] ), 8523 entity( "⪹", [ 10937, 0 ] ), 8524 entity( "⋨", [ 8936, 0 ] ), 8525 entity( "∏", [ 8719, 0 ] ), 8526 entity( "⌮", [ 9006, 0 ] ), 8527 entity( "⌒", [ 8978, 0 ] ), 8528 entity( "⌓", [ 8979, 0 ] ), 8529 entity( "∝", [ 8733, 0 ] ), 8530 entity( "∝", [ 8733, 0 ] ), 8531 entity( "≾", [ 8830, 0 ] ), 8532 entity( "⊰", [ 8880, 0 ] ), 8533 entity( "𝓅", [ 120005, 0 ] ), 8534 entity( "ψ", [ 968, 0 ] ), 8535 entity( " ", [ 8200, 0 ] ), 8536 entity( "𝔮", [ 120110, 0 ] ), 8537 entity( "⨌", [ 10764, 0 ] ), 8538 entity( "𝕢", [ 120162, 0 ] ), 8539 entity( "⁗", [ 8279, 0 ] ), 8540 entity( "𝓆", [ 120006, 0 ] ), 8541 entity( "ℍ", [ 8461, 0 ] ), 8542 entity( "⨖", [ 10774, 0 ] ), 8543 entity( "?", [ 63, 0 ] ), 8544 entity( "≟", [ 8799, 0 ] ), 8545 entity( """, [ 34, 0 ] ), 8546 entity( "⇛", [ 8667, 0 ] ), 8547 entity( "⇒", [ 8658, 0 ] ), 8548 entity( "⤜", [ 10524, 0 ] ), 8549 entity( "⤏", [ 10511, 0 ] ), 8550 entity( "⥤", [ 10596, 0 ] ), 8551 entity( "∽̱", [ 8765, 817 ] ), 8552 entity( "ŕ", [ 341, 0 ] ), 8553 entity( "√", [ 8730, 0 ] ), 8554 entity( "⦳", [ 10675, 0 ] ), 8555 entity( "⟩", [ 10217, 0 ] ), 8556 entity( "⦒", [ 10642, 0 ] ), 8557 entity( "⦥", [ 10661, 0 ] ), 8558 entity( "⟩", [ 10217, 0 ] ), 8559 entity( "»", [ 187, 0 ] ), 8560 entity( "→", [ 8594, 0 ] ), 8561 entity( "⥵", [ 10613, 0 ] ), 8562 entity( "⇥", [ 8677, 0 ] ), 8563 entity( "⤠", [ 10528, 0 ] ), 8564 entity( "⤳", [ 10547, 0 ] ), 8565 entity( "⤞", [ 10526, 0 ] ), 8566 entity( "↪", [ 8618, 0 ] ), 8567 entity( "↬", [ 8620, 0 ] ), 8568 entity( "⥅", [ 10565, 0 ] ), 8569 entity( "⥴", [ 10612, 0 ] ), 8570 entity( "↣", [ 8611, 0 ] ), 8571 entity( "↝", [ 8605, 0 ] ), 8572 entity( "⤚", [ 10522, 0 ] ), 8573 entity( "∶", [ 8758, 0 ] ), 8574 entity( "ℚ", [ 8474, 0 ] ), 8575 entity( "⤍", [ 10509, 0 ] ), 8576 entity( "❳", [ 10099, 0 ] ), 8577 entity( "}", [ 125, 0 ] ), 8578 entity( "]", [ 93, 0 ] ), 8579 entity( "⦌", [ 10636, 0 ] ), 8580 entity( "⦎", [ 10638, 0 ] ), 8581 entity( "⦐", [ 10640, 0 ] ), 8582 entity( "ř", [ 345, 0 ] ), 8583 entity( "ŗ", [ 343, 0 ] ), 8584 entity( "⌉", [ 8969, 0 ] ), 8585 entity( "}", [ 125, 0 ] ), 8586 entity( "р", [ 1088, 0 ] ), 8587 entity( "⤷", [ 10551, 0 ] ), 8588 entity( "⥩", [ 10601, 0 ] ), 8589 entity( "”", [ 8221, 0 ] ), 8590 entity( "”", [ 8221, 0 ] ), 8591 entity( "↳", [ 8627, 0 ] ), 8592 entity( "ℜ", [ 8476, 0 ] ), 8593 entity( "ℛ", [ 8475, 0 ] ), 8594 entity( "ℜ", [ 8476, 0 ] ), 8595 entity( "ℝ", [ 8477, 0 ] ), 8596 entity( "▭", [ 9645, 0 ] ), 8597 entity( "®", [ 174, 0 ] ), 8598 entity( "⥽", [ 10621, 0 ] ), 8599 entity( "⌋", [ 8971, 0 ] ), 8600 entity( "𝔯", [ 120111, 0 ] ), 8601 entity( "⇁", [ 8641, 0 ] ), 8602 entity( "⇀", [ 8640, 0 ] ), 8603 entity( "⥬", [ 10604, 0 ] ), 8604 entity( "ρ", [ 961, 0 ] ), 8605 entity( "ϱ", [ 1009, 0 ] ), 8606 entity( "→", [ 8594, 0 ] ), 8607 entity( "↣", [ 8611, 0 ] ), 8608 entity( "⇁", [ 8641, 0 ] ), 8609 entity( "⇀", [ 8640, 0 ] ), 8610 entity( "⇄", [ 8644, 0 ] ), 8611 entity( "⇌", [ 8652, 0 ] ), 8612 entity( "⇉", [ 8649, 0 ] ), 8613 entity( "↝", [ 8605, 0 ] ), 8614 entity( "⋌", [ 8908, 0 ] ), 8615 entity( "˚", [ 730, 0 ] ), 8616 entity( "≓", [ 8787, 0 ] ), 8617 entity( "⇄", [ 8644, 0 ] ), 8618 entity( "⇌", [ 8652, 0 ] ), 8619 entity( "‏", [ 8207, 0 ] ), 8620 entity( "⎱", [ 9137, 0 ] ), 8621 entity( "⎱", [ 9137, 0 ] ), 8622 entity( "⫮", [ 10990, 0 ] ), 8623 entity( "⟭", [ 10221, 0 ] ), 8624 entity( "⇾", [ 8702, 0 ] ), 8625 entity( "⟧", [ 10215, 0 ] ), 8626 entity( "⦆", [ 10630, 0 ] ), 8627 entity( "𝕣", [ 120163, 0 ] ), 8628 entity( "⨮", [ 10798, 0 ] ), 8629 entity( "⨵", [ 10805, 0 ] ), 8630 entity( ")", [ 41, 0 ] ), 8631 entity( "⦔", [ 10644, 0 ] ), 8632 entity( "⨒", [ 10770, 0 ] ), 8633 entity( "⇉", [ 8649, 0 ] ), 8634 entity( "›", [ 8250, 0 ] ), 8635 entity( "𝓇", [ 120007, 0 ] ), 8636 entity( "↱", [ 8625, 0 ] ), 8637 entity( "]", [ 93, 0 ] ), 8638 entity( "’", [ 8217, 0 ] ), 8639 entity( "’", [ 8217, 0 ] ), 8640 entity( "⋌", [ 8908, 0 ] ), 8641 entity( "⋊", [ 8906, 0 ] ), 8642 entity( "▹", [ 9657, 0 ] ), 8643 entity( "⊵", [ 8885, 0 ] ), 8644 entity( "▸", [ 9656, 0 ] ), 8645 entity( "⧎", [ 10702, 0 ] ), 8646 entity( "⥨", [ 10600, 0 ] ), 8647 entity( "℞", [ 8478, 0 ] ), 8648 entity( "ś", [ 347, 0 ] ), 8649 entity( "‚", [ 8218, 0 ] ), 8650 entity( "≻", [ 8827, 0 ] ), 8651 entity( "⪴", [ 10932, 0 ] ), 8652 entity( "⪸", [ 10936, 0 ] ), 8653 entity( "š", [ 353, 0 ] ), 8654 entity( "≽", [ 8829, 0 ] ), 8655 entity( "⪰", [ 10928, 0 ] ), 8656 entity( "ş", [ 351, 0 ] ), 8657 entity( "ŝ", [ 349, 0 ] ), 8658 entity( "⪶", [ 10934, 0 ] ), 8659 entity( "⪺", [ 10938, 0 ] ), 8660 entity( "⋩", [ 8937, 0 ] ), 8661 entity( "⨓", [ 10771, 0 ] ), 8662 entity( "≿", [ 8831, 0 ] ), 8663 entity( "с", [ 1089, 0 ] ), 8664 entity( "⋅", [ 8901, 0 ] ), 8665 entity( "⊡", [ 8865, 0 ] ), 8666 entity( "⩦", [ 10854, 0 ] ), 8667 entity( "⇘", [ 8664, 0 ] ), 8668 entity( "⤥", [ 10533, 0 ] ), 8669 entity( "↘", [ 8600, 0 ] ), 8670 entity( "↘", [ 8600, 0 ] ), 8671 entity( "§", [ 167, 0 ] ), 8672 entity( ";", [ 59, 0 ] ), 8673 entity( "⤩", [ 10537, 0 ] ), 8674 entity( "∖", [ 8726, 0 ] ), 8675 entity( "∖", [ 8726, 0 ] ), 8676 entity( "✶", [ 10038, 0 ] ), 8677 entity( "𝔰", [ 120112, 0 ] ), 8678 entity( "⌢", [ 8994, 0 ] ), 8679 entity( "♯", [ 9839, 0 ] ), 8680 entity( "щ", [ 1097, 0 ] ), 8681 entity( "ш", [ 1096, 0 ] ), 8682 entity( "∣", [ 8739, 0 ] ), 8683 entity( "∥", [ 8741, 0 ] ), 8684 entity( "­", [ 173, 0 ] ), 8685 entity( "σ", [ 963, 0 ] ), 8686 entity( "ς", [ 962, 0 ] ), 8687 entity( "ς", [ 962, 0 ] ), 8688 entity( "∼", [ 8764, 0 ] ), 8689 entity( "⩪", [ 10858, 0 ] ), 8690 entity( "≃", [ 8771, 0 ] ), 8691 entity( "≃", [ 8771, 0 ] ), 8692 entity( "⪞", [ 10910, 0 ] ), 8693 entity( "⪠", [ 10912, 0 ] ), 8694 entity( "⪝", [ 10909, 0 ] ), 8695 entity( "⪟", [ 10911, 0 ] ), 8696 entity( "≆", [ 8774, 0 ] ), 8697 entity( "⨤", [ 10788, 0 ] ), 8698 entity( "⥲", [ 10610, 0 ] ), 8699 entity( "←", [ 8592, 0 ] ), 8700 entity( "∖", [ 8726, 0 ] ), 8701 entity( "⨳", [ 10803, 0 ] ), 8702 entity( "⧤", [ 10724, 0 ] ), 8703 entity( "∣", [ 8739, 0 ] ), 8704 entity( "⌣", [ 8995, 0 ] ), 8705 entity( "⪪", [ 10922, 0 ] ), 8706 entity( "⪬", [ 10924, 0 ] ), 8707 entity( "⪬︀", [ 10924, 65024 ] ), 8708 entity( "ь", [ 1100, 0 ] ), 8709 entity( "/", [ 47, 0 ] ), 8710 entity( "⧄", [ 10692, 0 ] ), 8711 entity( "⌿", [ 9023, 0 ] ), 8712 entity( "𝕤", [ 120164, 0 ] ), 8713 entity( "♠", [ 9824, 0 ] ), 8714 entity( "♠", [ 9824, 0 ] ), 8715 entity( "∥", [ 8741, 0 ] ), 8716 entity( "⊓", [ 8851, 0 ] ), 8717 entity( "⊓︀", [ 8851, 65024 ] ), 8718 entity( "⊔", [ 8852, 0 ] ), 8719 entity( "⊔︀", [ 8852, 65024 ] ), 8720 entity( "⊏", [ 8847, 0 ] ), 8721 entity( "⊑", [ 8849, 0 ] ), 8722 entity( "⊏", [ 8847, 0 ] ), 8723 entity( "⊑", [ 8849, 0 ] ), 8724 entity( "⊐", [ 8848, 0 ] ), 8725 entity( "⊒", [ 8850, 0 ] ), 8726 entity( "⊐", [ 8848, 0 ] ), 8727 entity( "⊒", [ 8850, 0 ] ), 8728 entity( "□", [ 9633, 0 ] ), 8729 entity( "□", [ 9633, 0 ] ), 8730 entity( "▪", [ 9642, 0 ] ), 8731 entity( "▪", [ 9642, 0 ] ), 8732 entity( "→", [ 8594, 0 ] ), 8733 entity( "𝓈", [ 120008, 0 ] ), 8734 entity( "∖", [ 8726, 0 ] ), 8735 entity( "⌣", [ 8995, 0 ] ), 8736 entity( "⋆", [ 8902, 0 ] ), 8737 entity( "☆", [ 9734, 0 ] ), 8738 entity( "★", [ 9733, 0 ] ), 8739 entity( "ϵ", [ 1013, 0 ] ), 8740 entity( "ϕ", [ 981, 0 ] ), 8741 entity( "¯", [ 175, 0 ] ), 8742 entity( "⊂", [ 8834, 0 ] ), 8743 entity( "⫅", [ 10949, 0 ] ), 8744 entity( "⪽", [ 10941, 0 ] ), 8745 entity( "⊆", [ 8838, 0 ] ), 8746 entity( "⫃", [ 10947, 0 ] ), 8747 entity( "⫁", [ 10945, 0 ] ), 8748 entity( "⫋", [ 10955, 0 ] ), 8749 entity( "⊊", [ 8842, 0 ] ), 8750 entity( "⪿", [ 10943, 0 ] ), 8751 entity( "⥹", [ 10617, 0 ] ), 8752 entity( "⊂", [ 8834, 0 ] ), 8753 entity( "⊆", [ 8838, 0 ] ), 8754 entity( "⫅", [ 10949, 0 ] ), 8755 entity( "⊊", [ 8842, 0 ] ), 8756 entity( "⫋", [ 10955, 0 ] ), 8757 entity( "⫇", [ 10951, 0 ] ), 8758 entity( "⫕", [ 10965, 0 ] ), 8759 entity( "⫓", [ 10963, 0 ] ), 8760 entity( "≻", [ 8827, 0 ] ), 8761 entity( "⪸", [ 10936, 0 ] ), 8762 entity( "≽", [ 8829, 0 ] ), 8763 entity( "⪰", [ 10928, 0 ] ), 8764 entity( "⪺", [ 10938, 0 ] ), 8765 entity( "⪶", [ 10934, 0 ] ), 8766 entity( "⋩", [ 8937, 0 ] ), 8767 entity( "≿", [ 8831, 0 ] ), 8768 entity( "∑", [ 8721, 0 ] ), 8769 entity( "♪", [ 9834, 0 ] ), 8770 entity( "¹", [185, 0 ] ), 8771 entity( "¹", [ 185, 0 ] ), 8772 entity( "²", [178, 0 ] ), 8773 entity( "²", [ 178, 0 ] ), 8774 entity( "³", [179, 0 ] ), 8775 entity( "³", [ 179, 0 ] ), 8776 entity( "⊃", [ 8835, 0 ] ), 8777 entity( "⫆", [ 10950, 0 ] ), 8778 entity( "⪾", [ 10942, 0 ] ), 8779 entity( "⫘", [ 10968, 0 ] ), 8780 entity( "⊇", [ 8839, 0 ] ), 8781 entity( "⫄", [ 10948, 0 ] ), 8782 entity( "⟉", [ 10185, 0 ] ), 8783 entity( "⫗", [ 10967, 0 ] ), 8784 entity( "⥻", [ 10619, 0 ] ), 8785 entity( "⫂", [ 10946, 0 ] ), 8786 entity( "⫌", [ 10956, 0 ] ), 8787 entity( "⊋", [ 8843, 0 ] ), 8788 entity( "⫀", [ 10944, 0 ] ), 8789 entity( "⊃", [ 8835, 0 ] ), 8790 entity( "⊇", [ 8839, 0 ] ), 8791 entity( "⫆", [ 10950, 0 ] ), 8792 entity( "⊋", [ 8843, 0 ] ), 8793 entity( "⫌", [ 10956, 0 ] ), 8794 entity( "⫈", [ 10952, 0 ] ), 8795 entity( "⫔", [ 10964, 0 ] ), 8796 entity( "⫖", [ 10966, 0 ] ), 8797 entity( "⇙", [ 8665, 0 ] ), 8798 entity( "⤦", [ 10534, 0 ] ), 8799 entity( "↙", [ 8601, 0 ] ), 8800 entity( "↙", [ 8601, 0 ] ), 8801 entity( "⤪", [ 10538, 0 ] ), 8802 entity( "ß", [ 223, 0 ] ), 8803 entity( "⌖", [ 8982, 0 ] ), 8804 entity( "τ", [ 964, 0 ] ), 8805 entity( "⎴", [ 9140, 0 ] ), 8806 entity( "ť", [ 357, 0 ] ), 8807 entity( "ţ", [ 355, 0 ] ), 8808 entity( "т", [ 1090, 0 ] ), 8809 entity( "⃛", [ 8411, 0 ] ), 8810 entity( "⌕", [ 8981, 0 ] ), 8811 entity( "𝔱", [ 120113, 0 ] ), 8812 entity( "∴", [ 8756, 0 ] ), 8813 entity( "∴", [ 8756, 0 ] ), 8814 entity( "θ", [ 952, 0 ] ), 8815 entity( "ϑ", [ 977, 0 ] ), 8816 entity( "ϑ", [ 977, 0 ] ), 8817 entity( "≈", [ 8776, 0 ] ), 8818 entity( "∼", [ 8764, 0 ] ), 8819 entity( " ", [ 8201, 0 ] ), 8820 entity( "≈", [ 8776, 0 ] ), 8821 entity( "∼", [ 8764, 0 ] ), 8822 entity( "þ", [ 254, 0 ] ), 8823 entity( "˜", [ 732, 0 ] ), 8824 entity( "×", [ 215, 0 ] ), 8825 entity( "⊠", [ 8864, 0 ] ), 8826 entity( "⨱", [ 10801, 0 ] ), 8827 entity( "⨰", [ 10800, 0 ] ), 8828 entity( "∭", [ 8749, 0 ] ), 8829 entity( "⤨", [ 10536, 0 ] ), 8830 entity( "⊤", [ 8868, 0 ] ), 8831 entity( "⌶", [ 9014, 0 ] ), 8832 entity( "⫱", [ 10993, 0 ] ), 8833 entity( "𝕥", [ 120165, 0 ] ), 8834 entity( "⫚", [ 10970, 0 ] ), 8835 entity( "⤩", [ 10537, 0 ] ), 8836 entity( "‴", [ 8244, 0 ] ), 8837 entity( "™", [ 8482, 0 ] ), 8838 entity( "▵", [ 9653, 0 ] ), 8839 entity( "▿", [ 9663, 0 ] ), 8840 entity( "◃", [ 9667, 0 ] ), 8841 entity( "⊴", [ 8884, 0 ] ), 8842 entity( "≜", [ 8796, 0 ] ), 8843 entity( "▹", [ 9657, 0 ] ), 8844 entity( "⊵", [ 8885, 0 ] ), 8845 entity( "◬", [ 9708, 0 ] ), 8846 entity( "≜", [ 8796, 0 ] ), 8847 entity( "⨺", [ 10810, 0 ] ), 8848 entity( "⨹", [ 10809, 0 ] ), 8849 entity( "⧍", [ 10701, 0 ] ), 8850 entity( "⨻", [ 10811, 0 ] ), 8851 entity( "⏢", [ 9186, 0 ] ), 8852 entity( "𝓉", [ 120009, 0 ] ), 8853 entity( "ц", [ 1094, 0 ] ), 8854 entity( "ћ", [ 1115, 0 ] ), 8855 entity( "ŧ", [ 359, 0 ] ), 8856 entity( "≬", [ 8812, 0 ] ), 8857 entity( "↞", [ 8606, 0 ] ), 8858 entity( "↠", [ 8608, 0 ] ), 8859 entity( "⇑", [ 8657, 0 ] ), 8860 entity( "⥣", [ 10595, 0 ] ), 8861 entity( "ú", [ 250, 0 ] ), 8862 entity( "↑", [ 8593, 0 ] ), 8863 entity( "ў", [ 1118, 0 ] ), 8864 entity( "ŭ", [ 365, 0 ] ), 8865 entity( "û", [ 251, 0 ] ), 8866 entity( "у", [ 1091, 0 ] ), 8867 entity( "⇅", [ 8645, 0 ] ), 8868 entity( "ű", [ 369, 0 ] ), 8869 entity( "⥮", [ 10606, 0 ] ), 8870 entity( "⥾", [ 10622, 0 ] ), 8871 entity( "𝔲", [ 120114, 0 ] ), 8872 entity( "ù", [ 249, 0 ] ), 8873 entity( "↿", [ 8639, 0 ] ), 8874 entity( "↾", [ 8638, 0 ] ), 8875 entity( "▀", [ 9600, 0 ] ), 8876 entity( "⌜", [ 8988, 0 ] ), 8877 entity( "⌜", [ 8988, 0 ] ), 8878 entity( "⌏", [ 8975, 0 ] ), 8879 entity( "◸", [ 9720, 0 ] ), 8880 entity( "ū", [ 363, 0 ] ), 8881 entity( "¨", [ 168, 0 ] ), 8882 entity( "ų", [ 371, 0 ] ), 8883 entity( "𝕦", [ 120166, 0 ] ), 8884 entity( "↑", [ 8593, 0 ] ), 8885 entity( "↕", [ 8597, 0 ] ), 8886 entity( "↿", [ 8639, 0 ] ), 8887 entity( "↾", [ 8638, 0 ] ), 8888 entity( "⊎", [ 8846, 0 ] ), 8889 entity( "υ", [ 965, 0 ] ), 8890 entity( "ϒ", [ 978, 0 ] ), 8891 entity( "υ", [ 965, 0 ] ), 8892 entity( "⇈", [ 8648, 0 ] ), 8893 entity( "⌝", [ 8989, 0 ] ), 8894 entity( "⌝", [ 8989, 0 ] ), 8895 entity( "⌎", [ 8974, 0 ] ), 8896 entity( "ů", [ 367, 0 ] ), 8897 entity( "◹", [ 9721, 0 ] ), 8898 entity( "𝓊", [ 120010, 0 ] ), 8899 entity( "⋰", [ 8944, 0 ] ), 8900 entity( "ũ", [ 361, 0 ] ), 8901 entity( "▵", [ 9653, 0 ] ), 8902 entity( "▴", [ 9652, 0 ] ), 8903 entity( "⇈", [ 8648, 0 ] ), 8904 entity( "ü", [ 252, 0 ] ), 8905 entity( "⦧", [ 10663, 0 ] ), 8906 entity( "⇕", [ 8661, 0 ] ), 8907 entity( "⫨", [ 10984, 0 ] ), 8908 entity( "⫩", [ 10985, 0 ] ), 8909 entity( "⊨", [ 8872, 0 ] ), 8910 entity( "⦜", [ 10652, 0 ] ), 8911 entity( "ϵ", [ 1013, 0 ] ), 8912 entity( "ϰ", [ 1008, 0 ] ), 8913 entity( "∅", [ 8709, 0 ] ), 8914 entity( "ϕ", [ 981, 0 ] ), 8915 entity( "ϖ", [ 982, 0 ] ), 8916 entity( "∝", [ 8733, 0 ] ), 8917 entity( "↕", [ 8597, 0 ] ), 8918 entity( "ϱ", [ 1009, 0 ] ), 8919 entity( "ς", [ 962, 0 ] ), 8920 entity( "⊊︀", [ 8842, 65024 ] ), 8921 entity( "⫋︀", [ 10955, 65024 ] ), 8922 entity( "⊋︀", [ 8843, 65024 ] ), 8923 entity( "⫌︀", [ 10956, 65024 ] ), 8924 entity( "ϑ", [ 977, 0 ] ), 8925 entity( "⊲", [ 8882, 0 ] ), 8926 entity( "⊳", [ 8883, 0 ] ), 8927 entity( "в", [ 1074, 0 ] ), 8928 entity( "⊢", [ 8866, 0 ] ), 8929 entity( "∨", [ 8744, 0 ] ), 8930 entity( "⊻", [ 8891, 0 ] ), 8931 entity( "≚", [ 8794, 0 ] ), 8932 entity( "⋮", [ 8942, 0 ] ), 8933 entity( "|", [ 124, 0 ] ), 8934 entity( "|", [ 124, 0 ] ), 8935 entity( "𝔳", [ 120115, 0 ] ), 8936 entity( "⊲", [ 8882, 0 ] ), 8937 entity( "⊂⃒", [ 8834, 8402 ] ), 8938 entity( "⊃⃒", [ 8835, 8402 ] ), 8939 entity( "𝕧", [ 120167, 0 ] ), 8940 entity( "∝", [ 8733, 0 ] ), 8941 entity( "⊳", [ 8883, 0 ] ), 8942 entity( "𝓋", [ 120011, 0 ] ), 8943 entity( "⫋︀", [ 10955, 65024 ] ), 8944 entity( "⊊︀", [ 8842, 65024 ] ), 8945 entity( "⫌︀", [ 10956, 65024 ] ), 8946 entity( "⊋︀", [ 8843, 65024 ] ), 8947 entity( "⦚", [ 10650, 0 ] ), 8948 entity( "ŵ", [ 373, 0 ] ), 8949 entity( "⩟", [ 10847, 0 ] ), 8950 entity( "∧", [ 8743, 0 ] ), 8951 entity( "≙", [ 8793, 0 ] ), 8952 entity( "℘", [ 8472, 0 ] ), 8953 entity( "𝔴", [ 120116, 0 ] ), 8954 entity( "𝕨", [ 120168, 0 ] ), 8955 entity( "℘", [ 8472, 0 ] ), 8956 entity( "≀", [ 8768, 0 ] ), 8957 entity( "≀", [ 8768, 0 ] ), 8958 entity( "𝓌", [ 120012, 0 ] ), 8959 entity( "⋂", [ 8898, 0 ] ), 8960 entity( "◯", [ 9711, 0 ] ), 8961 entity( "⋃", [ 8899, 0 ] ), 8962 entity( "▽", [ 9661, 0 ] ), 8963 entity( "𝔵", [ 120117, 0 ] ), 8964 entity( "⟺", [ 10234, 0 ] ), 8965 entity( "⟷", [ 10231, 0 ] ), 8966 entity( "ξ", [ 958, 0 ] ), 8967 entity( "⟸", [ 10232, 0 ] ), 8968 entity( "⟵", [ 10229, 0 ] ), 8969 entity( "⟼", [ 10236, 0 ] ), 8970 entity( "⋻", [ 8955, 0 ] ), 8971 entity( "⨀", [ 10752, 0 ] ), 8972 entity( "𝕩", [ 120169, 0 ] ), 8973 entity( "⨁", [ 10753, 0 ] ), 8974 entity( "⨂", [ 10754, 0 ] ), 8975 entity( "⟹", [ 10233, 0 ] ), 8976 entity( "⟶", [ 10230, 0 ] ), 8977 entity( "𝓍", [ 120013, 0 ] ), 8978 entity( "⨆", [ 10758, 0 ] ), 8979 entity( "⨄", [ 10756, 0 ] ), 8980 entity( "△", [ 9651, 0 ] ), 8981 entity( "⋁", [ 8897, 0 ] ), 8982 entity( "⋀", [ 8896, 0 ] ), 8983 entity( "ý", [ 253, 0 ] ), 8984 entity( "я", [ 1103, 0 ] ), 8985 entity( "ŷ", [ 375, 0 ] ), 8986 entity( "ы", [ 1099, 0 ] ), 8987 entity( "¥", [ 165, 0 ] ), 8988 entity( "𝔶", [ 120118, 0 ] ), 8989 entity( "ї", [ 1111, 0 ] ), 8990 entity( "𝕪", [ 120170, 0 ] ), 8991 entity( "𝓎", [ 120014, 0 ] ), 8992 entity( "ю", [ 1102, 0 ] ), 8993 entity( "ÿ", [ 255, 0 ] ), 8994 entity( "ź", [ 378, 0 ] ), 8995 entity( "ž", [ 382, 0 ] ), 8996 entity( "з", [ 1079, 0 ] ), 8997 entity( "ż", [ 380, 0 ] ), 8998 entity( "ℨ", [ 8488, 0 ] ), 8999 entity( "ζ", [ 950, 0 ] ), 9000 entity( "𝔷", [ 120119, 0 ] ), 9001 entity( "ж", [ 1078, 0 ] ), 9002 entity( "⇝", [ 8669, 0 ] ), 9003 entity( "𝕫", [ 120171, 0 ] ), 9004 entity( "𝓏", [ 120015, 0 ] ), 9005 entity( "‍", [ 8205, 0 ] ), 9006 entity( "‌", [ 8204, 0 ] ), 9007 ]; 9008 9009 9010 struct entity_key 9011 { 9012 const(char)* name; 9013 size_t name_size; 9014 } 9015 9016 extern(C) int entity_cmp(scope const(void)* p_key, scope const(void)* p_entity) 9017 { 9018 entity_key* key = cast(entity_key*) p_key; 9019 entity* ent = cast(entity*) p_entity; 9020 return strncmp(key.name, ent.name, key.name_size); 9021 } 9022 9023 const(entity)* entity_lookup(const(char)* name, size_t name_size) 9024 { 9025 entity_key key = entity_key(name, name_size); 9026 const(void)* result = bsearch(&key, cast(const(void)*)entity_table.ptr, entity_table.length, entity.sizeof, &entity_cmp); 9027 return cast(const(entity)*)result; 9028 } 9029 9030 // 9031 // HTML RENDERING 9032 // 9033 9034 /* If set, debug output from md_parse() is sent to stderr. */ 9035 enum MD_RENDER_FLAG_DEBUG = 0x0001; 9036 9037 enum MD_RENDER_FLAG_VERBATIM_ENTITIES = 0x0002; 9038 9039 9040 struct MD_RENDER_HTML 9041 { 9042 void function(const(MD_CHAR)*, MD_SIZE, void*) nothrow @nogc process_output; 9043 void* userdata; 9044 uint flags; 9045 int image_nesting_level; 9046 char[256] escape_map; 9047 } 9048 9049 9050 /***************************************** 9051 *** HTML rendering helper functions *** 9052 *****************************************/ 9053 9054 /* 9055 #define ISDIGIT(ch) 9056 #define ISLOWER(ch) 9057 #define ISUPPER(ch) 9058 */ 9059 bool ISALNUM_HTML(CHAR ch) 9060 { 9061 return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || ('0' <= ch && ch <= '9'); 9062 } 9063 9064 void render_text(MD_RENDER_HTML* r, const(MD_CHAR)* text, MD_SIZE size) 9065 { 9066 r.process_output(text, size, r.userdata); 9067 } 9068 9069 void RENDER_LITERAL(MD_RENDER_HTML* r, const(MD_CHAR)* literal) 9070 { 9071 render_text(r, literal, cast(uint) strlen(literal)); 9072 } 9073 9074 /* Some characters need to be escaped in normal HTML text. */ 9075 bool HTML_NEED_ESCAPE(MD_RENDER_HTML* r, CHAR ch) 9076 { 9077 return (r.escape_map[cast(ubyte)(ch)] != 0); 9078 } 9079 9080 void render_html_escaped(MD_RENDER_HTML* r, const MD_CHAR* data, MD_SIZE size) 9081 { 9082 MD_OFFSET beg = 0; 9083 MD_OFFSET off = 0; 9084 9085 while(1) { 9086 /* Optimization: Use some loop unrolling. */ 9087 while(off + 3 < size && !HTML_NEED_ESCAPE(r, data[off+0]) && !HTML_NEED_ESCAPE(r, data[off+1]) 9088 && !HTML_NEED_ESCAPE(r, data[off+2]) && !HTML_NEED_ESCAPE(r, data[off+3])) 9089 off += 4; 9090 while(off < size && !HTML_NEED_ESCAPE(r, data[off])) 9091 off++; 9092 9093 if(off > beg) 9094 render_text(r, data + beg, off - beg); 9095 9096 if(off < size) { 9097 switch(data[off]) { 9098 case '&': RENDER_LITERAL(r, "&"); break; 9099 case '<': RENDER_LITERAL(r, "<"); break; 9100 case '>': RENDER_LITERAL(r, ">"); break; 9101 case '"': RENDER_LITERAL(r, """); break; 9102 default: break; 9103 } 9104 off++; 9105 } else { 9106 break; 9107 } 9108 beg = off; 9109 } 9110 } 9111 9112 9113 bool URL_NEED_ESCAPE(CHAR ch) 9114 { 9115 return (!ISALNUM_HTML(ch) && strchr("-_.+!*'(),%#@?=;:/,+$", ch) == null); 9116 } 9117 9118 void render_url_escaped(MD_RENDER_HTML* r, const MD_CHAR* data, MD_SIZE size) 9119 { 9120 static immutable(MD_CHAR)[] hex_chars = "0123456789ABCDEF"; 9121 MD_OFFSET beg = 0; 9122 MD_OFFSET off = 0; 9123 9124 while(1) { 9125 while(off < size && !URL_NEED_ESCAPE(data[off])) 9126 off++; 9127 if(off > beg) 9128 render_text(r, data + beg, off - beg); 9129 9130 if(off < size) { 9131 char[3] hex; 9132 9133 switch(data[off]) { 9134 case '&': RENDER_LITERAL(r, "&"); break; 9135 case '\'': RENDER_LITERAL(r, "'"); break; 9136 default: 9137 hex[0] = '%'; 9138 hex[1] = hex_chars[(cast(uint)data[off] >> 4) & 0xf]; 9139 hex[2] = hex_chars[(cast(uint)data[off] >> 0) & 0xf]; 9140 render_text(r, hex.ptr, 3); 9141 break; 9142 } 9143 off++; 9144 } else { 9145 break; 9146 } 9147 9148 beg = off; 9149 } 9150 } 9151 9152 uint hex_val(char ch) 9153 { 9154 if('0' <= ch && ch <= '9') 9155 return ch - '0'; 9156 if('A' <= ch && ch <= 'Z') 9157 return ch - 'A' + 10; 9158 else 9159 return ch - 'a' + 10; 9160 } 9161 9162 alias appendFunc = nothrow @nogc void function(MD_RENDER_HTML*, const(MD_CHAR)*, MD_SIZE); 9163 9164 void render_utf8_codepoint(MD_RENDER_HTML* r, uint codepoint, 9165 appendFunc fn_append) 9166 { 9167 static immutable(MD_CHAR)[] utf8_replacement_char = [ 0xef, 0xbf, 0xbd ]; 9168 9169 char[4] utf8; 9170 size_t n; 9171 9172 if(codepoint <= 0x7f) { 9173 n = 1; 9174 utf8[0] = cast(ubyte) codepoint; 9175 } else if(codepoint <= 0x7ff) { 9176 n = 2; 9177 utf8[0] = 0xc0 | ((codepoint >> 6) & 0x1f); 9178 utf8[1] = 0x80 + ((codepoint >> 0) & 0x3f); 9179 } else if(codepoint <= 0xffff) { 9180 n = 3; 9181 utf8[0] = 0xe0 | ((codepoint >> 12) & 0xf); 9182 utf8[1] = 0x80 + ((codepoint >> 6) & 0x3f); 9183 utf8[2] = 0x80 + ((codepoint >> 0) & 0x3f); 9184 } else { 9185 n = 4; 9186 utf8[0] = 0xf0 | ((codepoint >> 18) & 0x7); 9187 utf8[1] = 0x80 + ((codepoint >> 12) & 0x3f); 9188 utf8[2] = 0x80 + ((codepoint >> 6) & 0x3f); 9189 utf8[3] = 0x80 + ((codepoint >> 0) & 0x3f); 9190 } 9191 9192 if(0 < codepoint && codepoint <= 0x10ffff) 9193 fn_append(r, utf8.ptr, cast(uint)n); 9194 else 9195 fn_append(r, utf8_replacement_char.ptr, 3); 9196 } 9197 9198 /* Translate entity to its UTF-8 equivalent, or output the verbatim one 9199 * if such entity is unknown (or if the translation is disabled). */ 9200 void render_entity(MD_RENDER_HTML* r, const(MD_CHAR)* text, MD_SIZE size, 9201 appendFunc fn_append) 9202 { 9203 if(r.flags & MD_RENDER_FLAG_VERBATIM_ENTITIES) { 9204 fn_append(r, text, size); 9205 return; 9206 } 9207 9208 /* We assume UTF-8 output is what is desired. */ 9209 if(size > 3 && text[1] == '#') { 9210 uint codepoint = 0; 9211 9212 if(text[2] == 'x' || text[2] == 'X') { 9213 /* Hexadecimal entity (e.g. "�")). */ 9214 MD_SIZE i; 9215 for(i = 3; i < size-1; i++) 9216 codepoint = 16 * codepoint + hex_val(text[i]); 9217 } else { 9218 /* Decimal entity (e.g. "&1234;") */ 9219 MD_SIZE i; 9220 for(i = 2; i < size-1; i++) 9221 codepoint = 10 * codepoint + (text[i] - '0'); 9222 } 9223 9224 render_utf8_codepoint(r, codepoint, fn_append); 9225 return; 9226 } else { 9227 /* Named entity (e.g. " "). */ 9228 const(entity)* ent; 9229 9230 ent = entity_lookup(text, size); 9231 if(ent != null) { 9232 render_utf8_codepoint(r, ent.codepoints[0], fn_append); 9233 if(ent.codepoints[1]) 9234 render_utf8_codepoint(r, ent.codepoints[1], fn_append); 9235 return; 9236 } 9237 } 9238 9239 fn_append(r, text, size); 9240 } 9241 9242 void render_attribute(MD_RENDER_HTML* r, const MD_ATTRIBUTE* attr, 9243 appendFunc fn_append) 9244 { 9245 int i; 9246 9247 for(i = 0; attr.substr_offsets[i] < attr.size; i++) { 9248 MD_TEXTTYPE type = attr.substr_types[i]; 9249 MD_OFFSET off = attr.substr_offsets[i]; 9250 MD_SIZE size = attr.substr_offsets[i+1] - off; 9251 const MD_CHAR* text = attr.text + off; 9252 9253 switch(type) { 9254 case MD_TEXT_NULLCHAR: render_utf8_codepoint(r, 0x0000, &render_text); break; 9255 case MD_TEXT_ENTITY: render_entity(r, text, size, fn_append); break; 9256 default: fn_append(r, text, size); break; 9257 } 9258 } 9259 } 9260 9261 9262 void render_open_ol_block(MD_RENDER_HTML* r, const(MD_BLOCK_OL_DETAIL)* det) 9263 { 9264 char[64] buf; 9265 9266 if(det.start == 1) { 9267 RENDER_LITERAL(r, "<ol>\n"); 9268 return; 9269 } 9270 9271 snprintf(buf.ptr, buf.length, "<ol start=\"%u\">\n", det.start); 9272 RENDER_LITERAL(r, buf.ptr); 9273 } 9274 9275 void render_open_li_block(MD_RENDER_HTML* r, const(MD_BLOCK_LI_DETAIL)* det) 9276 { 9277 if(det.is_task) { 9278 RENDER_LITERAL(r, "<li class=\"task-list-item\">" ~ 9279 "<input type=\"checkbox\" class=\"task-list-item-checkbox\" disabled"); 9280 if(det.task_mark == 'x' || det.task_mark == 'X') 9281 RENDER_LITERAL(r, " checked"); 9282 RENDER_LITERAL(r, ">"); 9283 } else { 9284 RENDER_LITERAL(r, "<li>"); 9285 } 9286 } 9287 9288 void render_open_code_block(MD_RENDER_HTML* r, const(MD_BLOCK_CODE_DETAIL)* det) 9289 { 9290 RENDER_LITERAL(r, "<pre><code"); 9291 9292 /* If known, output the HTML 5 attribute class="language-LANGNAME". */ 9293 if(det.lang.text != null) { 9294 RENDER_LITERAL(r, " class=\"language-"); 9295 render_attribute(r, &det.lang, &render_html_escaped); 9296 RENDER_LITERAL(r, "\""); 9297 } 9298 9299 RENDER_LITERAL(r, ">"); 9300 } 9301 9302 void render_open_td_block(MD_RENDER_HTML* r, const(MD_CHAR)* cell_type, const(MD_BLOCK_TD_DETAIL)* det) 9303 { 9304 RENDER_LITERAL(r, "<"); 9305 RENDER_LITERAL(r, cell_type); 9306 9307 switch(det.align_) 9308 { 9309 case MD_ALIGN_LEFT: RENDER_LITERAL(r, " align=\"left\">"); break; 9310 case MD_ALIGN_CENTER: RENDER_LITERAL(r, " align=\"center\">"); break; 9311 case MD_ALIGN_RIGHT: RENDER_LITERAL(r, " align=\"right\">"); break; 9312 default: RENDER_LITERAL(r, ">"); break; 9313 } 9314 } 9315 9316 void render_open_a_span(MD_RENDER_HTML* r, const(MD_SPAN_A_DETAIL)* det) 9317 { 9318 RENDER_LITERAL(r, "<a href=\""); 9319 render_attribute(r, &det.href, &render_url_escaped); 9320 9321 if(det.title.text != null) { 9322 RENDER_LITERAL(r, "\" title=\""); 9323 render_attribute(r, &det.title, &render_html_escaped); 9324 } 9325 9326 RENDER_LITERAL(r, "\">"); 9327 } 9328 9329 void render_open_img_span(MD_RENDER_HTML* r, const(MD_SPAN_IMG_DETAIL)* det) 9330 { 9331 RENDER_LITERAL(r, "<img src=\""); 9332 render_attribute(r, &det.src, &render_url_escaped); 9333 9334 RENDER_LITERAL(r, "\" alt=\""); 9335 9336 r.image_nesting_level++; 9337 } 9338 9339 void render_close_img_span(MD_RENDER_HTML* r, const(MD_SPAN_IMG_DETAIL)* det) 9340 { 9341 if(det.title.text != null) { 9342 RENDER_LITERAL(r, "\" title=\""); 9343 render_attribute(r, &det.title, &render_html_escaped); 9344 } 9345 RENDER_LITERAL(r, "\" />"); 9346 r.image_nesting_level--; 9347 } 9348 9349 9350 /************************************** 9351 *** HTML renderer implementation *** 9352 **************************************/ 9353 9354 int enter_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) 9355 { 9356 static immutable(MD_CHAR)*[6] head = [ "<h1>", "<h2>", "<h3>", "<h4>", "<h5>", "<h6>" ]; 9357 MD_RENDER_HTML* r = cast(MD_RENDER_HTML*) userdata; 9358 9359 switch(type) 9360 { 9361 case MD_BLOCK_DOC: /* noop */ break; 9362 case MD_BLOCK_QUOTE: RENDER_LITERAL(r, "<blockquote>\n"); break; 9363 case MD_BLOCK_UL: RENDER_LITERAL(r, "<ul>\n"); break; 9364 case MD_BLOCK_OL: render_open_ol_block(r, cast(const(MD_BLOCK_OL_DETAIL)*)detail); break; 9365 case MD_BLOCK_LI: render_open_li_block(r, cast(const(MD_BLOCK_LI_DETAIL)*)detail); break; 9366 case MD_BLOCK_HR: RENDER_LITERAL(r, "<hr />\n"); break; 9367 case MD_BLOCK_H: RENDER_LITERAL(r, head[(cast(MD_BLOCK_H_DETAIL*)detail).level - 1]); break; 9368 case MD_BLOCK_CODE: render_open_code_block(r, cast(const(MD_BLOCK_CODE_DETAIL)*) detail); break; 9369 case MD_BLOCK_HTML: /* noop */ break; 9370 case MD_BLOCK_P: RENDER_LITERAL(r, "<p>"); break; 9371 case MD_BLOCK_TABLE: RENDER_LITERAL(r, "<table>\n"); break; 9372 case MD_BLOCK_THEAD: RENDER_LITERAL(r, "<thead>\n"); break; 9373 case MD_BLOCK_TBODY: RENDER_LITERAL(r, "<tbody>\n"); break; 9374 case MD_BLOCK_TR: RENDER_LITERAL(r, "<tr>\n"); break; 9375 case MD_BLOCK_TH: render_open_td_block(r, "th", cast(MD_BLOCK_TD_DETAIL*)detail); break; 9376 case MD_BLOCK_TD: render_open_td_block(r, "td", cast(MD_BLOCK_TD_DETAIL*)detail); break; 9377 default: assert(false); 9378 } 9379 9380 return 0; 9381 } 9382 9383 int leave_block_callback(MD_BLOCKTYPE type, void* detail, void* userdata) 9384 { 9385 static immutable(MD_CHAR)*[6] head = [ "</h1>\n", "</h2>\n", "</h3>\n", "</h4>\n", "</h5>\n", "</h6>\n" ]; 9386 MD_RENDER_HTML* r = cast(MD_RENDER_HTML*) userdata; 9387 9388 switch(type) { 9389 case MD_BLOCK_DOC: /*noop*/ break; 9390 case MD_BLOCK_QUOTE: RENDER_LITERAL(r, "</blockquote>\n"); break; 9391 case MD_BLOCK_UL: RENDER_LITERAL(r, "</ul>\n"); break; 9392 case MD_BLOCK_OL: RENDER_LITERAL(r, "</ol>\n"); break; 9393 case MD_BLOCK_LI: RENDER_LITERAL(r, "</li>\n"); break; 9394 case MD_BLOCK_HR: /*noop*/ break; 9395 case MD_BLOCK_H: RENDER_LITERAL(r, head[(cast(MD_BLOCK_H_DETAIL*)detail).level - 1]); break; 9396 case MD_BLOCK_CODE: RENDER_LITERAL(r, "</code></pre>\n"); break; 9397 case MD_BLOCK_HTML: /* noop */ break; 9398 case MD_BLOCK_P: RENDER_LITERAL(r, "</p>\n"); break; 9399 case MD_BLOCK_TABLE: RENDER_LITERAL(r, "</table>\n"); break; 9400 case MD_BLOCK_THEAD: RENDER_LITERAL(r, "</thead>\n"); break; 9401 case MD_BLOCK_TBODY: RENDER_LITERAL(r, "</tbody>\n"); break; 9402 case MD_BLOCK_TR: RENDER_LITERAL(r, "</tr>\n"); break; 9403 case MD_BLOCK_TH: RENDER_LITERAL(r, "</th>\n"); break; 9404 case MD_BLOCK_TD: RENDER_LITERAL(r, "</td>\n"); break; 9405 default: assert(false); 9406 } 9407 9408 return 0; 9409 } 9410 9411 int enter_span_callback(MD_SPANTYPE type, void* detail, void* userdata) 9412 { 9413 MD_RENDER_HTML* r = cast(MD_RENDER_HTML*) userdata; 9414 9415 if(r.image_nesting_level > 0) { 9416 /* We are inside an image, i.e. rendering the ALT attribute of 9417 * <IMG> tag. */ 9418 return 0; 9419 } 9420 9421 switch(type) { 9422 case MD_SPAN_EM: RENDER_LITERAL(r, "<em>"); break; 9423 case MD_SPAN_STRONG: RENDER_LITERAL(r, "<strong>"); break; 9424 case MD_SPAN_A: render_open_a_span(r, cast(MD_SPAN_A_DETAIL*) detail); break; 9425 case MD_SPAN_IMG: render_open_img_span(r, cast(MD_SPAN_IMG_DETAIL*) detail); break; 9426 case MD_SPAN_CODE: RENDER_LITERAL(r, "<code>"); break; 9427 case MD_SPAN_DEL: RENDER_LITERAL(r, "<del>"); break; 9428 case MD_SPAN_LATEXMATH: RENDER_LITERAL(r, "<equation>"); break; 9429 case MD_SPAN_LATEXMATH_DISPLAY: RENDER_LITERAL(r, "<equation type=\"display\">"); break; 9430 default: assert(false); 9431 } 9432 9433 return 0; 9434 } 9435 9436 int leave_span_callback(MD_SPANTYPE type, void* detail, void* userdata) 9437 { 9438 MD_RENDER_HTML* r = cast(MD_RENDER_HTML*) userdata; 9439 9440 if(r.image_nesting_level > 0) { 9441 /* We are inside an image, i.e. rendering the ALT attribute of 9442 * <IMG> tag. */ 9443 if(r.image_nesting_level == 1 && type == MD_SPAN_IMG) 9444 render_close_img_span(r, cast(MD_SPAN_IMG_DETAIL*) detail); 9445 return 0; 9446 } 9447 9448 switch(type) { 9449 case MD_SPAN_EM: RENDER_LITERAL(r, "</em>"); break; 9450 case MD_SPAN_STRONG: RENDER_LITERAL(r, "</strong>"); break; 9451 case MD_SPAN_A: RENDER_LITERAL(r, "</a>"); break; 9452 case MD_SPAN_IMG: /*noop, handled above*/ break; 9453 case MD_SPAN_CODE: RENDER_LITERAL(r, "</code>"); break; 9454 case MD_SPAN_DEL: RENDER_LITERAL(r, "</del>"); break; 9455 case MD_SPAN_LATEXMATH: /*fall through*/ 9456 case MD_SPAN_LATEXMATH_DISPLAY: RENDER_LITERAL(r, "</equation>"); break; 9457 default: assert(false); 9458 } 9459 9460 return 0; 9461 } 9462 9463 int text_callback(MD_TEXTTYPE type, const(MD_CHAR)* text, MD_SIZE size, void* userdata) 9464 { 9465 MD_RENDER_HTML* r = cast(MD_RENDER_HTML*) userdata; 9466 9467 switch(type) { 9468 case MD_TEXT_NULLCHAR: render_utf8_codepoint(r, 0x0000, &render_text); break; 9469 case MD_TEXT_BR: RENDER_LITERAL(r, (r.image_nesting_level == 0 ? "<br />\n" : " ")); break; 9470 case MD_TEXT_SOFTBR: RENDER_LITERAL(r, (r.image_nesting_level == 0 ? "\n" : " ")); break; 9471 case MD_TEXT_HTML: render_text(r, text, size); break; 9472 case MD_TEXT_ENTITY: render_entity(r, text, size, &render_html_escaped); break; 9473 default: render_html_escaped(r, text, size); break; 9474 } 9475 9476 return 0; 9477 } 9478 9479 void debug_log_callback(const(char)* msg, void* userdata) 9480 { 9481 MD_RENDER_HTML* r = cast(MD_RENDER_HTML*) userdata; 9482 if(r.flags & MD_RENDER_FLAG_DEBUG) 9483 fprintf(stderr, "MD4C: %s\n", msg); 9484 } 9485 9486 9487 /* Render Markdown into HTML. 9488 * 9489 * Note only contents of <body> tag is generated. Caller must generate 9490 * HTML header/footer manually before/after calling md_render_html(). 9491 * 9492 * Params input and input_size specify the Markdown input. 9493 * Callback process_output() gets called with chunks of HTML output. 9494 * (Typical implementation may just output the bytes to file or append to 9495 * some buffer). 9496 * Param userdata is just propagated back to process_output() callback. 9497 * Param parser_flags are flags from md4c.h propagated to md_parse(). 9498 * Param render_flags is bitmask of MD_RENDER_FLAG_xxxx. 9499 * 9500 * Returns -1 on error (if md_parse() fails.) 9501 * Returns 0 on success. 9502 */ 9503 int md_render_html(const(MD_CHAR)* input, MD_SIZE input_size, 9504 void function(const(MD_CHAR)*, MD_SIZE, void*) nothrow @nogc process_output, 9505 void* userdata, uint parser_flags, uint renderer_flags) 9506 { 9507 MD_RENDER_HTML render = MD_RENDER_HTML(process_output, userdata, renderer_flags, 0); 9508 render.escape_map[] = '\x00'; 9509 9510 MD_PARSER parser = MD_PARSER( 9511 0, 9512 parser_flags, 9513 &enter_block_callback, 9514 &leave_block_callback, 9515 &enter_span_callback, 9516 &leave_span_callback, 9517 &text_callback, 9518 &debug_log_callback, 9519 null 9520 ); 9521 9522 render.escape_map['"'] = 1; 9523 render.escape_map['&'] = 1; 9524 render.escape_map['<'] = 1; 9525 render.escape_map['>'] = 1; 9526 9527 return md_parse(input, input_size, &parser, cast(void*) &render); 9528 } 9529