1 /++ 2 Some support for the Microsoft Excel Spreadsheet file format. 3 4 Don't expect much from it, not even API stability. 5 6 Some code is borrowed from the xlsxreader package. 7 8 History: 9 Added February 13, 2025 10 11 See_Also: 12 https://github.com/symmetryinvestments/xlsxd which supports writing xlsx files. I might add write support here too someday but I kinda doubt it. 13 +/ 14 module arsd.xlsx; 15 16 /+ 17 ./csv-viewer ~/Downloads/UI_comparison.xlsx 18 arsd.dom.ElementNotFoundException@/home/me/program/lib/arsd/xlsx.d(823): Element of type 'Element' matching {worksheet > dimension} not found. 19 +/ 20 21 /+ 22 sheet at double[]: 23 24 nan payloads for blank, errors, then strings as indexes into a table. 25 +/ 26 27 // FIXME: does excel save errors like DIV0 to content in the file? 28 29 // See also Robert's impl: https://github.com/symmetryinvestments/xlsxreader/blob/master/source/xlsxreader.d 30 31 import arsd.core; 32 import arsd.zip; 33 import arsd.dom; 34 import arsd.color; 35 36 import std.conv; 37 38 private struct ExcelFormatStringLexeme { 39 string lexeme; 40 bool isLiteral; 41 } 42 43 class ExcelFormatStringException : Exception { 44 this(string msg, string file = __FILE__, size_t line = __LINE__) { 45 super(msg, file, line); 46 } 47 } 48 49 // FIXME: out contract that asserts s_io.length has indeed been reduced 50 private ExcelFormatStringLexeme extractExcelFormatStringLexeme(ref string s_io) { 51 assert(s_io.length); 52 string s = s_io; 53 54 switch(s[0]) { 55 case '[': 56 // condition or color 57 // or elapsed time thing. 58 // or a locale setting thing for dates (and more?) 59 int count = 0; 60 int size = 0; 61 while(s[0]) { 62 if(s[0] == '[') 63 count++; 64 if(s[0] == ']') 65 count--; 66 s = s[1 .. $]; 67 size++; 68 if(count == 0) 69 break; 70 if(s.length == 0) 71 throw new ExcelFormatStringException("unclosed ["); 72 } 73 74 string ret = s_io[0 .. size]; 75 s_io = s_io[size .. $]; 76 77 return ExcelFormatStringLexeme(ret, false); 78 case '"': 79 // quoted thing watching for backslash 80 bool escaped; 81 int size; 82 83 size++; 84 s = s[1 .. $]; // skip the first " 85 86 string ret; 87 88 while(escaped || s[0] != '"') { 89 if(!escaped) { 90 if(s[0] == '"') { 91 break; 92 } 93 if(s[0] == '\\') 94 escaped = true; 95 else 96 ret ~= s[0]; 97 } else { 98 ret ~= s[0]; 99 escaped = false; 100 } 101 102 s = s[1 .. $]; 103 size++; 104 } 105 if(s.length == 0) 106 throw new ExcelFormatStringException("unclosed \""); 107 size++; 108 109 s_io = s_io[size .. $]; 110 return ExcelFormatStringLexeme(ret, true); 111 112 case '\\': 113 // escaped character 114 s = s[1 .. $]; // skip the \ 115 s_io = s_io[1 .. $]; 116 117 // FIXME: need real stride 118 auto stride = 1; 119 s_io = s_io[stride .. $]; 120 return ExcelFormatStringLexeme(s[0 .. stride], true); 121 case '$', '+', '(', ':', '^', '\'', '{', '<', '=', '-', ')', '!', '&', '~', '}', '>', ' ': // they say slash but that seems to be fraction instead 122 // character literals w/o needing to be quoted 123 s_io = s_io[1 .. $]; 124 return ExcelFormatStringLexeme(s[0 .. 1], true); 125 case 'A', 'a', 'P', 'p': 126 // am/pm 127 128 int size = 0; 129 while( 130 s[0] == 'a' || s[0] == 'A' || 131 s[0] == 'p' || s[0] == 'P' || 132 s[0] == 'm' || s[0] == 'M' || 133 s[0] == '/' 134 ) { 135 size++; 136 s = s[1 .. $]; 137 if(s.length == 0) 138 break; 139 } 140 // also switches hour to 12 hour format when it happens 141 string ret = s_io[0 .. size]; 142 s_io = s_io[size .. $]; 143 144 return ExcelFormatStringLexeme(ret, false); 145 146 // the single char directives 147 case '@': // text placeholder 148 case ';': // clause separator 149 s_io = s_io[1 .. $]; 150 return ExcelFormatStringLexeme(s[0 .. 1], false); 151 case '_': // padding char - this adds a space with the same width as the char that follows it, for column alignment. 152 case '*': // fill char 153 // the padding or fill is the next lexeme, not the next char! 154 s_io = s_io[1 .. $]; 155 return ExcelFormatStringLexeme(s[0 .. 1], false); 156 case 'e', 'E': // scientific notation request 157 case '%': // percent indicator 158 case ',': // thousands separator 159 case '.': // decimal separator 160 case '/': // fraction or date separator 161 s_io = s_io[1 .. $]; 162 return ExcelFormatStringLexeme(s[0 .. 1], false); 163 case /*'m',*/ 'd', 'y': // date parts 164 case 'h', 'm', 's': // time parts 165 166 /+ 167 Note: The m or mm code must appear immediately after the h or hh code or immediately before the ss code; otherwise, Excel displays the month instead of minutes. 168 169 it can be either a date/time OR a number/fraction, not both. 170 +/ 171 172 auto thing = s[0]; 173 int size; 174 while(s.length && s[0] == thing) { 175 s = s[1 .. $]; 176 size++; 177 } 178 auto keep = s_io[0 .. size]; 179 s_io = s_io[size .. $]; 180 return ExcelFormatStringLexeme(keep, false); 181 case '1': .. case '9': // fraction denominators or just literal numbers 182 int size; 183 while(s.length && s[0] >= '1' && s[0] <= '9') { 184 s = s[1 .. $]; 185 size++; 186 } 187 auto keep = s_io[0 .. size]; 188 s_io = s_io[size .. $]; 189 return ExcelFormatStringLexeme(keep, false); 190 case '0', '#', '?': // digit placeholder 191 int size; 192 193 while(s[0] == '0' || s[0] == '#' || s[0] == '?') { 194 s = s[1 .. $]; 195 size++; 196 if(s.length == 0) 197 break; 198 } 199 200 auto keep = s_io[0 .. size]; 201 s_io = s_io[size .. $]; 202 return ExcelFormatStringLexeme(keep, false); 203 204 default: 205 // idk 206 throw new ExcelFormatStringException("unknown char " ~ s); 207 } 208 209 assert(0); 210 } 211 212 unittest { 213 string thing = `[>50][Red]"foo"`; 214 ExcelFormatStringLexeme lexeme; 215 216 lexeme = extractExcelFormatStringLexeme(thing); 217 assert(thing == `[Red]"foo"`); 218 lexeme = extractExcelFormatStringLexeme(thing); 219 assert(thing == `"foo"`); 220 lexeme = extractExcelFormatStringLexeme(thing); 221 assert(thing == ""); 222 assert(lexeme.lexeme == "foo"); 223 224 thing = `"\""`; 225 lexeme = extractExcelFormatStringLexeme(thing); 226 assert(thing == ""); 227 assert(lexeme.lexeme == `"`); 228 229 thing = `\,`; 230 lexeme = extractExcelFormatStringLexeme(thing); 231 assert(thing == ""); 232 assert(lexeme.lexeme == `,`); 233 234 /* 235 thing = `"A\""`; 236 lexeme = extractExcelFormatStringLexeme(thing); 237 assert(thing == ""); 238 assert(lexeme.lexeme == `"`); 239 */ 240 241 /+ 242 thing = "mm-yyyy"; 243 lexeme = extractExcelFormatStringLexeme(thing); 244 import std.stdio; writeln(thing); writeln(lexeme); 245 +/ 246 } 247 248 struct XlsxFormat { 249 string originalFormatString; 250 251 Color foregroundColor; 252 Color backgroundColor; 253 254 int alignment; // 0 = left, 1 = right, 2 = center 255 256 enum Type { 257 /++ 258 +/ 259 String, 260 /++ 261 262 +/ 263 Number, 264 /++ 265 A Date is a special kind of number in Excel. 266 +/ 267 Date, 268 /++ 269 things like # ?/4 270 271 +/ 272 Fraction, 273 Percent 274 } 275 Type type; 276 277 /++ 278 +/ 279 static struct Result { 280 string content; 281 string color; 282 int alignment; 283 } 284 285 /++ 286 +/ 287 Result applyTo(string s) const { 288 if(this.type == Type.String || originalFormatString == "@" || originalFormatString.length == 0) 289 return Result(s, null, alignment); 290 291 int alignment = this.alignment; 292 293 // need to check for a text thing and if conversion fails, we use that 294 double value; 295 try { 296 value = to!double(s); 297 } catch(Exception e) { 298 value = double.nan; 299 } 300 301 DateTime date_; 302 bool dateCalculated; 303 304 DateTime getDate() { 305 // make sure value is not nan before here or it will throw "negative overflow"! 306 if(!dateCalculated) { 307 date_ = doubleToDateTime(value); 308 dateCalculated = true; 309 } 310 311 return date_; 312 } 313 314 // parse out the original format string 315 // the ordering by default is positive;negative;zero;text 316 // 317 // these can also be like [Color][Condition]fmt;generic 318 // color is allowed anywhere 319 // but condition can only have two things following: `[Color][Condition];` repeated any number of times then `;generic-number;text`. no more negative/zero stuff. 320 // once we see a condition, it switches modes - following things MUST have condition or else are treated as just generic catch all for number and then text. 321 // 322 // it matches linearly. 323 /+ 324 so it goes: 325 implicit match >0 326 implicit match <0 327 implicit match =0 328 text 329 330 but if at any point one of them has a condition, the following ones must be either more conditions (immediately!) or unconditional: 331 fallthrough for number 332 text 333 334 335 and if i dont support a format thing i can always fall back to the original text. 336 +/ 337 338 try { 339 string fmt = originalFormatString; 340 341 int state = 0; // 0 == positive, 1 == negative or custom, 2 == other, 3 == text 342 bool matchesCurrentCondition = value > 0; 343 344 bool hasMultipleClauses = false; 345 { 346 string fmt2 = fmt; 347 while(fmt2.length) { 348 auto next = extractExcelFormatStringLexeme(fmt2); 349 if(!next.isLiteral && next.lexeme == ";") 350 hasMultipleClauses = true; 351 break; 352 } 353 } 354 if(hasMultipleClauses == false) 355 matchesCurrentCondition = true; // only one thing means we must always match it 356 357 int numericState; 358 bool inDenominator; 359 bool inAmPm; 360 bool inDecimal; 361 bool justSawHours; 362 363 // these are populated below once we match a clause 364 bool hasAmPm; 365 bool hasFraction; 366 bool hasScientificNotation; 367 bool hasPercent; 368 bool first = true; 369 370 string color; 371 string ret; 372 373 while(fmt.length) { 374 auto lexeme = extractExcelFormatStringLexeme(fmt); 375 376 ExcelFormatStringLexeme peekLexeme(bool returnLiteral = false) { 377 string fmt2 = fmt; 378 skip: 379 if(fmt2.length == 0) 380 return ExcelFormatStringLexeme.init; 381 auto next = extractExcelFormatStringLexeme(fmt2); 382 if(next.isLiteral && !returnLiteral) 383 goto skip; 384 return next; 385 } 386 387 if(!lexeme.isLiteral && lexeme.lexeme[0] == ';') { 388 // we finished the format of the match, so no need to continue 389 if(matchesCurrentCondition) 390 break; 391 // otherwise, we go to the next thing 392 state++; 393 if(state == 1) { 394 matchesCurrentCondition = value < 0; 395 } else if(state == 2) { 396 // this is going to be either the catch-all fallback or another custom one 397 // for now, assume it is a catch-all 398 import std.math; 399 matchesCurrentCondition = !isNaN(value) ? true : false; // only numbers, so not text, matches the catch-all 400 } else if(state == 3) { 401 matchesCurrentCondition = true; // this needs to match, we're at the end, so this is the text display 402 } else { 403 throw new ExcelFormatStringException("too many ; pieces"); 404 } 405 406 continue; 407 } 408 409 if(!matchesCurrentCondition) 410 continue; 411 412 // scan ahead to see if we're doing some special cases: fractions, 12 hour clock, percentages, and sci notation 413 if(first) { 414 string fmt2 = fmt; 415 while(fmt2.length) { 416 auto next = extractExcelFormatStringLexeme(fmt2); 417 if(!next.isLiteral) { 418 // don't proceed into the next clause 419 if(next.lexeme == ";") 420 break; 421 422 char c = next.lexeme[0] | 0x20; 423 if(next.lexeme == "/") 424 hasFraction = true; 425 else if(next.lexeme == "%") { 426 hasPercent = true; 427 value *= 100.0; 428 } else if(c == 'e') 429 hasScientificNotation = true; 430 else if(c == 'a' || c == 'p') 431 hasAmPm = true; 432 } 433 } 434 first = false; 435 } 436 437 if(hasScientificNotation) 438 return Result(s, "unsupported feature: scientific notation"); // FIXME 439 if(hasFraction) 440 return Result(s, "unsupported feature: fractions"); // FIXME 441 442 if(!lexeme.isLiteral && lexeme.lexeme[0] == '[') { 443 // look for color, condition, or locale 444 char nc = lexeme.lexeme[1]; 445 if(nc == '$') 446 continue; // locale i think, skip it 447 if(nc == '<' || nc == '>' || nc == '=') { 448 // condition 449 450 if(state == 1 || state == 2) { 451 state = 1; 452 // read the condition, see if we match it 453 auto condition = lexeme.lexeme[1 .. $-1]; 454 455 string operator; 456 string num; 457 if(condition[1] == '=') { 458 operator = condition[0 .. 2]; 459 num = condition[2 .. $]; 460 } else { 461 operator = condition[0 .. 1]; 462 num = condition[1 .. $]; 463 } 464 465 double compareTo; 466 try { 467 compareTo = to!double(num); 468 } catch(Exception e) { 469 throw new ExcelFormatStringException("not a number: " ~ num); 470 } 471 switch(operator) { 472 case "<": 473 matchesCurrentCondition = value < compareTo; 474 break; 475 case "<=": 476 matchesCurrentCondition = value <= compareTo; 477 break; 478 case ">": 479 matchesCurrentCondition = value > compareTo; 480 break; 481 case ">=": 482 matchesCurrentCondition = value >= compareTo; 483 break; 484 case "=": 485 // FIXME: approxEqual? 486 matchesCurrentCondition = value == compareTo; 487 break; 488 489 default: 490 throw new ExcelFormatStringException("not a supported comparison operator " ~ operator); 491 } 492 493 continue; 494 } else { 495 throw new ExcelFormatStringException("inappropriately placed custom condition"); 496 } 497 } else { 498 // color, we hope. FIXME can also be [s], [m], or [h] or maybe [ss], [mm], [hh] 499 // colors are capitalized... 500 color = lexeme.lexeme[1 .. $-1]; 501 continue; 502 } 503 } 504 505 // if we're here, it should actually match and need some processing. 506 507 if(lexeme.isLiteral) { 508 // literals are easy... 509 ret ~= lexeme.lexeme; 510 } else { 511 // but the rest of these are formatting commands 512 switch(lexeme.lexeme[0]) { 513 case ',': 514 // thousands separator requested, 515 // handled below in the decimal placeholder thing 516 break; 517 case '_', '*': 518 auto lexemeToPadWith = extractExcelFormatStringLexeme(fmt); 519 if(lexeme.lexeme[0] == '_') 520 ret ~= " "; // FIXME supposed to match width of the char 521 else if(lexeme.lexeme[0] == '*') 522 ret ~= lexemeToPadWith.lexeme; // FIXME: supposed to repeat to fill the column width 523 break; 524 case '@': // the original text 525 ret ~= s; 526 break; 527 case '%': 528 ret ~= lexeme.lexeme; 529 break; 530 case '.': 531 inDecimal = true; 532 ret ~= lexeme.lexeme; 533 break; 534 case '/': 535 if(!inAmPm) { 536 inDenominator = true; 537 ret ~= lexeme.lexeme; 538 } 539 break; 540 case '#', '0', '?': 541 // decimal group 542 // # = digit 543 // 0 = digit, pad with 0 if not significant 544 // ? = digit, pad with space (same sized as digit) if not significant 545 546 if(value is double.nan) 547 return Result(s, "NaN"); 548 549 alignment = 1; // if we are printing numbers let's assume right align FIXME 550 /+ 551 if(s.length == 0 && value is double.nan) // and if we printing numbers, treat empty cell as 0 552 value = 0.0; 553 +/ 554 555 bool appendNumber(double v, bool includeThousandsSeparator) { 556 if(v < 0) 557 v = -v; 558 string f = to!string(cast(int) v); 559 if(f.length < lexeme.lexeme.length) 560 foreach(l; lexeme.lexeme[0 .. $ - f.length]) { 561 if(l == '0') 562 ret ~= '0'; 563 else if(l == '?') 564 ret ~= ' '; 565 } 566 if(f.length) { 567 if(includeThousandsSeparator) { 568 // 14532 569 // 1234 570 // 123 571 auto offset = cast(int) f.length % 3; 572 while(f.length > 3) { 573 ret ~= f[offset .. offset + 3]; 574 ret ~= ","; 575 f = f[3 .. $]; 576 } 577 ret ~= f; 578 } else { 579 ret ~= f; 580 } 581 return true; 582 } 583 return false; 584 } 585 586 if(peekLexeme().lexeme == ",") { 587 // thousands separator requested... 588 auto v = cast(int) value / 1000; 589 590 if(v == 0) 591 continue; // FIXME? maybe we want some leading 0 padding? 592 593 auto hadOutput = appendNumber(v, true); 594 595 value = value - v * 1000; // take the remainder for the next iteration of the loop 596 597 if(hadOutput) 598 ret ~= ","; // append the comma before the final thousands digits in the next iteration 599 600 continue; 601 } 602 603 604 if(inDecimal) { 605 // FIXME: no more std.format 606 import std.format; 607 string f = format("%."~to!string(lexeme.lexeme.length)~"f", value - cast(int) value)[2..$]; // slice off the "0." 608 ret ~= f; 609 } else { 610 appendNumber(value, false); 611 } 612 613 inDenominator = false; 614 break; 615 case '1': .. case '9': 616 // number, if in denominator position 617 // otherwise treat as string 618 if(inDenominator) 619 inDenominator = false; // the rest is handled elsewhere 620 else 621 ret ~= lexeme.lexeme; 622 break; 623 case 'y': 624 if(value is double.nan) 625 return Result(s, "NaN date"); 626 627 justSawHours = false; 628 auto y = getDate().year; 629 630 char[16] buffer; 631 632 switch(lexeme.lexeme.length) { 633 case 2: 634 ret ~= intToString(y % 100, buffer[], IntToStringArgs().withPadding(2)); 635 break; 636 case 4: 637 ret ~= intToString(y, buffer[], IntToStringArgs().withPadding(4)); 638 break; 639 default: 640 throw new ExcelFormatStringException("unknown thing " ~ lexeme.lexeme); 641 } 642 break; 643 case 'm': 644 if(value is double.nan) 645 return Result(s, "NaN date"); 646 auto peek = peekLexeme(false); 647 bool precedesSeconds = 648 (peek.lexeme.length && peek.lexeme[0] == 's') 649 || 650 (peek.lexeme.length > 1 && peek.lexeme[1] == 's') 651 ; 652 653 if(justSawHours || precedesSeconds) { 654 // minutes 655 auto m = getDate().timeOfDay.minute; 656 657 char[16] buffer; 658 659 switch(lexeme.lexeme.length) { 660 case 1: 661 ret ~= intToString(m, buffer[]); 662 break; 663 case 2: 664 ret ~= intToString(m, buffer[], IntToStringArgs().withPadding(2)); 665 break; 666 default: 667 throw new ExcelFormatStringException("unknown thing " ~ lexeme.lexeme); 668 } 669 } else { 670 // month 671 auto m = cast(int) getDate().month; 672 673 char[16] buffer; 674 675 import arsd.calendar; 676 677 switch(lexeme.lexeme.length) { 678 case 1: 679 ret ~= intToString(m, buffer[]); 680 break; 681 case 2: 682 ret ~= intToString(m, buffer[], IntToStringArgs().withPadding(2)); 683 break; 684 case 3: // abbreviation 685 ret ~= monthNames[m][0 .. 3]; 686 break; 687 case 4: // full name 688 ret ~= monthNames[m]; 689 break; 690 case 5: // single letter 691 ret ~= monthNames[m][0 .. 1]; // FIXME? 692 break; 693 default: 694 throw new ExcelFormatStringException("unknown thing " ~ lexeme.lexeme); 695 } 696 } 697 698 justSawHours = false; 699 break; 700 case 'd': 701 if(value is double.nan) 702 return Result(s, "NaN date"); 703 justSawHours = false; 704 705 char[16] buffer; 706 707 import arsd.calendar; 708 709 auto d = getDate().day; 710 auto dow = cast(int) getDate().dayOfWeek; 711 712 switch(lexeme.lexeme.length) { 713 case 1: 714 ret ~= intToString(d, buffer[]); 715 break; 716 case 2: 717 ret ~= intToString(d, buffer[], IntToStringArgs().withPadding(2)); 718 break; 719 case 3: 720 // abbreviation 721 ret ~= daysOfWeekNames[dow][0 .. 3]; 722 break; 723 case 4: 724 // full name 725 ret ~= daysOfWeekNames[dow]; 726 break; 727 default: 728 throw new ExcelFormatStringException("unknown thing " ~ lexeme.lexeme); 729 } 730 break; 731 case 'h': 732 if(value is double.nan) 733 return Result(s, "NaN date"); 734 justSawHours = true; 735 736 auto m = getDate().timeOfDay.hour; 737 char[16] buffer; 738 739 if(hasAmPm && m > 12) 740 m -= 12; 741 if(hasAmPm && m == 0) 742 m = 12; 743 744 switch(lexeme.lexeme.length) { 745 case 1: 746 ret ~= intToString(m, buffer[]); 747 break; 748 case 2: 749 ret ~= intToString(m, buffer[], IntToStringArgs().withPadding(2)); 750 break; 751 default: 752 throw new ExcelFormatStringException("unknown thing " ~ lexeme.lexeme); 753 } 754 break; 755 case 'a', 'A': 756 if(value is double.nan) 757 return Result(s, "NaN date"); 758 inAmPm = true; 759 auto m = getDate().timeOfDay.hour; 760 if(m >= 12) 761 ret ~= lexeme.lexeme[0] == 'a' ? "pm" : "PM"; 762 else 763 ret ~= lexeme.lexeme[0] == 'a' ? "am" : "AM"; 764 break; 765 case 'p', 'P': 766 inAmPm = false; 767 break; 768 case 's': 769 if(value is double.nan) 770 return Result(s, "NaN date"); 771 auto m = getDate().timeOfDay.second; 772 char[16] buffer; 773 switch(lexeme.lexeme.length) { 774 case 1: 775 ret ~= intToString(m, buffer[]); 776 break; 777 case 2: 778 ret ~= intToString(m, buffer[], IntToStringArgs().withPadding(2)); 779 break; 780 default: 781 throw new ExcelFormatStringException("unknown thing " ~ lexeme.lexeme); 782 } 783 break; 784 case 'e', 'E': 785 // FIXME: scientific notation 786 break; 787 default: 788 assert(0, "unsupported formatting command: " ~ lexeme.lexeme); 789 } 790 } 791 } 792 793 return Result(ret, color, alignment); 794 } catch(ExcelFormatStringException e) { 795 // we'll fall back to just displaying the original input text 796 return Result(s, e.msg /* FIXME should be null */, alignment); 797 } 798 } 799 800 /+ 801 positive;negative;zero;text 802 can include formats and dates and tons of stuff. 803 https://support.microsoft.com/en-us/office/review-guidelines-for-customizing-a-number-format-c0a1d1fa-d3f4-4018-96b7-9c9354dd99f5 804 +/ 805 private this(XlsxFile file, XlsxFile.StyleInternal.xf formatting) { 806 if(formatting.applyNumberFormat) { 807 // dates too depending on format 808 //import std.stdio; writeln(formatting.numFmtId); writeln(file.styleInternal.numFmts); 809 this.originalFormatString = file.styleInternal.numFmts[formatting.numFmtId]; 810 811 this.type = Type.Number; 812 } else { 813 this.type = Type.String; 814 } 815 816 /+ 817 xf also has: 818 819 int xfId; 820 int numFmtId; 821 int fontId; 822 int fillId; 823 int borderId; 824 +/ 825 } 826 827 private this(string f) { 828 this.originalFormatString = f; 829 this.type = Type.Number; 830 } 831 } 832 833 unittest { 834 assert(XlsxFormat(`;;;"foo"`).applyTo("anything") == XlsxFormat.Result("foo", null)); 835 assert(XlsxFormat(`#.#;;;"foo"`).applyTo("2.0") == XlsxFormat.Result("2.0", null, 1)); 836 assert(XlsxFormat(`0#.##;;;"foo"`).applyTo("24.25") == XlsxFormat.Result("24.25", null, 1)); 837 assert(XlsxFormat(`0#.##;;;"foo"`).applyTo("2.25") == XlsxFormat.Result("02.25", null, 1)); 838 assert(XlsxFormat(`#,#.##`).applyTo("2.25") == XlsxFormat.Result("2.25", null, 1)); 839 assert(XlsxFormat(`#,#.##`).applyTo("123.25") == XlsxFormat.Result("123.25", null, 1)); 840 assert(XlsxFormat(`#,#.##`).applyTo("1234.25") == XlsxFormat.Result("1,234.25", null, 1)); 841 assert(XlsxFormat(`#,#.##`).applyTo("123456.25") == XlsxFormat.Result("123,456.25", null, 1)); 842 } 843 844 struct XlsxCell { 845 string formula; 846 string content; 847 XlsxFormat formatting; 848 849 XlsxFormat.Result displayableResult() { 850 return formatting.applyTo(content); 851 } 852 853 string toString() { 854 return displayableResult().content; 855 } 856 } 857 858 struct CellReference { 859 string name; 860 861 static CellReference fromInts(int column, int row) { 862 string ret; 863 864 string piece; 865 int adjustment = 0; 866 do { 867 piece ~= cast(char)(column % 26 + 'A' - adjustment); 868 if(adjustment == 0) 869 adjustment = 1; 870 column /= 26; 871 } while(column); 872 873 foreach_reverse(ch; piece) 874 ret ~= ch; 875 piece = null; 876 877 do { 878 piece ~= cast(char)(row % 10 + '0'); 879 row /= 10; 880 } while(row); 881 882 foreach_reverse(ch; piece) 883 ret ~= ch; 884 piece = null; 885 886 return CellReference(ret); 887 } 888 889 string toString() { 890 return name; 891 } 892 893 int toColumnIndex() { 894 size_t endSlice = name.length; 895 foreach(idx, ch; name) { 896 if(ch < 'A' || ch > 'Z') { 897 endSlice = idx; 898 break; 899 } 900 } 901 902 int accumulator; 903 foreach(idx, ch; name[0 .. endSlice]) { 904 int value; 905 if(idx + 1 == endSlice) { 906 // an A in the last "digit" is a 0, elsewhere it is a 1 907 value = ch - 'A'; 908 } else { 909 value = ch - 'A' + 1; 910 } 911 912 accumulator *= 26; 913 accumulator += value; 914 } 915 return accumulator; 916 } 917 918 int toRowIndex() { 919 int accumulator; 920 foreach(ch; name) { 921 if(ch >= 'A' && ch <= 'Z') 922 continue; 923 accumulator *= 10; 924 accumulator += ch - '0'; 925 } 926 return accumulator; 927 } 928 } 929 930 unittest { 931 auto cr = CellReference("AE434"); 932 assert(cr.toColumnIndex == 30); 933 cr = CellReference("E434"); 934 assert(cr.toColumnIndex == 4); // zero-based 935 936 // zero-based column, 1-based row. wtf? 937 assert(CellReference("AE434") == CellReference.fromInts(30, 434)); 938 939 assert(CellReference("Z1") == CellReference.fromInts(25, 1)); 940 } 941 942 /++ 943 944 +/ 945 class XlsxSheet { 946 private string name_; 947 private XlsxFile file; 948 private XmlDocument document; 949 private this(XlsxFile file, string name, XmlDocument document) { 950 this.file = file; 951 this.name_ = name; 952 this.document = document; 953 954 this.dimension = document.requireSelector("worksheet > dimension").getAttribute("ref"); 955 // there's also sheetView with selection, activeCell, etc 956 // and cols with widths and such 957 958 auto ul = this.upperLeft; 959 this.minRow = ul.toRowIndex; 960 this.minColumn = ul.toColumnIndex; 961 962 auto lr = this.lowerRight; 963 this.maxRow = lr.toRowIndex + 1; 964 this.maxColumn = lr.toColumnIndex + 1; 965 } 966 967 private string dimension; 968 969 private int minRow; 970 private int minColumn; 971 private int maxRow; 972 private int maxColumn; 973 974 /++ 975 +/ 976 Size size() { 977 return Size(maxColumn - minColumn, maxRow - minRow); 978 } 979 980 private CellReference upperLeft() { 981 foreach(idx, ch; dimension) 982 if(ch == ':') 983 return CellReference(dimension[0 .. idx]); 984 //assert(0); // it has no lower right... 985 return CellReference(dimension); 986 } 987 988 private CellReference lowerRight() { 989 foreach(idx, ch; dimension) 990 if(ch == ':') 991 return CellReference(dimension[idx + 1 .. $]); 992 assert(0); 993 } 994 995 // opIndex could be like sheet["A1:B4"] and sheet["A1", "B4"] and stuff maybe. 996 997 /++ 998 +/ 999 string name() { 1000 return name_; 1001 } 1002 1003 /++ 1004 Suitable for passing to [arsd.csv.toCsv] 1005 +/ 1006 string[][] toStringGrid() { 1007 auto grid = this.toGrid(); 1008 1009 string[][] ret; 1010 ret.length = size.height; 1011 foreach(i, ref row; ret) { 1012 row.length = size.width; 1013 foreach(k, ref cell; row) 1014 cell = grid[i][k].toString(); 1015 } 1016 1017 return ret; 1018 } 1019 1020 /++ 1021 1022 +/ 1023 XlsxCell[][] toGrid() { 1024 // FIXME: this crashes on opend dmd! 1025 // string[][] ret = new string[][](size.height, size.width); 1026 1027 /+ 1028 // almost everything we allocate in here is to keep, so 1029 // turning off the GC while working prevents unnecessary 1030 // collection attempts that won't find any garbage anyway. 1031 1032 // but meh no significant difference in perf anyway. 1033 import core.memory; 1034 GC.disable(); 1035 scope(exit) 1036 GC.enable(); 1037 +/ 1038 1039 XlsxCell[][] ret; 1040 ret.length = size.height; 1041 foreach(ref row; ret) 1042 row.length = size.width; 1043 1044 //alloc done 1045 1046 auto sheetData = document.requireSelector("sheetData"); 1047 Element[] rowElements = sheetData.childNodes; 1048 1049 Element[] nextRow(int expected) { 1050 if(rowElements.length == 0) 1051 throw new Exception("ran out of row elements..."); 1052 1053 Element rowElement; 1054 Element[] before = rowElements; 1055 1056 do { 1057 rowElement = rowElements[0]; 1058 rowElements = rowElements[1 .. $]; 1059 } while(rowElement.tagName != "row"); 1060 1061 if(rowElement.attrs.r.to!int != expected) { 1062 // a row was skipped in the file, so we'll 1063 // return an empty placeholder too 1064 rowElements = before; 1065 return null; 1066 } 1067 1068 return rowElement.childNodes; 1069 } 1070 1071 foreach(int rowIdx, row; ret) { 1072 auto cellElements = nextRow(rowIdx + 1); 1073 1074 foreach(int cellIdx, ref cell; row) { 1075 string cellReference = CellReference.fromInts(cellIdx + minColumn, rowIdx + minRow).name; 1076 1077 Element element = null; 1078 foreach(idx, thing; cellElements) { 1079 if(thing.attrs.r == cellReference) { 1080 element = thing; 1081 cellElements = cellElements[idx + 1 .. $]; 1082 break; 1083 } 1084 } 1085 1086 if(element is null) 1087 continue; 1088 string v = element.optionSelector("v").textContent; 1089 if(element.attrs.t == "s") 1090 v = file.sharedStrings[v.to!int()]; 1091 1092 auto sString = element.attrs.s; 1093 auto sId = sString.length ? to!int(sString) : 0; 1094 1095 string f = element.optionSelector("f").textContent; 1096 1097 cell = XlsxCell(f, v, XlsxFormat(file, file.styleInternal.xfs[sId])); 1098 } 1099 } 1100 return ret; 1101 } 1102 } 1103 1104 /++ 1105 1106 +/ 1107 class XlsxFile { 1108 private ZipFile zipFile; 1109 1110 /++ 1111 1112 +/ 1113 this(FilePath file) { 1114 this.zipFile = new ZipFile(file); 1115 1116 load(); 1117 } 1118 1119 /// ditto 1120 this(immutable(ubyte)[] rawData) { 1121 this.zipFile = new ZipFile(rawData); 1122 1123 load(); 1124 } 1125 1126 /++ 1127 +/ 1128 int sheetCount() { 1129 return cast(int) sheetsInternal.length; 1130 } 1131 1132 /++ 1133 +/ 1134 string[] sheetNames() { 1135 string[] ret; 1136 foreach(sheet; sheetsInternal) 1137 ret ~= sheet.name; 1138 return ret; 1139 } 1140 1141 /++ 1142 +/ 1143 XlsxSheet getSheet(string name) { 1144 foreach(ref sheet; sheetsInternal) 1145 if(sheet.name == name) 1146 return getSheetParsed(sheet); 1147 return null; 1148 1149 } 1150 1151 /// ditto 1152 XlsxSheet getSheet(int indexZeroBased) { 1153 // FIXME: if it is out of range do what? 1154 return getSheetParsed(sheetsInternal[indexZeroBased]); 1155 } 1156 1157 // docProps/core.xml has creator, last modified, etc. 1158 1159 private string[string] contentTypes; 1160 private struct Relationship { 1161 string id; 1162 string type; 1163 string target; 1164 } 1165 private Relationship[string] relationships; 1166 private string[] sharedStrings; 1167 1168 private struct SheetInternal { 1169 string name; 1170 string id; 1171 string rel; 1172 1173 XmlDocument cached; 1174 XlsxSheet parsed; 1175 } 1176 private SheetInternal[] sheetsInternal; 1177 1178 // https://stackoverflow.com/questions/3154646/what-does-the-s-attribute-signify-in-a-cell-tag-in-xlsx 1179 private struct StyleInternal { 1180 string[int] numFmts; 1181 // fonts 1182 // font references color theme from xl/themes 1183 // fills 1184 // borders 1185 // cellStyleXfs 1186 // cellXfs 1187 struct xf { 1188 int xfId; 1189 int numFmtId; 1190 int fontId; 1191 int fillId; 1192 int borderId; 1193 1194 bool applyNumberFormat; // if yes, you get default right alignment 1195 } 1196 xf[] xfs; 1197 1198 // cellStyles 1199 // dxfs 1200 // tableStyles 1201 1202 } 1203 private StyleInternal styleInternal; 1204 1205 private XmlDocument getSheetXml(ref SheetInternal sheet) { 1206 if(sheet.cached is null) 1207 loadXml("xl/" ~ relationships[sheet.rel].target, (document) { sheet.cached = document; }); 1208 1209 return sheet.cached; 1210 } 1211 1212 private XlsxSheet getSheetParsed(ref SheetInternal sheet) { 1213 if(sheet.parsed is null) 1214 sheet.parsed = new XlsxSheet(this, sheet.name, getSheetXml(sheet)); 1215 1216 return sheet.parsed; 1217 } 1218 1219 1220 private void load() { 1221 loadXml("[Content_Types].xml", (document) { 1222 foreach(element; document.querySelectorAll("Override")) 1223 contentTypes[element.attrs.PartName] = element.attrs.ContentType; 1224 }); 1225 1226 loadXml("xl/_rels/workbook.xml.rels", (document) { 1227 foreach(element; document.querySelectorAll("Relationship")) 1228 relationships[element.attrs.Id] = Relationship(element.attrs.Id, element.attrs.Type, element.attrs.Target); 1229 }); 1230 1231 try 1232 loadXml("xl/sharedStrings.xml", (document) { 1233 foreach(element; document.querySelectorAll("si t")) 1234 sharedStrings ~= element.textContent; 1235 }); 1236 catch(Exception e) {} // this xml might not exist and that's ok 1237 1238 loadXml("xl/styles.xml", (document) { 1239 // need to keep the generic hardcoded formats first 1240 styleInternal.numFmts = [ 1241 0: "@", 1242 1: "0", 1243 2: "0.00", 1244 3: "#,##0", 1245 4: "#,##0.00", 1246 5: "$#,##0_);($#,##0)", 1247 6: "$#,##0_);[Red]($#,##0)", 1248 7: "$#,##0.00_);($#,##0.00)", 1249 8: "$#,##0.00_);[Red]($#,##0.00)", 1250 9: "0%", 1251 10: "0.00%", 1252 11: "0.00E+00", 1253 12: "# ?/?", 1254 13: "# ??/??", 1255 14: "m/d/yyyy", // ive heard this one does different things in different locales 1256 15: "d-mmm-yy", 1257 16: "d-mmm", 1258 17: "mmm-yy", 1259 18: "h:mm AM/PM", 1260 19: "h:mm:ss AM/PM", 1261 20: "h:mm", 1262 21: "h:mm:ss", 1263 22: "m/d/yyyy h:mm", 1264 37: "#,##0_);(#,##0)", 1265 38: "#,##0_);[Red](#,##0)", 1266 39: "#,##0.00_);(#,##0.00)", 1267 40: "#,##0.00_);[Red](#,##0.00)", 1268 45: "mm:ss", 1269 46: "[h]:mm:ss", 1270 47: "mm:ss.0", 1271 48: "##0.0E+0", 1272 49: "@", 1273 ]; 1274 1275 1276 foreach(element; document.querySelectorAll("numFmts > numFmt")) { 1277 styleInternal.numFmts[to!int(element.attrs.numFmtId)] = element.attrs.formatCode; 1278 } 1279 1280 foreach(element; document.querySelectorAll("cellXfs > xf")) { 1281 StyleInternal.xf xf; 1282 1283 xf.xfId = element.attrs.xfId.to!int; 1284 xf.fontId = element.attrs.fontId.to!int; 1285 xf.fillId = element.attrs.fillId.to!int; 1286 xf.borderId = element.attrs.borderId.to!int; 1287 xf.numFmtId = element.attrs.numFmtId.to!int; 1288 1289 if(element.attrs.applyNumberFormat == "1") 1290 xf.applyNumberFormat = true; 1291 1292 styleInternal.xfs ~= xf; 1293 } 1294 }); 1295 1296 loadXml("xl/workbook.xml", (document) { 1297 foreach(element; document.querySelectorAll("sheets > sheet")) { 1298 sheetsInternal ~= SheetInternal(element.attrs.name, element.attrs.sheetId, element.getAttribute("r:id")); 1299 } 1300 }); 1301 } 1302 1303 private void loadXml(string filename, scope void delegate(XmlDocument document) handler) { 1304 auto document = new XmlDocument(cast(string) zipFile.getContent(filename)); 1305 handler(document); 1306 } 1307 } 1308 1309 1310 // from Robert Schadek's code { 1311 1312 import std.datetime; 1313 version(unittest) import std.format; 1314 1315 Date longToDate(long d) @safe { 1316 // modifed from https://www.codeproject.com/Articles/2750/ 1317 // Excel-Serial-Date-to-Day-Month-Year-and-Vice-Versa 1318 1319 // Excel/Lotus 123 have a bug with 29-02-1900. 1900 is not a 1320 // leap year, but Excel/Lotus 123 think it is... 1321 if(d == 60) { 1322 return Date(1900, 2, 29); 1323 } else if(d < 60) { 1324 // Because of the 29-02-1900 bug, any serial date 1325 // under 60 is one off... Compensate. 1326 ++d; 1327 } 1328 1329 // Modified Julian to DMY calculation with an addition of 2415019 1330 int l = cast(int)d + 68569 + 2415019; 1331 int n = int(( 4 * l ) / 146097); 1332 l = l - int(( 146097 * n + 3 ) / 4); 1333 int i = int(( 4000 * ( l + 1 ) ) / 1461001); 1334 l = l - int(( 1461 * i ) / 4) + 31; 1335 int j = int(( 80 * l ) / 2447); 1336 int nDay = l - int(( 2447 * j ) / 80); 1337 l = int(j / 11); 1338 int nMonth = j + 2 - ( 12 * l ); 1339 int nYear = 100 * ( n - 49 ) + i + l; 1340 return Date(nYear, nMonth, nDay); 1341 } 1342 1343 long dateToLong(Date d) @safe { 1344 // modifed from https://www.codeproject.com/Articles/2750/ 1345 // Excel-Serial-Date-to-Day-Month-Year-and-Vice-Versa 1346 1347 // Excel/Lotus 123 have a bug with 29-02-1900. 1900 is not a 1348 // leap year, but Excel/Lotus 123 think it is... 1349 if(d.day == 29 && d.month == 2 && d.year == 1900) { 1350 return 60; 1351 } 1352 1353 // DMY to Modified Julian calculated with an extra subtraction of 2415019. 1354 long nSerialDate = 1355 int(( 1461 * ( d.year + 4800 + int(( d.month - 14 ) / 12) ) ) / 4) + 1356 int(( 367 * ( d.month - 2 - 12 * 1357 ( ( d.month - 14 ) / 12 ) ) ) / 12) - 1358 int(( 3 * ( int(( d.year + 4900 1359 + int(( d.month - 14 ) / 12) ) / 100) ) ) / 4) + 1360 d.day - 2415019 - 32075; 1361 1362 if(nSerialDate < 60) { 1363 // Because of the 29-02-1900 bug, any serial date 1364 // under 60 is one off... Compensate. 1365 nSerialDate--; 1366 } 1367 1368 return nSerialDate; 1369 } 1370 1371 @safe unittest { 1372 auto ds = [ Date(1900,2,1), Date(1901, 2, 28), Date(2019, 06, 05) ]; 1373 foreach(const d; ds) { 1374 long l = dateToLong(d); 1375 Date r = longToDate(l); 1376 assert(r == d, format("%s %s", r, d)); 1377 } 1378 } 1379 1380 TimeOfDay doubleToTimeOfDay(double s) @safe { 1381 import core.stdc.math : lround; 1382 double secs = (24.0 * 60.0 * 60.0) * s; 1383 1384 // TODO not one-hundred my lround is needed 1385 int secI = to!int(lround(secs)); 1386 1387 return TimeOfDay(secI / 3600, (secI / 60) % 60, secI % 60); 1388 } 1389 1390 double timeOfDayToDouble(TimeOfDay tod) @safe { 1391 long h = tod.hour * 60 * 60; 1392 long m = tod.minute * 60; 1393 long s = tod.second; 1394 return (h + m + s) / (24.0 * 60.0 * 60.0); 1395 } 1396 1397 @safe unittest { 1398 auto tods = [ TimeOfDay(23, 12, 11), TimeOfDay(11, 0, 11), 1399 TimeOfDay(0, 0, 0), TimeOfDay(0, 1, 0), 1400 TimeOfDay(23, 59, 59), TimeOfDay(0, 0, 0)]; 1401 foreach(const tod; tods) { 1402 double d = timeOfDayToDouble(tod); 1403 assert(d <= 1.0, format("%s", d)); 1404 TimeOfDay r = doubleToTimeOfDay(d); 1405 assert(r == tod, format("%s %s", r, tod)); 1406 } 1407 } 1408 1409 double datetimeToDouble(DateTime dt) @safe { 1410 double d = dateToLong(dt.date); 1411 double t = timeOfDayToDouble(dt.timeOfDay); 1412 return d + t; 1413 } 1414 1415 DateTime doubleToDateTime(double d) @safe { 1416 long l = cast(long)d; 1417 Date dt = longToDate(l); 1418 TimeOfDay t = doubleToTimeOfDay(d - l); 1419 return DateTime(dt, t); 1420 } 1421 1422 @safe unittest { 1423 auto ds = [ Date(1900,2,1), Date(1901, 2, 28), Date(2019, 06, 05) ]; 1424 auto tods = [ TimeOfDay(23, 12, 11), TimeOfDay(11, 0, 11), 1425 TimeOfDay(0, 0, 0), TimeOfDay(0, 1, 0), 1426 TimeOfDay(23, 59, 59), TimeOfDay(0, 0, 0)]; 1427 foreach(const d; ds) { 1428 foreach(const tod; tods) { 1429 DateTime dt = DateTime(d, tod); 1430 double dou = datetimeToDouble(dt); 1431 1432 Date rd = longToDate(cast(long)dou); 1433 assert(rd == d, format("%s %s", rd, d)); 1434 1435 double rest = dou - cast(long)dou; 1436 TimeOfDay rt = doubleToTimeOfDay(dou - cast(long)dou); 1437 assert(rt == tod, format("%s %s", rt, tod)); 1438 1439 DateTime r = doubleToDateTime(dou); 1440 assert(r == dt, format("%s %s", r, dt)); 1441 } 1442 } 1443 } 1444 // end from burner's code }