1 /++ 2 Some support for the Microsoft Excel Spreadsheet file format. 3 4 Don't expect much from it, not even API stability. 5 6 Some code is borrowed from the xlsxreader package. 7 8 History: 9 Added February 13, 2025 10 11 See_Also: 12 https://github.com/symmetryinvestments/xlsxd which supports writing xlsx files. I might add write support here too someday but I kinda doubt it. 13 +/ 14 module arsd.xlsx; 15 16 /+ 17 ./csv-viewer ~/Downloads/UI_comparison.xlsx 18 arsd.dom.ElementNotFoundException@/home/me/program/lib/arsd/xlsx.d(823): Element of type 'Element' matching {worksheet > dimension} not found. 19 +/ 20 21 /+ 22 sheet at double[]: 23 24 nan payloads for blank, errors, then strings as indexes into a table. 25 +/ 26 27 // FIXME: does excel save errors like DIV0 to content in the file? 28 29 // See also Robert's impl: https://github.com/symmetryinvestments/xlsxreader/blob/master/source/xlsxreader.d 30 31 import arsd.core; 32 import arsd.zip; 33 import arsd.dom; 34 import arsd.color; 35 36 import std.conv; 37 38 private struct ExcelFormatStringLexeme { 39 string lexeme; 40 bool isLiteral; 41 } 42 43 class ExcelFormatStringException : Exception { 44 this(string msg, string file = __FILE__, size_t line = __LINE__) { 45 super(msg, file, line); 46 } 47 } 48 49 // FIXME: out contract that asserts s_io.length has indeed been reduced 50 private ExcelFormatStringLexeme extractExcelFormatStringLexeme(ref string s_io) { 51 assert(s_io.length); 52 string s = s_io; 53 54 switch(s[0]) { 55 case '[': 56 // condition or color 57 // or elapsed time thing. 58 // or a locale setting thing for dates (and more?) 59 int count = 0; 60 int size = 0; 61 while(s[0]) { 62 if(s[0] == '[') 63 count++; 64 if(s[0] == ']') 65 count--; 66 s = s[1 .. $]; 67 size++; 68 if(count == 0) 69 break; 70 if(s.length == 0) 71 throw new ExcelFormatStringException("unclosed ["); 72 } 73 74 string ret = s_io[0 .. size]; 75 s_io = s_io[size .. $]; 76 77 return ExcelFormatStringLexeme(ret, false); 78 case '"': 79 // quoted thing watching for backslash 80 bool escaped; 81 int size; 82 83 size++; 84 s = s[1 .. $]; // skip the first " 85 86 string ret; 87 88 while(escaped || s[0] != '"') { 89 if(!escaped) { 90 if(s[0] == '"') { 91 break; 92 } 93 if(s[0] == '\\') 94 escaped = true; 95 else 96 ret ~= s[0]; 97 } else { 98 ret ~= s[0]; 99 escaped = false; 100 } 101 102 s = s[1 .. $]; 103 size++; 104 } 105 if(s.length == 0) 106 throw new ExcelFormatStringException("unclosed \""); 107 size++; 108 109 s_io = s_io[size .. $]; 110 return ExcelFormatStringLexeme(ret, true); 111 112 case '\\': 113 // escaped character 114 s = s[1 .. $]; // skip the \ 115 s_io = s_io[1 .. $]; 116 117 // FIXME: need real stride 118 auto stride = 1; 119 s_io = s_io[stride .. $]; 120 return ExcelFormatStringLexeme(s[0 .. stride], true); 121 case '$', '+', '(', ':', '^', '\'', '{', '<', '=', '-', ')', '!', '&', '~', '}', '>', ' ': // they say slash but that seems to be fraction instead 122 // character literals w/o needing to be quoted 123 s_io = s_io[1 .. $]; 124 return ExcelFormatStringLexeme(s[0 .. 1], true); 125 case 'A', 'a', 'P', 'p': 126 // am/pm 127 128 int size = 0; 129 while( 130 s[0] == 'a' || s[0] == 'A' || 131 s[0] == 'p' || s[0] == 'P' || 132 s[0] == 'm' || s[0] == 'M' || 133 s[0] == '/' 134 ) { 135 size++; 136 s = s[1 .. $]; 137 if(s.length == 0) 138 break; 139 } 140 // also switches hour to 12 hour format when it happens 141 string ret = s_io[0 .. size]; 142 s_io = s_io[size .. $]; 143 144 return ExcelFormatStringLexeme(ret, false); 145 146 // the single char directives 147 case '@': // text placeholder 148 case ';': // clause separator 149 s_io = s_io[1 .. $]; 150 return ExcelFormatStringLexeme(s[0 .. 1], false); 151 case '_': // padding char - this adds a space with the same width as the char that follows it, for column alignment. 152 case '*': // fill char 153 // the padding or fill is the next lexeme, not the next char! 154 s_io = s_io[1 .. $]; 155 return ExcelFormatStringLexeme(s[0 .. 1], false); 156 case 'e', 'E': // scientific notation request 157 case '%': // percent indicator 158 case ',': // thousands separator 159 case '.': // decimal separator 160 case '/': // fraction or date separator 161 s_io = s_io[1 .. $]; 162 return ExcelFormatStringLexeme(s[0 .. 1], false); 163 case /*'m',*/ 'd', 'y': // date parts 164 case 'h', 'm', 's': // time parts 165 166 /+ 167 Note: The m or mm code must appear immediately after the h or hh code or immediately before the ss code; otherwise, Excel displays the month instead of minutes. 168 169 it can be either a date/time OR a number/fraction, not both. 170 +/ 171 172 auto thing = s[0]; 173 int size; 174 while(s.length && s[0] == thing) { 175 s = s[1 .. $]; 176 size++; 177 } 178 auto keep = s_io[0 .. size]; 179 s_io = s_io[size .. $]; 180 return ExcelFormatStringLexeme(keep, false); 181 case '1': .. case '9': // fraction denominators or just literal numbers 182 int size; 183 while(s.length && s[0] >= '1' && s[0] <= '9') { 184 s = s[1 .. $]; 185 size++; 186 } 187 auto keep = s_io[0 .. size]; 188 s_io = s_io[size .. $]; 189 return ExcelFormatStringLexeme(keep, false); 190 case '0', '#', '?': // digit placeholder 191 int size; 192 193 while(s[0] == '0' || s[0] == '#' || s[0] == '?') { 194 s = s[1 .. $]; 195 size++; 196 if(s.length == 0) 197 break; 198 } 199 200 auto keep = s_io[0 .. size]; 201 s_io = s_io[size .. $]; 202 return ExcelFormatStringLexeme(keep, false); 203 204 default: 205 // idk 206 throw new ExcelFormatStringException("unknown char " ~ s); 207 } 208 209 assert(0); 210 } 211 212 unittest { 213 string thing = `[>50][Red]"foo"`; 214 ExcelFormatStringLexeme lexeme; 215 216 lexeme = extractExcelFormatStringLexeme(thing); 217 assert(thing == `[Red]"foo"`); 218 lexeme = extractExcelFormatStringLexeme(thing); 219 assert(thing == `"foo"`); 220 lexeme = extractExcelFormatStringLexeme(thing); 221 assert(thing == ""); 222 assert(lexeme.lexeme == "foo"); 223 224 thing = `"\""`; 225 lexeme = extractExcelFormatStringLexeme(thing); 226 assert(thing == ""); 227 assert(lexeme.lexeme == `"`); 228 229 thing = `\,`; 230 lexeme = extractExcelFormatStringLexeme(thing); 231 assert(thing == ""); 232 assert(lexeme.lexeme == `,`); 233 234 /* 235 thing = `"A\""`; 236 lexeme = extractExcelFormatStringLexeme(thing); 237 assert(thing == ""); 238 assert(lexeme.lexeme == `"`); 239 */ 240 241 /+ 242 thing = "mm-yyyy"; 243 lexeme = extractExcelFormatStringLexeme(thing); 244 import std.stdio; writeln(thing); writeln(lexeme); 245 +/ 246 } 247 248 struct XlsxFormat { 249 string originalFormatString; 250 251 Color foregroundColor; 252 Color backgroundColor; 253 254 int alignment; // 0 = left, 1 = right, 2 = center 255 256 enum Type { 257 /++ 258 +/ 259 String, 260 /++ 261 262 +/ 263 Number, 264 /++ 265 A Date is a special kind of number in Excel. 266 +/ 267 Date, 268 /++ 269 things like # ?/4 270 271 +/ 272 Fraction, 273 Percent 274 } 275 Type type; 276 277 /++ 278 +/ 279 static struct Result { 280 string content; 281 string color; 282 int alignment; 283 } 284 285 /++ 286 +/ 287 Result applyTo(string s) const { 288 if(this.type == Type.String || originalFormatString == "@" || originalFormatString.length == 0) 289 return Result(s, null, alignment); 290 291 int alignment = this.alignment; 292 293 // need to check for a text thing and if conversion fails, we use that 294 double value; 295 try { 296 value = to!double(s); 297 } catch(Exception e) { 298 value = double.nan; 299 } 300 301 DateTime date_; 302 bool dateCalculated; 303 304 DateTime getDate() { 305 // make sure value is not nan before here or it will throw "negative overflow"! 306 if(!dateCalculated) { 307 date_ = doubleToDateTime(value); 308 dateCalculated = true; 309 } 310 311 return date_; 312 } 313 314 // parse out the original format string 315 // the ordering by default is positive;negative;zero;text 316 // 317 // these can also be like [Color][Condition]fmt;generic 318 // color is allowed anywhere 319 // but condition can only have two things following: `[Color][Condition];` repeated any number of times then `;generic-number;text`. no more negative/zero stuff. 320 // once we see a condition, it switches modes - following things MUST have condition or else are treated as just generic catch all for number and then text. 321 // 322 // it matches linearly. 323 /+ 324 so it goes: 325 implicit match >0 326 implicit match <0 327 implicit match =0 328 text 329 330 but if at any point one of them has a condition, the following ones must be either more conditions (immediately!) or unconditional: 331 fallthrough for number 332 text 333 334 335 and if i dont support a format thing i can always fall back to the original text. 336 +/ 337 338 try { 339 string fmt = originalFormatString; 340 341 int state = 0; // 0 == positive, 1 == negative or custom, 2 == other, 3 == text 342 bool matchesCurrentCondition = value > 0; 343 344 bool hasMultipleClauses = false; 345 { 346 string fmt2 = fmt; 347 while(fmt2.length) { 348 auto next = extractExcelFormatStringLexeme(fmt2); 349 if(!next.isLiteral && next.lexeme == ";") 350 hasMultipleClauses = true; 351 break; 352 } 353 } 354 if(hasMultipleClauses == false) 355 matchesCurrentCondition = true; // only one thing means we must always match it 356 357 int numericState; 358 bool inDenominator; 359 bool inAmPm; 360 bool inDecimal; 361 bool justSawHours; 362 363 // these are populated below once we match a clause 364 bool hasAmPm; 365 bool hasFraction; 366 bool hasScientificNotation; 367 bool hasPercent; 368 bool first = true; 369 370 string color; 371 string ret; 372 373 while(fmt.length) { 374 auto lexeme = extractExcelFormatStringLexeme(fmt); 375 376 ExcelFormatStringLexeme peekLexeme(bool returnLiteral = false) { 377 string fmt2 = fmt; 378 skip: 379 if(fmt2.length == 0) 380 return ExcelFormatStringLexeme.init; 381 auto next = extractExcelFormatStringLexeme(fmt2); 382 if(next.isLiteral && !returnLiteral) 383 goto skip; 384 return next; 385 } 386 387 if(!lexeme.isLiteral && lexeme.lexeme[0] == ';') { 388 // we finished the format of the match, so no need to continue 389 if(matchesCurrentCondition) 390 break; 391 // otherwise, we go to the next thing 392 state++; 393 if(state == 1) { 394 matchesCurrentCondition = value < 0; 395 } else if(state == 2) { 396 // this is going to be either the catch-all fallback or another custom one 397 // for now, assume it is a catch-all 398 import std.math; 399 matchesCurrentCondition = !isNaN(value) ? true : false; // only numbers, so not text, matches the catch-all 400 } else if(state == 3) { 401 matchesCurrentCondition = true; // this needs to match, we're at the end, so this is the text display 402 } else { 403 throw new ExcelFormatStringException("too many ; pieces"); 404 } 405 406 continue; 407 } 408 409 if(!matchesCurrentCondition) 410 continue; 411 412 // scan ahead to see if we're doing some special cases: fractions, 12 hour clock, percentages, and sci notation 413 if(first) { 414 string fmt2 = fmt; 415 while(fmt2.length) { 416 auto next = extractExcelFormatStringLexeme(fmt2); 417 if(!next.isLiteral) { 418 // don't proceed into the next clause 419 if(next.lexeme == ";") 420 break; 421 422 char c = next.lexeme[0] | 0x20; 423 if(next.lexeme == "/") 424 hasFraction = true; 425 else if(next.lexeme == "%") { 426 hasPercent = true; 427 value *= 100.0; 428 } else if(c == 'e') 429 hasScientificNotation = true; 430 else if(c == 'a' || c == 'p') 431 hasAmPm = true; 432 } 433 } 434 first = false; 435 } 436 437 if(hasScientificNotation) 438 return Result(s, "unsupported feature: scientific notation"); // FIXME 439 if(hasFraction) 440 return Result(s, "unsupported feature: fractions"); // FIXME 441 442 if(!lexeme.isLiteral && lexeme.lexeme[0] == '[') { 443 // look for color, condition, or locale 444 char nc = lexeme.lexeme[1]; 445 if(nc == '$') 446 continue; // locale i think, skip it 447 if(nc == '<' || nc == '>' || nc == '=') { 448 // condition 449 450 if(state == 1 || state == 2) { 451 state = 1; 452 // read the condition, see if we match it 453 auto condition = lexeme.lexeme[1 .. $-1]; 454 455 string operator; 456 string num; 457 if(condition[1] == '=') { 458 operator = condition[0 .. 2]; 459 num = condition[2 .. $]; 460 } else { 461 operator = condition[0 .. 1]; 462 num = condition[1 .. $]; 463 } 464 465 double compareTo; 466 try { 467 compareTo = to!double(num); 468 } catch(Exception e) { 469 throw new ExcelFormatStringException("not a number: " ~ num); 470 } 471 switch(operator) { 472 case "<": 473 matchesCurrentCondition = value < compareTo; 474 break; 475 case "<=": 476 matchesCurrentCondition = value <= compareTo; 477 break; 478 case ">": 479 matchesCurrentCondition = value > compareTo; 480 break; 481 case ">=": 482 matchesCurrentCondition = value >= compareTo; 483 break; 484 case "=": 485 // FIXME: approxEqual? 486 matchesCurrentCondition = value == compareTo; 487 break; 488 489 default: 490 throw new ExcelFormatStringException("not a supported comparison operator " ~ operator); 491 } 492 493 continue; 494 } else { 495 throw new ExcelFormatStringException("inappropriately placed custom condition"); 496 } 497 } else { 498 // color, we hope. FIXME can also be [s], [m], or [h] or maybe [ss], [mm], [hh] 499 // colors are capitalized... 500 color = lexeme.lexeme[1 .. $-1]; 501 continue; 502 } 503 } 504 505 // if we're here, it should actually match and need some processing. 506 507 if(lexeme.isLiteral) { 508 // literals are easy... 509 ret ~= lexeme.lexeme; 510 } else { 511 // but the rest of these are formatting commands 512 switch(lexeme.lexeme[0]) { 513 case ',': 514 // thousands separator requested, 515 // handled below in the decimal placeholder thing 516 break; 517 case '_', '*': 518 auto lexemeToPadWith = extractExcelFormatStringLexeme(fmt); 519 if(lexeme.lexeme[0] == '_') 520 ret ~= " "; // FIXME supposed to match width of the char 521 else if(lexeme.lexeme[0] == '*') 522 ret ~= lexemeToPadWith.lexeme; // FIXME: supposed to repeat to fill the column width 523 break; 524 case '@': // the original text 525 ret ~= s; 526 break; 527 case '%': 528 ret ~= lexeme.lexeme; 529 break; 530 case '.': 531 inDecimal = true; 532 ret ~= lexeme.lexeme; 533 break; 534 case '/': 535 if(!inAmPm) { 536 inDenominator = true; 537 ret ~= lexeme.lexeme; 538 } 539 break; 540 case '#', '0', '?': 541 // decimal group 542 // # = digit 543 // 0 = digit, pad with 0 if not significant 544 // ? = digit, pad with space (same sized as digit) if not significant 545 546 if(value is double.nan) 547 return Result(s, "NaN"); 548 549 alignment = 1; // if we are printing numbers let's assume right align FIXME 550 /+ 551 if(s.length == 0 && value is double.nan) // and if we printing numbers, treat empty cell as 0 552 value = 0.0; 553 +/ 554 555 bool appendNumber(double v, bool includeThousandsSeparator) { 556 if(v < 0) 557 v = -v; 558 string f = to!string(cast(int) v); 559 if(f.length < lexeme.lexeme.length) 560 foreach(l; lexeme.lexeme[0 .. $ - f.length]) { 561 if(l == '0') 562 ret ~= '0'; 563 else if(l == '?') 564 ret ~= ' '; 565 } 566 if(f.length) { 567 if(includeThousandsSeparator) { 568 // 14532 569 // 1234 570 // 123 571 auto offset = cast(int) f.length % 3; 572 while(f.length > 3) { 573 ret ~= f[offset .. offset + 3]; 574 ret ~= ","; 575 f = f[3 .. $]; 576 } 577 ret ~= f; 578 } else { 579 ret ~= f; 580 } 581 return true; 582 } 583 return false; 584 } 585 586 if(peekLexeme().lexeme == ",") { 587 // thousands separator requested... 588 auto v = cast(int) value / 1000; 589 590 if(v == 0) 591 continue; // FIXME? maybe we want some leading 0 padding? 592 593 auto hadOutput = appendNumber(v, true); 594 595 value = value - v * 1000; // take the remainder for the next iteration of the loop 596 597 if(hadOutput) 598 ret ~= ","; // append the comma before the final thousands digits in the next iteration 599 600 continue; 601 } 602 603 604 if(inDecimal) { 605 // FIXME: no more std.format 606 import std.format; 607 string f = format("%."~to!string(lexeme.lexeme.length)~"f", value - cast(int) value)[2..$]; // slice off the "0." 608 ret ~= f; 609 } else { 610 appendNumber(value, false); 611 } 612 613 inDenominator = false; 614 break; 615 case '1': .. case '9': 616 // number, if in denominator position 617 // otherwise treat as string 618 if(inDenominator) 619 inDenominator = false; // the rest is handled elsewhere 620 else 621 ret ~= lexeme.lexeme; 622 break; 623 case 'y': 624 if(value is double.nan) 625 return Result(s, "NaN date"); 626 627 justSawHours = false; 628 auto y = getDate().year; 629 630 char[16] buffer; 631 632 switch(lexeme.lexeme.length) { 633 case 2: 634 ret ~= intToString(y % 100, buffer[], IntToStringArgs().withPadding(2)); 635 break; 636 case 4: 637 ret ~= intToString(y, buffer[], IntToStringArgs().withPadding(4)); 638 break; 639 default: 640 throw new ExcelFormatStringException("unknown thing " ~ lexeme.lexeme); 641 } 642 break; 643 case 'm': 644 if(value is double.nan) 645 return Result(s, "NaN date"); 646 auto peek = peekLexeme(false); 647 bool precedesSeconds = 648 (peek.lexeme.length && peek.lexeme[0] == 's') 649 || 650 (peek.lexeme.length > 1 && peek.lexeme[1] == 's') 651 ; 652 653 if(justSawHours || precedesSeconds) { 654 // minutes 655 auto m = getDate().timeOfDay.minute; 656 657 char[16] buffer; 658 659 switch(lexeme.lexeme.length) { 660 case 1: 661 ret ~= intToString(m, buffer[]); 662 break; 663 case 2: 664 ret ~= intToString(m, buffer[], IntToStringArgs().withPadding(2)); 665 break; 666 default: 667 throw new ExcelFormatStringException("unknown thing " ~ lexeme.lexeme); 668 } 669 } else { 670 // month 671 auto m = cast(int) getDate().month; 672 673 char[16] buffer; 674 675 import arsd.calendar; 676 677 switch(lexeme.lexeme.length) { 678 case 1: 679 ret ~= intToString(m, buffer[]); 680 break; 681 case 2: 682 ret ~= intToString(m, buffer[], IntToStringArgs().withPadding(2)); 683 break; 684 case 3: // abbreviation 685 ret ~= monthNames[m][0 .. 3]; 686 break; 687 case 4: // full name 688 ret ~= monthNames[m]; 689 break; 690 case 5: // single letter 691 ret ~= monthNames[m][0 .. 1]; // FIXME? 692 break; 693 default: 694 throw new ExcelFormatStringException("unknown thing " ~ lexeme.lexeme); 695 } 696 } 697 698 justSawHours = false; 699 break; 700 case 'd': 701 if(value is double.nan) 702 return Result(s, "NaN date"); 703 justSawHours = false; 704 705 char[16] buffer; 706 707 import arsd.calendar; 708 709 auto d = getDate().day; 710 auto dow = cast(int) getDate().dayOfWeek; 711 712 switch(lexeme.lexeme.length) { 713 case 1: 714 ret ~= intToString(d, buffer[]); 715 break; 716 case 2: 717 ret ~= intToString(d, buffer[], IntToStringArgs().withPadding(2)); 718 break; 719 case 3: 720 // abbreviation 721 ret ~= daysOfWeekNames[dow][0 .. 3]; 722 break; 723 case 4: 724 // full name 725 ret ~= daysOfWeekNames[dow]; 726 break; 727 default: 728 throw new ExcelFormatStringException("unknown thing " ~ lexeme.lexeme); 729 } 730 break; 731 case 'h': 732 if(value is double.nan) 733 return Result(s, "NaN date"); 734 justSawHours = true; 735 736 auto m = getDate().timeOfDay.hour; 737 char[16] buffer; 738 739 if(hasAmPm && m > 12) 740 m -= 12; 741 if(hasAmPm && m == 0) 742 m = 12; 743 744 switch(lexeme.lexeme.length) { 745 case 1: 746 ret ~= intToString(m, buffer[]); 747 break; 748 case 2: 749 ret ~= intToString(m, buffer[], IntToStringArgs().withPadding(2)); 750 break; 751 default: 752 throw new ExcelFormatStringException("unknown thing " ~ lexeme.lexeme); 753 } 754 break; 755 case 'a', 'A': 756 if(value is double.nan) 757 return Result(s, "NaN date"); 758 inAmPm = true; 759 auto m = getDate().timeOfDay.hour; 760 if(m >= 12) 761 ret ~= lexeme.lexeme[0] == 'a' ? "pm" : "PM"; 762 else 763 ret ~= lexeme.lexeme[0] == 'a' ? "am" : "AM"; 764 break; 765 case 'p', 'P': 766 inAmPm = false; 767 break; 768 case 's': 769 if(value is double.nan) 770 return Result(s, "NaN date"); 771 auto m = getDate().timeOfDay.second; 772 char[16] buffer; 773 switch(lexeme.lexeme.length) { 774 case 1: 775 ret ~= intToString(m, buffer[]); 776 break; 777 case 2: 778 ret ~= intToString(m, buffer[], IntToStringArgs().withPadding(2)); 779 break; 780 default: 781 throw new ExcelFormatStringException("unknown thing " ~ lexeme.lexeme); 782 } 783 break; 784 case 'e', 'E': 785 // FIXME: scientific notation 786 break; 787 default: 788 assert(0, "unsupported formatting command: " ~ lexeme.lexeme); 789 } 790 } 791 } 792 793 return Result(ret, color, alignment); 794 } catch(ExcelFormatStringException e) { 795 // we'll fall back to just displaying the original input text 796 return Result(s, e.msg /* FIXME should be null */, alignment); 797 } 798 } 799 800 /+ 801 positive;negative;zero;text 802 can include formats and dates and tons of stuff. 803 https://support.microsoft.com/en-us/office/review-guidelines-for-customizing-a-number-format-c0a1d1fa-d3f4-4018-96b7-9c9354dd99f5 804 +/ 805 private this(XlsxFile file, XlsxFile.StyleInternal.xf formatting) { 806 if(formatting.applyNumberFormat) { 807 // dates too depending on format 808 //import std.stdio; writeln(formatting.numFmtId); writeln(file.styleInternal.numFmts); 809 this.originalFormatString = file.styleInternal.numFmts[formatting.numFmtId]; 810 811 this.type = Type.Number; 812 } else { 813 this.type = Type.String; 814 } 815 816 /+ 817 xf also has: 818 819 int xfId; 820 int numFmtId; 821 int fontId; 822 int fillId; 823 int borderId; 824 +/ 825 } 826 827 private this(string f) { 828 this.originalFormatString = f; 829 this.type = Type.Number; 830 } 831 } 832 833 unittest { 834 assert(XlsxFormat(`;;;"foo"`).applyTo("anything") == XlsxFormat.Result("foo", null)); 835 assert(XlsxFormat(`#.#;;;"foo"`).applyTo("2.0") == XlsxFormat.Result("2.0", null, 1)); 836 assert(XlsxFormat(`0#.##;;;"foo"`).applyTo("24.25") == XlsxFormat.Result("24.25", null, 1)); 837 assert(XlsxFormat(`0#.##;;;"foo"`).applyTo("2.25") == XlsxFormat.Result("02.25", null, 1)); 838 assert(XlsxFormat(`#,#.##`).applyTo("2.25") == XlsxFormat.Result("2.25", null, 1)); 839 assert(XlsxFormat(`#,#.##`).applyTo("123.25") == XlsxFormat.Result("123.25", null, 1)); 840 assert(XlsxFormat(`#,#.##`).applyTo("1234.25") == XlsxFormat.Result("1,234.25", null, 1)); 841 assert(XlsxFormat(`#,#.##`).applyTo("123456.25") == XlsxFormat.Result("123,456.25", null, 1)); 842 } 843 844 struct XlsxCell { 845 string formula; 846 string content; 847 XlsxFormat formatting; 848 849 XlsxFormat.Result displayableResult() { 850 return formatting.applyTo(content); 851 } 852 853 string toString() { 854 return displayableResult().content; 855 } 856 } 857 858 struct CellReference { 859 string name; 860 861 static CellReference fromInts(int column, int row) { 862 string ret; 863 864 string piece; 865 int adjustment = 0; 866 do { 867 piece ~= cast(char)(column % 26 + 'A' - adjustment); 868 if(adjustment == 0) 869 adjustment = 1; 870 column /= 26; 871 } while(column); 872 873 foreach_reverse(ch; piece) 874 ret ~= ch; 875 piece = null; 876 877 do { 878 piece ~= cast(char)(row % 10 + '0'); 879 row /= 10; 880 } while(row); 881 882 foreach_reverse(ch; piece) 883 ret ~= ch; 884 piece = null; 885 886 return CellReference(ret); 887 } 888 889 int toColumnIndex() { 890 size_t endSlice = name.length; 891 foreach(idx, ch; name) { 892 if(ch < 'A' || ch > 'Z') { 893 endSlice = idx; 894 break; 895 } 896 } 897 898 int accumulator; 899 foreach(idx, ch; name[0 .. endSlice]) { 900 int value; 901 if(idx + 1 == endSlice) { 902 // an A in the last "digit" is a 0, elsewhere it is a 1 903 value = ch - 'A'; 904 } else { 905 value = ch - 'A' + 1; 906 } 907 908 accumulator *= 26; 909 accumulator += value; 910 } 911 return accumulator; 912 } 913 914 int toRowIndex() { 915 int accumulator; 916 foreach(ch; name) { 917 if(ch >= 'A' && ch <= 'Z') 918 continue; 919 accumulator *= 10; 920 accumulator += ch - '0'; 921 } 922 return accumulator; 923 } 924 } 925 926 unittest { 927 auto cr = CellReference("AE434"); 928 assert(cr.toColumnIndex == 30); 929 cr = CellReference("E434"); 930 assert(cr.toColumnIndex == 4); // zero-based 931 932 // zero-based column, 1-based row. wtf? 933 assert(CellReference("AE434") == CellReference.fromInts(30, 434)); 934 935 assert(CellReference("Z1") == CellReference.fromInts(25, 1)); 936 } 937 938 /++ 939 940 +/ 941 class XlsxSheet { 942 private string name_; 943 private XlsxFile file; 944 private XmlDocument document; 945 private this(XlsxFile file, string name, XmlDocument document) { 946 this.file = file; 947 this.name_ = name; 948 this.document = document; 949 950 this.dimension = document.requireSelector("worksheet > dimension").getAttribute("ref"); 951 // there's also sheetView with selection, activeCell, etc 952 // and cols with widths and such 953 954 auto ul = this.upperLeft; 955 this.minRow = ul.toRowIndex; 956 this.minColumn = ul.toColumnIndex; 957 958 auto lr = this.lowerRight; 959 this.maxRow = lr.toRowIndex + 1; 960 this.maxColumn = lr.toColumnIndex + 1; 961 } 962 963 private string dimension; 964 965 private int minRow; 966 private int minColumn; 967 private int maxRow; 968 private int maxColumn; 969 970 /++ 971 +/ 972 Size size() { 973 return Size(maxColumn - minColumn, maxRow - minRow); 974 } 975 976 private CellReference upperLeft() { 977 foreach(idx, ch; dimension) 978 if(ch == ':') 979 return CellReference(dimension[0 .. idx]); 980 //assert(0); // it has no lower right... 981 return CellReference(dimension); 982 } 983 984 private CellReference lowerRight() { 985 foreach(idx, ch; dimension) 986 if(ch == ':') 987 return CellReference(dimension[idx + 1 .. $]); 988 assert(0); 989 } 990 991 // opIndex could be like sheet["A1:B4"] and sheet["A1", "B4"] and stuff maybe. 992 993 /++ 994 +/ 995 string name() { 996 return name_; 997 } 998 999 /++ 1000 Suitable for passing to [arsd.csv.toCsv] 1001 +/ 1002 string[][] toStringGrid() { 1003 auto grid = this.toGrid(); 1004 1005 string[][] ret; 1006 ret.length = size.height; 1007 foreach(i, ref row; ret) { 1008 row.length = size.width; 1009 foreach(k, ref cell; row) 1010 cell = grid[i][k].toString(); 1011 } 1012 1013 return ret; 1014 } 1015 1016 /++ 1017 1018 +/ 1019 XlsxCell[][] toGrid() { 1020 // FIXME: this crashes on opend dmd! 1021 // string[][] ret = new string[][](size.height, size.width); 1022 1023 /+ 1024 // almost everything we allocate in here is to keep, so 1025 // turning off the GC while working prevents unnecessary 1026 // collection attempts that won't find any garbage anyway. 1027 1028 // but meh no significant difference in perf anyway. 1029 import core.memory; 1030 GC.disable(); 1031 scope(exit) 1032 GC.enable(); 1033 +/ 1034 1035 XlsxCell[][] ret; 1036 ret.length = size.height; 1037 foreach(ref row; ret) 1038 row.length = size.width; 1039 1040 //alloc done 1041 1042 auto sheetData = document.requireSelector("sheetData"); 1043 Element[] rowElements = sheetData.childNodes; 1044 1045 Element[] nextRow(int expected) { 1046 if(rowElements.length == 0) 1047 throw new Exception("ran out of row elements..."); 1048 1049 Element rowElement; 1050 Element[] before = rowElements; 1051 1052 do { 1053 rowElement = rowElements[0]; 1054 rowElements = rowElements[1 .. $]; 1055 } while(rowElement.tagName != "row"); 1056 1057 if(rowElement.attrs.r.to!int != expected) { 1058 // a row was skipped in the file, so we'll 1059 // return an empty placeholder too 1060 rowElements = before; 1061 return null; 1062 } 1063 1064 return rowElement.childNodes; 1065 } 1066 1067 foreach(int rowIdx, row; ret) { 1068 auto cellElements = nextRow(rowIdx + 1); 1069 1070 foreach(int cellIdx, ref cell; row) { 1071 string cellReference = CellReference.fromInts(cellIdx + minColumn, rowIdx + minRow).name; 1072 1073 Element element = null; 1074 foreach(idx, thing; cellElements) { 1075 if(thing.attrs.r == cellReference) { 1076 element = thing; 1077 cellElements = cellElements[idx + 1 .. $]; 1078 break; 1079 } 1080 } 1081 1082 if(element is null) 1083 continue; 1084 string v = element.optionSelector("v").textContent; 1085 if(element.attrs.t == "s") 1086 v = file.sharedStrings[v.to!int()]; 1087 1088 auto sString = element.attrs.s; 1089 auto sId = sString.length ? to!int(sString) : 0; 1090 1091 string f = element.optionSelector("f").textContent; 1092 1093 cell = XlsxCell(f, v, XlsxFormat(file, file.styleInternal.xfs[sId])); 1094 } 1095 } 1096 return ret; 1097 } 1098 } 1099 1100 /++ 1101 1102 +/ 1103 class XlsxFile { 1104 private ZipFile zipFile; 1105 1106 /++ 1107 1108 +/ 1109 this(FilePath file) { 1110 this.zipFile = new ZipFile(file); 1111 1112 load(); 1113 } 1114 1115 /// ditto 1116 this(immutable(ubyte)[] rawData) { 1117 this.zipFile = new ZipFile(rawData); 1118 1119 load(); 1120 } 1121 1122 /++ 1123 +/ 1124 int sheetCount() { 1125 return cast(int) sheetsInternal.length; 1126 } 1127 1128 /++ 1129 +/ 1130 string[] sheetNames() { 1131 string[] ret; 1132 foreach(sheet; sheetsInternal) 1133 ret ~= sheet.name; 1134 return ret; 1135 } 1136 1137 /++ 1138 +/ 1139 XlsxSheet getSheet(string name) { 1140 foreach(ref sheet; sheetsInternal) 1141 if(sheet.name == name) 1142 return getSheetParsed(sheet); 1143 return null; 1144 1145 } 1146 1147 /// ditto 1148 XlsxSheet getSheet(int indexZeroBased) { 1149 // FIXME: if it is out of range do what? 1150 return getSheetParsed(sheetsInternal[indexZeroBased]); 1151 } 1152 1153 // docProps/core.xml has creator, last modified, etc. 1154 1155 private string[string] contentTypes; 1156 private struct Relationship { 1157 string id; 1158 string type; 1159 string target; 1160 } 1161 private Relationship[string] relationships; 1162 private string[] sharedStrings; 1163 1164 private struct SheetInternal { 1165 string name; 1166 string id; 1167 string rel; 1168 1169 XmlDocument cached; 1170 XlsxSheet parsed; 1171 } 1172 private SheetInternal[] sheetsInternal; 1173 1174 // https://stackoverflow.com/questions/3154646/what-does-the-s-attribute-signify-in-a-cell-tag-in-xlsx 1175 private struct StyleInternal { 1176 string[int] numFmts; 1177 // fonts 1178 // font references color theme from xl/themes 1179 // fills 1180 // borders 1181 // cellStyleXfs 1182 // cellXfs 1183 struct xf { 1184 int xfId; 1185 int numFmtId; 1186 int fontId; 1187 int fillId; 1188 int borderId; 1189 1190 bool applyNumberFormat; // if yes, you get default right alignment 1191 } 1192 xf[] xfs; 1193 1194 // cellStyles 1195 // dxfs 1196 // tableStyles 1197 1198 } 1199 private StyleInternal styleInternal; 1200 1201 private XmlDocument getSheetXml(ref SheetInternal sheet) { 1202 if(sheet.cached is null) 1203 loadXml("xl/" ~ relationships[sheet.rel].target, (document) { sheet.cached = document; }); 1204 1205 return sheet.cached; 1206 } 1207 1208 private XlsxSheet getSheetParsed(ref SheetInternal sheet) { 1209 if(sheet.parsed is null) 1210 sheet.parsed = new XlsxSheet(this, sheet.name, getSheetXml(sheet)); 1211 1212 return sheet.parsed; 1213 } 1214 1215 1216 private void load() { 1217 loadXml("[Content_Types].xml", (document) { 1218 foreach(element; document.querySelectorAll("Override")) 1219 contentTypes[element.attrs.PartName] = element.attrs.ContentType; 1220 }); 1221 1222 loadXml("xl/_rels/workbook.xml.rels", (document) { 1223 foreach(element; document.querySelectorAll("Relationship")) 1224 relationships[element.attrs.Id] = Relationship(element.attrs.Id, element.attrs.Type, element.attrs.Target); 1225 }); 1226 1227 loadXml("xl/sharedStrings.xml", (document) { 1228 foreach(element; document.querySelectorAll("si t")) 1229 sharedStrings ~= element.textContent; 1230 }); 1231 1232 loadXml("xl/styles.xml", (document) { 1233 // need to keep the generic hardcoded formats first 1234 styleInternal.numFmts = [ 1235 0: "@", 1236 1: "0", 1237 2: "0.00", 1238 3: "#,##0", 1239 4: "#,##0.00", 1240 5: "$#,##0_);($#,##0)", 1241 6: "$#,##0_);[Red]($#,##0)", 1242 7: "$#,##0.00_);($#,##0.00)", 1243 8: "$#,##0.00_);[Red]($#,##0.00)", 1244 9: "0%", 1245 10: "0.00%", 1246 11: "0.00E+00", 1247 12: "# ?/?", 1248 13: "# ??/??", 1249 14: "m/d/yyyy", // ive heard this one does different things in different locales 1250 15: "d-mmm-yy", 1251 16: "d-mmm", 1252 17: "mmm-yy", 1253 18: "h:mm AM/PM", 1254 19: "h:mm:ss AM/PM", 1255 20: "h:mm", 1256 21: "h:mm:ss", 1257 22: "m/d/yyyy h:mm", 1258 37: "#,##0_);(#,##0)", 1259 38: "#,##0_);[Red](#,##0)", 1260 39: "#,##0.00_);(#,##0.00)", 1261 40: "#,##0.00_);[Red](#,##0.00)", 1262 45: "mm:ss", 1263 46: "[h]:mm:ss", 1264 47: "mm:ss.0", 1265 48: "##0.0E+0", 1266 49: "@", 1267 ]; 1268 1269 1270 foreach(element; document.querySelectorAll("numFmts > numFmt")) { 1271 styleInternal.numFmts[to!int(element.attrs.numFmtId)] = element.attrs.formatCode; 1272 } 1273 1274 foreach(element; document.querySelectorAll("cellXfs > xf")) { 1275 StyleInternal.xf xf; 1276 1277 xf.xfId = element.attrs.xfId.to!int; 1278 xf.fontId = element.attrs.fontId.to!int; 1279 xf.fillId = element.attrs.fillId.to!int; 1280 xf.borderId = element.attrs.borderId.to!int; 1281 xf.numFmtId = element.attrs.numFmtId.to!int; 1282 1283 if(element.attrs.applyNumberFormat == "1") 1284 xf.applyNumberFormat = true; 1285 1286 styleInternal.xfs ~= xf; 1287 } 1288 }); 1289 1290 loadXml("xl/workbook.xml", (document) { 1291 foreach(element; document.querySelectorAll("sheets > sheet")) { 1292 sheetsInternal ~= SheetInternal(element.attrs.name, element.attrs.sheetId, element.getAttribute("r:id")); 1293 } 1294 }); 1295 } 1296 1297 private void loadXml(string filename, scope void delegate(XmlDocument document) handler) { 1298 auto document = new XmlDocument(cast(string) zipFile.getContent(filename)); 1299 handler(document); 1300 } 1301 } 1302 1303 1304 // from Robert Schadek's code { 1305 1306 import std.datetime; 1307 version(unittest) import std.format; 1308 1309 Date longToDate(long d) @safe { 1310 // modifed from https://www.codeproject.com/Articles/2750/ 1311 // Excel-Serial-Date-to-Day-Month-Year-and-Vice-Versa 1312 1313 // Excel/Lotus 123 have a bug with 29-02-1900. 1900 is not a 1314 // leap year, but Excel/Lotus 123 think it is... 1315 if(d == 60) { 1316 return Date(1900, 2, 29); 1317 } else if(d < 60) { 1318 // Because of the 29-02-1900 bug, any serial date 1319 // under 60 is one off... Compensate. 1320 ++d; 1321 } 1322 1323 // Modified Julian to DMY calculation with an addition of 2415019 1324 int l = cast(int)d + 68569 + 2415019; 1325 int n = int(( 4 * l ) / 146097); 1326 l = l - int(( 146097 * n + 3 ) / 4); 1327 int i = int(( 4000 * ( l + 1 ) ) / 1461001); 1328 l = l - int(( 1461 * i ) / 4) + 31; 1329 int j = int(( 80 * l ) / 2447); 1330 int nDay = l - int(( 2447 * j ) / 80); 1331 l = int(j / 11); 1332 int nMonth = j + 2 - ( 12 * l ); 1333 int nYear = 100 * ( n - 49 ) + i + l; 1334 return Date(nYear, nMonth, nDay); 1335 } 1336 1337 long dateToLong(Date d) @safe { 1338 // modifed from https://www.codeproject.com/Articles/2750/ 1339 // Excel-Serial-Date-to-Day-Month-Year-and-Vice-Versa 1340 1341 // Excel/Lotus 123 have a bug with 29-02-1900. 1900 is not a 1342 // leap year, but Excel/Lotus 123 think it is... 1343 if(d.day == 29 && d.month == 2 && d.year == 1900) { 1344 return 60; 1345 } 1346 1347 // DMY to Modified Julian calculated with an extra subtraction of 2415019. 1348 long nSerialDate = 1349 int(( 1461 * ( d.year + 4800 + int(( d.month - 14 ) / 12) ) ) / 4) + 1350 int(( 367 * ( d.month - 2 - 12 * 1351 ( ( d.month - 14 ) / 12 ) ) ) / 12) - 1352 int(( 3 * ( int(( d.year + 4900 1353 + int(( d.month - 14 ) / 12) ) / 100) ) ) / 4) + 1354 d.day - 2415019 - 32075; 1355 1356 if(nSerialDate < 60) { 1357 // Because of the 29-02-1900 bug, any serial date 1358 // under 60 is one off... Compensate. 1359 nSerialDate--; 1360 } 1361 1362 return nSerialDate; 1363 } 1364 1365 @safe unittest { 1366 auto ds = [ Date(1900,2,1), Date(1901, 2, 28), Date(2019, 06, 05) ]; 1367 foreach(const d; ds) { 1368 long l = dateToLong(d); 1369 Date r = longToDate(l); 1370 assert(r == d, format("%s %s", r, d)); 1371 } 1372 } 1373 1374 TimeOfDay doubleToTimeOfDay(double s) @safe { 1375 import core.stdc.math : lround; 1376 double secs = (24.0 * 60.0 * 60.0) * s; 1377 1378 // TODO not one-hundred my lround is needed 1379 int secI = to!int(lround(secs)); 1380 1381 return TimeOfDay(secI / 3600, (secI / 60) % 60, secI % 60); 1382 } 1383 1384 double timeOfDayToDouble(TimeOfDay tod) @safe { 1385 long h = tod.hour * 60 * 60; 1386 long m = tod.minute * 60; 1387 long s = tod.second; 1388 return (h + m + s) / (24.0 * 60.0 * 60.0); 1389 } 1390 1391 @safe unittest { 1392 auto tods = [ TimeOfDay(23, 12, 11), TimeOfDay(11, 0, 11), 1393 TimeOfDay(0, 0, 0), TimeOfDay(0, 1, 0), 1394 TimeOfDay(23, 59, 59), TimeOfDay(0, 0, 0)]; 1395 foreach(const tod; tods) { 1396 double d = timeOfDayToDouble(tod); 1397 assert(d <= 1.0, format("%s", d)); 1398 TimeOfDay r = doubleToTimeOfDay(d); 1399 assert(r == tod, format("%s %s", r, tod)); 1400 } 1401 } 1402 1403 double datetimeToDouble(DateTime dt) @safe { 1404 double d = dateToLong(dt.date); 1405 double t = timeOfDayToDouble(dt.timeOfDay); 1406 return d + t; 1407 } 1408 1409 DateTime doubleToDateTime(double d) @safe { 1410 long l = cast(long)d; 1411 Date dt = longToDate(l); 1412 TimeOfDay t = doubleToTimeOfDay(d - l); 1413 return DateTime(dt, t); 1414 } 1415 1416 @safe unittest { 1417 auto ds = [ Date(1900,2,1), Date(1901, 2, 28), Date(2019, 06, 05) ]; 1418 auto tods = [ TimeOfDay(23, 12, 11), TimeOfDay(11, 0, 11), 1419 TimeOfDay(0, 0, 0), TimeOfDay(0, 1, 0), 1420 TimeOfDay(23, 59, 59), TimeOfDay(0, 0, 0)]; 1421 foreach(const d; ds) { 1422 foreach(const tod; tods) { 1423 DateTime dt = DateTime(d, tod); 1424 double dou = datetimeToDouble(dt); 1425 1426 Date rd = longToDate(cast(long)dou); 1427 assert(rd == d, format("%s %s", rd, d)); 1428 1429 double rest = dou - cast(long)dou; 1430 TimeOfDay rt = doubleToTimeOfDay(dou - cast(long)dou); 1431 assert(rt == tod, format("%s %s", rt, tod)); 1432 1433 DateTime r = doubleToDateTime(dou); 1434 assert(r == dt, format("%s %s", r, dt)); 1435 } 1436 } 1437 } 1438 // end from burner's code }