arsd.xlsx source code

1 /++
2 	Some support for the Microsoft Excel Spreadsheet file format.
3 
4 	Don't expect much from it, not even API stability.
5 
6 	Some code is borrowed from the xlsxreader package.
7 
8 	History:
9 		Added February 13, 2025
10 
11 	See_Also:
12 		https://github.com/symmetryinvestments/xlsxd which supports writing xlsx files. I might add write support here too someday but I kinda doubt it.
13 +/
14 module arsd.xlsx;
15 
16 /+
17 ./csv-viewer ~/Downloads/UI_comparison.xlsx
18 arsd.dom.ElementNotFoundException@/home/me/program/lib/arsd/xlsx.d(823): Element of type 'Element' matching {worksheet > dimension} not found.
19 +/
20 
21 /+
22 	sheet at double[]:
23 
24 	nan payloads for blank, errors, then strings as indexes into a table.
25 +/
26 
27 // FIXME: does excel save errors like DIV0 to content in the file?
28 
29 // See also Robert's impl: https://github.com/symmetryinvestments/xlsxreader/blob/master/source/xlsxreader.d
30 
31 import arsd.core;
32 import arsd.zip;
33 import arsd.dom;
34 import arsd.color;
35 
36 import std.conv;
37 
38 private struct ExcelFormatStringLexeme {
39 	string lexeme;
40 	bool isLiteral;
41 }
42 
43 class ExcelFormatStringException : Exception {
44 	this(string msg, string file = __FILE__, size_t line = __LINE__) {
45 		super(msg, file, line);
46 	}
47 }
48 
49 // FIXME: out contract that asserts s_io.length has indeed been reduced
50 private ExcelFormatStringLexeme extractExcelFormatStringLexeme(ref string s_io) {
51 	assert(s_io.length);
52 	string s = s_io;
53 
54 	switch(s[0]) {
55 		case '[':
56 			// condition or color
57 			// or elapsed time thing.
58 			// or a locale setting thing for dates (and more?)
59 			int count = 0;
60 			int size = 0;
61 			while(s[0]) {
62 				if(s[0] == '[')
63 					count++;
64 				if(s[0] == ']')
65 					count--;
66 				s = s[1 .. $];
67 				size++;
68 				if(count == 0)
69 					break;
70 				if(s.length == 0)
71 					throw new ExcelFormatStringException("unclosed [");
72 			}
73 
74 			string ret = s_io[0 .. size];
75 			s_io = s_io[size .. $];
76 
77 			return ExcelFormatStringLexeme(ret, false);
78 		case '"':
79 			// quoted thing watching for backslash
80 			bool escaped;
81 			int size;
82 
83 			size++;
84 			s = s[1 .. $]; // skip the first "
85 
86 			string ret;
87 
88 			while(escaped || s[0] != '"') {
89 				if(!escaped) {
90 					if(s[0] == '"') {
91 						break;
92 					}
93 					if(s[0] == '\\')
94 						escaped = true;
95 					else
96 						ret ~= s[0];
97 				} else {
98 					ret ~= s[0];
99 					escaped = false;
100 				}
101 
102 				s = s[1 .. $];
103 				size++;
104 			}
105 			if(s.length == 0)
106 				throw new ExcelFormatStringException("unclosed \"");
107 			size++;
108 
109 			s_io = s_io[size .. $];
110 			return ExcelFormatStringLexeme(ret, true);
111 
112 		case '\\':
113 			// escaped character
114 			s = s[1 .. $]; // skip the \
115 			s_io = s_io[1 .. $];
116 
117 			// FIXME: need real stride
118 			auto stride = 1;
119 			s_io = s_io[stride .. $];
120 			return ExcelFormatStringLexeme(s[0 .. stride], true);
121 		case '$', '+', '(', ':', '^', '\'', '{', '<', '=', '-', ')', '!', '&', '~', '}', '>', ' ': // they say slash but that seems to be fraction instead
122 			// character literals w/o needing to be quoted
123 			s_io = s_io[1 .. $];
124 			return ExcelFormatStringLexeme(s[0 .. 1], true);
125 		case 'A', 'a', 'P', 'p':
126 			// am/pm
127 
128 			int size = 0;
129 			while(
130 				s[0] == 'a' || s[0] == 'A' ||
131 				s[0] == 'p' || s[0] == 'P' ||
132 				s[0] == 'm' || s[0] == 'M' ||
133 				s[0] == '/'
134 			) {
135 				size++;
136 				s = s[1 .. $];
137 				if(s.length == 0)
138 					break;
139 			}
140 			// also switches hour to 12 hour format when it happens
141 			string ret = s_io[0 .. size];
142 			s_io = s_io[size .. $];
143 
144 			return ExcelFormatStringLexeme(ret, false);
145 
146 		// the single char directives
147 		case '@': // text placeholder
148 		case ';': // clause separator
149 			s_io = s_io[1 .. $];
150 			return ExcelFormatStringLexeme(s[0 .. 1], false);
151 		case '_': // padding char - this adds a space with the same width as the char that follows it, for column alignment.
152 		case '*': // fill char
153 			// the padding or fill is the next lexeme, not the next char!
154 			s_io = s_io[1 .. $];
155 			return ExcelFormatStringLexeme(s[0 .. 1], false);
156 		case 'e', 'E': // scientific notation request
157 		case '%': // percent indicator
158 		case ',': // thousands separator
159 		case '.': // decimal separator
160 		case '/': // fraction or date separator
161 			s_io = s_io[1 .. $];
162 			return ExcelFormatStringLexeme(s[0 .. 1], false);
163 		case /*'m',*/ 'd', 'y': // date parts
164 		case 'h', 'm', 's': // time parts
165 
166 			/+
167 			Note: The m or mm code must appear immediately after the h or hh code or immediately before the ss code; otherwise, Excel displays the month instead of minutes.
168 
169 			it can be either a date/time OR a number/fraction, not both.
170 			+/
171 
172 			auto thing = s[0];
173 			int size;
174 			while(s.length && s[0] == thing) {
175 				s = s[1 .. $];
176 				size++;
177 			}
178 			auto keep = s_io[0 .. size];
179 			s_io = s_io[size .. $];
180 			return ExcelFormatStringLexeme(keep, false);
181 		case '1': .. case '9': // fraction denominators or just literal numbers
182 			int size;
183 			while(s.length && s[0] >= '1' && s[0] <= '9') {
184 				s = s[1 .. $];
185 				size++;
186 			}
187 			auto keep = s_io[0 .. size];
188 			s_io = s_io[size .. $];
189 			return ExcelFormatStringLexeme(keep, false);
190 		case '0', '#', '?': // digit placeholder
191 			int size;
192 
193 			while(s[0] == '0' || s[0] == '#' || s[0] == '?') {
194 				s = s[1 .. $];
195 				size++;
196 				if(s.length == 0)
197 					break;
198 			}
199 
200 			auto keep = s_io[0 .. size];
201 			s_io = s_io[size .. $];
202 			return ExcelFormatStringLexeme(keep, false);
203 
204 		default:
205 			// idk
206 			throw new ExcelFormatStringException("unknown char " ~ s);
207 	}
208 
209 	assert(0);
210 }
211 
212 unittest {
213 	string thing = `[>50][Red]"foo"`;
214 	ExcelFormatStringLexeme lexeme;
215 
216 	lexeme = extractExcelFormatStringLexeme(thing);
217 	assert(thing == `[Red]"foo"`);
218 	lexeme = extractExcelFormatStringLexeme(thing);
219 	assert(thing == `"foo"`);
220 	lexeme = extractExcelFormatStringLexeme(thing);
221 	assert(thing == "");
222 	assert(lexeme.lexeme == "foo");
223 
224 	thing = `"\""`;
225 	lexeme = extractExcelFormatStringLexeme(thing);
226 	assert(thing == "");
227 	assert(lexeme.lexeme == `"`);
228 
229 	thing = `\,`;
230 	lexeme = extractExcelFormatStringLexeme(thing);
231 	assert(thing == "");
232 	assert(lexeme.lexeme == `,`);
233 
234 	/*
235 	thing = `"A\""`;
236 	lexeme = extractExcelFormatStringLexeme(thing);
237 	assert(thing == "");
238 	assert(lexeme.lexeme == `"`);
239 	*/
240 
241 	/+
242 	thing = "mm-yyyy";
243 	lexeme = extractExcelFormatStringLexeme(thing);
244 	import std.stdio; writeln(thing); writeln(lexeme);
245 	+/
246 }
247 
248 struct XlsxFormat {
249 	string originalFormatString;
250 
251 	Color foregroundColor;
252 	Color backgroundColor;
253 
254 	int alignment; // 0 = left, 1 = right, 2 = center
255 
256 	enum Type {
257 		/++
258 		+/
259 		String,
260 		/++
261 
262 		+/
263 		Number,
264 		/++
265 			A Date is a special kind of number in Excel.
266 		+/
267 		Date,
268 		/++
269 			things like # ?/4
270 
271 		+/
272 		Fraction,
273 		Percent
274 	}
275 	Type type;
276 
277 	/++
278 	+/
279 	static struct Result {
280 		string content;
281 		string color;
282 		int alignment;
283 	}
284 
285 	/++
286 	+/
287 	Result applyTo(string s) const {
288 		if(this.type == Type.String || originalFormatString == "@" || originalFormatString.length == 0)
289 			return Result(s, null, alignment);
290 
291 		int alignment = this.alignment;
292 
293 		// need to check for a text thing and if conversion fails, we use that
294 		double value;
295 		try {
296 			value = to!double(s);
297 		} catch(Exception e) {
298 			value = double.nan;
299 		}
300 
301 		DateTime date_;
302 		bool dateCalculated;
303 
304 		DateTime getDate() {
305 			// make sure value is not nan before here or it will throw "negative overflow"!
306 			if(!dateCalculated) {
307 				date_ = doubleToDateTime(value);
308 				dateCalculated = true;
309 			}
310 
311 			return date_;
312 		}
313 
314 		// parse out the original format string
315 		// the ordering by default is positive;negative;zero;text
316 		//
317 		// these can also be like [Color][Condition]fmt;generic
318 		// color is allowed anywhere
319 		// but condition can only have two things following: `[Color][Condition];` repeated any number of times then `;generic-number;text`. no more negative/zero stuff.
320 		// once we see a condition, it switches modes - following things MUST have condition or else are treated as just generic catch all for number and then text.
321 		//
322 		// it matches linearly.
323 		/+
324 			so it goes:
325 				implicit match >0
326 				implicit match <0
327 				implicit match =0
328 				text
329 
330 			but if at any point one of them has a condition, the following ones must be either more conditions (immediately!) or unconditional:
331 				fallthrough for number
332 				text
333 
334 
335 			and if i dont support a format thing i can always fall back to the original text.
336 		+/
337 
338 		try {
339 			string fmt = originalFormatString;
340 
341 			int state = 0; // 0 == positive, 1 == negative or custom, 2 == other, 3 == text
342 			bool matchesCurrentCondition = value > 0;
343 
344 			bool hasMultipleClauses = false;
345 			{
346 				string fmt2 = fmt;
347 				while(fmt2.length) {
348 					auto next = extractExcelFormatStringLexeme(fmt2);
349 					if(!next.isLiteral && next.lexeme == ";")
350 						hasMultipleClauses = true;
351 						break;
352 				}
353 			}
354 			if(hasMultipleClauses == false)
355 				matchesCurrentCondition = true; // only one thing means we must always match it
356 
357 			int numericState;
358 			bool inDenominator;
359 			bool inAmPm;
360 			bool inDecimal;
361 			bool justSawHours;
362 
363 			// these are populated below once we match a clause
364 			bool hasAmPm;
365 			bool hasFraction;
366 			bool hasScientificNotation;
367 			bool hasPercent;
368 			bool first = true;
369 
370 			string color;
371 			string ret;
372 
373 			while(fmt.length) {
374 				auto lexeme = extractExcelFormatStringLexeme(fmt);
375 
376 				ExcelFormatStringLexeme peekLexeme(bool returnLiteral = false) {
377 					string fmt2 = fmt;
378 					skip:
379 					if(fmt2.length == 0)
380 						return ExcelFormatStringLexeme.init;
381 					auto next = extractExcelFormatStringLexeme(fmt2);
382 					if(next.isLiteral && !returnLiteral)
383 						goto skip;
384 					return next;
385 				}
386 
387 				if(!lexeme.isLiteral && lexeme.lexeme[0] == ';') {
388 					// we finished the format of the match, so no need to continue
389 					if(matchesCurrentCondition)
390 						break;
391 					// otherwise, we go to the next thing
392 					state++;
393 					if(state == 1) {
394 						matchesCurrentCondition = value < 0;
395 					} else if(state == 2) {
396 						// this is going to be either the catch-all fallback or another custom one
397 						// for now, assume it is a catch-all
398 						import std.math;
399 						matchesCurrentCondition = !isNaN(value) ? true : false; // only numbers, so not text, matches the catch-all
400 					} else if(state == 3) {
401 						matchesCurrentCondition = true; // this needs to match, we're at the end, so this is the text display
402 					} else {
403 						throw new ExcelFormatStringException("too many ; pieces");
404 					}
405 
406 					continue;
407 				}
408 
409 				if(!matchesCurrentCondition)
410 					continue;
411 
412 				// scan ahead to see if we're doing some special cases: fractions, 12 hour clock, percentages, and sci notation
413 				if(first) {
414 					string fmt2 = fmt;
415 					while(fmt2.length) {
416 						auto next = extractExcelFormatStringLexeme(fmt2);
417 						if(!next.isLiteral) {
418 							// don't proceed into the next clause
419 							if(next.lexeme == ";")
420 								break;
421 
422 							char c = next.lexeme[0] | 0x20;
423 							if(next.lexeme == "/")
424 								hasFraction = true;
425 							else if(next.lexeme == "%") {
426 								hasPercent = true;
427 								value *= 100.0;
428 							} else if(c == 'e')
429 								hasScientificNotation = true;
430 							else if(c == 'a' || c == 'p')
431 								hasAmPm = true;
432 						}
433 					}
434 					first = false;
435 				}
436 
437 				if(hasScientificNotation)
438 					return Result(s, "unsupported feature: scientific notation"); // FIXME
439 				if(hasFraction)
440 					return Result(s, "unsupported feature: fractions"); // FIXME
441 
442 				if(!lexeme.isLiteral && lexeme.lexeme[0] == '[') {
443 					// look for color, condition, or locale
444 					char nc = lexeme.lexeme[1];
445 					if(nc == '$')
446 						continue; // locale i think, skip it
447 					if(nc == '<' || nc == '>' || nc == '=') {
448 						// condition
449 
450 						if(state == 1 || state == 2) {
451 							state = 1;
452 							// read the condition, see if we match it
453 							auto condition = lexeme.lexeme[1 .. $-1];
454 
455 							string operator;
456 							string num;
457 							if(condition[1] == '=') {
458 								operator = condition[0 .. 2];
459 								num = condition[2 .. $];
460 							} else {
461 								operator = condition[0 .. 1];
462 								num = condition[1 .. $];
463 							}
464 
465 							double compareTo;
466 							try {
467 								compareTo = to!double(num);
468 							} catch(Exception e) {
469 								throw new ExcelFormatStringException("not a number: " ~ num);
470 							}
471 							switch(operator) {
472 								case "<":
473 									matchesCurrentCondition = value < compareTo;
474 								break;
475 								case "<=":
476 									matchesCurrentCondition = value <= compareTo;
477 								break;
478 								case ">":
479 									matchesCurrentCondition = value > compareTo;
480 								break;
481 								case ">=":
482 									matchesCurrentCondition = value >= compareTo;
483 								break;
484 								case "=":
485 									// FIXME: approxEqual?
486 									matchesCurrentCondition = value == compareTo;
487 								break;
488 
489 								default:
490 									throw new ExcelFormatStringException("not a supported comparison operator " ~ operator);
491 							}
492 
493 							continue;
494 						} else {
495 							throw new ExcelFormatStringException("inappropriately placed custom condition");
496 						}
497 					} else {
498 						// color, we hope. FIXME can also be [s], [m], or [h] or maybe [ss], [mm], [hh]
499 						// colors are capitalized...
500 						color = lexeme.lexeme[1 .. $-1];
501 						continue;
502 					}
503 				}
504 
505 				// if we're here, it should actually match and need some processing.
506 
507 				if(lexeme.isLiteral) {
508 					// literals are easy...
509 					ret ~= lexeme.lexeme;
510 				} else {
511 					// but the rest of these are formatting commands
512 					switch(lexeme.lexeme[0]) {
513 						case ',':
514 							// thousands separator requested,
515 							// handled below in the decimal placeholder thing
516 						break;
517 						case '_', '*':
518 							auto lexemeToPadWith = extractExcelFormatStringLexeme(fmt);
519 							if(lexeme.lexeme[0] == '_')
520 								ret ~= " "; // FIXME supposed to match width of the char
521 							else if(lexeme.lexeme[0] == '*')
522 								ret ~= lexemeToPadWith.lexeme; // FIXME: supposed to repeat to fill the column width
523 						break;
524 						case '@': // the original text
525 							ret ~= s;
526 						break;
527 						case '%':
528 							ret ~= lexeme.lexeme;
529 						break;
530 						case '.':
531 							inDecimal = true;
532 							ret ~= lexeme.lexeme;
533 						break;
534 						case '/':
535 							if(!inAmPm) {
536 								inDenominator = true;
537 								ret ~= lexeme.lexeme;
538 							}
539 						break;
540 						case '#', '0', '?':
541 							// decimal group
542 							// # = digit
543 							// 0 = digit, pad with 0 if not significant
544 							// ? = digit, pad with space (same sized as digit) if not significant
545 
546 							if(value is double.nan)
547 								return Result(s, "NaN");
548 
549 							alignment = 1; // if we are printing numbers let's assume right align FIXME
550 							/+
551 							if(s.length == 0 && value is double.nan) // and if we printing numbers, treat empty cell as 0
552 								value = 0.0;
553 							+/
554 
555 							bool appendNumber(double v, bool includeThousandsSeparator) {
556 								if(v < 0)
557 									v = -v;
558 								string f = to!string(cast(int) v);
559 								if(f.length < lexeme.lexeme.length)
560 								foreach(l; lexeme.lexeme[0 .. $ - f.length]) {
561 									if(l == '0')
562 										ret ~= '0';
563 									else if(l == '?')
564 										ret ~= ' ';
565 								}
566 								if(f.length) {
567 									if(includeThousandsSeparator) {
568 										// 14532
569 										// 1234
570 										// 123
571 										auto offset = cast(int) f.length % 3;
572 										while(f.length > 3) {
573 											ret ~= f[offset .. offset + 3];
574 											ret ~= ",";
575 											f = f[3 .. $];
576 										}
577 										ret ~= f;
578 									} else {
579 										ret ~= f;
580 									}
581 									return true;
582 								}
583 								return false;
584 							}
585 
586 							if(peekLexeme().lexeme == ",") {
587 								// thousands separator requested...
588 								auto v = cast(int) value / 1000;
589 
590 								if(v == 0)
591 									continue; // FIXME? maybe we want some leading 0 padding?
592 
593 								auto hadOutput = appendNumber(v, true);
594 
595 								value = value - v * 1000; // take the remainder for the next iteration of the loop
596 
597 								if(hadOutput)
598 									ret ~= ","; // append the comma before the final thousands digits in the next iteration
599 
600 								continue;
601 							}
602 
603 
604 							if(inDecimal) {
605 								// FIXME: no more std.format
606 								import std.format;
607 								string f = format("%."~to!string(lexeme.lexeme.length)~"f", value - cast(int) value)[2..$]; // slice off the "0."
608 								ret ~= f;
609 							} else {
610 								appendNumber(value, false);
611 							}
612 
613 							inDenominator = false;
614 						break;
615 						case '1': .. case '9':
616 							// number,  if in denominator position
617 							// otherwise treat as string
618 							if(inDenominator)
619 								inDenominator = false; // the rest is handled elsewhere
620 							else
621 								ret ~= lexeme.lexeme;
622 						break;
623 						case 'y':
624 							if(value is double.nan)
625 								return Result(s, "NaN date");
626 
627 							justSawHours = false;
628 							auto y = getDate().year;
629 
630 							char[16] buffer;
631 
632 							switch(lexeme.lexeme.length) {
633 								case 2:
634 									ret ~= intToString(y % 100, buffer[], IntToStringArgs().withPadding(2));
635 								break;
636 								case 4:
637 									ret ~= intToString(y, buffer[], IntToStringArgs().withPadding(4));
638 								break;
639 								default:
640 									throw new ExcelFormatStringException("unknown thing " ~ lexeme.lexeme);
641 							}
642 						break;
643 						case 'm':
644 							if(value is double.nan)
645 								return Result(s, "NaN date");
646 							auto peek = peekLexeme(false);
647 							bool precedesSeconds =
648 								(peek.lexeme.length && peek.lexeme[0] == 's')
649 								||
650 								(peek.lexeme.length > 1 && peek.lexeme[1] == 's')
651 							;
652 
653 							if(justSawHours || precedesSeconds) {
654 								// minutes
655 								auto m = getDate().timeOfDay.minute;
656 
657 								char[16] buffer;
658 
659 								switch(lexeme.lexeme.length) {
660 									case 1:
661 										ret ~= intToString(m, buffer[]);
662 									break;
663 									case 2:
664 										ret ~= intToString(m, buffer[], IntToStringArgs().withPadding(2));
665 									break;
666 									default:
667 										throw new ExcelFormatStringException("unknown thing " ~ lexeme.lexeme);
668 								}
669 							} else {
670 								// month
671 								auto m = cast(int) getDate().month;
672 
673 								char[16] buffer;
674 
675 								import arsd.calendar;
676 
677 								switch(lexeme.lexeme.length) {
678 									case 1:
679 										ret ~= intToString(m, buffer[]);
680 									break;
681 									case 2:
682 										ret ~= intToString(m, buffer[], IntToStringArgs().withPadding(2));
683 									break;
684 									case 3: // abbreviation
685 										ret ~= monthNames[m][0 .. 3];
686 									break;
687 									case 4: // full name
688 										ret ~= monthNames[m];
689 									break;
690 									case 5: // single letter
691 										ret ~= monthNames[m][0 .. 1]; // FIXME?
692 									break;
693 									default:
694 										throw new ExcelFormatStringException("unknown thing " ~ lexeme.lexeme);
695 								}
696 							}
697 
698 							justSawHours = false;
699 						break;
700 						case 'd':
701 							if(value is double.nan)
702 								return Result(s, "NaN date");
703 							justSawHours = false;
704 
705 							char[16] buffer;
706 
707 							import arsd.calendar;
708 
709 							auto d = getDate().day;
710 							auto dow = cast(int) getDate().dayOfWeek;
711 
712 							switch(lexeme.lexeme.length) {
713 								case 1:
714 									ret ~= intToString(d, buffer[]);
715 								break;
716 								case 2:
717 									ret ~= intToString(d, buffer[], IntToStringArgs().withPadding(2));
718 								break;
719 								case 3:
720 									// abbreviation
721 									ret ~= daysOfWeekNames[dow][0 .. 3];
722 								break;
723 								case 4:
724 									// full name
725 									ret ~= daysOfWeekNames[dow];
726 								break;
727 								default:
728 									throw new ExcelFormatStringException("unknown thing " ~ lexeme.lexeme);
729 							}
730 						break;
731 						case 'h':
732 							if(value is double.nan)
733 								return Result(s, "NaN date");
734 							justSawHours = true;
735 
736 							auto m = getDate().timeOfDay.hour;
737 							char[16] buffer;
738 
739 							if(hasAmPm && m > 12)
740 								m -= 12;
741 							if(hasAmPm && m == 0)
742 								m = 12;
743 
744 							switch(lexeme.lexeme.length) {
745 								case 1:
746 									ret ~= intToString(m, buffer[]);
747 								break;
748 								case 2:
749 									ret ~= intToString(m, buffer[], IntToStringArgs().withPadding(2));
750 								break;
751 								default:
752 									throw new ExcelFormatStringException("unknown thing " ~ lexeme.lexeme);
753 							}
754 						break;
755 						case 'a', 'A':
756 							if(value is double.nan)
757 								return Result(s, "NaN date");
758 							inAmPm = true;
759 							auto m = getDate().timeOfDay.hour;
760 							if(m >= 12)
761 								ret ~= lexeme.lexeme[0] == 'a' ? "pm" : "PM";
762 							else
763 								ret ~= lexeme.lexeme[0] == 'a' ? "am" : "AM";
764 						break;
765 						case 'p', 'P':
766 							inAmPm = false;
767 						break;
768 						case 's':
769 							if(value is double.nan)
770 								return Result(s, "NaN date");
771 							auto m = getDate().timeOfDay.second;
772 							char[16] buffer;
773 							switch(lexeme.lexeme.length) {
774 								case 1:
775 									ret ~= intToString(m, buffer[]);
776 								break;
777 								case 2:
778 									ret ~= intToString(m, buffer[], IntToStringArgs().withPadding(2));
779 								break;
780 								default:
781 									throw new ExcelFormatStringException("unknown thing " ~ lexeme.lexeme);
782 							}
783 						break;
784 						case 'e', 'E':
785 							// FIXME: scientific notation
786 						break;
787 						default:
788 							assert(0, "unsupported formatting command: " ~ lexeme.lexeme);
789 					}
790 				}
791 			}
792 
793 			return Result(ret, color, alignment);
794 		} catch(ExcelFormatStringException e) {
795 			// we'll fall back to just displaying the original input text
796 			return Result(s, e.msg /* FIXME should be null */, alignment);
797 		}
798 	}
799 
800 	/+
801 		positive;negative;zero;text
802 		can include formats and dates and tons of stuff.
803 		https://support.microsoft.com/en-us/office/review-guidelines-for-customizing-a-number-format-c0a1d1fa-d3f4-4018-96b7-9c9354dd99f5
804 	+/
805 	private this(XlsxFile file, XlsxFile.StyleInternal.xf formatting) {
806 		if(formatting.applyNumberFormat) {
807 			// dates too depending on format
808 			 //import std.stdio; writeln(formatting.numFmtId); writeln(file.styleInternal.numFmts);
809 			this.originalFormatString = file.styleInternal.numFmts[formatting.numFmtId];
810 
811 			this.type = Type.Number;
812 		} else {
813 			this.type = Type.String;
814 		}
815 
816 		/+
817 			xf also has:
818 
819 			int xfId;
820 			int numFmtId;
821 			int fontId;
822 			int fillId;
823 			int borderId;
824 		+/
825 	}
826 
827 	private this(string f) {
828 		this.originalFormatString = f;
829 		this.type = Type.Number;
830 	}
831 }
832 
833 unittest {
834 	assert(XlsxFormat(`;;;"foo"`).applyTo("anything") == XlsxFormat.Result("foo", null));
835 	assert(XlsxFormat(`#.#;;;"foo"`).applyTo("2.0") == XlsxFormat.Result("2.0", null, 1));
836 	assert(XlsxFormat(`0#.##;;;"foo"`).applyTo("24.25") == XlsxFormat.Result("24.25", null, 1));
837 	assert(XlsxFormat(`0#.##;;;"foo"`).applyTo("2.25") == XlsxFormat.Result("02.25", null, 1));
838 	assert(XlsxFormat(`#,#.##`).applyTo("2.25") == XlsxFormat.Result("2.25", null, 1));
839 	assert(XlsxFormat(`#,#.##`).applyTo("123.25") == XlsxFormat.Result("123.25", null, 1));
840 	assert(XlsxFormat(`#,#.##`).applyTo("1234.25") == XlsxFormat.Result("1,234.25", null, 1));
841 	assert(XlsxFormat(`#,#.##`).applyTo("123456.25") == XlsxFormat.Result("123,456.25", null, 1));
842 }
843 
844 struct XlsxCell {
845 	string formula;
846 	string content;
847 	XlsxFormat formatting;
848 
849 	XlsxFormat.Result displayableResult() {
850 		return formatting.applyTo(content);
851 	}
852 
853 	string toString() {
854 		return displayableResult().content;
855 	}
856 }
857 
858 struct CellReference {
859 	string name;
860 
861 	static CellReference fromInts(int column, int row) {
862 		string ret;
863 
864 		string piece;
865 		int adjustment = 0;
866 		do {
867 			piece ~= cast(char)(column % 26 + 'A' - adjustment);
868 			if(adjustment == 0)
869 				adjustment = 1;
870 			column /= 26;
871 		} while(column);
872 
873 		foreach_reverse(ch; piece)
874 			ret ~= ch;
875 		piece = null;
876 
877 		do {
878 			piece ~= cast(char)(row % 10 + '0');
879 			row /= 10;
880 		} while(row);
881 
882 		foreach_reverse(ch; piece)
883 			ret ~= ch;
884 		piece = null;
885 
886 		return CellReference(ret);
887 	}
888 
889 	string toString() {
890 		return name;
891 	}
892 
893 	int toColumnIndex() {
894 		size_t endSlice = name.length;
895 		foreach(idx, ch; name) {
896 			if(ch < 'A' || ch > 'Z') {
897 				endSlice = idx;
898 				break;
899 			}
900 		}
901 
902 		int accumulator;
903 		foreach(idx, ch; name[0 .. endSlice]) {
904 			int value;
905 			if(idx + 1 == endSlice) {
906 				// an A in the last "digit" is a 0, elsewhere it is a 1
907 				value = ch - 'A';
908 			} else {
909 				value = ch - 'A' + 1;
910 			}
911 
912 			accumulator *= 26;
913 			accumulator += value;
914 		}
915 		return accumulator;
916 	}
917 
918 	int toRowIndex() {
919 		int accumulator;
920 		foreach(ch; name) {
921 			if(ch >= 'A' && ch <= 'Z')
922 				continue;
923 			accumulator *= 10;
924 			accumulator += ch - '0';
925 		}
926 		return accumulator;
927 	}
928 }
929 
930 unittest {
931 	auto cr = CellReference("AE434");
932 	assert(cr.toColumnIndex == 30);
933 	cr = CellReference("E434");
934 	assert(cr.toColumnIndex == 4); // zero-based
935 
936 	// zero-based column, 1-based row. wtf?
937 	assert(CellReference("AE434") == CellReference.fromInts(30, 434));
938 
939 	assert(CellReference("Z1") == CellReference.fromInts(25, 1));
940 }
941 
942 /++
943 
944 +/
945 class XlsxSheet {
946 	private string name_;
947 	private XlsxFile file;
948 	private XmlDocument document;
949 	private this(XlsxFile file, string name, XmlDocument document) {
950 		this.file = file;
951 		this.name_ = name;
952 		this.document = document;
953 
954 		this.dimension = document.requireSelector("worksheet > dimension").getAttribute("ref");
955 		// there's also sheetView with selection, activeCell, etc
956 		// and cols with widths and such
957 
958 		auto ul = this.upperLeft;
959 		this.minRow = ul.toRowIndex;
960 		this.minColumn = ul.toColumnIndex;
961 
962 		auto lr = this.lowerRight;
963 		this.maxRow = lr.toRowIndex + 1;
964 		this.maxColumn = lr.toColumnIndex + 1;
965 	}
966 
967 	private string dimension;
968 
969 	private int minRow;
970 	private int minColumn;
971 	private int maxRow;
972 	private int maxColumn;
973 
974 	/++
975 	+/
976 	Size size() {
977 		return Size(maxColumn - minColumn, maxRow - minRow);
978 	}
979 
980 	private CellReference upperLeft() {
981 		foreach(idx, ch; dimension)
982 			if(ch == ':')
983 				return CellReference(dimension[0 .. idx]);
984 		//assert(0); // it has no lower right...
985 		return CellReference(dimension);
986 	}
987 
988 	private CellReference lowerRight() {
989 		foreach(idx, ch; dimension)
990 			if(ch == ':')
991 				return CellReference(dimension[idx + 1 .. $]);
992 		assert(0);
993 	}
994 
995 	// opIndex could be like sheet["A1:B4"] and sheet["A1", "B4"] and stuff maybe.
996 
997 	/++
998 	+/
999 	string name() {
1000 		return name_;
1001 	}
1002 
1003 	/++
1004 		Suitable for passing to [arsd.csv.toCsv]
1005 	+/
1006 	string[][] toStringGrid() {
1007 		auto grid = this.toGrid();
1008 
1009 		string[][] ret;
1010 		ret.length = size.height;
1011 		foreach(i, ref row; ret) {
1012 			row.length = size.width;
1013 			foreach(k, ref cell; row)
1014 				cell = grid[i][k].toString();
1015 		}
1016 
1017 		return ret;
1018 	}
1019 
1020 	/++
1021 
1022 	+/
1023 	XlsxCell[][] toGrid() {
1024 		// FIXME: this crashes on opend dmd!
1025 		// string[][] ret = new string[][](size.height, size.width);
1026 
1027 		/+
1028 		// almost everything we allocate in here is to keep, so
1029 		// turning off the GC while working prevents unnecessary
1030 		// collection attempts that won't find any garbage anyway.
1031 
1032 		// but meh no significant difference in perf anyway.
1033 		import core.memory;
1034 		GC.disable();
1035 		scope(exit)
1036 			GC.enable();
1037 		+/
1038 
1039 		XlsxCell[][] ret;
1040 		ret.length = size.height;
1041 		foreach(ref row; ret)
1042 			row.length = size.width;
1043 
1044 		//alloc done
1045 
1046 		auto sheetData = document.requireSelector("sheetData");
1047 		Element[] rowElements = sheetData.childNodes;
1048 
1049 		Element[] nextRow(int expected) {
1050 			if(rowElements.length == 0)
1051 				throw new Exception("ran out of row elements...");
1052 
1053 			Element rowElement;
1054 			Element[] before = rowElements;
1055 
1056 			do {
1057 				rowElement = rowElements[0];
1058 				rowElements = rowElements[1 .. $];
1059 			} while(rowElement.tagName != "row");
1060 
1061 			if(rowElement.attrs.r.to!int != expected) {
1062 				// a row was skipped in the file, so we'll
1063 				// return an empty placeholder too
1064 				rowElements = before;
1065 				return null;
1066 			}
1067 
1068 			return rowElement.childNodes;
1069 		}
1070 
1071 		foreach(int rowIdx, row; ret) {
1072 			auto cellElements = nextRow(rowIdx + 1);
1073 
1074 			foreach(int cellIdx, ref cell; row) {
1075 				string cellReference = CellReference.fromInts(cellIdx + minColumn, rowIdx + minRow).name;
1076 
1077 				Element element = null;
1078 				foreach(idx, thing; cellElements) {
1079 					if(thing.attrs.r == cellReference) {
1080 						element = thing;
1081 						cellElements = cellElements[idx + 1 .. $];
1082 						break;
1083 					}
1084 				}
1085 
1086 				if(element is null)
1087 					continue;
1088 				string v = element.optionSelector("v").textContent;
1089 				if(element.attrs.t == "s")
1090 					v = file.sharedStrings[v.to!int()];
1091 
1092 				auto sString = element.attrs.s;
1093 				auto sId = sString.length ? to!int(sString) : 0;
1094 
1095 				string f = element.optionSelector("f").textContent;
1096 
1097 				cell = XlsxCell(f, v, XlsxFormat(file, file.styleInternal.xfs[sId]));
1098 			}
1099 		}
1100 		return ret;
1101 	}
1102 }
1103 
1104 /++
1105 
1106 +/
1107 class XlsxFile {
1108 	private ZipFile zipFile;
1109 
1110 	/++
1111 
1112 	+/
1113 	this(FilePath file) {
1114 		this.zipFile = new ZipFile(file);
1115 
1116 		load();
1117 	}
1118 
1119 	/// ditto
1120 	this(immutable(ubyte)[] rawData) {
1121 		this.zipFile = new ZipFile(rawData);
1122 
1123 		load();
1124 	}
1125 
1126 	/++
1127 	+/
1128 	int sheetCount() {
1129 		return cast(int) sheetsInternal.length;
1130 	}
1131 
1132 	/++
1133 	+/
1134 	string[] sheetNames() {
1135 		string[] ret;
1136 		foreach(sheet; sheetsInternal)
1137 			ret ~= sheet.name;
1138 		return ret;
1139 	}
1140 
1141 	/++
1142 	+/
1143 	XlsxSheet getSheet(string name) {
1144 		foreach(ref sheet; sheetsInternal)
1145 			if(sheet.name == name)
1146 				return getSheetParsed(sheet);
1147 		return null;
1148 
1149 	}
1150 
1151 	/// ditto
1152 	XlsxSheet getSheet(int indexZeroBased) {
1153 		// FIXME: if it is out of range do what?
1154 		return getSheetParsed(sheetsInternal[indexZeroBased]);
1155 	}
1156 
1157 	// docProps/core.xml has creator, last modified, etc.
1158 
1159 	private string[string] contentTypes;
1160 	private struct Relationship {
1161 		string id;
1162 		string type;
1163 		string target;
1164 	}
1165 	private Relationship[string] relationships;
1166 	private string[] sharedStrings;
1167 
1168 	private struct SheetInternal {
1169 		string name;
1170 		string id;
1171 		string rel;
1172 
1173 		XmlDocument cached;
1174 		XlsxSheet parsed;
1175 	}
1176 	private SheetInternal[] sheetsInternal;
1177 
1178 	// https://stackoverflow.com/questions/3154646/what-does-the-s-attribute-signify-in-a-cell-tag-in-xlsx
1179 	private struct StyleInternal {
1180 		string[int] numFmts;
1181 		// fonts
1182 			// font references color theme from xl/themes
1183 		// fills
1184 		// borders
1185 		// cellStyleXfs
1186 		// cellXfs
1187 		struct xf {
1188 			int xfId;
1189 			int numFmtId;
1190 			int fontId;
1191 			int fillId;
1192 			int borderId;
1193 
1194 			bool applyNumberFormat; // if yes, you get default right alignment
1195 		}
1196 		xf[] xfs;
1197 
1198 		// cellStyles
1199 		// dxfs
1200 		// tableStyles
1201 
1202 	}
1203 	private StyleInternal styleInternal;
1204 
1205 	private XmlDocument getSheetXml(ref SheetInternal sheet) {
1206 		if(sheet.cached is null)
1207 			loadXml("xl/" ~ relationships[sheet.rel].target, (document) { sheet.cached = document; });
1208 
1209 		return sheet.cached;
1210 	}
1211 
1212 	private XlsxSheet getSheetParsed(ref SheetInternal sheet) {
1213 		if(sheet.parsed is null)
1214 			sheet.parsed = new XlsxSheet(this, sheet.name, getSheetXml(sheet));
1215 
1216 		return sheet.parsed;
1217 	}
1218 
1219 
1220 	private void load() {
1221 		loadXml("[Content_Types].xml", (document) {
1222 			foreach(element; document.querySelectorAll("Override"))
1223 				contentTypes[element.attrs.PartName] = element.attrs.ContentType;
1224 		});
1225 
1226 		loadXml("xl/_rels/workbook.xml.rels", (document) {
1227 			foreach(element; document.querySelectorAll("Relationship"))
1228 				relationships[element.attrs.Id] = Relationship(element.attrs.Id, element.attrs.Type, element.attrs.Target);
1229 		});
1230 
1231 		try
1232 		loadXml("xl/sharedStrings.xml", (document) {
1233 			foreach(element; document.querySelectorAll("si t"))
1234 				sharedStrings ~= element.textContent;
1235 		});
1236 		catch(Exception e) {} // this xml might not exist and that's ok
1237 
1238 		loadXml("xl/styles.xml", (document) {
1239 			// need to keep the generic hardcoded formats first
1240 			styleInternal.numFmts = [
1241 				 0: "@",
1242 				 1: "0",
1243 				 2: "0.00",
1244 				 3: "#,##0",
1245 				 4: "#,##0.00",
1246 				 5: "$#,##0_);($#,##0)",
1247 				 6: "$#,##0_);[Red]($#,##0)",
1248 				 7: "$#,##0.00_);($#,##0.00)",
1249 				 8: "$#,##0.00_);[Red]($#,##0.00)",
1250 				 9: "0%",
1251 				10: "0.00%",
1252 				11: "0.00E+00",
1253 				12: "# ?/?",
1254 				13: "# ??/??",
1255 				14: "m/d/yyyy", // ive heard this one does different things in different locales
1256 				15: "d-mmm-yy",
1257 				16: "d-mmm",
1258 				17: "mmm-yy",
1259 				18: "h:mm AM/PM",
1260 				19: "h:mm:ss AM/PM",
1261 				20: "h:mm",
1262 				21: "h:mm:ss",
1263 				22: "m/d/yyyy h:mm",
1264 				37: "#,##0_);(#,##0)",
1265 				38: "#,##0_);[Red](#,##0)",
1266 				39: "#,##0.00_);(#,##0.00)",
1267 				40: "#,##0.00_);[Red](#,##0.00)",
1268 				45: "mm:ss",
1269 				46: "[h]:mm:ss",
1270 				47: "mm:ss.0",
1271 				48: "##0.0E+0",
1272 				49: "@",
1273 			];
1274 
1275 
1276 			foreach(element; document.querySelectorAll("numFmts > numFmt")) {
1277 				styleInternal.numFmts[to!int(element.attrs.numFmtId)] = element.attrs.formatCode;
1278 			}
1279 
1280 			foreach(element; document.querySelectorAll("cellXfs > xf")) {
1281 				StyleInternal.xf xf;
1282 
1283 				xf.xfId = element.attrs.xfId.to!int;
1284 				xf.fontId = element.attrs.fontId.to!int;
1285 				xf.fillId = element.attrs.fillId.to!int;
1286 				xf.borderId = element.attrs.borderId.to!int;
1287 				xf.numFmtId = element.attrs.numFmtId.to!int;
1288 
1289 				if(element.attrs.applyNumberFormat == "1")
1290 					xf.applyNumberFormat = true;
1291 
1292 				styleInternal.xfs ~= xf;
1293 			}
1294 		});
1295 
1296 		loadXml("xl/workbook.xml", (document) {
1297 			foreach(element; document.querySelectorAll("sheets > sheet")) {
1298 				sheetsInternal ~= SheetInternal(element.attrs.name, element.attrs.sheetId, element.getAttribute("r:id"));
1299 			}
1300 		});
1301 	}
1302 
1303 	private void loadXml(string filename, scope void delegate(XmlDocument document) handler) {
1304 		auto document = new XmlDocument(cast(string) zipFile.getContent(filename));
1305 		handler(document);
1306 	}
1307 }
1308 
1309 
1310 // from Robert Schadek's code {
1311 
1312 import std.datetime;
1313 version(unittest) import std.format;
1314 
1315 Date longToDate(long d) @safe {
1316 	// modifed from https://www.codeproject.com/Articles/2750/
1317 	// Excel-Serial-Date-to-Day-Month-Year-and-Vice-Versa
1318 
1319 	// Excel/Lotus 123 have a bug with 29-02-1900. 1900 is not a
1320 	// leap year, but Excel/Lotus 123 think it is...
1321 	if(d == 60) {
1322 		return Date(1900, 2,  29);
1323 	} else if(d < 60) {
1324 		// Because of the 29-02-1900 bug, any serial date
1325 		// under 60 is one off... Compensate.
1326 		++d;
1327 	}
1328 
1329 	// Modified Julian to DMY calculation with an addition of 2415019
1330 	int l = cast(int)d + 68569 + 2415019;
1331 	int n = int(( 4 * l ) / 146097);
1332 	l = l - int(( 146097 * n + 3 ) / 4);
1333 	int i = int(( 4000 * ( l + 1 ) ) / 1461001);
1334 	l = l - int(( 1461 * i ) / 4) + 31;
1335 	int j = int(( 80 * l ) / 2447);
1336 	int nDay = l - int(( 2447 * j ) / 80);
1337 	l = int(j / 11);
1338 	int nMonth = j + 2 - ( 12 * l );
1339 	int nYear = 100 * ( n - 49 ) + i + l;
1340 	return Date(nYear, nMonth, nDay);
1341 }
1342 
1343 long dateToLong(Date d) @safe {
1344 	// modifed from https://www.codeproject.com/Articles/2750/
1345 	// Excel-Serial-Date-to-Day-Month-Year-and-Vice-Versa
1346 
1347 	// Excel/Lotus 123 have a bug with 29-02-1900. 1900 is not a
1348 	// leap year, but Excel/Lotus 123 think it is...
1349 	if(d.day == 29 && d.month == 2 && d.year == 1900) {
1350 		return 60;
1351 	}
1352 
1353 	// DMY to Modified Julian calculated with an extra subtraction of 2415019.
1354 	long nSerialDate =
1355 			int(( 1461 * ( d.year + 4800 + int(( d.month - 14 ) / 12) ) ) / 4) +
1356 			int(( 367 * ( d.month - 2 - 12 *
1357 				( ( d.month - 14 ) / 12 ) ) ) / 12) -
1358 				int(( 3 * ( int(( d.year + 4900
1359 				+ int(( d.month - 14 ) / 12) ) / 100) ) ) / 4) +
1360 				d.day - 2415019 - 32075;
1361 
1362 	if(nSerialDate < 60) {
1363 		// Because of the 29-02-1900 bug, any serial date
1364 		// under 60 is one off... Compensate.
1365 		nSerialDate--;
1366 	}
1367 
1368 	return nSerialDate;
1369 }
1370 
1371 @safe unittest {
1372 	auto ds = [ Date(1900,2,1), Date(1901, 2, 28), Date(2019, 06, 05) ];
1373 	foreach(const d; ds) {
1374 		long l = dateToLong(d);
1375 		Date r = longToDate(l);
1376 		assert(r == d, format("%s %s", r, d));
1377 	}
1378 }
1379 
1380 TimeOfDay doubleToTimeOfDay(double s) @safe {
1381 	import core.stdc.math : lround;
1382 	double secs = (24.0 * 60.0 * 60.0) * s;
1383 
1384 	// TODO not one-hundred my lround is needed
1385 	int secI = to!int(lround(secs));
1386 
1387 	return TimeOfDay(secI / 3600, (secI / 60) % 60, secI % 60);
1388 }
1389 
1390 double timeOfDayToDouble(TimeOfDay tod) @safe {
1391 	long h = tod.hour * 60 * 60;
1392 	long m = tod.minute * 60;
1393 	long s = tod.second;
1394 	return (h + m + s) / (24.0 * 60.0 * 60.0);
1395 }
1396 
1397 @safe unittest {
1398 	auto tods = [ TimeOfDay(23, 12, 11), TimeOfDay(11, 0, 11),
1399 		 TimeOfDay(0, 0, 0), TimeOfDay(0, 1, 0),
1400 		 TimeOfDay(23, 59, 59), TimeOfDay(0, 0, 0)];
1401 	foreach(const tod; tods) {
1402 		double d = timeOfDayToDouble(tod);
1403 		assert(d <= 1.0, format("%s", d));
1404 		TimeOfDay r = doubleToTimeOfDay(d);
1405 		assert(r == tod, format("%s %s", r, tod));
1406 	}
1407 }
1408 
1409 double datetimeToDouble(DateTime dt) @safe {
1410 	double d = dateToLong(dt.date);
1411 	double t = timeOfDayToDouble(dt.timeOfDay);
1412 	return d + t;
1413 }
1414 
1415 DateTime doubleToDateTime(double d) @safe {
1416 	long l = cast(long)d;
1417 	Date dt = longToDate(l);
1418 	TimeOfDay t = doubleToTimeOfDay(d - l);
1419 	return DateTime(dt, t);
1420 }
1421 
1422 @safe unittest {
1423 	auto ds = [ Date(1900,2,1), Date(1901, 2, 28), Date(2019, 06, 05) ];
1424 	auto tods = [ TimeOfDay(23, 12, 11), TimeOfDay(11, 0, 11),
1425 		 TimeOfDay(0, 0, 0), TimeOfDay(0, 1, 0),
1426 		 TimeOfDay(23, 59, 59), TimeOfDay(0, 0, 0)];
1427 	foreach(const d; ds) {
1428 		foreach(const tod; tods) {
1429 			DateTime dt = DateTime(d, tod);
1430 			double dou = datetimeToDouble(dt);
1431 
1432 			Date rd = longToDate(cast(long)dou);
1433 			assert(rd == d, format("%s %s", rd, d));
1434 
1435 			double rest = dou - cast(long)dou;
1436 			TimeOfDay rt = doubleToTimeOfDay(dou - cast(long)dou);
1437 			assert(rt == tod, format("%s %s", rt, tod));
1438 
1439 			DateTime r = doubleToDateTime(dou);
1440 			assert(r == dt, format("%s %s", r, dt));
1441 		}
1442 	}
1443 }
1444 // end from burner's code }