arsd.xlsx source code

1 /++
2 	Some support for the Microsoft Excel Spreadsheet file format.
3 
4 	Don't expect much from it, not even API stability.
5 
6 	Some code is borrowed from the xlsxreader package.
7 
8 	History:
9 		Added February 13, 2025
10 
11 	See_Also:
12 		https://github.com/symmetryinvestments/xlsxd which supports writing xlsx files. I might add write support here too someday but I kinda doubt it.
13 +/
14 module arsd.xlsx;
15 
16 /+
17 ./csv-viewer ~/Downloads/UI_comparison.xlsx
18 arsd.dom.ElementNotFoundException@/home/me/program/lib/arsd/xlsx.d(823): Element of type 'Element' matching {worksheet > dimension} not found.
19 +/
20 
21 /+
22 	sheet at double[]:
23 
24 	nan payloads for blank, errors, then strings as indexes into a table.
25 +/
26 
27 // FIXME: does excel save errors like DIV0 to content in the file?
28 
29 // See also Robert's impl: https://github.com/symmetryinvestments/xlsxreader/blob/master/source/xlsxreader.d
30 
31 import arsd.core;
32 import arsd.zip;
33 import arsd.dom;
34 import arsd.color;
35 
36 import std.conv;
37 
38 private struct ExcelFormatStringLexeme {
39 	string lexeme;
40 	bool isLiteral;
41 }
42 
43 class ExcelFormatStringException : Exception {
44 	this(string msg, string file = __FILE__, size_t line = __LINE__) {
45 		super(msg, file, line);
46 	}
47 }
48 
49 // FIXME: out contract that asserts s_io.length has indeed been reduced
50 private ExcelFormatStringLexeme extractExcelFormatStringLexeme(ref string s_io) {
51 	assert(s_io.length);
52 	string s = s_io;
53 
54 	switch(s[0]) {
55 		case '[':
56 			// condition or color
57 			// or elapsed time thing.
58 			// or a locale setting thing for dates (and more?)
59 			int count = 0;
60 			int size = 0;
61 			while(s[0]) {
62 				if(s[0] == '[')
63 					count++;
64 				if(s[0] == ']')
65 					count--;
66 				s = s[1 .. $];
67 				size++;
68 				if(count == 0)
69 					break;
70 				if(s.length == 0)
71 					throw new ExcelFormatStringException("unclosed [");
72 			}
73 
74 			string ret = s_io[0 .. size];
75 			s_io = s_io[size .. $];
76 
77 			return ExcelFormatStringLexeme(ret, false);
78 		case '"':
79 			// quoted thing watching for backslash
80 			bool escaped;
81 			int size;
82 
83 			size++;
84 			s = s[1 .. $]; // skip the first "
85 
86 			string ret;
87 
88 			while(escaped || s[0] != '"') {
89 				if(!escaped) {
90 					if(s[0] == '"') {
91 						break;
92 					}
93 					if(s[0] == '\\')
94 						escaped = true;
95 					else
96 						ret ~= s[0];
97 				} else {
98 					ret ~= s[0];
99 					escaped = false;
100 				}
101 
102 				s = s[1 .. $];
103 				size++;
104 			}
105 			if(s.length == 0)
106 				throw new ExcelFormatStringException("unclosed \"");
107 			size++;
108 
109 			s_io = s_io[size .. $];
110 			return ExcelFormatStringLexeme(ret, true);
111 
112 		case '\\':
113 			// escaped character
114 			s = s[1 .. $]; // skip the \
115 			s_io = s_io[1 .. $];
116 
117 			// FIXME: need real stride
118 			auto stride = 1;
119 			s_io = s_io[stride .. $];
120 			return ExcelFormatStringLexeme(s[0 .. stride], true);
121 		case '$', '+', '(', ':', '^', '\'', '{', '<', '=', '-', ')', '!', '&', '~', '}', '>', ' ': // they say slash but that seems to be fraction instead
122 			// character literals w/o needing to be quoted
123 			s_io = s_io[1 .. $];
124 			return ExcelFormatStringLexeme(s[0 .. 1], true);
125 		case 'A', 'a', 'P', 'p':
126 			// am/pm
127 
128 			int size = 0;
129 			while(
130 				s[0] == 'a' || s[0] == 'A' ||
131 				s[0] == 'p' || s[0] == 'P' ||
132 				s[0] == 'm' || s[0] == 'M' ||
133 				s[0] == '/'
134 			) {
135 				size++;
136 				s = s[1 .. $];
137 				if(s.length == 0)
138 					break;
139 			}
140 			// also switches hour to 12 hour format when it happens
141 			string ret = s_io[0 .. size];
142 			s_io = s_io[size .. $];
143 
144 			return ExcelFormatStringLexeme(ret, false);
145 
146 		// the single char directives
147 		case '@': // text placeholder
148 		case ';': // clause separator
149 			s_io = s_io[1 .. $];
150 			return ExcelFormatStringLexeme(s[0 .. 1], false);
151 		case '_': // padding char - this adds a space with the same width as the char that follows it, for column alignment.
152 		case '*': // fill char
153 			// the padding or fill is the next lexeme, not the next char!
154 			s_io = s_io[1 .. $];
155 			return ExcelFormatStringLexeme(s[0 .. 1], false);
156 		case 'e', 'E': // scientific notation request
157 		case '%': // percent indicator
158 		case ',': // thousands separator
159 		case '.': // decimal separator
160 		case '/': // fraction or date separator
161 			s_io = s_io[1 .. $];
162 			return ExcelFormatStringLexeme(s[0 .. 1], false);
163 		case /*'m',*/ 'd', 'y': // date parts
164 		case 'h', 'm', 's': // time parts
165 
166 			/+
167 			Note: The m or mm code must appear immediately after the h or hh code or immediately before the ss code; otherwise, Excel displays the month instead of minutes.
168 
169 			it can be either a date/time OR a number/fraction, not both.
170 			+/
171 
172 			auto thing = s[0];
173 			int size;
174 			while(s.length && s[0] == thing) {
175 				s = s[1 .. $];
176 				size++;
177 			}
178 			auto keep = s_io[0 .. size];
179 			s_io = s_io[size .. $];
180 			return ExcelFormatStringLexeme(keep, false);
181 		case '1': .. case '9': // fraction denominators or just literal numbers
182 			int size;
183 			while(s.length && s[0] >= '1' && s[0] <= '9') {
184 				s = s[1 .. $];
185 				size++;
186 			}
187 			auto keep = s_io[0 .. size];
188 			s_io = s_io[size .. $];
189 			return ExcelFormatStringLexeme(keep, false);
190 		case '0', '#', '?': // digit placeholder
191 			int size;
192 
193 			while(s[0] == '0' || s[0] == '#' || s[0] == '?') {
194 				s = s[1 .. $];
195 				size++;
196 				if(s.length == 0)
197 					break;
198 			}
199 
200 			auto keep = s_io[0 .. size];
201 			s_io = s_io[size .. $];
202 			return ExcelFormatStringLexeme(keep, false);
203 
204 		default:
205 			// idk
206 			throw new ExcelFormatStringException("unknown char " ~ s);
207 	}
208 
209 	assert(0);
210 }
211 
212 unittest {
213 	string thing = `[>50][Red]"foo"`;
214 	ExcelFormatStringLexeme lexeme;
215 
216 	lexeme = extractExcelFormatStringLexeme(thing);
217 	assert(thing == `[Red]"foo"`);
218 	lexeme = extractExcelFormatStringLexeme(thing);
219 	assert(thing == `"foo"`);
220 	lexeme = extractExcelFormatStringLexeme(thing);
221 	assert(thing == "");
222 	assert(lexeme.lexeme == "foo");
223 
224 	thing = `"\""`;
225 	lexeme = extractExcelFormatStringLexeme(thing);
226 	assert(thing == "");
227 	assert(lexeme.lexeme == `"`);
228 
229 	thing = `\,`;
230 	lexeme = extractExcelFormatStringLexeme(thing);
231 	assert(thing == "");
232 	assert(lexeme.lexeme == `,`);
233 
234 	/*
235 	thing = `"A\""`;
236 	lexeme = extractExcelFormatStringLexeme(thing);
237 	assert(thing == "");
238 	assert(lexeme.lexeme == `"`);
239 	*/
240 
241 	/+
242 	thing = "mm-yyyy";
243 	lexeme = extractExcelFormatStringLexeme(thing);
244 	import std.stdio; writeln(thing); writeln(lexeme);
245 	+/
246 }
247 
248 struct XlsxFormat {
249 	string originalFormatString;
250 
251 	Color foregroundColor;
252 	Color backgroundColor;
253 
254 	int alignment; // 0 = left, 1 = right, 2 = center
255 
256 	enum Type {
257 		/++
258 		+/
259 		String,
260 		/++
261 
262 		+/
263 		Number,
264 		/++
265 			A Date is a special kind of number in Excel.
266 		+/
267 		Date,
268 		/++
269 			things like # ?/4
270 
271 		+/
272 		Fraction,
273 		Percent
274 	}
275 	Type type;
276 
277 	/++
278 	+/
279 	static struct Result {
280 		string content;
281 		string color;
282 		int alignment;
283 	}
284 
285 	/++
286 	+/
287 	Result applyTo(string s) const {
288 		if(this.type == Type.String || originalFormatString == "@" || originalFormatString.length == 0)
289 			return Result(s, null, alignment);
290 
291 		int alignment = this.alignment;
292 
293 		// need to check for a text thing and if conversion fails, we use that
294 		double value;
295 		try {
296 			value = to!double(s);
297 		} catch(Exception e) {
298 			value = double.nan;
299 		}
300 
301 		DateTime date_;
302 		bool dateCalculated;
303 
304 		DateTime getDate() {
305 			// make sure value is not nan before here or it will throw "negative overflow"!
306 			if(!dateCalculated) {
307 				date_ = doubleToDateTime(value);
308 				dateCalculated = true;
309 			}
310 
311 			return date_;
312 		}
313 
314 		// parse out the original format string
315 		// the ordering by default is positive;negative;zero;text
316 		//
317 		// these can also be like [Color][Condition]fmt;generic
318 		// color is allowed anywhere
319 		// but condition can only have two things following: `[Color][Condition];` repeated any number of times then `;generic-number;text`. no more negative/zero stuff.
320 		// once we see a condition, it switches modes - following things MUST have condition or else are treated as just generic catch all for number and then text.
321 		//
322 		// it matches linearly.
323 		/+
324 			so it goes:
325 				implicit match >0
326 				implicit match <0
327 				implicit match =0
328 				text
329 
330 			but if at any point one of them has a condition, the following ones must be either more conditions (immediately!) or unconditional:
331 				fallthrough for number
332 				text
333 
334 
335 			and if i dont support a format thing i can always fall back to the original text.
336 		+/
337 
338 		try {
339 			string fmt = originalFormatString;
340 
341 			int state = 0; // 0 == positive, 1 == negative or custom, 2 == other, 3 == text
342 			bool matchesCurrentCondition = value > 0;
343 
344 			bool hasMultipleClauses = false;
345 			{
346 				string fmt2 = fmt;
347 				while(fmt2.length) {
348 					auto next = extractExcelFormatStringLexeme(fmt2);
349 					if(!next.isLiteral && next.lexeme == ";")
350 						hasMultipleClauses = true;
351 						break;
352 				}
353 			}
354 			if(hasMultipleClauses == false)
355 				matchesCurrentCondition = true; // only one thing means we must always match it
356 
357 			int numericState;
358 			bool inDenominator;
359 			bool inAmPm;
360 			bool inDecimal;
361 			bool justSawHours;
362 
363 			// these are populated below once we match a clause
364 			bool hasAmPm;
365 			bool hasFraction;
366 			bool hasScientificNotation;
367 			bool hasPercent;
368 			bool first = true;
369 
370 			string color;
371 			string ret;
372 
373 			while(fmt.length) {
374 				auto lexeme = extractExcelFormatStringLexeme(fmt);
375 
376 				ExcelFormatStringLexeme peekLexeme(bool returnLiteral = false) {
377 					string fmt2 = fmt;
378 					skip:
379 					if(fmt2.length == 0)
380 						return ExcelFormatStringLexeme.init;
381 					auto next = extractExcelFormatStringLexeme(fmt2);
382 					if(next.isLiteral && !returnLiteral)
383 						goto skip;
384 					return next;
385 				}
386 
387 				if(!lexeme.isLiteral && lexeme.lexeme[0] == ';') {
388 					// we finished the format of the match, so no need to continue
389 					if(matchesCurrentCondition)
390 						break;
391 					// otherwise, we go to the next thing
392 					state++;
393 					if(state == 1) {
394 						matchesCurrentCondition = value < 0;
395 					} else if(state == 2) {
396 						// this is going to be either the catch-all fallback or another custom one
397 						// for now, assume it is a catch-all
398 						import std.math;
399 						matchesCurrentCondition = !isNaN(value) ? true : false; // only numbers, so not text, matches the catch-all
400 					} else if(state == 3) {
401 						matchesCurrentCondition = true; // this needs to match, we're at the end, so this is the text display
402 					} else {
403 						throw new ExcelFormatStringException("too many ; pieces");
404 					}
405 
406 					continue;
407 				}
408 
409 				if(!matchesCurrentCondition)
410 					continue;
411 
412 				// scan ahead to see if we're doing some special cases: fractions, 12 hour clock, percentages, and sci notation
413 				if(first) {
414 					string fmt2 = fmt;
415 					while(fmt2.length) {
416 						auto next = extractExcelFormatStringLexeme(fmt2);
417 						if(!next.isLiteral) {
418 							// don't proceed into the next clause
419 							if(next.lexeme == ";")
420 								break;
421 
422 							char c = next.lexeme[0] | 0x20;
423 							if(next.lexeme == "/")
424 								hasFraction = true;
425 							else if(next.lexeme == "%") {
426 								hasPercent = true;
427 								value *= 100.0;
428 							} else if(c == 'e')
429 								hasScientificNotation = true;
430 							else if(c == 'a' || c == 'p')
431 								hasAmPm = true;
432 						}
433 					}
434 					first = false;
435 				}
436 
437 				if(hasScientificNotation)
438 					return Result(s, "unsupported feature: scientific notation"); // FIXME
439 				if(hasFraction)
440 					return Result(s, "unsupported feature: fractions"); // FIXME
441 
442 				if(!lexeme.isLiteral && lexeme.lexeme[0] == '[') {
443 					// look for color, condition, or locale
444 					char nc = lexeme.lexeme[1];
445 					if(nc == '$')
446 						continue; // locale i think, skip it
447 					if(nc == '<' || nc == '>' || nc == '=') {
448 						// condition
449 
450 						if(state == 1 || state == 2) {
451 							state = 1;
452 							// read the condition, see if we match it
453 							auto condition = lexeme.lexeme[1 .. $-1];
454 
455 							string operator;
456 							string num;
457 							if(condition[1] == '=') {
458 								operator = condition[0 .. 2];
459 								num = condition[2 .. $];
460 							} else {
461 								operator = condition[0 .. 1];
462 								num = condition[1 .. $];
463 							}
464 
465 							double compareTo;
466 							try {
467 								compareTo = to!double(num);
468 							} catch(Exception e) {
469 								throw new ExcelFormatStringException("not a number: " ~ num);
470 							}
471 							switch(operator) {
472 								case "<":
473 									matchesCurrentCondition = value < compareTo;
474 								break;
475 								case "<=":
476 									matchesCurrentCondition = value <= compareTo;
477 								break;
478 								case ">":
479 									matchesCurrentCondition = value > compareTo;
480 								break;
481 								case ">=":
482 									matchesCurrentCondition = value >= compareTo;
483 								break;
484 								case "=":
485 									// FIXME: approxEqual?
486 									matchesCurrentCondition = value == compareTo;
487 								break;
488 
489 								default:
490 									throw new ExcelFormatStringException("not a supported comparison operator " ~ operator);
491 							}
492 
493 							continue;
494 						} else {
495 							throw new ExcelFormatStringException("inappropriately placed custom condition");
496 						}
497 					} else {
498 						// color, we hope. FIXME can also be [s], [m], or [h] or maybe [ss], [mm], [hh]
499 						// colors are capitalized...
500 						color = lexeme.lexeme[1 .. $-1];
501 						continue;
502 					}
503 				}
504 
505 				// if we're here, it should actually match and need some processing.
506 
507 				if(lexeme.isLiteral) {
508 					// literals are easy...
509 					ret ~= lexeme.lexeme;
510 				} else {
511 					// but the rest of these are formatting commands
512 					switch(lexeme.lexeme[0]) {
513 						case ',':
514 							// thousands separator requested,
515 							// handled below in the decimal placeholder thing
516 						break;
517 						case '_', '*':
518 							auto lexemeToPadWith = extractExcelFormatStringLexeme(fmt);
519 							if(lexeme.lexeme[0] == '_')
520 								ret ~= " "; // FIXME supposed to match width of the char
521 							else if(lexeme.lexeme[0] == '*')
522 								ret ~= lexemeToPadWith.lexeme; // FIXME: supposed to repeat to fill the column width
523 						break;
524 						case '@': // the original text
525 							ret ~= s;
526 						break;
527 						case '%':
528 							ret ~= lexeme.lexeme;
529 						break;
530 						case '.':
531 							inDecimal = true;
532 							ret ~= lexeme.lexeme;
533 						break;
534 						case '/':
535 							if(!inAmPm) {
536 								inDenominator = true;
537 								ret ~= lexeme.lexeme;
538 							}
539 						break;
540 						case '#', '0', '?':
541 							// decimal group
542 							// # = digit
543 							// 0 = digit, pad with 0 if not significant
544 							// ? = digit, pad with space (same sized as digit) if not significant
545 
546 							if(value is double.nan)
547 								return Result(s, "NaN");
548 
549 							alignment = 1; // if we are printing numbers let's assume right align FIXME
550 							/+
551 							if(s.length == 0 && value is double.nan) // and if we printing numbers, treat empty cell as 0
552 								value = 0.0;
553 							+/
554 
555 							bool appendNumber(double v, bool includeThousandsSeparator) {
556 								if(v < 0)
557 									v = -v;
558 								string f = to!string(cast(int) v);
559 								if(f.length < lexeme.lexeme.length)
560 								foreach(l; lexeme.lexeme[0 .. $ - f.length]) {
561 									if(l == '0')
562 										ret ~= '0';
563 									else if(l == '?')
564 										ret ~= ' ';
565 								}
566 								if(f.length) {
567 									if(includeThousandsSeparator) {
568 										// 14532
569 										// 1234
570 										// 123
571 										auto offset = cast(int) f.length % 3;
572 										while(f.length > 3) {
573 											ret ~= f[offset .. offset + 3];
574 											ret ~= ",";
575 											f = f[3 .. $];
576 										}
577 										ret ~= f;
578 									} else {
579 										ret ~= f;
580 									}
581 									return true;
582 								}
583 								return false;
584 							}
585 
586 							if(peekLexeme().lexeme == ",") {
587 								// thousands separator requested...
588 								auto v = cast(int) value / 1000;
589 
590 								if(v == 0)
591 									continue; // FIXME? maybe we want some leading 0 padding?
592 
593 								auto hadOutput = appendNumber(v, true);
594 
595 								value = value - v * 1000; // take the remainder for the next iteration of the loop
596 
597 								if(hadOutput)
598 									ret ~= ","; // append the comma before the final thousands digits in the next iteration
599 
600 								continue;
601 							}
602 
603 
604 							if(inDecimal) {
605 								// FIXME: no more std.format
606 								import std.format;
607 								string f = format("%."~to!string(lexeme.lexeme.length)~"f", value - cast(int) value)[2..$]; // slice off the "0."
608 								ret ~= f;
609 							} else {
610 								appendNumber(value, false);
611 							}
612 
613 							inDenominator = false;
614 						break;
615 						case '1': .. case '9':
616 							// number,  if in denominator position
617 							// otherwise treat as string
618 							if(inDenominator)
619 								inDenominator = false; // the rest is handled elsewhere
620 							else
621 								ret ~= lexeme.lexeme;
622 						break;
623 						case 'y':
624 							if(value is double.nan)
625 								return Result(s, "NaN date");
626 
627 							justSawHours = false;
628 							auto y = getDate().year;
629 
630 							char[16] buffer;
631 
632 							switch(lexeme.lexeme.length) {
633 								case 2:
634 									ret ~= intToString(y % 100, buffer[], IntToStringArgs().withPadding(2));
635 								break;
636 								case 4:
637 									ret ~= intToString(y, buffer[], IntToStringArgs().withPadding(4));
638 								break;
639 								default:
640 									throw new ExcelFormatStringException("unknown thing " ~ lexeme.lexeme);
641 							}
642 						break;
643 						case 'm':
644 							if(value is double.nan)
645 								return Result(s, "NaN date");
646 							auto peek = peekLexeme(false);
647 							bool precedesSeconds =
648 								(peek.lexeme.length && peek.lexeme[0] == 's')
649 								||
650 								(peek.lexeme.length > 1 && peek.lexeme[1] == 's')
651 							;
652 
653 							if(justSawHours || precedesSeconds) {
654 								// minutes
655 								auto m = getDate().timeOfDay.minute;
656 
657 								char[16] buffer;
658 
659 								switch(lexeme.lexeme.length) {
660 									case 1:
661 										ret ~= intToString(m, buffer[]);
662 									break;
663 									case 2:
664 										ret ~= intToString(m, buffer[], IntToStringArgs().withPadding(2));
665 									break;
666 									default:
667 										throw new ExcelFormatStringException("unknown thing " ~ lexeme.lexeme);
668 								}
669 							} else {
670 								// month
671 								auto m = cast(int) getDate().month;
672 
673 								char[16] buffer;
674 
675 								import arsd.calendar;
676 
677 								switch(lexeme.lexeme.length) {
678 									case 1:
679 										ret ~= intToString(m, buffer[]);
680 									break;
681 									case 2:
682 										ret ~= intToString(m, buffer[], IntToStringArgs().withPadding(2));
683 									break;
684 									case 3: // abbreviation
685 										ret ~= monthNames[m][0 .. 3];
686 									break;
687 									case 4: // full name
688 										ret ~= monthNames[m];
689 									break;
690 									case 5: // single letter
691 										ret ~= monthNames[m][0 .. 1]; // FIXME?
692 									break;
693 									default:
694 										throw new ExcelFormatStringException("unknown thing " ~ lexeme.lexeme);
695 								}
696 							}
697 
698 							justSawHours = false;
699 						break;
700 						case 'd':
701 							if(value is double.nan)
702 								return Result(s, "NaN date");
703 							justSawHours = false;
704 
705 							char[16] buffer;
706 
707 							import arsd.calendar;
708 
709 							auto d = getDate().day;
710 							auto dow = cast(int) getDate().dayOfWeek;
711 
712 							switch(lexeme.lexeme.length) {
713 								case 1:
714 									ret ~= intToString(d, buffer[]);
715 								break;
716 								case 2:
717 									ret ~= intToString(d, buffer[], IntToStringArgs().withPadding(2));
718 								break;
719 								case 3:
720 									// abbreviation
721 									ret ~= daysOfWeekNames[dow][0 .. 3];
722 								break;
723 								case 4:
724 									// full name
725 									ret ~= daysOfWeekNames[dow];
726 								break;
727 								default:
728 									throw new ExcelFormatStringException("unknown thing " ~ lexeme.lexeme);
729 							}
730 						break;
731 						case 'h':
732 							if(value is double.nan)
733 								return Result(s, "NaN date");
734 							justSawHours = true;
735 
736 							auto m = getDate().timeOfDay.hour;
737 							char[16] buffer;
738 
739 							if(hasAmPm && m > 12)
740 								m -= 12;
741 							if(hasAmPm && m == 0)
742 								m = 12;
743 
744 							switch(lexeme.lexeme.length) {
745 								case 1:
746 									ret ~= intToString(m, buffer[]);
747 								break;
748 								case 2:
749 									ret ~= intToString(m, buffer[], IntToStringArgs().withPadding(2));
750 								break;
751 								default:
752 									throw new ExcelFormatStringException("unknown thing " ~ lexeme.lexeme);
753 							}
754 						break;
755 						case 'a', 'A':
756 							if(value is double.nan)
757 								return Result(s, "NaN date");
758 							inAmPm = true;
759 							auto m = getDate().timeOfDay.hour;
760 							if(m >= 12)
761 								ret ~= lexeme.lexeme[0] == 'a' ? "pm" : "PM";
762 							else
763 								ret ~= lexeme.lexeme[0] == 'a' ? "am" : "AM";
764 						break;
765 						case 'p', 'P':
766 							inAmPm = false;
767 						break;
768 						case 's':
769 							if(value is double.nan)
770 								return Result(s, "NaN date");
771 							auto m = getDate().timeOfDay.second;
772 							char[16] buffer;
773 							switch(lexeme.lexeme.length) {
774 								case 1:
775 									ret ~= intToString(m, buffer[]);
776 								break;
777 								case 2:
778 									ret ~= intToString(m, buffer[], IntToStringArgs().withPadding(2));
779 								break;
780 								default:
781 									throw new ExcelFormatStringException("unknown thing " ~ lexeme.lexeme);
782 							}
783 						break;
784 						case 'e', 'E':
785 							// FIXME: scientific notation
786 						break;
787 						default:
788 							assert(0, "unsupported formatting command: " ~ lexeme.lexeme);
789 					}
790 				}
791 			}
792 
793 			return Result(ret, color, alignment);
794 		} catch(ExcelFormatStringException e) {
795 			// we'll fall back to just displaying the original input text
796 			return Result(s, e.msg /* FIXME should be null */, alignment);
797 		}
798 	}
799 
800 	/+
801 		positive;negative;zero;text
802 		can include formats and dates and tons of stuff.
803 		https://support.microsoft.com/en-us/office/review-guidelines-for-customizing-a-number-format-c0a1d1fa-d3f4-4018-96b7-9c9354dd99f5
804 	+/
805 	private this(XlsxFile file, XlsxFile.StyleInternal.xf formatting) {
806 		if(formatting.applyNumberFormat) {
807 			// dates too depending on format
808 			 //import std.stdio; writeln(formatting.numFmtId); writeln(file.styleInternal.numFmts);
809 			this.originalFormatString = file.styleInternal.numFmts[formatting.numFmtId];
810 
811 			this.type = Type.Number;
812 		} else {
813 			this.type = Type.String;
814 		}
815 
816 		/+
817 			xf also has:
818 
819 			int xfId;
820 			int numFmtId;
821 			int fontId;
822 			int fillId;
823 			int borderId;
824 		+/
825 	}
826 
827 	private this(string f) {
828 		this.originalFormatString = f;
829 		this.type = Type.Number;
830 	}
831 }
832 
833 unittest {
834 	assert(XlsxFormat(`;;;"foo"`).applyTo("anything") == XlsxFormat.Result("foo", null));
835 	assert(XlsxFormat(`#.#;;;"foo"`).applyTo("2.0") == XlsxFormat.Result("2.0", null, 1));
836 	assert(XlsxFormat(`0#.##;;;"foo"`).applyTo("24.25") == XlsxFormat.Result("24.25", null, 1));
837 	assert(XlsxFormat(`0#.##;;;"foo"`).applyTo("2.25") == XlsxFormat.Result("02.25", null, 1));
838 	assert(XlsxFormat(`#,#.##`).applyTo("2.25") == XlsxFormat.Result("2.25", null, 1));
839 	assert(XlsxFormat(`#,#.##`).applyTo("123.25") == XlsxFormat.Result("123.25", null, 1));
840 	assert(XlsxFormat(`#,#.##`).applyTo("1234.25") == XlsxFormat.Result("1,234.25", null, 1));
841 	assert(XlsxFormat(`#,#.##`).applyTo("123456.25") == XlsxFormat.Result("123,456.25", null, 1));
842 }
843 
844 struct XlsxCell {
845 	string formula;
846 	string content;
847 	XlsxFormat formatting;
848 
849 	XlsxFormat.Result displayableResult() {
850 		return formatting.applyTo(content);
851 	}
852 
853 	string toString() {
854 		return displayableResult().content;
855 	}
856 }
857 
858 struct CellReference {
859 	string name;
860 
861 	static CellReference fromInts(int column, int row) {
862 		string ret;
863 
864 		string piece;
865 		int adjustment = 0;
866 		do {
867 			piece ~= cast(char)(column % 26 + 'A' - adjustment);
868 			if(adjustment == 0)
869 				adjustment = 1;
870 			column /= 26;
871 		} while(column);
872 
873 		foreach_reverse(ch; piece)
874 			ret ~= ch;
875 		piece = null;
876 
877 		do {
878 			piece ~= cast(char)(row % 10 + '0');
879 			row /= 10;
880 		} while(row);
881 
882 		foreach_reverse(ch; piece)
883 			ret ~= ch;
884 		piece = null;
885 
886 		return CellReference(ret);
887 	}
888 
889 	int toColumnIndex() {
890 		size_t endSlice = name.length;
891 		foreach(idx, ch; name) {
892 			if(ch < 'A' || ch > 'Z') {
893 				endSlice = idx;
894 				break;
895 			}
896 		}
897 
898 		int accumulator;
899 		foreach(idx, ch; name[0 .. endSlice]) {
900 			int value;
901 			if(idx + 1 == endSlice) {
902 				// an A in the last "digit" is a 0, elsewhere it is a 1
903 				value = ch - 'A';
904 			} else {
905 				value = ch - 'A' + 1;
906 			}
907 
908 			accumulator *= 26;
909 			accumulator += value;
910 		}
911 		return accumulator;
912 	}
913 
914 	int toRowIndex() {
915 		int accumulator;
916 		foreach(ch; name) {
917 			if(ch >= 'A' && ch <= 'Z')
918 				continue;
919 			accumulator *= 10;
920 			accumulator += ch - '0';
921 		}
922 		return accumulator;
923 	}
924 }
925 
926 unittest {
927 	auto cr = CellReference("AE434");
928 	assert(cr.toColumnIndex == 30);
929 	cr = CellReference("E434");
930 	assert(cr.toColumnIndex == 4); // zero-based
931 
932 	// zero-based column, 1-based row. wtf?
933 	assert(CellReference("AE434") == CellReference.fromInts(30, 434));
934 
935 	assert(CellReference("Z1") == CellReference.fromInts(25, 1));
936 }
937 
938 /++
939 
940 +/
941 class XlsxSheet {
942 	private string name_;
943 	private XlsxFile file;
944 	private XmlDocument document;
945 	private this(XlsxFile file, string name, XmlDocument document) {
946 		this.file = file;
947 		this.name_ = name;
948 		this.document = document;
949 
950 		this.dimension = document.requireSelector("worksheet > dimension").getAttribute("ref");
951 		// there's also sheetView with selection, activeCell, etc
952 		// and cols with widths and such
953 
954 		auto ul = this.upperLeft;
955 		this.minRow = ul.toRowIndex;
956 		this.minColumn = ul.toColumnIndex;
957 
958 		auto lr = this.lowerRight;
959 		this.maxRow = lr.toRowIndex + 1;
960 		this.maxColumn = lr.toColumnIndex + 1;
961 	}
962 
963 	private string dimension;
964 
965 	private int minRow;
966 	private int minColumn;
967 	private int maxRow;
968 	private int maxColumn;
969 
970 	/++
971 	+/
972 	Size size() {
973 		return Size(maxColumn - minColumn, maxRow - minRow);
974 	}
975 
976 	private CellReference upperLeft() {
977 		foreach(idx, ch; dimension)
978 			if(ch == ':')
979 				return CellReference(dimension[0 .. idx]);
980 		//assert(0); // it has no lower right...
981 		return CellReference(dimension);
982 	}
983 
984 	private CellReference lowerRight() {
985 		foreach(idx, ch; dimension)
986 			if(ch == ':')
987 				return CellReference(dimension[idx + 1 .. $]);
988 		assert(0);
989 	}
990 
991 	// opIndex could be like sheet["A1:B4"] and sheet["A1", "B4"] and stuff maybe.
992 
993 	/++
994 	+/
995 	string name() {
996 		return name_;
997 	}
998 
999 	/++
1000 		Suitable for passing to [arsd.csv.toCsv]
1001 	+/
1002 	string[][] toStringGrid() {
1003 		auto grid = this.toGrid();
1004 
1005 		string[][] ret;
1006 		ret.length = size.height;
1007 		foreach(i, ref row; ret) {
1008 			row.length = size.width;
1009 			foreach(k, ref cell; row)
1010 				cell = grid[i][k].toString();
1011 		}
1012 
1013 		return ret;
1014 	}
1015 
1016 	/++
1017 
1018 	+/
1019 	XlsxCell[][] toGrid() {
1020 		// FIXME: this crashes on opend dmd!
1021 		// string[][] ret = new string[][](size.height, size.width);
1022 
1023 		/+
1024 		// almost everything we allocate in here is to keep, so
1025 		// turning off the GC while working prevents unnecessary
1026 		// collection attempts that won't find any garbage anyway.
1027 
1028 		// but meh no significant difference in perf anyway.
1029 		import core.memory;
1030 		GC.disable();
1031 		scope(exit)
1032 			GC.enable();
1033 		+/
1034 
1035 		XlsxCell[][] ret;
1036 		ret.length = size.height;
1037 		foreach(ref row; ret)
1038 			row.length = size.width;
1039 
1040 		//alloc done
1041 
1042 		auto sheetData = document.requireSelector("sheetData");
1043 		Element[] rowElements = sheetData.childNodes;
1044 
1045 		Element[] nextRow(int expected) {
1046 			if(rowElements.length == 0)
1047 				throw new Exception("ran out of row elements...");
1048 
1049 			Element rowElement;
1050 			Element[] before = rowElements;
1051 
1052 			do {
1053 				rowElement = rowElements[0];
1054 				rowElements = rowElements[1 .. $];
1055 			} while(rowElement.tagName != "row");
1056 
1057 			if(rowElement.attrs.r.to!int != expected) {
1058 				// a row was skipped in the file, so we'll
1059 				// return an empty placeholder too
1060 				rowElements = before;
1061 				return null;
1062 			}
1063 
1064 			return rowElement.childNodes;
1065 		}
1066 
1067 		foreach(int rowIdx, row; ret) {
1068 			auto cellElements = nextRow(rowIdx + 1);
1069 
1070 			foreach(int cellIdx, ref cell; row) {
1071 				string cellReference = CellReference.fromInts(cellIdx + minColumn, rowIdx + minRow).name;
1072 
1073 				Element element = null;
1074 				foreach(idx, thing; cellElements) {
1075 					if(thing.attrs.r == cellReference) {
1076 						element = thing;
1077 						cellElements = cellElements[idx + 1 .. $];
1078 						break;
1079 					}
1080 				}
1081 
1082 				if(element is null)
1083 					continue;
1084 				string v = element.optionSelector("v").textContent;
1085 				if(element.attrs.t == "s")
1086 					v = file.sharedStrings[v.to!int()];
1087 
1088 				auto sString = element.attrs.s;
1089 				auto sId = sString.length ? to!int(sString) : 0;
1090 
1091 				string f = element.optionSelector("f").textContent;
1092 
1093 				cell = XlsxCell(f, v, XlsxFormat(file, file.styleInternal.xfs[sId]));
1094 			}
1095 		}
1096 		return ret;
1097 	}
1098 }
1099 
1100 /++
1101 
1102 +/
1103 class XlsxFile {
1104 	private ZipFile zipFile;
1105 
1106 	/++
1107 
1108 	+/
1109 	this(FilePath file) {
1110 		this.zipFile = new ZipFile(file);
1111 
1112 		load();
1113 	}
1114 
1115 	/// ditto
1116 	this(immutable(ubyte)[] rawData) {
1117 		this.zipFile = new ZipFile(rawData);
1118 
1119 		load();
1120 	}
1121 
1122 	/++
1123 	+/
1124 	int sheetCount() {
1125 		return cast(int) sheetsInternal.length;
1126 	}
1127 
1128 	/++
1129 	+/
1130 	string[] sheetNames() {
1131 		string[] ret;
1132 		foreach(sheet; sheetsInternal)
1133 			ret ~= sheet.name;
1134 		return ret;
1135 	}
1136 
1137 	/++
1138 	+/
1139 	XlsxSheet getSheet(string name) {
1140 		foreach(ref sheet; sheetsInternal)
1141 			if(sheet.name == name)
1142 				return getSheetParsed(sheet);
1143 		return null;
1144 
1145 	}
1146 
1147 	/// ditto
1148 	XlsxSheet getSheet(int indexZeroBased) {
1149 		// FIXME: if it is out of range do what?
1150 		return getSheetParsed(sheetsInternal[indexZeroBased]);
1151 	}
1152 
1153 	// docProps/core.xml has creator, last modified, etc.
1154 
1155 	private string[string] contentTypes;
1156 	private struct Relationship {
1157 		string id;
1158 		string type;
1159 		string target;
1160 	}
1161 	private Relationship[string] relationships;
1162 	private string[] sharedStrings;
1163 
1164 	private struct SheetInternal {
1165 		string name;
1166 		string id;
1167 		string rel;
1168 
1169 		XmlDocument cached;
1170 		XlsxSheet parsed;
1171 	}
1172 	private SheetInternal[] sheetsInternal;
1173 
1174 	// https://stackoverflow.com/questions/3154646/what-does-the-s-attribute-signify-in-a-cell-tag-in-xlsx
1175 	private struct StyleInternal {
1176 		string[int] numFmts;
1177 		// fonts
1178 			// font references color theme from xl/themes
1179 		// fills
1180 		// borders
1181 		// cellStyleXfs
1182 		// cellXfs
1183 		struct xf {
1184 			int xfId;
1185 			int numFmtId;
1186 			int fontId;
1187 			int fillId;
1188 			int borderId;
1189 
1190 			bool applyNumberFormat; // if yes, you get default right alignment
1191 		}
1192 		xf[] xfs;
1193 
1194 		// cellStyles
1195 		// dxfs
1196 		// tableStyles
1197 
1198 	}
1199 	private StyleInternal styleInternal;
1200 
1201 	private XmlDocument getSheetXml(ref SheetInternal sheet) {
1202 		if(sheet.cached is null)
1203 			loadXml("xl/" ~ relationships[sheet.rel].target, (document) { sheet.cached = document; });
1204 
1205 		return sheet.cached;
1206 	}
1207 
1208 	private XlsxSheet getSheetParsed(ref SheetInternal sheet) {
1209 		if(sheet.parsed is null)
1210 			sheet.parsed = new XlsxSheet(this, sheet.name, getSheetXml(sheet));
1211 
1212 		return sheet.parsed;
1213 	}
1214 
1215 
1216 	private void load() {
1217 		loadXml("[Content_Types].xml", (document) {
1218 			foreach(element; document.querySelectorAll("Override"))
1219 				contentTypes[element.attrs.PartName] = element.attrs.ContentType;
1220 		});
1221 
1222 		loadXml("xl/_rels/workbook.xml.rels", (document) {
1223 			foreach(element; document.querySelectorAll("Relationship"))
1224 				relationships[element.attrs.Id] = Relationship(element.attrs.Id, element.attrs.Type, element.attrs.Target);
1225 		});
1226 
1227 		loadXml("xl/sharedStrings.xml", (document) {
1228 			foreach(element; document.querySelectorAll("si t"))
1229 				sharedStrings ~= element.textContent;
1230 		});
1231 
1232 		loadXml("xl/styles.xml", (document) {
1233 			// need to keep the generic hardcoded formats first
1234 			styleInternal.numFmts = [
1235 				 0: "@",
1236 				 1: "0",
1237 				 2: "0.00",
1238 				 3: "#,##0",
1239 				 4: "#,##0.00",
1240 				 5: "$#,##0_);($#,##0)",
1241 				 6: "$#,##0_);[Red]($#,##0)",
1242 				 7: "$#,##0.00_);($#,##0.00)",
1243 				 8: "$#,##0.00_);[Red]($#,##0.00)",
1244 				 9: "0%",
1245 				10: "0.00%",
1246 				11: "0.00E+00",
1247 				12: "# ?/?",
1248 				13: "# ??/??",
1249 				14: "m/d/yyyy", // ive heard this one does different things in different locales
1250 				15: "d-mmm-yy",
1251 				16: "d-mmm",
1252 				17: "mmm-yy",
1253 				18: "h:mm AM/PM",
1254 				19: "h:mm:ss AM/PM",
1255 				20: "h:mm",
1256 				21: "h:mm:ss",
1257 				22: "m/d/yyyy h:mm",
1258 				37: "#,##0_);(#,##0)",
1259 				38: "#,##0_);[Red](#,##0)",
1260 				39: "#,##0.00_);(#,##0.00)",
1261 				40: "#,##0.00_);[Red](#,##0.00)",
1262 				45: "mm:ss",
1263 				46: "[h]:mm:ss",
1264 				47: "mm:ss.0",
1265 				48: "##0.0E+0",
1266 				49: "@",
1267 			];
1268 
1269 
1270 			foreach(element; document.querySelectorAll("numFmts > numFmt")) {
1271 				styleInternal.numFmts[to!int(element.attrs.numFmtId)] = element.attrs.formatCode;
1272 			}
1273 
1274 			foreach(element; document.querySelectorAll("cellXfs > xf")) {
1275 				StyleInternal.xf xf;
1276 
1277 				xf.xfId = element.attrs.xfId.to!int;
1278 				xf.fontId = element.attrs.fontId.to!int;
1279 				xf.fillId = element.attrs.fillId.to!int;
1280 				xf.borderId = element.attrs.borderId.to!int;
1281 				xf.numFmtId = element.attrs.numFmtId.to!int;
1282 
1283 				if(element.attrs.applyNumberFormat == "1")
1284 					xf.applyNumberFormat = true;
1285 
1286 				styleInternal.xfs ~= xf;
1287 			}
1288 		});
1289 
1290 		loadXml("xl/workbook.xml", (document) {
1291 			foreach(element; document.querySelectorAll("sheets > sheet")) {
1292 				sheetsInternal ~= SheetInternal(element.attrs.name, element.attrs.sheetId, element.getAttribute("r:id"));
1293 			}
1294 		});
1295 	}
1296 
1297 	private void loadXml(string filename, scope void delegate(XmlDocument document) handler) {
1298 		auto document = new XmlDocument(cast(string) zipFile.getContent(filename));
1299 		handler(document);
1300 	}
1301 }
1302 
1303 
1304 // from Robert Schadek's code {
1305 
1306 import std.datetime;
1307 version(unittest) import std.format;
1308 
1309 Date longToDate(long d) @safe {
1310 	// modifed from https://www.codeproject.com/Articles/2750/
1311 	// Excel-Serial-Date-to-Day-Month-Year-and-Vice-Versa
1312 
1313 	// Excel/Lotus 123 have a bug with 29-02-1900. 1900 is not a
1314 	// leap year, but Excel/Lotus 123 think it is...
1315 	if(d == 60) {
1316 		return Date(1900, 2,  29);
1317 	} else if(d < 60) {
1318 		// Because of the 29-02-1900 bug, any serial date
1319 		// under 60 is one off... Compensate.
1320 		++d;
1321 	}
1322 
1323 	// Modified Julian to DMY calculation with an addition of 2415019
1324 	int l = cast(int)d + 68569 + 2415019;
1325 	int n = int(( 4 * l ) / 146097);
1326 	l = l - int(( 146097 * n + 3 ) / 4);
1327 	int i = int(( 4000 * ( l + 1 ) ) / 1461001);
1328 	l = l - int(( 1461 * i ) / 4) + 31;
1329 	int j = int(( 80 * l ) / 2447);
1330 	int nDay = l - int(( 2447 * j ) / 80);
1331 	l = int(j / 11);
1332 	int nMonth = j + 2 - ( 12 * l );
1333 	int nYear = 100 * ( n - 49 ) + i + l;
1334 	return Date(nYear, nMonth, nDay);
1335 }
1336 
1337 long dateToLong(Date d) @safe {
1338 	// modifed from https://www.codeproject.com/Articles/2750/
1339 	// Excel-Serial-Date-to-Day-Month-Year-and-Vice-Versa
1340 
1341 	// Excel/Lotus 123 have a bug with 29-02-1900. 1900 is not a
1342 	// leap year, but Excel/Lotus 123 think it is...
1343 	if(d.day == 29 && d.month == 2 && d.year == 1900) {
1344 		return 60;
1345 	}
1346 
1347 	// DMY to Modified Julian calculated with an extra subtraction of 2415019.
1348 	long nSerialDate =
1349 			int(( 1461 * ( d.year + 4800 + int(( d.month - 14 ) / 12) ) ) / 4) +
1350 			int(( 367 * ( d.month - 2 - 12 *
1351 				( ( d.month - 14 ) / 12 ) ) ) / 12) -
1352 				int(( 3 * ( int(( d.year + 4900
1353 				+ int(( d.month - 14 ) / 12) ) / 100) ) ) / 4) +
1354 				d.day - 2415019 - 32075;
1355 
1356 	if(nSerialDate < 60) {
1357 		// Because of the 29-02-1900 bug, any serial date
1358 		// under 60 is one off... Compensate.
1359 		nSerialDate--;
1360 	}
1361 
1362 	return nSerialDate;
1363 }
1364 
1365 @safe unittest {
1366 	auto ds = [ Date(1900,2,1), Date(1901, 2, 28), Date(2019, 06, 05) ];
1367 	foreach(const d; ds) {
1368 		long l = dateToLong(d);
1369 		Date r = longToDate(l);
1370 		assert(r == d, format("%s %s", r, d));
1371 	}
1372 }
1373 
1374 TimeOfDay doubleToTimeOfDay(double s) @safe {
1375 	import core.stdc.math : lround;
1376 	double secs = (24.0 * 60.0 * 60.0) * s;
1377 
1378 	// TODO not one-hundred my lround is needed
1379 	int secI = to!int(lround(secs));
1380 
1381 	return TimeOfDay(secI / 3600, (secI / 60) % 60, secI % 60);
1382 }
1383 
1384 double timeOfDayToDouble(TimeOfDay tod) @safe {
1385 	long h = tod.hour * 60 * 60;
1386 	long m = tod.minute * 60;
1387 	long s = tod.second;
1388 	return (h + m + s) / (24.0 * 60.0 * 60.0);
1389 }
1390 
1391 @safe unittest {
1392 	auto tods = [ TimeOfDay(23, 12, 11), TimeOfDay(11, 0, 11),
1393 		 TimeOfDay(0, 0, 0), TimeOfDay(0, 1, 0),
1394 		 TimeOfDay(23, 59, 59), TimeOfDay(0, 0, 0)];
1395 	foreach(const tod; tods) {
1396 		double d = timeOfDayToDouble(tod);
1397 		assert(d <= 1.0, format("%s", d));
1398 		TimeOfDay r = doubleToTimeOfDay(d);
1399 		assert(r == tod, format("%s %s", r, tod));
1400 	}
1401 }
1402 
1403 double datetimeToDouble(DateTime dt) @safe {
1404 	double d = dateToLong(dt.date);
1405 	double t = timeOfDayToDouble(dt.timeOfDay);
1406 	return d + t;
1407 }
1408 
1409 DateTime doubleToDateTime(double d) @safe {
1410 	long l = cast(long)d;
1411 	Date dt = longToDate(l);
1412 	TimeOfDay t = doubleToTimeOfDay(d - l);
1413 	return DateTime(dt, t);
1414 }
1415 
1416 @safe unittest {
1417 	auto ds = [ Date(1900,2,1), Date(1901, 2, 28), Date(2019, 06, 05) ];
1418 	auto tods = [ TimeOfDay(23, 12, 11), TimeOfDay(11, 0, 11),
1419 		 TimeOfDay(0, 0, 0), TimeOfDay(0, 1, 0),
1420 		 TimeOfDay(23, 59, 59), TimeOfDay(0, 0, 0)];
1421 	foreach(const d; ds) {
1422 		foreach(const tod; tods) {
1423 			DateTime dt = DateTime(d, tod);
1424 			double dou = datetimeToDouble(dt);
1425 
1426 			Date rd = longToDate(cast(long)dou);
1427 			assert(rd == d, format("%s %s", rd, d));
1428 
1429 			double rest = dou - cast(long)dou;
1430 			TimeOfDay rt = doubleToTimeOfDay(dou - cast(long)dou);
1431 			assert(rt == tod, format("%s %s", rt, tod));
1432 
1433 			DateTime r = doubleToDateTime(dou);
1434 			assert(r == dt, format("%s %s", r, dt));
1435 		}
1436 	}
1437 }
1438 // end from burner's code }