1 /++
2 	Some support for the Microsoft Excel Spreadsheet file format.
3 
4 	Don't expect much from it.
5 
6 	Some code is borrowed from the xlsxreader package.
7 
8 	History:
9 		Added February 13, 2025
10 
11 	See_Also:
12 		https://github.com/symmetryinvestments/xlsxd which supports writing xlsx files. I might add write support here too someday but I kinda doubt it.
13 +/
14 module arsd.xlsx;
15 
16 // See also Robert's impl: https://github.com/symmetryinvestments/xlsxreader/blob/master/source/xlsxreader.d
17 
18 import arsd.core;
19 import arsd.zip;
20 import arsd.dom;
21 import arsd.color;
22 
23 import std.conv;
24 
25 /+
26 struct XlsxCell {
27 	string type;
28 	string formula;
29 	string value;
30 }
31 +/
32 
33 struct CellReference {
34 	string name;
35 
36 	static CellReference fromInts(int column, int row) {
37 		string ret;
38 
39 		string piece;
40 		do {
41 			piece ~= cast(char)(column % 26 + 'A');
42 			column /= 26;
43 		} while(column);
44 
45 		foreach_reverse(ch; piece)
46 			ret ~= ch;
47 		piece = null;
48 
49 		do {
50 			piece ~= cast(char)(row % 10 + '0');
51 			row /= 10;
52 		} while(row);
53 
54 		foreach_reverse(ch; piece)
55 			ret ~= ch;
56 		piece = null;
57 
58 		return CellReference(ret);
59 	}
60 
61 	int toColumnIndex() {
62 		int accumulator;
63 		foreach(ch; name) {
64 			if(ch < 'A' || ch > 'Z')
65 				break;
66 			accumulator *= 26;
67 			accumulator += ch - 'A';
68 		}
69 		return accumulator;
70 	}
71 
72 	int toRowIndex() {
73 		int accumulator;
74 		foreach(ch; name) {
75 			if(ch >= 'A' && ch <= 'Z')
76 				continue;
77 			accumulator *= 10;
78 			accumulator += ch - '0';
79 		}
80 		return accumulator;
81 	}
82 }
83 
84 /++
85 
86 +/
87 class XlsxSheet {
88 	private string name_;
89 	private XlsxFile file;
90 	private XmlDocument document;
91 	private this(XlsxFile file, string name, XmlDocument document) {
92 		this.file = file;
93 		this.name_ = name;
94 		this.document = document;
95 
96 		this.dimension = document.requireSelector("worksheet > dimension").getAttribute("ref");
97 		// there's also sheetView with selection, activeCell, etc
98 		// and cols with widths and such
99 
100 		auto ul = this.upperLeft;
101 		this.minRow = ul.toRowIndex;
102 		this.minColumn = ul.toColumnIndex;
103 
104 		auto lr = this.lowerRight;
105 		this.maxRow = lr.toRowIndex + 1;
106 		this.maxColumn = lr.toColumnIndex + 1;
107 	}
108 
109 	private string dimension;
110 
111 	private int minRow;
112 	private int minColumn;
113 	private int maxRow;
114 	private int maxColumn;
115 
116 	/++
117 	+/
118 	Size size() {
119 		return Size(maxColumn - minColumn, maxRow - minRow);
120 	}
121 
122 	private CellReference upperLeft() {
123 		foreach(idx, ch; dimension)
124 			if(ch == ':')
125 				return CellReference(dimension[0 .. idx]);
126 		assert(0);
127 	}
128 
129 	private CellReference lowerRight() {
130 		foreach(idx, ch; dimension)
131 			if(ch == ':')
132 				return CellReference(dimension[idx + 1 .. $]);
133 		assert(0);
134 	}
135 
136 	// opIndex could be like sheet["A1:B4"] and sheet["A1", "B4"] and stuff maybe.
137 
138 	/++
139 	+/
140 	string name() {
141 		return name_;
142 	}
143 
144 	/++
145 		Suitable for passing to [arsd.csv.toCsv]
146 	+/
147 	string[][] toStringGrid() {
148 		// FIXME: this crashes on opend dmd!
149 		// string[][] ret = new string[][](size.height, size.width);
150 
151 		string[][] ret;
152 		ret.length = size.height;
153 		foreach(ref row; ret)
154 			row.length = size.width;
155 
156 		//alloc done
157 
158 		foreach(int rowIdx, row; ret)
159 		foreach(int cellIdx, ref cell; row) {
160 			string cellReference = CellReference.fromInts(cellIdx + minColumn, rowIdx + minRow).name;
161 			// FIXME: i should prolly read left to right here at least and not iterate the whole document over and over
162 			auto element = document.querySelector("c[r=\""~cellReference~"\"]");
163 			if(element is null)
164 				continue;
165 			string v = element.requireSelector("v").textContent;
166 			if(element.attrs.t == "s")
167 				v = file.sharedStrings[v.to!int()];
168 			cell = v;
169 		}
170 		return ret;
171 	}
172 }
173 
174 /++
175 
176 +/
177 class XlsxFile {
178 	private ZipFile zipFile;
179 
180 	/++
181 
182 	+/
183 	this(FilePath file) {
184 		this.zipFile = new ZipFile(file);
185 
186 		load();
187 	}
188 
189 	/// ditto
190 	this(immutable(ubyte)[] rawData) {
191 		this.zipFile = new ZipFile(rawData);
192 
193 		load();
194 	}
195 
196 	/++
197 	+/
198 	int sheetCount() {
199 		return cast(int) sheetsInternal.length;
200 	}
201 
202 	/++
203 	+/
204 	string[] sheetNames() {
205 		string[] ret;
206 		foreach(sheet; sheetsInternal)
207 			ret ~= sheet.name;
208 		return ret;
209 	}
210 
211 	/++
212 	+/
213 	XlsxSheet getSheet(string name) {
214 		foreach(ref sheet; sheetsInternal)
215 			if(sheet.name == name)
216 				return getSheetParsed(sheet);
217 		return null;
218 
219 	}
220 
221 	/// ditto
222 	XlsxSheet getSheet(int indexZeroBased) {
223 		// FIXME: if it is out of range do what?
224 		return getSheetParsed(sheetsInternal[indexZeroBased]);
225 	}
226 
227 	// docProps/core.xml has creator, last modified, etc.
228 
229 	private string[string] contentTypes;
230 	private struct Relationship {
231 		string id;
232 		string type;
233 		string target;
234 	}
235 	private Relationship[string] relationships;
236 	private string[] sharedStrings;
237 
238 	private struct SheetInternal {
239 		string name;
240 		string id;
241 		string rel;
242 
243 		XmlDocument cached;
244 		XlsxSheet parsed;
245 	}
246 	private SheetInternal[] sheetsInternal;
247 
248 	private XmlDocument getSheetXml(ref SheetInternal sheet) {
249 		if(sheet.cached is null)
250 			loadXml("xl/" ~ relationships[sheet.rel].target, (document) { sheet.cached = document; });
251 
252 		return sheet.cached;
253 	}
254 
255 	private XlsxSheet getSheetParsed(ref SheetInternal sheet) {
256 		if(sheet.parsed is null)
257 			sheet.parsed = new XlsxSheet(this, sheet.name, getSheetXml(sheet));
258 
259 		return sheet.parsed;
260 	}
261 
262 
263 	private void load() {
264 		loadXml("[Content_Types].xml", (document) {
265 			foreach(element; document.querySelectorAll("Override"))
266 				contentTypes[element.attrs.PartName] = element.attrs.ContentType;
267 		});
268 
269 		loadXml("xl/_rels/workbook.xml.rels", (document) {
270 			foreach(element; document.querySelectorAll("Relationship"))
271 				relationships[element.attrs.Id] = Relationship(element.attrs.Id, element.attrs.Type, element.attrs.Target);
272 		});
273 
274 		loadXml("xl/sharedStrings.xml", (document) {
275 			foreach(element; document.querySelectorAll("si t"))
276 				sharedStrings ~= element.textContent;
277 		});
278 
279 		loadXml("xl/workbook.xml", (document) {
280 			foreach(element; document.querySelectorAll("sheets > sheet")) {
281 				sheetsInternal ~= SheetInternal(element.attrs.name, element.attrs.sheetId, element.getAttribute("r:id"));
282 			}
283 		});
284 	}
285 
286 	private void loadXml(string filename, scope void delegate(XmlDocument document) handler) {
287 		auto document = new XmlDocument(cast(string) zipFile.getContent(filename));
288 		handler(document);
289 	}
290 }