1 /++
2 	Bare minimum support for reading Microsoft Word files.
3 
4 	History:
5 		Added February 19, 2025
6 +/
7 module arsd.docx;
8 
9 import arsd.core;
10 import arsd.zip;
11 import arsd.dom;
12 import arsd.color;
13 
14 /++
15 
16 +/
17 class DocxFile {
18 	private ZipFile zipFile;
19 	private XmlDocument document;
20 
21 	/++
22 
23 	+/
24 	this(FilePath file) {
25 		this.zipFile = new ZipFile(file);
26 
27 		load();
28 	}
29 
30 	/// ditto
31 	this(immutable(ubyte)[] rawData) {
32 		this.zipFile = new ZipFile(rawData);
33 
34 		load();
35 	}
36 
37 	/++
38 		Converts the document to a plain text string that gives you
39 		the jist of the document that you can view in a plain editor.
40 
41 		Most formatting is stripped out.
42 	+/
43 	string toPlainText() {
44 		string ret;
45 		foreach(paragraph; document.querySelectorAll("w\\:p")) {
46 			if(ret.length)
47 				ret ~= "\n\n";
48 			ret ~= paragraph.innerText;
49 		}
50 		return ret;
51 	}
52 
53 	// FIXME: to RTF, markdown, html, and terminal sequences might also be useful.
54 
55 	private void load() {
56 		loadXml("word/document.xml", (document) {
57 			this.document = document;
58 		});
59 	}
60 
61 	private void loadXml(string filename, scope void delegate(XmlDocument document) handler) {
62 		auto document = new XmlDocument(cast(string) zipFile.getContent(filename));
63 		handler(document);
64 	}
65 
66 }