1 /++ 2 Bare minimum support for reading Microsoft Word files. 3 4 History: 5 Added February 19, 2025 6 +/ 7 module arsd.docx; 8 9 import arsd.core; 10 import arsd.zip; 11 import arsd.dom; 12 import arsd.color; 13 14 /++ 15 16 +/ 17 class DocxFile { 18 private ZipFile zipFile; 19 private XmlDocument document; 20 21 /++ 22 23 +/ 24 this(FilePath file) { 25 this.zipFile = new ZipFile(file); 26 27 load(); 28 } 29 30 /// ditto 31 this(immutable(ubyte)[] rawData) { 32 this.zipFile = new ZipFile(rawData); 33 34 load(); 35 } 36 37 /++ 38 Converts the document to a plain text string that gives you 39 the jist of the document that you can view in a plain editor. 40 41 Most formatting is stripped out. 42 +/ 43 string toPlainText() { 44 string ret; 45 foreach(paragraph; document.querySelectorAll("w\\:p")) { 46 if(ret.length) 47 ret ~= "\n\n"; 48 ret ~= paragraph.innerText; 49 } 50 return ret; 51 } 52 53 // FIXME: to RTF, markdown, html, and terminal sequences might also be useful. 54 55 private void load() { 56 loadXml("word/document.xml", (document) { 57 this.document = document; 58 }); 59 } 60 61 private void loadXml(string filename, scope void delegate(XmlDocument document) handler) { 62 auto document = new XmlDocument(cast(string) zipFile.getContent(filename)); 63 handler(document); 64 } 65 66 }