-
Notifications
You must be signed in to change notification settings - Fork 128
/
Copy pathdocx.d
66 lines (51 loc) · 1.17 KB
/
docx.d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
/++
Bare minimum support for reading Microsoft Word files.
History:
Added February 19, 2025
+/
module arsd.docx;
import arsd.core;
import arsd.zip;
import arsd.dom;
import arsd.color;
/++
+/
class DocxFile {
private ZipFile zipFile;
private XmlDocument document;
/++
+/
this(FilePath file) {
this.zipFile = new ZipFile(file);
load();
}
/// ditto
this(immutable(ubyte)[] rawData) {
this.zipFile = new ZipFile(rawData);
load();
}
/++
Converts the document to a plain text string that gives you
the jist of the document that you can view in a plain editor.
Most formatting is stripped out.
+/
string toPlainText() {
string ret;
foreach(paragraph; document.querySelectorAll("w\\:p")) {
if(ret.length)
ret ~= "\n\n";
ret ~= paragraph.innerText;
}
return ret;
}
// FIXME: to RTF, markdown, html, and terminal sequences might also be useful.
private void load() {
loadXml("word/document.xml", (document) {
this.document = document;
});
}
private void loadXml(string filename, scope void delegate(XmlDocument document) handler) {
auto document = new XmlDocument(cast(string) zipFile.getContent(filename));
handler(document);
}
}