-
Notifications
You must be signed in to change notification settings - Fork 0
/
srt_document.py
43 lines (32 loc) · 1.36 KB
/
srt_document.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import zipfile
import xml.etree.ElementTree
class SRT_Document:
def __init__(self, path):
self.items = []
self._load(path)
def print(self):
for n in range(0, len(self.items)):
print("{}: {}".format(n + 1, self.items[n]))
def __str__(self):
return self.items.__str__()
def __len__(self):
return len(self.items)
def _load(self, path):
file_type = path.split(".")[-1].lower()
if file_type == "docx":
WORD_NAMESPACE = '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}'
PARA = WORD_NAMESPACE + 'p'
TEXT = WORD_NAMESPACE + 't'
TABLE = WORD_NAMESPACE + 'tbl'
ROW = WORD_NAMESPACE + 'tr'
CELL = WORD_NAMESPACE + 'tc'
with zipfile.ZipFile(path) as docx:
tree = xml.etree.ElementTree.XML(docx.read('word/document.xml'))
for table in tree.iter(TABLE):
for row in table.iter(ROW):
for cell in row.iter(CELL):
self.items.append(''.join(node.text for node in cell.iter(TEXT)))
else:
raise ValueError("File type {} not yet supported\r\nUse\r\n\r\n\tlowriter --convert-to docx documentname.doc\r\n\r\nto convert.".format(file_type))
if __name__ == "__main__":
print('ran srt_document.py')