-
Notifications
You must be signed in to change notification settings - Fork 0
/
tsv2json.py
105 lines (94 loc) · 2.77 KB
/
tsv2json.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# {
# "national": [
# {
# "titleId": 0,
# "title": "Aoyama Women's Cup (2021)",
# "rounds":
# [
# {
# "id": 0,
# "roundId": 0,
# "title": "Aoyama Women's Cup (2021)",
# "round": "R1",
# "motion": "THW force all news organizations to be non profit(i.e. NHK, BBC, PBS)"
# "slide": ""
# },
# ]
# },
# ],
# "international": [
# {
# }
# ],
# }
import copy
import json
import re
import numpy
json_dict = {}
json_dict["national"] = []
json_dict["international"] = []
titleId, id, roundId = 0, 0, 0
len_f = 0
data = {}
def tsv2json(input_file, type="national"):
global json_dict
global titleId, id, roundId, len_f, data
with open(input_file, "r") as f:
i = 0
lst = f.readlines()
len_f = 0
for _ in lst:
len_f += 1
while (i < len_f):
print(i)
t_count = lst[i].count("\t")
if t_count == 1:
if data:
json_dict[type].append(data)
roundId = 0
data = {}
title = lst[i].strip()
data["titleId"] = titleId
data["title"] = lst[i].strip()
data["rounds"] = []
titleId += 1
i += 1
elif t_count == 2:
round = lst[i].strip()
i += 1
elif t_count == 3:
data_inner = {}
data_inner["id"] = id
id += 1
data_inner["roundId"] = roundId
roundId += 1
data_inner["round"] = round
data_inner["title"] = title
data_inner["motion"] = lst[i].strip()
data_inner["slide"] = ""
i += 1
while (i < len_f and lst[i].count('\t') == 4):
if "$stats" in lst[i]: # Check if $stats is in the line and skip if true
i += 1
continue
data_inner["slide"] += lst[i].strip()
i += 1
data["rounds"].append(data_inner)
else:
i += 1
class MyEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, numpy.integer):
return int(obj)
elif isinstance(obj, numpy.floating):
return float(obj)
elif isinstance(obj, numpy.ndarray):
return obj.tolist()
else:
return super(MyEncoder, self).default(obj)
tsv2json("./data/tsv/nationals.tsv", "national")
tsv2json("./data/tsv/internationals.tsv", "international")
output_file_name = "./data/json/data.json"
with open(output_file_name, 'w', encoding='utf-8') as fw:
fw.write(json.dumps(json_dict, indent=4, cls=MyEncoder))