-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_prep.py
77 lines (68 loc) · 2.88 KB
/
data_prep.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import json
import pandas as pd
import cv2
import argparse
train_file = '/data/sara/TCT/CCT/body_part_sequencing/data/sup_train.txt'
val_file = '/data/sara/TCT/CCT/body_part_sequencing/data/val.txt'
seq_file = '/data/sara/semantic-segmentation-pytorch/data/seq_data.txt' #each 3 lines belon to the same sequence
def get_data(filename, mode):
with open(filename, 'r') as fr , open("decom_" + mode + '.odgt','a') as fw :
lines = fr.readlines()
for line in lines:
img_name, seg_name = line.split(' ')
seg = cv2.imread(seg_name.strip(), 0)
h, w = seg.shape
new_line = {}
new_line["fpath_img"], new_line["fpath_segm"], new_line["width"], new_line["height"] = \
img_name.strip(), seg_name.strip(), w, h
json.dump(new_line, fw)
fw.write('\n')
def get_seq_data(filename, seq_len):
seq_df = pd.read_csv(filename, sep=" ", names=['img','label'])
labels = seq_df['label'].unique()
with open("decom_" + filename.split('/')[-1].split('.')[0] + '.odgt','a') as fw :
for label in labels:
matches = seq_df[seq_df['label'] == label]
img_names = []
seg_names = []
i = 0
main_img = ''
main_segm = ''
for row in matches.iterrows():
img_name, seg_name = row[1]['img'], row[1]['label']
if i == 0:
main_img = img_name
main_segm = seg_name
img_names = [main_img]
seg_names = [main_segm]
else:
img_names.append(img_name)
seg_names.append(seg_name)
i += 1
if len(img_names) % seq_len == 0:
seg = cv2.imread(seg_name.strip(), 0)
h, w = seg.shape
new_line = {}
new_line["fpath_img"], new_line["fpath_segm"], new_line["width"], new_line["height"] = \
img_names, seg_names, w, h
json.dump(new_line, fw)
fw.write('\n')
img_names = [main_img]
seg_names = [main_segm]
if len(img_names) == seq_len:
seg = cv2.imread(seg_name.strip(), 0)
h, w = seg.shape
new_line = {}
new_line["fpath_img"], new_line["fpath_segm"], new_line["width"], new_line["height"] = \
img_names, seg_names, w, h
json.dump(new_line, fw)
fw.write('\n')
if __name__=="__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--seq", default=True, type=bool)
args = parser.parse_args()
if args.seq != True:
get_data(train_file, 'train')
get_data(val_file, 'val')
else:
get_seq_data(seq_file, 3)