-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathwrite_csv_files.py
66 lines (58 loc) · 2.09 KB
/
write_csv_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
"""Script for writing cvs files
"""
import os
import csv
import pandas as pd
import random
import numpy as np
from random import shuffle
def create_csv_file(image_dir, output_csv):
"""
create a csv file to store the paths of files for each patient
"""
img_names = os.listdir(image_dir)
img_names = [item for item in img_names if ".png" in item]
img_names = sorted(img_names)
name_lab_list = []
for img_name in img_names:
lab = 0 if("0.png" in img_name) else 1
name_lab_list.append([img_name, lab])
with open(output_csv, mode='w') as csv_file:
csv_writer = csv.writer(csv_file, delimiter=',',
quotechar='"',quoting=csv.QUOTE_MINIMAL)
csv_writer.writerow(['image', 'label'])
for item in name_lab_list:
csv_writer.writerow(item)
def random_split_dataset():
random.seed(2021)
input_file = 'config/cxr_all.csv'
train_names_file = 'config/cxr_train.csv'
valid_names_file = 'config/cxr_valid.csv'
test_names_file = 'config/cxr_test.csv'
with open(input_file, 'r') as f:
lines = f.readlines()
data_lines = lines[1:]
img_num = len(data_lines)
idx = list(range(img_num))
shuffle(idx)
num1 = int(img_num * 0.7)
num2 = int(img_num * 0.8)
train_idx = sorted(idx[:num1])
valid_idx = sorted(idx[num1:num2])
test_idx = sorted(idx[num2:])
train_lines = [data_lines[i] for i in train_idx]
valid_lines = [data_lines[i] for i in valid_idx]
test_lines = [data_lines[i] for i in test_idx]
with open(train_names_file, 'w') as f:
f.writelines(lines[:1] + train_lines)
with open(valid_names_file, 'w') as f:
f.writelines(lines[:1] + valid_lines)
with open(test_names_file, 'w') as f:
f.writelines(lines[:1] + test_lines)
if __name__ == "__main__":
# create cvs file for ISIC dataset
image_dir = '../../PyMIC_data/CHNCXR/CXR_png'
output_csv = 'config/cxr_all.csv'
create_csv_file(image_dir, output_csv)
# split the dataset in to training, validation and testing
random_split_dataset()