-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdarknet_data.py
153 lines (141 loc) · 8.53 KB
/
darknet_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
"""
Tool for generating a train and/or test folders with labeled object samples for darknet-based detectors.
"""
import os
import sys
import argparse
import shutil
import json
import re
from tqdm import tqdm
_IMAGE_EXTENSIONS = [".jpg", ".png"]
_ANNOTATION_EXTENSION = ".txt"
_VALIDATION_FOLDER_NAME = "image_samples"
_DATA_FOLDER_NAME = "data"
_OBJECTS_FOLDER_NAME = "obj"
_NAMES_FILE_NAME = "obj.names"
_DATA_FILE_NAME = "obj.data"
_TRAIN_FILE_NAME = "train.txt"
_VALID_FILE_NAME = "valid.txt"
_valid_data_porcentage = 0.10
class DetectorCustomDataset(object):
def __init__(self, parent_folder: str, output_folder: str, configuration_file: str) -> None:
self.parent_folder = parent_folder
self.output_folder = output_folder
with open(configuration_file) as config_file:
self.config = json.load(config_file)
self.subfolders_path_list = [file.path for file in os.scandir(parent_folder) if file.is_dir()]
self.file_path_list = list()
self.samples_folder_path_list = list()
self.raw_image_id = str()
self.sample_image_id = str()
self.sample_annotation_id = str()
self.images_filepath_list = list()
self.annotations_filepath_list = list()
self.batch_input_filepaths = list()
self.samples_counter = 0
self.train_txt_path = str()
self.valid_txt_path = str()
self.output_data_folder = str()
self.objects_data_folder = str()
self.train_txt_file = None
self.valid_txt_file = None
def run(self) -> None:
print('Darknet Detection Dataset generator tool LAUNCHED successfully!')
self.create_training_file()
self.get_valid_samples_folders()
self.samples_folder_path_list.sort(key=lambda f: int(re.sub('\D', '', f)))
self.process_sample_subfolders()
print('Darknet Detection Dataset generator tool finished successfully!')
def process_sample_subfolders(self) -> None:
for samples_folder_path in self.samples_folder_path_list:
print(f'Current sampling folder : {samples_folder_path}')
self.file_path_list = [file.path for file in os.scandir(samples_folder_path) if file.is_file()]
self.images_filepath_list = [file_path for file_path in self.file_path_list if os.path.splitext(file_path)[1] in _IMAGE_EXTENSIONS]
self.images_filepath_list.sort(key=lambda f: int(re.sub('\D', '', f)))
self.annotations_filepath_list = [file_path for file_path in self.file_path_list if os.path.splitext(file_path)[1] == _ANNOTATION_EXTENSION]
self.annotations_filepath_list.sort(key=lambda f: int(re.sub('\D', '', f)))
if len(self.images_filepath_list) != len(self.annotations_filepath_list):
print(f'Number of image files : {len(self.images_filepath_list)}')
print(f'Number of label files : {len(self.annotations_filepath_list)}')
if len(self.images_filepath_list) > len(self.annotations_filepath_list):
self.find_missing_files(self.images_filepath_list)
else:
self.find_missing_files(self.annotations_filepath_list)
sys.exit()
assert(len(self.images_filepath_list) == len(self.annotations_filepath_list))
valid_sample_save_idx = len(self.images_filepath_list) * _valid_data_porcentage
valid_sample_save_idx = round(len(self.images_filepath_list) / valid_sample_save_idx)
progress_bar = tqdm(total=len(self.images_filepath_list))
with open(self.train_txt_path, "a+") as self.train_txt_file:
with open(self.valid_txt_path, "a+") as self.valid_txt_file:
train_sample_counter = 0
valid_sample_counter = 0
for image_path, label_path in zip(self.images_filepath_list, self.annotations_filepath_list):
self.sample_image_id = str(self.samples_counter) + str(os.path.splitext(image_path)[1])
image_relative_path = os.path.join(_DATA_FOLDER_NAME, _OBJECTS_FOLDER_NAME, self.sample_image_id)
self.sample_annotation_id = str(self.samples_counter) + _ANNOTATION_EXTENSION
shutil.copy2(image_path, os.path.join(self.objects_data_folder, self.sample_image_id))
shutil.copy2(label_path, os.path.join(self.objects_data_folder, self.sample_annotation_id))
if self.samples_counter % valid_sample_save_idx == 0:
self.valid_txt_file.write(f'{image_relative_path}\n')
valid_sample_counter += 1
else:
self.train_txt_file.write(f'{image_relative_path}\n')
train_sample_counter += 1
self.samples_counter += 1
progress_bar.update(1)
progress_bar.close()
print(f'Total number of samples : {str(self.samples_counter)}')
print(f'Total number of samples to train : {str(train_sample_counter)}')
print(f'Total number of samples to validate : {str(valid_sample_counter)}')
print(f'Train - Validation split %: Training set {(1 - _valid_data_porcentage) * 100} - Validation set {_valid_data_porcentage * 100}')
def create_training_file(self):
self.output_data_folder = os.path.join(self.output_folder, _DATA_FOLDER_NAME)
if os.path.exists(self.output_data_folder):
print("--> The folder to process already have a data folder. Creating new data folder!")
shutil.rmtree(self.output_data_folder)
os.makedirs(self.output_data_folder)
object_names_path = os.path.join(self.output_data_folder, _NAMES_FILE_NAME)
with open(object_names_path, "w+") as object_names_file:
for i in range(len(self.config["classes"])):
object_names_file.write(f"{self.config['classes'][str(i)]}\n")
print("obj.names file CREATED!")
self.train_txt_path = os.path.join(self.output_data_folder, _TRAIN_FILE_NAME)
with open(self.train_txt_path, "w+") as self.train_txt_file:
self.samples_counter = 0
print("train.txt file CREATED!")
self.valid_txt_path = os.path.join(self.output_data_folder, _VALID_FILE_NAME)
with open(self.valid_txt_path, "w+") as self.valid_txt_file:
print("valid.txt file CREATED!")
self.objects_data_folder = os.path.join(self.output_data_folder, _OBJECTS_FOLDER_NAME)
os.makedirs(self.objects_data_folder)
object_data_path = os.path.join(self.output_data_folder, _DATA_FILE_NAME)
with open(object_data_path, "w+") as object_data_file:
object_data_file.write(f'classes = {len(self.config["classes"])}\n')
object_data_file.write(f'train = {_DATA_FOLDER_NAME}/{_TRAIN_FILE_NAME}\n')
object_data_file.write(f'valid = {_DATA_FOLDER_NAME}/{_VALID_FILE_NAME}\n')
object_data_file.write(f'names = {_DATA_FOLDER_NAME}/{_NAMES_FILE_NAME}\n')
object_data_file.write(f'backup = backup/')
def get_valid_samples_folders(self) -> None:
self.samples_folder_path_list = []
for samples_candidate_folder in self.subfolders_path_list:
if os.path.exists(os.path.join(samples_candidate_folder, _VALIDATION_FOLDER_NAME)):
self.samples_folder_path_list.append(os.path.join(samples_candidate_folder, _VALIDATION_FOLDER_NAME))
else:
print(f"A IMAGE SAMPLES FOLDER DOES NOT EXISTS!!! Skipping folder {samples_candidate_folder} from dataset creation!")
@staticmethod
def find_missing_files(filepath_list):
missing_filepaths = []
for file_path in filepath_list:
target_path = str(os.path.splitext(file_path)[0]) + _ANNOTATION_EXTENSION
if not os.path.isfile(target_path):
missing_filepaths.append(target_path)
print(missing_filepaths)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="CLI for generating custom dataset for detection using YOLO Darknet format")
parser.add_argument("parent_folder", help="Full path to parent folder containing subfolders with human validated samples")
parser.add_argument("output_folder", help="Full path to output folder where all validated and post-processed samples will be stored")
parser.add_argument("configuration", help='Full path to configuration.json file')
args = parser.parse_args()
DetectorCustomDataset(args.parent_folder, args.output_folder, args.configuration).run()