-
Notifications
You must be signed in to change notification settings - Fork 49
/
annotations_preprocessing_multi.py
68 lines (56 loc) · 2.6 KB
/
annotations_preprocessing_multi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
'''
Script to prepare annotation for litter detection task.
'''
import argparse
import os
import numpy as np
# update all annotations in one run
from utils.dataset_converter import convert_dataset, \
taco_categories_to_detectwaste, \
convert_to_binary, \
concatenate_datasets
from utils.split_coco_dataset import split_coco_dataset
def get_args_parser():
parser = argparse.ArgumentParser(
'Prepare images of trash for detection task')
parser.add_argument('--dataset_dest',
help='paths to annotations',
nargs='+',
default=['annotations/annotations-epi.json'])
parser.add_argument('--split_dest',
help='path to destination directory',
default='annotations/',
type=str)
parser.add_argument('--test_split',
help='fraction of dataset for test',
default=0.2,
type=str)
return parser
if __name__ == '__main__':
parser = get_args_parser()
args = parser.parse_args()
np.random.seed(2020)
# split files into train and test files
# if you want to concat more datasets simply
# add path to datasets to the list below
train_to_concat = []
test_to_concat = []
for i, data_file in enumerate(args.dataset_dest):
print('Parsing', data_file, 'file', i+1, 'of', len(args.dataset_dest))
filename = str(i) + '_' + data_file.split('/')[-1].split('.json')[0]
print(filename)
train, test = split_coco_dataset([data_file],
args.split_dest + filename,
args.test_split)
train_source = args.split_dest + filename + '_train.json'
test_source = args.split_dest + filename + '_test.json'
train_dest = args.split_dest + "binary_" + filename +"_train.json"
test_dest = args.split_dest + "binary_" + filename +"_test.json"
convert_to_binary(source=train_source,
dest=train_dest)
convert_to_binary(source=test_source,
dest=test_dest)
train_to_concat.append(train_dest)
test_to_concat.append(test_dest)
concatenate_datasets(train_to_concat, dest = args.split_dest + 'binary_mixed_train.json')
concatenate_datasets(test_to_concat, dest = args.split_dest + 'binary_mixed_test.json')