-
Notifications
You must be signed in to change notification settings - Fork 49
/
annotations_preprocessing.py
77 lines (67 loc) · 3.05 KB
/
annotations_preprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
'''
Script to prepare annotation for litter detection task.
'''
import argparse
import os
import numpy as np
# update all annotations in one run
from utils.dataset_converter import convert_dataset, \
taco_categories_to_detectwaste, \
convert_to_binary
from utils.split_coco_dataset import split_coco_dataset
def get_args_parser():
parser = argparse.ArgumentParser(
'Prepare images of trash for classification task')
parser.add_argument('--epi_source', help='path to epinote annotations',
default='/dih4/dih4_2/wimlds/data'
'/annotations_epi.json',
type=str)
parser.add_argument('--taco_source',
help='path to taco annotations',
default='/dih4/dih4_2/wimlds/TACO-master'
'/data/annotations.json',
type=str)
parser.add_argument('--detectwaste_dest',
help='path to detectwaste annotations',
default='annotations/annotations_detectwaste.json',
type=str)
parser.add_argument('--epi_dest',
help='path to source epi annotations',
default='annotations/annotations-epi.json',
type=str)
parser.add_argument('--split_dest',
help='path to destination directory',
default='annotations/annotations',
type=str)
parser.add_argument('--test_split',
help='fraction of dataset for test',
default=0.2,
type=str)
return parser
if __name__ == '__main__':
parser = get_args_parser()
args = parser.parse_args()
np.random.seed(2020)
# create directory to store all annotations
if not os.path.exists(os.path.dirname(args.detectwaste_dest)):
os.mkdir(os.path.dirname(args.detectwaste_dest))
# first, move all category ids from taco annotation style (60 categories)
# to detectwaste (7 categories)
taco_categories_to_detectwaste(source=args.taco_source,
dest=args.detectwaste_dest)
# convert from epi to detectwaste
convert_dataset(args.detectwaste_dest, args.epi_source, args.epi_dest)
# split files into train and test files
# if you want to concat more datasets simply
# add path to datasets to the list below
list_of_datasets = [args.detectwaste_dest,
args.epi_dest,
]
split_coco_dataset(list_of_datasets,
args.split_dest,
args.test_split)
# convert all annotations to binary to preserve original split
convert_to_binary(source=args.split_dest+'_train.json',
dest=args.split_dest+'_binary_train.json')
convert_to_binary(source=args.split_dest+'_test.json',
dest=args.split_dest+'_binary_test.json')