-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_split.py
53 lines (30 loc) · 1.14 KB
/
data_split.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# script to split data into train, validation, and test sets
import os
import shutil
import random
from glob import glob
images_folder ="Images folder path"
labels_folder = "Labels folder path"
train_ratio = 0.8
valid_ratio = 0.10
test_ratio = 0.10
image_files = glob(os.path.join(images_folder, '*.jpg'))
random.shuffle(image_files)
train_split = int(train_ratio * len(image_files))
valid_split = int(valid_ratio * len(image_files)) + train_split
train_files = image_files[:train_split]
valid_files = image_files[train_split:valid_split]
test_files = image_files[valid_split:]
def move_files(files, set_name):
os.makedirs(f'{set_name}/images/', exist_ok=True)
os.makedirs(f'{set_name}/labels/', exist_ok=True)
for file in files:
shutil.copy(file, f'{set_name}/images/')
base_name = os.path.splitext(os.path.basename(file))[0]
label_file = os.path.join(labels_folder, base_name + '.txt')
if os.path.exists(label_file):
shutil.copy(label_file, f'{set_name}/labels/')
move_files(train_files, 'train')
move_files(valid_files, 'val')
move_files(test_files, 'test')
print("Data split completed!")