Skip to content

Commit

Permalink
Merge pull request #29 from saaresearch/develop
Browse files Browse the repository at this point in the history
Fix some bugs and add new feature.
  • Loading branch information
a-smetanin authored Nov 29, 2023
2 parents 7280176 + ed1e0ec commit b8dbc45
Show file tree
Hide file tree
Showing 24 changed files with 582 additions and 403 deletions.
5 changes: 2 additions & 3 deletions ODRS/api/ODRS.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ class ODRS:
def __init__(self, job, data_path=None, classes="classes.txt",
img_size="256", batch_size="18", epochs="3",
model='yolov5l', gpu_count=1, select_gpu="0", config_path="dataset.yaml",
split_train_value=0.6, split_test_value=0.35, split_val_value=0.05,
split_train_value=0.6, split_val_value=0.05,
gpu=True, speed=2, accuracy=10):
self.job = job.lower()
self.data_path = data_path
Expand All @@ -19,7 +19,6 @@ def __init__(self, job, data_path=None, classes="classes.txt",
self.select_gpu = select_gpu
self.config_path = config_path
self.split_train_value = split_train_value
self.split_test_value = split_test_value
self.split_val_value = split_val_value
self.gpu = gpu
self.speed = speed
Expand All @@ -31,4 +30,4 @@ def fit(self):
elif self.job == "object_detection":
fit_model(self.data_path, self.classes, self.img_size, self.batch_size, self.epochs,
self.model, self.config_path, self.split_train_value, self.split_val_value,
self.split_test_value, self.gpu_count, self.select_gpu)
self.gpu_count, self.select_gpu)
16 changes: 8 additions & 8 deletions ODRS/data_utils/convert_yolo_to_voc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,20 @@
import re
from pathlib import Path
import shutil
from loguru import logger
from PIL import Image
from tqdm import tqdm
from ODRS.data_utils.prepare_ssd import create_ssd_json


def convert_voc(data_path, txt_path):
print("Creating VOC format for dataset")
for i in ['train', 'test', 'valid']:
convert_yolo_to_voc(f'{data_path}/{i}', txt_path, 'annotations')
shutil.rmtree(f'{data_path}/{i}/labels')
create_ssd_json(f'{data_path}/{i}', txt_path)
# except:
# continue
logger.info("Creating VOC format for dataset")
path = Path(data_path)
folder_names = [folder.name for folder in path.iterdir() if folder.is_dir()]
for name in folder_names:
convert_yolo_to_voc(Path(data_path) / name, txt_path, 'annotations')
shutil.rmtree(Path(data_path) / name / 'labels')
create_ssd_json(Path(data_path) / name, txt_path)


def copy_files_to_jpeg_images_folder(data_path):
Expand All @@ -26,7 +27,6 @@ def copy_files_to_jpeg_images_folder(data_path):
file_path = os.path.join(subfolder_path, file_name)
if os.path.isfile(file_path):
shutil.copy(file_path, jpeg_images_folder)

return jpeg_images_folder


Expand Down
43 changes: 23 additions & 20 deletions ODRS/data_utils/create_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,54 +20,57 @@ def delete_cache(data_path):
if file.endswith(tuple(extensions_to_delete)):
os.remove(os.path.join(root, file))


def create_config_data(train_path, val_path, classname_file, config_path, arch, batch_size, epochs, model):
# Get current file path
def createRunDirectory(model):
current_file_path = Path(__file__).resolve()

# Create runs directory if it does not exist
runs_directory = f"{current_file_path.parents[2]}/runs"
runs_directory = Path(current_file_path.parents[2]) / 'runs'
if not os.path.exists(runs_directory):
os.makedirs(runs_directory, exist_ok=True)

# Create runs path
runs_path = f"{runs_directory}/{str(datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))}_{model}"
os.makedirs(f"{runs_path}", exist_ok=True)
class_file_path = f"{current_file_path.parents[2]}/{classname_file}"
runs_path = runs_directory / f"{str(datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))}_{model}"
os.makedirs(runs_path, exist_ok=True)
return runs_path


def create_config_data(train_path, val_path, classname_file, config_path, arch, batch_size, epochs, model):
current_file_path = Path(__file__).resolve()

runs_path = createRunDirectory(model)

class_file_path = Path(current_file_path.parents[2]) / classname_file

# Create config path
config_path = f"{runs_path}/{config_path}"
config_path = runs_path / config_path
if arch == 'ssd':
class_names = read_names_from_txt(class_file_path)
dataset_yaml = '''\
# Data
train_json: {}
val_json: {}
class_names: {}
recall_steps: 101
recall_steps: 11
image_mean: [123., 117., 104.]
image_stddev: [1., 1, 1.]
# Model
model: SSD
backbone:
name: VGG16
num_stages: 7
input_size: 512
anchor_scales: [0.04, 0.1, 0.26, 0.42, 0.58, 0.74, 0.9]
anchor_aspect_ratios: [[1, 2], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
num_stages: 6
input_size: 300
anchor_scales: [0.1, 0.2, 0.375, 0.55, 0.725, 0.9]
anchor_aspect_ratios: [[1, 2], [1, 2, 3], [1, 2, 3], [1, 2, 3], [1, 2], [1, 2]]
# Training
batch_size: {}
epochs: {}
optim:
name: SGD
lr: 0.001
lr: 0.0001
momentum: 0.9
weight_decay: 0.0005
scheduler:
name: MultiStepLR
milestones: [90, 110]
milestones: [155, 195]
gamma: 0.1
'''.format(train_path, val_path, class_names, batch_size, epochs)
logger.info("Create config file")
Expand Down Expand Up @@ -98,8 +101,8 @@ def create_config_data(train_path, val_path, classname_file, config_path, arch,
# Whether to save the predictions of the validation set while training.
SAVE_VALID_PREDICTION_IMAGES: True
'''.format(f'{train_path}/images', f'{train_path}/annotations', f'{val_path}/images',
f'{val_path}/annotations', class_names, len(class_names))
'''.format(train_path / 'images', train_path / 'annotations', val_path / 'images',
val_path / 'annotations', class_names, len(class_names))
logger.info("Create config file")
with open(config_path, 'w') as file:
file.write(dataset_yaml)
Expand Down
139 changes: 0 additions & 139 deletions ODRS/data_utils/dataset_info.py

This file was deleted.

84 changes: 70 additions & 14 deletions ODRS/data_utils/prepare_ssd.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import os
import json
import glob
from PIL import Image
import xml.etree.ElementTree as ET
from tqdm import tqdm
from pathlib import Path
Expand Down Expand Up @@ -62,28 +64,82 @@ def get_image_names(folder_path):

def create_ssd_json(path_folder, txt_path):
current_file_path = Path(__file__).resolve()
txt_path = f"{current_file_path.parents[2]}/{txt_path}"
txt_path = Path(current_file_path.parents[2]) / txt_path
class_names = read_names_from_txt(txt_path)

paths = {
2007: os.path.join(os.path.dirname(path_folder), path_folder.split('/')[-1])
2007: os.path.join(os.path.dirname(path_folder), path_folder)
}

dataset = []
for year, path in paths.items():
ids = get_image_names(f'{path_folder}/images')
ids = get_image_names(Path(path_folder) / 'images')
for id in tqdm(ids):
image_path = os.path.join(path, 'images', id + '.jpg')
annotation_path = os.path.join(path, 'annotations', id + '.xml')
if check_filename(annotation_path):
boxes, classes, difficulties = parse_annotation(annotation_path)
classes = [class_names.index(c) for c in classes]
dataset.append(
{
'image': os.path.abspath(image_path),
'boxes': boxes,
'classes': classes,
'difficulties': difficulties
}
)
save_as_json(f'{os.path.dirname(path_folder)}/{path_folder.split("/")[-1]}.json', dataset)
try:
boxes, classes, difficulties = parse_annotation(annotation_path)
classes = [class_names.index(c) for c in classes]
dataset.append(
{
'image': os.path.abspath(image_path),
'boxes': boxes,
'classes': classes,
'difficulties': difficulties
}
)
except Exception as e:
print(e)

save_as_json(Path(os.path.dirname(path_folder)) / f'{path_folder.name}.json', dataset)



def resize_images_and_annotations(data_path, img_size):
size = img_size if img_size <= 300 else 300
path = Path(data_path)
folder_names = [folder.name for folder in path.iterdir() if folder.is_dir()]
for name in folder_names:
folder_path = path / name
images_path = os.path.join(folder_path, 'images')
labels_path = os.path.join(folder_path, 'labels')

for image_name in tqdm(os.listdir(images_path), desc=f'Resize {name} images'):
image_path = os.path.join(images_path, image_name)
label_path = os.path.join(labels_path, image_name.replace('.jpg', '.txt'))

with Image.open(image_path) as img:
original_width, original_height = img.size

if original_width > size or original_height > size:
img = img.resize((size, size))

if os.path.exists(label_path):
with open(label_path, 'r') as file:
lines = file.readlines()

with open(label_path, 'w') as file:
for line in lines:
parts = line.split()
if len(parts) == 5:
x_center = float(parts[1]) * original_width
y_center = float(parts[2]) * original_height
width = float(parts[3]) * original_width
height = float(parts[4]) * original_height

x_center *= size / original_width
y_center *= size / original_height
width *= size / original_width
height *= size / original_height

x_center /= size
y_center /= size
width /= size
height /= size

file.write(f"{parts[0]} {x_center} {y_center} {width} {height}\n")

img.save(image_path)

# resize_images_and_annotations('/media/space/ssd_1_tb_evo_sumsung/ITMO/ODRS/user_datasets/Warp-D_voc/test')
Loading

0 comments on commit b8dbc45

Please sign in to comment.