Skip to content

Commit

Permalink
Nested datasets and shift to new engine
Browse files Browse the repository at this point in the history
  • Loading branch information
iwatkot authored Mar 12, 2024
2 parents 4b1aaa8 + 36e2ff5 commit bdde769
Show file tree
Hide file tree
Showing 7 changed files with 114 additions and 126 deletions.
3 changes: 3 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[flake8]
max-line-length = 100
ignore = E203, E501, W503, E722, W605
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@ venv/*
.idea/*
.venv
debug
data/
.DS_Store
3 changes: 2 additions & 1 deletion config.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
{
"name": "Convert Supervisely to YOLO v5 format",
"type": "app",
"version": "2.0.0",
"categories": [
"images",
"export"
],
"description": "Transform project to YOLO v5 format and prepares tar archive for download",
"docker_image": "supervisely/import-export:6.72.233",
"docker_image": "supervisely/import-export:6.73.48",
"instance_version": "6.8.54",
"main_script": "src/convert_sly_to_yolov5.py",
"modal_template": "src/modal.html",
Expand Down
22 changes: 22 additions & 0 deletions create_venv.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash

# learn more in documentation
# Official python docs: https://docs.python.org/3/library/venv.html
# Superviely developer portal: https://developer.supervise.ly/getting-started/installation#venv

if [ -d ".venv" ]; then
echo "VENV already exists, will be removed"
rm -rf .venv
fi

echo "VENV will be created" && \
python3 -m venv .venv && \
source .venv/bin/activate && \

echo "Install requirements..." && \
pip3 install -r dev_requirements.txt && \
echo "Requirements have been successfully installed" && \
echo "Testing imports, please wait a minute ..." && \
python3 -c "import supervisely as sly" && \
echo "Success!" && \
deactivate
2 changes: 1 addition & 1 deletion dev_requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
supervisely==6.72.233
supervisely==6.73.48
13 changes: 3 additions & 10 deletions local.env
Original file line number Diff line number Diff line change
@@ -1,10 +1,3 @@
PYTHONUNBUFFERED=1

TASK_ID=21262

context.teamId=506
context.workspaceId=942
modal.state.slyProjectId=27374

DEBUG_APP_DIR="debug/app_data"
DEBUG_CACHE_DIR="debug/app_cache"
TEAM_ID = 448
WORKSPACE_ID = 690
PROJECT_ID = 35637
195 changes: 81 additions & 114 deletions src/convert_sly_to_yolov5.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,43 @@
import os
import yaml

from typing import List, Tuple
from dotenv import load_dotenv

import supervisely as sly
from supervisely.app.v1.app_service import AppService

# region constants
TRAIN_TAG_NAME = "train"
VAL_TAG_NAME = "val"
DATA_DIR = os.path.join(os.getcwd(), "data")
# endregion
sly.fs.mkdir(DATA_DIR, remove_content_if_exists=True)

if sly.is_development():
load_dotenv("local.env")
load_dotenv(os.path.expanduser("~/supervisely.env"))

my_app = AppService()

TEAM_ID = sly.env.team_id()
WORKSPACE_ID = sly.env.workspace_id()
PROJECT_ID = sly.env.project_id()
PROCCESS_SHAPES = os.environ.get("modal.state.processShapes", "transform")
PROCCESS_SHAPES_MSG = "skipped" if PROCCESS_SHAPES == "skip" else "transformed to rectangles"

TRAIN_TAG_NAME = "train"
VAL_TAG_NAME = "val"


def transform_label(class_names, img_size, label: sly.Label):
# region envvars
team_id = sly.env.team_id()
workspace_id = sly.env.workspace_id()
project_id = sly.env.project_id()
process_shapes = os.environ.get("modal.state.processShapes", "transform")
process_shapes_message = "skipped" if process_shapes == "skip" else "transformed to rectangles"
# endregion
sly.logger.info(f"Team: {team_id}, Workspace: {workspace_id}, Project: {project_id}")
sly.logger.info(f"Process shapes: {process_shapes}")


def transform_label(class_names: List[str], img_size: Tuple[int, int], label: sly.Label) -> str:
"""Transforms label to YOLOv5 format.
:param class_names: list of class names
:type class_names: List[str]
:param img_size: image size
:type img_size: Tuple[int, int]
:param label: label to transform
:type label: sly.Label
:return: transformed label
:rtype: str
"""
class_number = class_names.index(label.obj_class.name)
rect_geometry = label.geometry.to_bbox()
center = rect_geometry.center
Expand All @@ -34,32 +48,35 @@ def transform_label(class_names, img_size, label: sly.Label):
return f"{class_number} {x_center} {y_center} {width} {height}"


@my_app.callback("transform")
@sly.timeit
def transform(api: sly.Api, task_id, context, state, app_logger):
project = api.project.get_info_by_id(PROJECT_ID)
result_dir_name = "{}_{}".format(project.id, project.name)

RESULT_DIR = os.path.join(my_app.data_dir, result_dir_name)
sly.fs.mkdir(RESULT_DIR)
ARCHIVE_NAME = f"{result_dir_name}.tar"
RESULT_ARCHIVE = os.path.join(my_app.data_dir, ARCHIVE_NAME)
CONFIG_PATH = os.path.join(RESULT_DIR, "data_config.yaml")

TRAIN_IMAGES_DIR = os.path.join(RESULT_DIR, "images/train")
TRAIN_LABELS_DIR = os.path.join(RESULT_DIR, "labels/train")
sly.fs.mkdir(TRAIN_IMAGES_DIR)
sly.fs.mkdir(TRAIN_LABELS_DIR)

VAL_IMAGES_DIR = os.path.join(RESULT_DIR, "images/val")
VAL_LABELS_DIR = os.path.join(RESULT_DIR, "labels/val")
sly.fs.mkdir(VAL_IMAGES_DIR)
sly.fs.mkdir(VAL_LABELS_DIR)
def transform(api: sly.Api) -> None:
"""Transforms Supervisely project to YOLOv5 format."""
project = api.project.get_info_by_id(project_id)

meta_json = api.project.get_meta(PROJECT_ID)
# Preparing result directory.
result_dir_name = "{}_{}".format(project.id, project.name)
result_dir = os.path.join(DATA_DIR, result_dir_name)
sly.fs.mkdir(result_dir)
config_path = os.path.join(result_dir, "data_config.yaml")
sly.logger.debug(f"Data will be saved to {result_dir}, path to the config file: {config_path}")

# Preparing directories for images and labels.
train_images_dir = os.path.join(result_dir, "images/train")
train_labels_dir = os.path.join(result_dir, "labels/train")
sly.fs.mkdir(train_images_dir)
sly.fs.mkdir(train_labels_dir)
val_images_dir = os.path.join(result_dir, "images/val")
val_labels_dir = os.path.join(result_dir, "labels/val")
sly.fs.mkdir(val_images_dir)
sly.fs.mkdir(val_labels_dir)

# Retrieving project meta and creating class names and colors lists.
meta_json = api.project.get_meta(project_id)
meta = sly.ProjectMeta.from_json(meta_json)
class_names = [obj_class.name for obj_class in meta.obj_classes]
class_colors = [obj_class.color for obj_class in meta.obj_classes]
sly.logger.debug(
f"Project meta retrieved. Class names: {class_names}, class colors: {class_colors}"
)

missing_tags = []
if meta.get_tag_meta(TRAIN_TAG_NAME) is None:
Expand All @@ -68,7 +85,7 @@ def transform(api: sly.Api, task_id, context, state, app_logger):
missing_tags.append(VAL_TAG_NAME)
if len(missing_tags) > 0:
missing_tags_str = ", ".join([f'"{tag}"' for tag in missing_tags])
app_logger.warn(
sly.logger.warn(
f"Tag(s): {missing_tags_str} not found in project meta. Images without special tags will be marked as train"
)

Expand All @@ -79,7 +96,7 @@ def transform(api: sly.Api, task_id, context, state, app_logger):
if len(error_classes) > 0:
sly.logger.warn(
f"Project has unsupported classes. "
f"Objects with unsupported geometry types will be {PROCCESS_SHAPES_MSG}: "
f"Objects with unsupported geometry types will be {process_shapes_message}: "
f"{[obj_class.name for obj_class in error_classes]}"
)

Expand All @@ -97,9 +114,11 @@ def _add_to_split(image_id, img_name, split_ids, split_image_paths, labels_dir,
train_count = 0
val_count = 0

progress = sly.Progress("Transformation ...", api.project.get_images_count(PROJECT_ID))
for dataset in api.dataset.get_list(PROJECT_ID):
progress = sly.Progress("Transformation ...", api.project.get_images_count(project_id))
for dataset in api.dataset.get_list(project_id, recursive=True):
sly.logger.info(f"Working with dataset: {dataset.name}...")
images = api.image.get_list(dataset.id)
sly.logger.debug(f"Dataset contains {len(images)} images.")

unsupported_shapes = 0
train_ids = []
Expand All @@ -109,7 +128,7 @@ def _add_to_split(image_id, img_name, split_ids, split_image_paths, labels_dir,

for batch in sly.batched(images):
image_ids = [image_info.id for image_info in batch]
image_names = [f"{dataset.name}_{image_info.name}" for image_info in batch]
image_names = [f"{dataset.id}_{dataset.name}_{image_info.name}" for image_info in batch]
ann_infos = api.annotation.download_batch(dataset.id, image_ids)

for image_id, img_name, ann_info in zip(image_ids, image_names, ann_infos):
Expand All @@ -120,7 +139,7 @@ def _add_to_split(image_id, img_name, split_ids, split_image_paths, labels_dir,
for label in ann.labels:
if label.obj_class.geometry_type != sly.Rectangle:
unsupported_shapes += 1
if PROCCESS_SHAPES == "skip":
if process_shapes == "skip":
continue
yolov5_ann.append(transform_label(class_names, ann.img_size, label))

Expand All @@ -131,32 +150,30 @@ def _add_to_split(image_id, img_name, split_ids, split_image_paths, labels_dir,
img_name,
train_ids,
train_image_paths,
TRAIN_LABELS_DIR,
TRAIN_IMAGES_DIR,
train_labels_dir,
train_images_dir,
)
image_processed = True
train_count += 1

if ann.img_tags.get(VAL_TAG_NAME) is not None:
val_ids.append(image_id)
ann_path = os.path.join(VAL_LABELS_DIR, f"{sly.fs.get_file_name(img_name)}.txt")
ann_path = os.path.join(val_labels_dir, f"{sly.fs.get_file_name(img_name)}.txt")

_write_new_ann(ann_path, yolov5_ann)
img_path = os.path.join(VAL_IMAGES_DIR, img_name)
img_path = os.path.join(val_images_dir, img_name)
val_image_paths.append(img_path)
image_processed = True
val_count += 1

if not image_processed:
# app_logger.warn("Image does not have train or val tags. It will be placed to training set.",
# extra={"image_id": image_id, "image_name": img_name, "dataset": dataset.name})
_add_to_split(
image_id,
img_name,
train_ids,
train_image_paths,
TRAIN_LABELS_DIR,
TRAIN_IMAGES_DIR,
train_labels_dir,
train_images_dir,
)
train_count += 1

Expand All @@ -165,9 +182,9 @@ def _add_to_split(image_id, img_name, split_ids, split_image_paths, labels_dir,

progress.iters_done_report(len(batch))
if unsupported_shapes > 0:
app_logger.warn(
f"DATASET '{dataset.name}': "
f"{unsupported_shapes} objects with unsupported geometry types have been {PROCCESS_SHAPES_MSG}"
sly.logger.warn(
f"Dataset {dataset.name}: "
f"{unsupported_shapes} objects with unsupported geometry types have been {process_shapes_message}"
)

data_yaml = {
Expand All @@ -177,67 +194,17 @@ def _add_to_split(image_id, img_name, split_ids, split_image_paths, labels_dir,
"names": class_names,
"colors": class_colors,
}
with open(CONFIG_PATH, "w") as f:
data = yaml.dump(data_yaml, f, default_flow_style=None)
with open(config_path, "w") as f:
yaml.dump(data_yaml, f, default_flow_style=None)

app_logger.info("Number of images in train: {}".format(train_count))
app_logger.info("Number of images in val: {}".format(val_count))
sly.logger.info("Number of images in train: {}".format(train_count))
sly.logger.info("Number of images in val: {}".format(val_count))

sly.fs.archive_directory(RESULT_DIR, RESULT_ARCHIVE)
app_logger.info("Result directory is archived")
# Archiving and uploading the directory to the TeamFiles.
sly.output.set_download(result_dir)
sly.logger.info("File uploaded, app stopped.")

remote_archive_path = os.path.join(
sly.team_files.RECOMMENDED_EXPORT_PATH, "yolov5_format/{}/{}".format(task_id, ARCHIVE_NAME)
)

# @TODO: uncomment only for debug
# api.file.remove(TEAM_ID, remote_archive_path)

upload_progress = []

def _print_progress(monitor, upload_progress):
if len(upload_progress) == 0:
upload_progress.append(
sly.Progress(
message="Upload {!r}".format(ARCHIVE_NAME),
total_cnt=monitor.len,
ext_logger=app_logger,
is_size=True,
)
)
upload_progress[0].set_current_value(monitor.bytes_read)

file_info = api.file.upload(
TEAM_ID, RESULT_ARCHIVE, remote_archive_path, lambda m: _print_progress(m, upload_progress)
)
app_logger.info("Uploaded to Team-Files: {!r}".format(file_info.storage_path))
api.task.set_output_archive(
task_id, file_info.id, ARCHIVE_NAME, file_url=file_info.storage_path
)

my_app.stop()


def main():
sly.logger.info(
"Script arguments",
extra={
"context.teamId": TEAM_ID,
"context.workspaceId": WORKSPACE_ID,
"modal.state.slyProjectId": PROJECT_ID,
"CONFIG_DIR": os.environ.get("CONFIG_DIR", "ENV not found"),
},
)

api = sly.Api.from_env()

# Run application service
my_app.run(initial_events=[{"command": "transform"}])


# @TODO: add information to modal window
if __name__ == "__main__":
# @TODO: uncomment only for debug
# sly.fs.clean_dir(my_app.data_dir)

sly.main_wrapper("main", main, log_for_agent=False)
api = sly.Api.from_env()
transform(api)

0 comments on commit bdde769

Please sign in to comment.