Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Nested datasets and shift to new engine #14

Merged
merged 1 commit into from
Mar 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[flake8]
max-line-length = 100
ignore = E203, E501, W503, E722, W605
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@ venv/*
.idea/*
.venv
debug
data/
.DS_Store
3 changes: 2 additions & 1 deletion config.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
{
"name": "Convert Supervisely to YOLO v5 format",
"type": "app",
"version": "2.0.0",
"categories": [
"images",
"export"
],
"description": "Transform project to YOLO v5 format and prepares tar archive for download",
"docker_image": "supervisely/import-export:6.72.233",
"docker_image": "supervisely/import-export:6.73.48",
"instance_version": "6.8.54",
"main_script": "src/convert_sly_to_yolov5.py",
"modal_template": "src/modal.html",
Expand Down
22 changes: 22 additions & 0 deletions create_venv.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash

# learn more in documentation
# Official python docs: https://docs.python.org/3/library/venv.html
# Superviely developer portal: https://developer.supervise.ly/getting-started/installation#venv

if [ -d ".venv" ]; then
echo "VENV already exists, will be removed"
rm -rf .venv
fi

echo "VENV will be created" && \
python3 -m venv .venv && \
source .venv/bin/activate && \

echo "Install requirements..." && \
pip3 install -r dev_requirements.txt && \
echo "Requirements have been successfully installed" && \
echo "Testing imports, please wait a minute ..." && \
python3 -c "import supervisely as sly" && \
echo "Success!" && \
deactivate
2 changes: 1 addition & 1 deletion dev_requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
supervisely==6.72.233
supervisely==6.73.48
13 changes: 3 additions & 10 deletions local.env
Original file line number Diff line number Diff line change
@@ -1,10 +1,3 @@
PYTHONUNBUFFERED=1

TASK_ID=21262

context.teamId=506
context.workspaceId=942
modal.state.slyProjectId=27374

DEBUG_APP_DIR="debug/app_data"
DEBUG_CACHE_DIR="debug/app_cache"
TEAM_ID = 448
WORKSPACE_ID = 690
PROJECT_ID = 35637
195 changes: 81 additions & 114 deletions src/convert_sly_to_yolov5.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,43 @@
import os
import yaml

from typing import List, Tuple
from dotenv import load_dotenv

import supervisely as sly
from supervisely.app.v1.app_service import AppService

# region constants
TRAIN_TAG_NAME = "train"
VAL_TAG_NAME = "val"
DATA_DIR = os.path.join(os.getcwd(), "data")
# endregion
sly.fs.mkdir(DATA_DIR, remove_content_if_exists=True)

if sly.is_development():
load_dotenv("local.env")
load_dotenv(os.path.expanduser("~/supervisely.env"))

my_app = AppService()

TEAM_ID = sly.env.team_id()
WORKSPACE_ID = sly.env.workspace_id()
PROJECT_ID = sly.env.project_id()
PROCCESS_SHAPES = os.environ.get("modal.state.processShapes", "transform")
PROCCESS_SHAPES_MSG = "skipped" if PROCCESS_SHAPES == "skip" else "transformed to rectangles"

TRAIN_TAG_NAME = "train"
VAL_TAG_NAME = "val"


def transform_label(class_names, img_size, label: sly.Label):
# region envvars
team_id = sly.env.team_id()
workspace_id = sly.env.workspace_id()
project_id = sly.env.project_id()
process_shapes = os.environ.get("modal.state.processShapes", "transform")
process_shapes_message = "skipped" if process_shapes == "skip" else "transformed to rectangles"
# endregion
sly.logger.info(f"Team: {team_id}, Workspace: {workspace_id}, Project: {project_id}")
sly.logger.info(f"Process shapes: {process_shapes}")


def transform_label(class_names: List[str], img_size: Tuple[int, int], label: sly.Label) -> str:
"""Transforms label to YOLOv5 format.

:param class_names: list of class names
:type class_names: List[str]
:param img_size: image size
:type img_size: Tuple[int, int]
:param label: label to transform
:type label: sly.Label
:return: transformed label
:rtype: str
"""
class_number = class_names.index(label.obj_class.name)
rect_geometry = label.geometry.to_bbox()
center = rect_geometry.center
Expand All @@ -34,32 +48,35 @@ def transform_label(class_names, img_size, label: sly.Label):
return f"{class_number} {x_center} {y_center} {width} {height}"


@my_app.callback("transform")
@sly.timeit
def transform(api: sly.Api, task_id, context, state, app_logger):
project = api.project.get_info_by_id(PROJECT_ID)
result_dir_name = "{}_{}".format(project.id, project.name)

RESULT_DIR = os.path.join(my_app.data_dir, result_dir_name)
sly.fs.mkdir(RESULT_DIR)
ARCHIVE_NAME = f"{result_dir_name}.tar"
RESULT_ARCHIVE = os.path.join(my_app.data_dir, ARCHIVE_NAME)
CONFIG_PATH = os.path.join(RESULT_DIR, "data_config.yaml")

TRAIN_IMAGES_DIR = os.path.join(RESULT_DIR, "images/train")
TRAIN_LABELS_DIR = os.path.join(RESULT_DIR, "labels/train")
sly.fs.mkdir(TRAIN_IMAGES_DIR)
sly.fs.mkdir(TRAIN_LABELS_DIR)

VAL_IMAGES_DIR = os.path.join(RESULT_DIR, "images/val")
VAL_LABELS_DIR = os.path.join(RESULT_DIR, "labels/val")
sly.fs.mkdir(VAL_IMAGES_DIR)
sly.fs.mkdir(VAL_LABELS_DIR)
def transform(api: sly.Api) -> None:
"""Transforms Supervisely project to YOLOv5 format."""
project = api.project.get_info_by_id(project_id)

meta_json = api.project.get_meta(PROJECT_ID)
# Preparing result directory.
result_dir_name = "{}_{}".format(project.id, project.name)
result_dir = os.path.join(DATA_DIR, result_dir_name)
sly.fs.mkdir(result_dir)
config_path = os.path.join(result_dir, "data_config.yaml")
sly.logger.debug(f"Data will be saved to {result_dir}, path to the config file: {config_path}")

# Preparing directories for images and labels.
train_images_dir = os.path.join(result_dir, "images/train")
train_labels_dir = os.path.join(result_dir, "labels/train")
sly.fs.mkdir(train_images_dir)
sly.fs.mkdir(train_labels_dir)
val_images_dir = os.path.join(result_dir, "images/val")
val_labels_dir = os.path.join(result_dir, "labels/val")
sly.fs.mkdir(val_images_dir)
sly.fs.mkdir(val_labels_dir)

# Retrieving project meta and creating class names and colors lists.
meta_json = api.project.get_meta(project_id)
meta = sly.ProjectMeta.from_json(meta_json)
class_names = [obj_class.name for obj_class in meta.obj_classes]
class_colors = [obj_class.color for obj_class in meta.obj_classes]
sly.logger.debug(
f"Project meta retrieved. Class names: {class_names}, class colors: {class_colors}"
)

missing_tags = []
if meta.get_tag_meta(TRAIN_TAG_NAME) is None:
Expand All @@ -68,7 +85,7 @@ def transform(api: sly.Api, task_id, context, state, app_logger):
missing_tags.append(VAL_TAG_NAME)
if len(missing_tags) > 0:
missing_tags_str = ", ".join([f'"{tag}"' for tag in missing_tags])
app_logger.warn(
sly.logger.warn(
f"Tag(s): {missing_tags_str} not found in project meta. Images without special tags will be marked as train"
)

Expand All @@ -79,7 +96,7 @@ def transform(api: sly.Api, task_id, context, state, app_logger):
if len(error_classes) > 0:
sly.logger.warn(
f"Project has unsupported classes. "
f"Objects with unsupported geometry types will be {PROCCESS_SHAPES_MSG}: "
f"Objects with unsupported geometry types will be {process_shapes_message}: "
f"{[obj_class.name for obj_class in error_classes]}"
)

Expand All @@ -97,9 +114,11 @@ def _add_to_split(image_id, img_name, split_ids, split_image_paths, labels_dir,
train_count = 0
val_count = 0

progress = sly.Progress("Transformation ...", api.project.get_images_count(PROJECT_ID))
for dataset in api.dataset.get_list(PROJECT_ID):
progress = sly.Progress("Transformation ...", api.project.get_images_count(project_id))
for dataset in api.dataset.get_list(project_id, recursive=True):
sly.logger.info(f"Working with dataset: {dataset.name}...")
images = api.image.get_list(dataset.id)
sly.logger.debug(f"Dataset contains {len(images)} images.")

unsupported_shapes = 0
train_ids = []
Expand All @@ -109,7 +128,7 @@ def _add_to_split(image_id, img_name, split_ids, split_image_paths, labels_dir,

for batch in sly.batched(images):
image_ids = [image_info.id for image_info in batch]
image_names = [f"{dataset.name}_{image_info.name}" for image_info in batch]
image_names = [f"{dataset.id}_{dataset.name}_{image_info.name}" for image_info in batch]
ann_infos = api.annotation.download_batch(dataset.id, image_ids)

for image_id, img_name, ann_info in zip(image_ids, image_names, ann_infos):
Expand All @@ -120,7 +139,7 @@ def _add_to_split(image_id, img_name, split_ids, split_image_paths, labels_dir,
for label in ann.labels:
if label.obj_class.geometry_type != sly.Rectangle:
unsupported_shapes += 1
if PROCCESS_SHAPES == "skip":
if process_shapes == "skip":
continue
yolov5_ann.append(transform_label(class_names, ann.img_size, label))

Expand All @@ -131,32 +150,30 @@ def _add_to_split(image_id, img_name, split_ids, split_image_paths, labels_dir,
img_name,
train_ids,
train_image_paths,
TRAIN_LABELS_DIR,
TRAIN_IMAGES_DIR,
train_labels_dir,
train_images_dir,
)
image_processed = True
train_count += 1

if ann.img_tags.get(VAL_TAG_NAME) is not None:
val_ids.append(image_id)
ann_path = os.path.join(VAL_LABELS_DIR, f"{sly.fs.get_file_name(img_name)}.txt")
ann_path = os.path.join(val_labels_dir, f"{sly.fs.get_file_name(img_name)}.txt")

_write_new_ann(ann_path, yolov5_ann)
img_path = os.path.join(VAL_IMAGES_DIR, img_name)
img_path = os.path.join(val_images_dir, img_name)
val_image_paths.append(img_path)
image_processed = True
val_count += 1

if not image_processed:
# app_logger.warn("Image does not have train or val tags. It will be placed to training set.",
# extra={"image_id": image_id, "image_name": img_name, "dataset": dataset.name})
_add_to_split(
image_id,
img_name,
train_ids,
train_image_paths,
TRAIN_LABELS_DIR,
TRAIN_IMAGES_DIR,
train_labels_dir,
train_images_dir,
)
train_count += 1

Expand All @@ -165,9 +182,9 @@ def _add_to_split(image_id, img_name, split_ids, split_image_paths, labels_dir,

progress.iters_done_report(len(batch))
if unsupported_shapes > 0:
app_logger.warn(
f"DATASET '{dataset.name}': "
f"{unsupported_shapes} objects with unsupported geometry types have been {PROCCESS_SHAPES_MSG}"
sly.logger.warn(
f"Dataset {dataset.name}: "
f"{unsupported_shapes} objects with unsupported geometry types have been {process_shapes_message}"
)

data_yaml = {
Expand All @@ -177,67 +194,17 @@ def _add_to_split(image_id, img_name, split_ids, split_image_paths, labels_dir,
"names": class_names,
"colors": class_colors,
}
with open(CONFIG_PATH, "w") as f:
data = yaml.dump(data_yaml, f, default_flow_style=None)
with open(config_path, "w") as f:
yaml.dump(data_yaml, f, default_flow_style=None)

app_logger.info("Number of images in train: {}".format(train_count))
app_logger.info("Number of images in val: {}".format(val_count))
sly.logger.info("Number of images in train: {}".format(train_count))
sly.logger.info("Number of images in val: {}".format(val_count))

sly.fs.archive_directory(RESULT_DIR, RESULT_ARCHIVE)
app_logger.info("Result directory is archived")
# Archiving and uploading the directory to the TeamFiles.
sly.output.set_download(result_dir)
sly.logger.info("File uploaded, app stopped.")

remote_archive_path = os.path.join(
sly.team_files.RECOMMENDED_EXPORT_PATH, "yolov5_format/{}/{}".format(task_id, ARCHIVE_NAME)
)

# @TODO: uncomment only for debug
# api.file.remove(TEAM_ID, remote_archive_path)

upload_progress = []

def _print_progress(monitor, upload_progress):
if len(upload_progress) == 0:
upload_progress.append(
sly.Progress(
message="Upload {!r}".format(ARCHIVE_NAME),
total_cnt=monitor.len,
ext_logger=app_logger,
is_size=True,
)
)
upload_progress[0].set_current_value(monitor.bytes_read)

file_info = api.file.upload(
TEAM_ID, RESULT_ARCHIVE, remote_archive_path, lambda m: _print_progress(m, upload_progress)
)
app_logger.info("Uploaded to Team-Files: {!r}".format(file_info.storage_path))
api.task.set_output_archive(
task_id, file_info.id, ARCHIVE_NAME, file_url=file_info.storage_path
)

my_app.stop()


def main():
sly.logger.info(
"Script arguments",
extra={
"context.teamId": TEAM_ID,
"context.workspaceId": WORKSPACE_ID,
"modal.state.slyProjectId": PROJECT_ID,
"CONFIG_DIR": os.environ.get("CONFIG_DIR", "ENV not found"),
},
)

api = sly.Api.from_env()

# Run application service
my_app.run(initial_events=[{"command": "transform"}])


# @TODO: add information to modal window
if __name__ == "__main__":
# @TODO: uncomment only for debug
# sly.fs.clean_dir(my_app.data_dir)

sly.main_wrapper("main", main, log_for_agent=False)
api = sly.Api.from_env()
transform(api)
Loading