From 6ae19df0cf62c8c35cf12906f8498af68dbe2eff Mon Sep 17 00:00:00 2001 From: cxnt Date: Wed, 4 Oct 2023 16:55:19 +0400 Subject: [PATCH] rework repo, add support for Rectangle shape --- .gitignore | 3 +- .vscode/launch.json | 16 ++ .vscode/settings.json | 35 ++++ README.md | 5 +- config.json | 49 +++--- debug.env | 19 --- dev_requirements.txt | 2 +- local.env | 15 ++ src/export-pascal-voc.py | 340 --------------------------------------- src/globals.py | 51 ++++++ src/main.py | 184 +++++++++++++++++++++ src/utils.py | 166 +++++++++++++++++++ 12 files changed, 495 insertions(+), 390 deletions(-) create mode 100644 .vscode/launch.json create mode 100644 .vscode/settings.json delete mode 100644 debug.env create mode 100644 local.env delete mode 100644 src/export-pascal-voc.py create mode 100644 src/globals.py create mode 100644 src/main.py create mode 100644 src/utils.py diff --git a/.gitignore b/.gitignore index e0386ed..6c2c93e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ .idea -/debug +.venv +src/debug secret_debug.env \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..40e7d4b --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,16 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Python: Current File", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal", + "justMyCode": true, + "env": { + "PYTHONPATH": "${workspaceFolder}:${PYTHONPATH}" + } + } + ] +} diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..f1691c4 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,35 @@ +{ + "files.exclude": { + "**/__pycache__": true, + "build": true, + "supervisely.egg-info": true, + ".venv": true + }, + "python.defaultInterpreterPath": ".venv/bin/python", + "editor.formatOnSave": true, + "editor.formatOnPaste": true, + "editor.formatOnType": true, + "black-formatter.args": [ + "--line-length", + "100" + ], + "[html]": { + "editor.defaultFormatter": "esbenp.prettier-vscode" + }, + "[json]": { + "editor.defaultFormatter": "esbenp.prettier-vscode" + }, + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter", + }, + "debug.inlineValues": "off", + "python.analysis.typeCheckingMode": "off", + "python.analysis.autoImportCompletions": false, + "autoDocstring.docstringFormat": "sphinx", + "autoDocstring.customTemplatePath": "docs/.mustache", + "python.testing.pytestArgs": [ + "tests/inference_cache" + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true +} \ No newline at end of file diff --git a/README.md b/README.md index 78c2d45..9a64629 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,8 @@ Converts [Supervisely](https://docs.supervise.ly/data-organization/00_ann_format ## Preparation -There are special requirements for Supervisely project, classes must have `Polygon` or `Bitmap` shape, all other shapes will be skipped. It means that only labeled objects with these shapes will be rendered as masks. + +There are special requirements for input Supervisely project, classes must have `Polygon`, `Bitmap` or `Rectangle` shape, all other shapes will be skipped. Only objects with `Polygon` and `Bitmap` shapes will be rendered as masks. Pascal VOC format stores all data in separate folders. Image classes bounding boxes and additional information are stored in `.xml` files. Segmentantion class and object masks are placed into `SegmentationClass` and `SegmentationObject` folders respectively. **All image Tags, except `train` and `val` will be skipped**. @@ -135,6 +136,6 @@ App creates task in `workspace tasks` list. Once app is finished, you will see d -Resulting archive is saved in : +Resulting archive is saved in: `Current Team` -> `Files` -> `/pascal_voc_format//__pascal_format.tar`. diff --git a/config.json b/config.json index ad03749..3ca208a 100644 --- a/config.json +++ b/config.json @@ -1,28 +1,23 @@ { - "name": "Export to Pascal VOC", - "type": "app", - "categories": [ - "images", - "export" - ], - "description": "Converts Supervisely Project to Pascal VOC format", - "docker_image": "supervisely/import-export:0.0.5", - "instance_version": "6.5.1", - "main_script": "src/export-pascal-voc.py", - "modal_template": "src/modal.html", - "modal_template_state": { - "pascalContourThickness": 3, - "trainSplitCoef": 0.8 - }, - "task_location": "workspace_tasks", - "icon": "https://i.imgur.com/zTseThz.png", - "icon_background": "#FFFFFF", - "headless": true, - "context_menu": { - "target": [ - "images_project" - ], - "context_root": "Download as" - }, - "poster": "https://user-images.githubusercontent.com/48245050/182382862-d74f1b2c-b19e-47c2-84db-45cd934ec34e.png" -} \ No newline at end of file + "name": "Export to Pascal VOC", + "type": "app", + "categories": ["images", "export"], + "description": "Converts Supervisely Project to Pascal VOC format", + "docker_image": "supervisely/import-export:6.72.147", + "instance_version": "6.5.1", + "main_script": "src/main.py", + "modal_template": "src/modal.html", + "modal_template_state": { + "pascalContourThickness": 3, + "trainSplitCoef": 0.8 + }, + "task_location": "workspace_tasks", + "icon": "https://i.imgur.com/zTseThz.png", + "icon_background": "#FFFFFF", + "headless": true, + "context_menu": { + "target": ["images_project"], + "context_root": "Download as" + }, + "poster": "https://user-images.githubusercontent.com/48245050/182382862-d74f1b2c-b19e-47c2-84db-45cd934ec34e.png" +} diff --git a/debug.env b/debug.env deleted file mode 100644 index 8b766f9..0000000 --- a/debug.env +++ /dev/null @@ -1,19 +0,0 @@ -PYTHONUNBUFFERED=1 - -DEBUG_APP_DIR="/home/paul/work/apps/export-to-pascal-voc/debug/app_debug_data" -DEBUG_CACHE_DIR="/home/paul/work/apps/export-to-pascal-voc/debug/app_debug_cache" -LOG_LEVEL="debug" - -TASK_ID=4562 - -context.teamId=8 -context.workspaceId=58 -modal.state.slyProjectId=1801 - -modal.state.pascalContourThickness=3 -modal.state.trainSplitCoef=0.8 - -SERVER_ADDRESS="put your value here in secret_debug.env" -API_TOKEN="put your value here in secret_debug.env" -AGENT_TOKEN="put your value here in secret_debug.env" - diff --git a/dev_requirements.txt b/dev_requirements.txt index d70b97b..b406843 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -1,4 +1,4 @@ -supervisely==6.68.109 +supervisely==6.72.147 lxml==4.6.3 numpy>=1.19.4 Pillow>=8.0.1 diff --git a/local.env b/local.env new file mode 100644 index 0000000..7ba0fec --- /dev/null +++ b/local.env @@ -0,0 +1,15 @@ +PYTHONUNBUFFERED=1 + +LOG_LEVEL="debug" + +TASK_ID=44728 + +context.teamId=8 +context.workspaceId=349 +modal.state.slyProjectId=22451 + +modal.state.pascalContourThickness=3 +modal.state.trainSplitCoef=0.8 + +DEBUG_APP_DIR="src/debug/app_debug_data" +DEBUG_CACHE_DIR="src/debug/app_debug_cache" diff --git a/src/export-pascal-voc.py b/src/export-pascal-voc.py deleted file mode 100644 index d834950..0000000 --- a/src/export-pascal-voc.py +++ /dev/null @@ -1,340 +0,0 @@ -import os -import numpy as np -import lxml.etree as ET -import supervisely as sly -from PIL import Image -from shutil import copyfile -from collections import OrderedDict -from supervisely.imaging.color import generate_rgb -from supervisely.app.v1.app_service import AppService - -my_app = AppService() - -TEAM_ID = int(os.environ['context.teamId']) -WORKSPACE_ID = int(os.environ['context.workspaceId']) -PROJECT_ID = int(os.environ['modal.state.slyProjectId']) - -PASCAL_CONTOUR_THICKNESS = int(os.environ['modal.state.pascalContourThickness']) -TRAIN_VAL_SPLIT_COEF = float(os.environ['modal.state.trainSplitCoef']) - -ARCHIVE_NAME_ENDING = '_pascal_voc.tar.gz' -RESULT_DIR_NAME_ENDING = '_pascal_voc' -RESULT_SUBDIR_NAME = 'VOCdevkit/VOC' - -images_dir_name = 'JPEGImages' -ann_dir_name = 'Annotations' -ann_class_dir_name = 'SegmentationClass' -ann_obj_dir_name = 'SegmentationObject' - -trainval_sets_dir_name = 'ImageSets' -trainval_sets_main_name = 'Main' -trainval_sets_segm_name = 'Segmentation' - -train_txt_name = 'train.txt' -val_txt_name = 'val.txt' - -is_trainval = None - -pascal_contour_color = [224, 224, 192] -pascal_ann_ext = '.png' - -TRAIN_TAG_NAME = 'train' -VAL_TAG_NAME = 'val' -SPLIT_TAGS = set([TRAIN_TAG_NAME, VAL_TAG_NAME]) - -VALID_IMG_EXT = set(['.jpe', '.jpeg', '.jpg']) -SUPPORTED_GEOMETRY_TYPES = set([sly.Bitmap, sly.Polygon]) - -if TRAIN_VAL_SPLIT_COEF > 1 or TRAIN_VAL_SPLIT_COEF < 0: - raise ValueError('train_val_split_coef should be between 0 and 1, your data is {}'.format(TRAIN_VAL_SPLIT_COEF)) - - -def from_ann_to_instance_mask(ann, mask_outpath, contour_thickness): - mask = np.zeros((ann.img_size[0], ann.img_size[1], 3), dtype=np.uint8) - for label in ann.labels: - if label.obj_class.name == "neutral": - label.geometry.draw(mask, pascal_contour_color) - continue - - label.geometry.draw_contour(mask, pascal_contour_color, contour_thickness) - label.geometry.draw(mask, label.obj_class.color) - - im = Image.fromarray(mask) - im = im.convert("P", palette=Image.ADAPTIVE) - im.save(mask_outpath) - - -def from_ann_to_class_mask(ann, mask_outpath, contour_thickness): - exist_colors = [[0, 0, 0], pascal_contour_color] - mask = np.zeros((ann.img_size[0], ann.img_size[1], 3), dtype=np.uint8) - for label in ann.labels: - if label.obj_class.name == "neutral": - label.geometry.draw(mask, pascal_contour_color) - continue - - new_color = generate_rgb(exist_colors) - exist_colors.append(new_color) - label.geometry.draw_contour(mask, pascal_contour_color, contour_thickness) - label.geometry.draw(mask, new_color) - - im = Image.fromarray(mask) - im = im.convert("P", palette=Image.ADAPTIVE) - im.save(mask_outpath) - - -def ann_to_xml(project_info, image_info, img_filename, result_ann_dir, ann): - xml_root = ET.Element("annotation") - - ET.SubElement(xml_root, "folder").text = "VOC_" + project_info.name - ET.SubElement(xml_root, "filename").text = img_filename - - xml_root_source = ET.SubElement(xml_root, "source") - ET.SubElement(xml_root_source, "database").text = "Supervisely Project ID:" + str(project_info.id) - ET.SubElement(xml_root_source, "annotation").text = "PASCAL VOC" - ET.SubElement(xml_root_source, "image").text = "Supervisely Image ID:" + str(image_info.id) - - xml_root_size = ET.SubElement(xml_root, "size") - ET.SubElement(xml_root_size, "width").text = str(image_info.width) - ET.SubElement(xml_root_size, "height").text = str(image_info.height) - ET.SubElement(xml_root_size, "depth").text = "3" - - ET.SubElement(xml_root, "segmented").text = "1" if len(ann.labels) > 0 else "0" - - for label in ann.labels: - if label.obj_class.name == "neutral": - continue - - bitmap_to_bbox = label.geometry.to_bbox() - - xml_ann_obj = ET.SubElement(xml_root, "object") - ET.SubElement(xml_ann_obj, "name").text = label.obj_class.name - ET.SubElement(xml_ann_obj, "pose").text = "Unspecified" - ET.SubElement(xml_ann_obj, "truncated").text = "0" - ET.SubElement(xml_ann_obj, "difficult").text = "0" - - xml_ann_obj_bndbox = ET.SubElement(xml_ann_obj, "bndbox") - ET.SubElement(xml_ann_obj_bndbox, "xmin").text = str(bitmap_to_bbox.left) - ET.SubElement(xml_ann_obj_bndbox, "ymin").text = str(bitmap_to_bbox.top) - ET.SubElement(xml_ann_obj_bndbox, "xmax").text = str(bitmap_to_bbox.right) - ET.SubElement(xml_ann_obj_bndbox, "ymax").text = str(bitmap_to_bbox.bottom) - - tree = ET.ElementTree(xml_root) - - img_name = os.path.join(result_ann_dir, os.path.splitext(img_filename)[0] + ".xml") - ann_path = (os.path.join(result_ann_dir, img_name)) - ET.indent(tree, space=" ") - tree.write(ann_path, pretty_print=True) - - -def find_first_tag(img_tags, split_tags): - for tag in split_tags: - if img_tags.has_key(tag): - return img_tags.get(tag) - return None - - -def write_main_set(is_trainval, images_stats, meta_json, result_imgsets_dir): - result_imgsets_main_subdir = os.path.join(result_imgsets_dir, trainval_sets_main_name) - result_imgsets_segm_subdir = os.path.join(result_imgsets_dir, trainval_sets_segm_name) - sly.fs.mkdir(result_imgsets_main_subdir) - - res_files = ["trainval.txt", "train.txt", "val.txt"] - for file in os.listdir(result_imgsets_segm_subdir): - if file in res_files: - copyfile(os.path.join(result_imgsets_segm_subdir, file), os.path.join(result_imgsets_main_subdir, file)) - - train_imgs = [i for i in images_stats if i['dataset'] == TRAIN_TAG_NAME] - val_imgs = [i for i in images_stats if i['dataset'] == VAL_TAG_NAME] - - write_objs = [ - {'suffix': 'trainval', 'imgs': images_stats}, - {'suffix': 'train', 'imgs': train_imgs}, - {'suffix': 'val', 'imgs': val_imgs}, - ] - - if is_trainval == 1: - trainval_imgs = [i for i in images_stats if i['dataset'] == TRAIN_TAG_NAME + VAL_TAG_NAME] - write_objs[0] = {'suffix': 'trainval', 'imgs': trainval_imgs} - - for obj_cls in meta_json.obj_classes: - if obj_cls.geometry_type not in SUPPORTED_GEOMETRY_TYPES: - continue - if obj_cls.name == 'neutral': - continue - for o in write_objs: - with open(os.path.join(result_imgsets_main_subdir, f'{obj_cls.name}_{o["suffix"]}.txt'), 'w') as f: - for img_stats in o['imgs']: - v = "1" if obj_cls.name in img_stats['classes'] else "-1" - f.write(f'{img_stats["name"]} {v}\n') - - -def write_segm_set(is_trainval, images_stats, result_imgsets_dir): - result_imgsets_segm_subdir = os.path.join(result_imgsets_dir, trainval_sets_segm_name) - sly.fs.mkdir(result_imgsets_segm_subdir) - - with open(os.path.join(result_imgsets_segm_subdir, 'trainval.txt'), 'w') as f: - if is_trainval ==1: - f.writelines(i['name'] + '\n' for i in images_stats if i['dataset'] == TRAIN_TAG_NAME+VAL_TAG_NAME) - else: - f.writelines(i['name'] + '\n' for i in images_stats) - with open(os.path.join(result_imgsets_segm_subdir, 'train.txt'), 'w') as f: - f.writelines(i['name'] + '\n' for i in images_stats if i['dataset'] == TRAIN_TAG_NAME) - with open(os.path.join(result_imgsets_segm_subdir, 'val.txt'), 'w') as f: - f.writelines(i['name'] + '\n' for i in images_stats if i['dataset'] == VAL_TAG_NAME) - - -@my_app.callback("from_sly_to_pascal") -@sly.timeit -def from_sly_to_pascal(api: sly.Api, task_id, context, state, app_logger): - global PASCAL_CONTOUR_THICKNESS, TRAIN_VAL_SPLIT_COEF - - project_info = api.project.get_info_by_id(PROJECT_ID) - meta_json = api.project.get_meta(PROJECT_ID) - meta = sly.ProjectMeta.from_json(meta_json) - app_logger.info("Palette has been created") - - full_archive_name = str(project_info.id) + '_' + project_info.name + ARCHIVE_NAME_ENDING - full_result_dir_name = str(project_info.id) + '_' + project_info.name + RESULT_DIR_NAME_ENDING - - result_archive = os.path.join(my_app.data_dir, full_archive_name) - result_dir = os.path.join(my_app.data_dir, full_result_dir_name) - result_subdir = os.path.join(result_dir, RESULT_SUBDIR_NAME) - - result_ann_dir = os.path.join(result_subdir, ann_dir_name) - result_images_dir = os.path.join(result_subdir, images_dir_name) - result_class_dir_name = os.path.join(result_subdir, ann_class_dir_name) - result_obj_dir = os.path.join(result_subdir, ann_obj_dir_name) - result_imgsets_dir = os.path.join(result_subdir, trainval_sets_dir_name) - - sly.fs.mkdir(result_ann_dir) - sly.fs.mkdir(result_imgsets_dir) - sly.fs.mkdir(result_images_dir) - sly.fs.mkdir(result_class_dir_name) - sly.fs.mkdir(result_obj_dir) - - app_logger.info("Pascal VOC directories have been created") - - images_stats = [] - classes_colors = {} - - datasets = api.dataset.get_list(PROJECT_ID) - dataset_names = ['trainval', 'val', 'train'] - progress = sly.Progress('Preparing images for export', api.project.get_images_count(PROJECT_ID), app_logger) - for dataset in datasets: - if dataset.name in dataset_names: - is_trainval = 1 - else: - is_trainval = 0 - - images = api.image.get_list(dataset.id) - for batch in sly.batched(images): - image_ids = [image_info.id for image_info in batch] - image_paths = [os.path.join(result_images_dir, image_info.name) for image_info in batch] - - api.image.download_paths(dataset.id, image_ids, image_paths) - ann_infos = api.annotation.download_batch(dataset.id, image_ids) - for image_info, ann_info in zip(batch, ann_infos): - img_title, img_ext = os.path.splitext(image_info.name) - cur_img_filename = image_info.name - - if is_trainval == 1: - cur_img_stats = {'classes': set(), 'dataset': dataset.name, 'name': img_title} - images_stats.append(cur_img_stats) - else: - cur_img_stats = {'classes': set(), 'dataset': None, 'name': img_title} - images_stats.append(cur_img_stats) - - if img_ext not in VALID_IMG_EXT: - orig_image_path = os.path.join(result_images_dir, cur_img_filename) - - jpg_image = img_title + ".jpg" - jpg_image_path = os.path.join(result_images_dir, jpg_image) - - im = sly.image.read(orig_image_path) - sly.image.write(jpg_image_path, im) - sly.fs.silent_remove(orig_image_path) - - ann = sly.Annotation.from_json(ann_info.annotation, meta) - tag = find_first_tag(ann.img_tags, SPLIT_TAGS) - if tag is not None: - cur_img_stats['dataset'] = tag.meta.name - - valid_labels = [] - for label in ann.labels: - if type(label.geometry) in SUPPORTED_GEOMETRY_TYPES: - valid_labels.append(label) - else: - app_logger.warn( - f"Label has unsupported geometry type ({type(label.geometry)}) and will be skipped.") - - ann = ann.clone(labels=valid_labels) - ann_to_xml(project_info, image_info, cur_img_filename, result_ann_dir, ann) - for label in ann.labels: - cur_img_stats['classes'].add(label.obj_class.name) - classes_colors[label.obj_class.name] = tuple(label.obj_class.color) - - fake_contour_th = 0 - if PASCAL_CONTOUR_THICKNESS != 0: - fake_contour_th = 2 * PASCAL_CONTOUR_THICKNESS + 1 - - from_ann_to_instance_mask(ann, os.path.join(result_class_dir_name, img_title + pascal_ann_ext), fake_contour_th) - from_ann_to_class_mask(ann, os.path.join(result_obj_dir, img_title + pascal_ann_ext), fake_contour_th) - - progress.iter_done_report() - - classes_colors = OrderedDict((sorted(classes_colors.items(), key=lambda t: t[0]))) - - with open(os.path.join(result_subdir, "colors.txt"), "w") as cc: - if PASCAL_CONTOUR_THICKNESS != 0: - cc.write(f"neutral {pascal_contour_color[0]} {pascal_contour_color[1]} {pascal_contour_color[2]}\n") - - for k in classes_colors.keys(): - if k == 'neutral': - continue - - cc.write(f"{k} {classes_colors[k][0]} {classes_colors[k][1]} {classes_colors[k][2]}\n") - - imgs_to_split = [i for i in images_stats if i['dataset'] is None] - train_len = int(len(imgs_to_split) * TRAIN_VAL_SPLIT_COEF) - - for img_stat in imgs_to_split[:train_len]: img_stat['dataset'] = TRAIN_TAG_NAME - for img_stat in imgs_to_split[train_len:]: img_stat['dataset'] = VAL_TAG_NAME - - write_segm_set(is_trainval, images_stats, result_imgsets_dir) - write_main_set(is_trainval, images_stats, meta, result_imgsets_dir) - - sly.fs.archive_directory(result_dir, result_archive) - app_logger.info("Result directory is archived") - - upload_progress = [] - remote_archive_path = os.path.join( - sly.team_files.RECOMMENDED_EXPORT_PATH, "export-to-Pascal-VOC/{}/{}".format(task_id, full_archive_name)) - - def _print_progress(monitor, upload_progress): - if len(upload_progress) == 0: - upload_progress.append(sly.Progress(message="Upload {!r}".format(full_archive_name), - total_cnt=monitor.len, - ext_logger=app_logger, - is_size=True)) - upload_progress[0].set_current_value(monitor.bytes_read) - - file_info = api.file.upload(TEAM_ID, result_archive, remote_archive_path, - lambda m: _print_progress(m, upload_progress)) - app_logger.info("Uploaded to Team-Files: {!r}".format(file_info.storage_path)) - api.task.set_output_archive(task_id, file_info.id, full_archive_name, file_url=file_info.storage_path) - - my_app.stop() - - -def main(): - sly.logger.info("Script arguments", extra={ - "TEAM_ID": TEAM_ID, - "WORKSPACE_ID": WORKSPACE_ID, - "PROJECT_ID": PROJECT_ID - }) - - my_app.run(initial_events=[{"command": "from_sly_to_pascal"}]) - - -if __name__ == '__main__': - sly.main_wrapper("main", main) diff --git a/src/globals.py b/src/globals.py new file mode 100644 index 0000000..26ee4e8 --- /dev/null +++ b/src/globals.py @@ -0,0 +1,51 @@ +import os +from dotenv import load_dotenv +import supervisely as sly +from supervisely.app.v1.app_service import AppService + +if sly.is_development(): + load_dotenv("local.env") + load_dotenv(os.path.expanduser("~/supervisely.env")) + + +my_app = AppService() + +TEAM_ID = int(os.environ["context.teamId"]) +WORKSPACE_ID = int(os.environ["context.workspaceId"]) +PROJECT_ID = int(os.environ["modal.state.slyProjectId"]) + +PASCAL_CONTOUR_THICKNESS = int(os.environ["modal.state.pascalContourThickness"]) +TRAIN_VAL_SPLIT_COEF = float(os.environ["modal.state.trainSplitCoef"]) + +ARCHIVE_NAME_ENDING = "_pascal_voc.tar.gz" +RESULT_DIR_NAME_ENDING = "_pascal_voc" +RESULT_SUBDIR_NAME = "VOCdevkit/VOC" + +images_dir_name = "JPEGImages" +ann_dir_name = "Annotations" +ann_class_dir_name = "SegmentationClass" +ann_obj_dir_name = "SegmentationObject" + +trainval_sets_dir_name = "ImageSets" +trainval_sets_main_name = "Main" +trainval_sets_segm_name = "Segmentation" + +train_txt_name = "train.txt" +val_txt_name = "val.txt" + +is_trainval = None + +pascal_contour_color = [224, 224, 192] +pascal_ann_ext = ".png" + +TRAIN_TAG_NAME = "train" +VAL_TAG_NAME = "val" +SPLIT_TAGS = {TRAIN_TAG_NAME, VAL_TAG_NAME} + +VALID_IMG_EXT = {".jpe", ".jpeg", ".jpg"} +SUPPORTED_GEOMETRY_TYPES = {sly.Bitmap, sly.Polygon, sly.Rectangle} + +if TRAIN_VAL_SPLIT_COEF > 1 or TRAIN_VAL_SPLIT_COEF < 0: + raise ValueError( + f"train_val_split_coef should be between 0 and 1, your data is {TRAIN_VAL_SPLIT_COEF}" + ) diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..fc32531 --- /dev/null +++ b/src/main.py @@ -0,0 +1,184 @@ +import os +import supervisely as sly +from collections import OrderedDict +import globals as g +import utils + + +@g.my_app.callback("from_sly_to_pascal") +@sly.timeit +def from_sly_to_pascal(api: sly.Api, task_id, context, state, app_logger): + project_info = api.project.get_info_by_id(g.PROJECT_ID) + meta_json = api.project.get_meta(g.PROJECT_ID) + meta = sly.ProjectMeta.from_json(meta_json) + app_logger.info("Palette has been created") + + full_archive_name = f"{str(project_info.id)}_{project_info.name}{g.ARCHIVE_NAME_ENDING}" + full_result_dir_name = f"{str(project_info.id)}_{project_info.name}{g.RESULT_DIR_NAME_ENDING}" + + result_archive = os.path.join(g.my_app.data_dir, full_archive_name) + result_dir = os.path.join(g.my_app.data_dir, full_result_dir_name) + result_subdir = os.path.join(result_dir, g.RESULT_SUBDIR_NAME) + + result_ann_dir = os.path.join(result_subdir, g.ann_dir_name) + result_images_dir = os.path.join(result_subdir, g.images_dir_name) + result_class_dir_name = os.path.join(result_subdir, g.ann_class_dir_name) + result_obj_dir = os.path.join(result_subdir, g.ann_obj_dir_name) + result_imgsets_dir = os.path.join(result_subdir, g.trainval_sets_dir_name) + + sly.fs.mkdir(result_ann_dir) + sly.fs.mkdir(result_imgsets_dir) + sly.fs.mkdir(result_images_dir) + sly.fs.mkdir(result_class_dir_name) + sly.fs.mkdir(result_obj_dir) + + app_logger.info("Pascal VOC directories have been created") + + images_stats = [] + classes_colors = {} + + datasets = api.dataset.get_list(g.PROJECT_ID) + dataset_names = ["trainval", "val", "train"] + progress = sly.Progress( + "Preparing images for export", api.project.get_images_count(g.PROJECT_ID), app_logger + ) + for dataset in datasets: + if dataset.name in dataset_names: + is_trainval = 1 + else: + is_trainval = 0 + + images = api.image.get_list(dataset.id) + for batch in sly.batched(images): + image_ids = [image_info.id for image_info in batch] + image_paths = [os.path.join(result_images_dir, image_info.name) for image_info in batch] + + api.image.download_paths(dataset.id, image_ids, image_paths) + ann_infos = api.annotation.download_batch(dataset.id, image_ids) + for image_info, ann_info in zip(batch, ann_infos): + img_title, img_ext = os.path.splitext(image_info.name) + cur_img_filename = image_info.name + + if is_trainval == 1: + cur_img_stats = {"classes": set(), "dataset": dataset.name, "name": img_title} + images_stats.append(cur_img_stats) + else: + cur_img_stats = {"classes": set(), "dataset": None, "name": img_title} + images_stats.append(cur_img_stats) + + if img_ext not in g.VALID_IMG_EXT: + orig_image_path = os.path.join(result_images_dir, cur_img_filename) + + jpg_image = f"{img_title}.jpg" + jpg_image_path = os.path.join(result_images_dir, jpg_image) + + im = sly.image.read(orig_image_path) + sly.image.write(jpg_image_path, im) + sly.fs.silent_remove(orig_image_path) + + ann = sly.Annotation.from_json(ann_info.annotation, meta) + tag = utils.find_first_tag(ann.img_tags, g.SPLIT_TAGS) + if tag is not None: + cur_img_stats["dataset"] = tag.meta.name + + valid_labels = [] + for label in ann.labels: + if type(label.geometry) in g.SUPPORTED_GEOMETRY_TYPES: + valid_labels.append(label) + else: + app_logger.warn( + f"Label has unsupported geometry type ({type(label.geometry)}) and will be skipped." + ) + + ann = ann.clone(labels=valid_labels) + utils.ann_to_xml(project_info, image_info, cur_img_filename, result_ann_dir, ann) + for label in ann.labels: + cur_img_stats["classes"].add(label.obj_class.name) + classes_colors[label.obj_class.name] = tuple(label.obj_class.color) + + fake_contour_th = 0 + if g.PASCAL_CONTOUR_THICKNESS != 0: + fake_contour_th = 2 * g.PASCAL_CONTOUR_THICKNESS + 1 + + utils.from_ann_to_instance_mask( + ann, + os.path.join(result_class_dir_name, img_title + g.pascal_ann_ext), + fake_contour_th, + ) + utils.from_ann_to_class_mask( + ann, os.path.join(result_obj_dir, img_title + g.pascal_ann_ext), fake_contour_th + ) + + progress.iter_done_report() + + classes_colors = OrderedDict((sorted(classes_colors.items(), key=lambda t: t[0]))) + + with open(os.path.join(result_subdir, "colors.txt"), "w") as cc: + if g.PASCAL_CONTOUR_THICKNESS != 0: + cc.write( + f"neutral {g.pascal_contour_color[0]} {g.pascal_contour_color[1]} {g.pascal_contour_color[2]}\n" + ) + + for k in classes_colors.keys(): + if k == "neutral": + continue + + cc.write(f"{k} {classes_colors[k][0]} {classes_colors[k][1]} {classes_colors[k][2]}\n") + + imgs_to_split = [i for i in images_stats if i["dataset"] is None] + train_len = int(len(imgs_to_split) * g.TRAIN_VAL_SPLIT_COEF) + + for img_stat in imgs_to_split[:train_len]: + img_stat["dataset"] = g.TRAIN_TAG_NAME + for img_stat in imgs_to_split[train_len:]: + img_stat["dataset"] = g.VAL_TAG_NAME + + utils.write_segm_set(is_trainval, images_stats, result_imgsets_dir) + utils.write_main_set(is_trainval, images_stats, meta, result_imgsets_dir) + + sly.fs.archive_directory(result_dir, result_archive) + app_logger.info("Result directory is archived") + + upload_progress = [] + remote_archive_path = os.path.join( + sly.team_files.RECOMMENDED_EXPORT_PATH, + "export-to-Pascal-VOC/{}/{}".format(task_id, full_archive_name), + ) + + def _print_progress(monitor, upload_progress): + if len(upload_progress) == 0: + upload_progress.append( + sly.Progress( + message="Upload {!r}".format(full_archive_name), + total_cnt=monitor.len, + ext_logger=app_logger, + is_size=True, + ) + ) + upload_progress[0].set_current_value(monitor.bytes_read) + + file_info = api.file.upload( + g.TEAM_ID, + result_archive, + remote_archive_path, + lambda m: _print_progress(m, upload_progress), + ) + app_logger.info("Uploaded to Team-Files: {!r}".format(file_info.storage_path)) + api.task.set_output_archive( + task_id, file_info.id, full_archive_name, file_url=file_info.storage_path + ) + + g.my_app.stop() + + +def main(): + sly.logger.info( + "Script arguments", + extra={"TEAM_ID": g.TEAM_ID, "WORKSPACE_ID": g.WORKSPACE_ID, "PROJECT_ID": g.PROJECT_ID}, + ) + + g.my_app.run(initial_events=[{"command": "from_sly_to_pascal"}]) + + +if __name__ == "__main__": + sly.main_wrapper("main", main) diff --git a/src/utils.py b/src/utils.py new file mode 100644 index 0000000..8114518 --- /dev/null +++ b/src/utils.py @@ -0,0 +1,166 @@ +import os +from typing import List +import numpy as np +import lxml.etree as ET +import supervisely as sly +from PIL import Image +from shutil import copyfile +from supervisely.imaging.color import generate_rgb +from supervisely.io.fs import get_file_name + +import globals as g + + +def from_ann_to_instance_mask(ann: sly.Annotation, mask_outpath, contour_thickness): + mask = np.zeros((ann.img_size[0], ann.img_size[1], 3), dtype=np.uint8) + for label in ann.labels: + if label.obj_class.geometry_type == sly.Rectangle: + continue + + if label.obj_class.name == "neutral": + label.geometry.draw(mask, g.pascal_contour_color) + continue + + label.geometry.draw_contour(mask, g.pascal_contour_color, contour_thickness) + label.geometry.draw(mask, label.obj_class.color) + + im = Image.fromarray(mask) + im = im.convert("P", palette=Image.ADAPTIVE) + im.save(mask_outpath) + + +def from_ann_to_class_mask(ann: sly.Annotation, mask_outpath, contour_thickness): + exist_colors = [[0, 0, 0], g.pascal_contour_color] + mask = np.zeros((ann.img_size[0], ann.img_size[1], 3), dtype=np.uint8) + for label in ann.labels: + if label.obj_class.geometry_type == sly.Rectangle: + continue + + if label.obj_class.name == "neutral": + label.geometry.draw(mask, g.pascal_contour_color) + continue + + new_color = generate_rgb(exist_colors) + exist_colors.append(new_color) + label.geometry.draw_contour(mask, g.pascal_contour_color, contour_thickness) + label.geometry.draw(mask, new_color) + + im = Image.fromarray(mask) + im = im.convert("P", palette=Image.ADAPTIVE) + im.save(mask_outpath) + + +def ann_to_xml(project_info, image_info, img_filename, result_ann_dir, ann): + xml_root = ET.Element("annotation") + + ET.SubElement(xml_root, "folder").text = f"VOC_{project_info.name}" + ET.SubElement(xml_root, "filename").text = img_filename + + xml_root_source = ET.SubElement(xml_root, "source") + ET.SubElement( + xml_root_source, "database" + ).text = f"Supervisely Project ID:{str(project_info.id)}" + + ET.SubElement(xml_root_source, "annotation").text = "PASCAL VOC" + ET.SubElement(xml_root_source, "image").text = f"Supervisely Image ID:{str(image_info.id)}" + + xml_root_size = ET.SubElement(xml_root, "size") + ET.SubElement(xml_root_size, "width").text = str(image_info.width) + ET.SubElement(xml_root_size, "height").text = str(image_info.height) + ET.SubElement(xml_root_size, "depth").text = "3" + + ET.SubElement(xml_root, "segmented").text = "1" if len(ann.labels) > 0 else "0" + + for label in ann.labels: + if label.obj_class.name == "neutral": + continue + + bitmap_to_bbox = label.geometry.to_bbox() + + xml_ann_obj = ET.SubElement(xml_root, "object") + ET.SubElement(xml_ann_obj, "name").text = label.obj_class.name + ET.SubElement(xml_ann_obj, "pose").text = "Unspecified" + ET.SubElement(xml_ann_obj, "truncated").text = "0" + ET.SubElement(xml_ann_obj, "difficult").text = "0" + + xml_ann_obj_bndbox = ET.SubElement(xml_ann_obj, "bndbox") + ET.SubElement(xml_ann_obj_bndbox, "xmin").text = str(bitmap_to_bbox.left) + ET.SubElement(xml_ann_obj_bndbox, "ymin").text = str(bitmap_to_bbox.top) + ET.SubElement(xml_ann_obj_bndbox, "xmax").text = str(bitmap_to_bbox.right) + ET.SubElement(xml_ann_obj_bndbox, "ymax").text = str(bitmap_to_bbox.bottom) + + tree = ET.ElementTree(xml_root) + + # img_name = os.path.join(result_ann_dir, f"{os.path.splitext(img_filename)[0]}.xml") + img_name = f"{get_file_name(img_filename)}.xml" + ann_path = os.path.join(result_ann_dir, img_name) + ET.indent(tree, space=" ") + tree.write(ann_path, pretty_print=True) + + +def find_first_tag(img_tags: sly.TagCollection, split_tags: set) -> sly.Tag: + for tag in split_tags: + if img_tags.has_key(tag): + return img_tags.get(tag) + return None + + +def write_main_set(is_trainval, images_stats, meta_json, result_imgsets_dir): + result_imgsets_main_subdir = os.path.join(result_imgsets_dir, g.trainval_sets_main_name) + result_imgsets_segm_subdir = os.path.join(result_imgsets_dir, g.trainval_sets_segm_name) + sly.fs.mkdir(result_imgsets_main_subdir) + + res_files = ["trainval.txt", "train.txt", "val.txt"] + for file in os.listdir(result_imgsets_segm_subdir): + if file in res_files: + copyfile( + os.path.join(result_imgsets_segm_subdir, file), + os.path.join(result_imgsets_main_subdir, file), + ) + + train_imgs = [i for i in images_stats if i["dataset"] == g.TRAIN_TAG_NAME] + val_imgs = [i for i in images_stats if i["dataset"] == g.VAL_TAG_NAME] + + write_objs = [ + {"suffix": "trainval", "imgs": images_stats}, + {"suffix": "train", "imgs": train_imgs}, + {"suffix": "val", "imgs": val_imgs}, + ] + + if is_trainval == 1: + trainval_imgs = [ + i for i in images_stats if i["dataset"] == g.TRAIN_TAG_NAME + g.VAL_TAG_NAME + ] + write_objs[0] = {"suffix": "trainval", "imgs": trainval_imgs} + + for obj_cls in meta_json.obj_classes: + if obj_cls.geometry_type not in g.SUPPORTED_GEOMETRY_TYPES: + continue + if obj_cls.name == "neutral": + continue + for o in write_objs: + with open( + os.path.join(result_imgsets_main_subdir, f'{obj_cls.name}_{o["suffix"]}.txt'), "w" + ) as f: + for img_stats in o["imgs"]: + v = "1" if obj_cls.name in img_stats["classes"] else "-1" + f.write(f'{img_stats["name"]} {v}\n') + + +def write_segm_set(is_trainval, images_stats, result_imgsets_dir): + result_imgsets_segm_subdir = os.path.join(result_imgsets_dir, g.trainval_sets_segm_name) + sly.fs.mkdir(result_imgsets_segm_subdir) + + with open(os.path.join(result_imgsets_segm_subdir, "trainval.txt"), "w") as f: + if is_trainval == 1: + f.writelines( + i["name"] + "\n" + for i in images_stats + if i["dataset"] == g.TRAIN_TAG_NAME + g.VAL_TAG_NAME + ) + else: + f.writelines(i["name"] + "\n" for i in images_stats) + with open(os.path.join(result_imgsets_segm_subdir, "train.txt"), "w") as f: + f.writelines(i["name"] + "\n" for i in images_stats if i["dataset"] == g.TRAIN_TAG_NAME) + with open(os.path.join(result_imgsets_segm_subdir, "val.txt"), "w") as f: + f.writelines(i["name"] + "\n" for i in images_stats if i["dataset"] == g.VAL_TAG_NAME)