diff --git a/.gitignore b/.gitignore
index e0386ed..6c2c93e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
.idea
-/debug
+.venv
+src/debug
secret_debug.env
\ No newline at end of file
diff --git a/.vscode/launch.json b/.vscode/launch.json
new file mode 100644
index 0000000..40e7d4b
--- /dev/null
+++ b/.vscode/launch.json
@@ -0,0 +1,16 @@
+{
+ "version": "0.2.0",
+ "configurations": [
+ {
+ "name": "Python: Current File",
+ "type": "python",
+ "request": "launch",
+ "program": "${file}",
+ "console": "integratedTerminal",
+ "justMyCode": true,
+ "env": {
+ "PYTHONPATH": "${workspaceFolder}:${PYTHONPATH}"
+ }
+ }
+ ]
+}
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000..f1691c4
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,35 @@
+{
+ "files.exclude": {
+ "**/__pycache__": true,
+ "build": true,
+ "supervisely.egg-info": true,
+ ".venv": true
+ },
+ "python.defaultInterpreterPath": ".venv/bin/python",
+ "editor.formatOnSave": true,
+ "editor.formatOnPaste": true,
+ "editor.formatOnType": true,
+ "black-formatter.args": [
+ "--line-length",
+ "100"
+ ],
+ "[html]": {
+ "editor.defaultFormatter": "esbenp.prettier-vscode"
+ },
+ "[json]": {
+ "editor.defaultFormatter": "esbenp.prettier-vscode"
+ },
+ "[python]": {
+ "editor.defaultFormatter": "ms-python.black-formatter",
+ },
+ "debug.inlineValues": "off",
+ "python.analysis.typeCheckingMode": "off",
+ "python.analysis.autoImportCompletions": false,
+ "autoDocstring.docstringFormat": "sphinx",
+ "autoDocstring.customTemplatePath": "docs/.mustache",
+ "python.testing.pytestArgs": [
+ "tests/inference_cache"
+ ],
+ "python.testing.unittestEnabled": false,
+ "python.testing.pytestEnabled": true
+}
\ No newline at end of file
diff --git a/README.md b/README.md
index 78c2d45..9a64629 100644
--- a/README.md
+++ b/README.md
@@ -22,7 +22,8 @@ Converts [Supervisely](https://docs.supervise.ly/data-organization/00_ann_format
## Preparation
-There are special requirements for Supervisely project, classes must have `Polygon` or `Bitmap` shape, all other shapes will be skipped. It means that only labeled objects with these shapes will be rendered as masks.
+
+There are special requirements for input Supervisely project, classes must have `Polygon`, `Bitmap` or `Rectangle` shape, all other shapes will be skipped. Only objects with `Polygon` and `Bitmap` shapes will be rendered as masks.
Pascal VOC format stores all data in separate folders. Image classes bounding boxes and additional information are stored in `.xml` files. Segmentantion class and object masks are placed into `SegmentationClass` and `SegmentationObject` folders respectively. **All image Tags, except `train` and `val` will be skipped**.
@@ -135,6 +136,6 @@ App creates task in `workspace tasks` list. Once app is finished, you will see d
-Resulting archive is saved in :
+Resulting archive is saved in:
`Current Team` -> `Files` -> `/pascal_voc_format//__pascal_format.tar`.
diff --git a/config.json b/config.json
index ad03749..3ca208a 100644
--- a/config.json
+++ b/config.json
@@ -1,28 +1,23 @@
{
- "name": "Export to Pascal VOC",
- "type": "app",
- "categories": [
- "images",
- "export"
- ],
- "description": "Converts Supervisely Project to Pascal VOC format",
- "docker_image": "supervisely/import-export:0.0.5",
- "instance_version": "6.5.1",
- "main_script": "src/export-pascal-voc.py",
- "modal_template": "src/modal.html",
- "modal_template_state": {
- "pascalContourThickness": 3,
- "trainSplitCoef": 0.8
- },
- "task_location": "workspace_tasks",
- "icon": "https://i.imgur.com/zTseThz.png",
- "icon_background": "#FFFFFF",
- "headless": true,
- "context_menu": {
- "target": [
- "images_project"
- ],
- "context_root": "Download as"
- },
- "poster": "https://user-images.githubusercontent.com/48245050/182382862-d74f1b2c-b19e-47c2-84db-45cd934ec34e.png"
-}
\ No newline at end of file
+ "name": "Export to Pascal VOC",
+ "type": "app",
+ "categories": ["images", "export"],
+ "description": "Converts Supervisely Project to Pascal VOC format",
+ "docker_image": "supervisely/import-export:6.72.147",
+ "instance_version": "6.5.1",
+ "main_script": "src/main.py",
+ "modal_template": "src/modal.html",
+ "modal_template_state": {
+ "pascalContourThickness": 3,
+ "trainSplitCoef": 0.8
+ },
+ "task_location": "workspace_tasks",
+ "icon": "https://i.imgur.com/zTseThz.png",
+ "icon_background": "#FFFFFF",
+ "headless": true,
+ "context_menu": {
+ "target": ["images_project"],
+ "context_root": "Download as"
+ },
+ "poster": "https://user-images.githubusercontent.com/48245050/182382862-d74f1b2c-b19e-47c2-84db-45cd934ec34e.png"
+}
diff --git a/debug.env b/debug.env
deleted file mode 100644
index 8b766f9..0000000
--- a/debug.env
+++ /dev/null
@@ -1,19 +0,0 @@
-PYTHONUNBUFFERED=1
-
-DEBUG_APP_DIR="/home/paul/work/apps/export-to-pascal-voc/debug/app_debug_data"
-DEBUG_CACHE_DIR="/home/paul/work/apps/export-to-pascal-voc/debug/app_debug_cache"
-LOG_LEVEL="debug"
-
-TASK_ID=4562
-
-context.teamId=8
-context.workspaceId=58
-modal.state.slyProjectId=1801
-
-modal.state.pascalContourThickness=3
-modal.state.trainSplitCoef=0.8
-
-SERVER_ADDRESS="put your value here in secret_debug.env"
-API_TOKEN="put your value here in secret_debug.env"
-AGENT_TOKEN="put your value here in secret_debug.env"
-
diff --git a/dev_requirements.txt b/dev_requirements.txt
index d70b97b..b406843 100644
--- a/dev_requirements.txt
+++ b/dev_requirements.txt
@@ -1,4 +1,4 @@
-supervisely==6.68.109
+supervisely==6.72.147
lxml==4.6.3
numpy>=1.19.4
Pillow>=8.0.1
diff --git a/local.env b/local.env
new file mode 100644
index 0000000..7ba0fec
--- /dev/null
+++ b/local.env
@@ -0,0 +1,15 @@
+PYTHONUNBUFFERED=1
+
+LOG_LEVEL="debug"
+
+TASK_ID=44728
+
+context.teamId=8
+context.workspaceId=349
+modal.state.slyProjectId=22451
+
+modal.state.pascalContourThickness=3
+modal.state.trainSplitCoef=0.8
+
+DEBUG_APP_DIR="src/debug/app_debug_data"
+DEBUG_CACHE_DIR="src/debug/app_debug_cache"
diff --git a/src/export-pascal-voc.py b/src/export-pascal-voc.py
deleted file mode 100644
index d834950..0000000
--- a/src/export-pascal-voc.py
+++ /dev/null
@@ -1,340 +0,0 @@
-import os
-import numpy as np
-import lxml.etree as ET
-import supervisely as sly
-from PIL import Image
-from shutil import copyfile
-from collections import OrderedDict
-from supervisely.imaging.color import generate_rgb
-from supervisely.app.v1.app_service import AppService
-
-my_app = AppService()
-
-TEAM_ID = int(os.environ['context.teamId'])
-WORKSPACE_ID = int(os.environ['context.workspaceId'])
-PROJECT_ID = int(os.environ['modal.state.slyProjectId'])
-
-PASCAL_CONTOUR_THICKNESS = int(os.environ['modal.state.pascalContourThickness'])
-TRAIN_VAL_SPLIT_COEF = float(os.environ['modal.state.trainSplitCoef'])
-
-ARCHIVE_NAME_ENDING = '_pascal_voc.tar.gz'
-RESULT_DIR_NAME_ENDING = '_pascal_voc'
-RESULT_SUBDIR_NAME = 'VOCdevkit/VOC'
-
-images_dir_name = 'JPEGImages'
-ann_dir_name = 'Annotations'
-ann_class_dir_name = 'SegmentationClass'
-ann_obj_dir_name = 'SegmentationObject'
-
-trainval_sets_dir_name = 'ImageSets'
-trainval_sets_main_name = 'Main'
-trainval_sets_segm_name = 'Segmentation'
-
-train_txt_name = 'train.txt'
-val_txt_name = 'val.txt'
-
-is_trainval = None
-
-pascal_contour_color = [224, 224, 192]
-pascal_ann_ext = '.png'
-
-TRAIN_TAG_NAME = 'train'
-VAL_TAG_NAME = 'val'
-SPLIT_TAGS = set([TRAIN_TAG_NAME, VAL_TAG_NAME])
-
-VALID_IMG_EXT = set(['.jpe', '.jpeg', '.jpg'])
-SUPPORTED_GEOMETRY_TYPES = set([sly.Bitmap, sly.Polygon])
-
-if TRAIN_VAL_SPLIT_COEF > 1 or TRAIN_VAL_SPLIT_COEF < 0:
- raise ValueError('train_val_split_coef should be between 0 and 1, your data is {}'.format(TRAIN_VAL_SPLIT_COEF))
-
-
-def from_ann_to_instance_mask(ann, mask_outpath, contour_thickness):
- mask = np.zeros((ann.img_size[0], ann.img_size[1], 3), dtype=np.uint8)
- for label in ann.labels:
- if label.obj_class.name == "neutral":
- label.geometry.draw(mask, pascal_contour_color)
- continue
-
- label.geometry.draw_contour(mask, pascal_contour_color, contour_thickness)
- label.geometry.draw(mask, label.obj_class.color)
-
- im = Image.fromarray(mask)
- im = im.convert("P", palette=Image.ADAPTIVE)
- im.save(mask_outpath)
-
-
-def from_ann_to_class_mask(ann, mask_outpath, contour_thickness):
- exist_colors = [[0, 0, 0], pascal_contour_color]
- mask = np.zeros((ann.img_size[0], ann.img_size[1], 3), dtype=np.uint8)
- for label in ann.labels:
- if label.obj_class.name == "neutral":
- label.geometry.draw(mask, pascal_contour_color)
- continue
-
- new_color = generate_rgb(exist_colors)
- exist_colors.append(new_color)
- label.geometry.draw_contour(mask, pascal_contour_color, contour_thickness)
- label.geometry.draw(mask, new_color)
-
- im = Image.fromarray(mask)
- im = im.convert("P", palette=Image.ADAPTIVE)
- im.save(mask_outpath)
-
-
-def ann_to_xml(project_info, image_info, img_filename, result_ann_dir, ann):
- xml_root = ET.Element("annotation")
-
- ET.SubElement(xml_root, "folder").text = "VOC_" + project_info.name
- ET.SubElement(xml_root, "filename").text = img_filename
-
- xml_root_source = ET.SubElement(xml_root, "source")
- ET.SubElement(xml_root_source, "database").text = "Supervisely Project ID:" + str(project_info.id)
- ET.SubElement(xml_root_source, "annotation").text = "PASCAL VOC"
- ET.SubElement(xml_root_source, "image").text = "Supervisely Image ID:" + str(image_info.id)
-
- xml_root_size = ET.SubElement(xml_root, "size")
- ET.SubElement(xml_root_size, "width").text = str(image_info.width)
- ET.SubElement(xml_root_size, "height").text = str(image_info.height)
- ET.SubElement(xml_root_size, "depth").text = "3"
-
- ET.SubElement(xml_root, "segmented").text = "1" if len(ann.labels) > 0 else "0"
-
- for label in ann.labels:
- if label.obj_class.name == "neutral":
- continue
-
- bitmap_to_bbox = label.geometry.to_bbox()
-
- xml_ann_obj = ET.SubElement(xml_root, "object")
- ET.SubElement(xml_ann_obj, "name").text = label.obj_class.name
- ET.SubElement(xml_ann_obj, "pose").text = "Unspecified"
- ET.SubElement(xml_ann_obj, "truncated").text = "0"
- ET.SubElement(xml_ann_obj, "difficult").text = "0"
-
- xml_ann_obj_bndbox = ET.SubElement(xml_ann_obj, "bndbox")
- ET.SubElement(xml_ann_obj_bndbox, "xmin").text = str(bitmap_to_bbox.left)
- ET.SubElement(xml_ann_obj_bndbox, "ymin").text = str(bitmap_to_bbox.top)
- ET.SubElement(xml_ann_obj_bndbox, "xmax").text = str(bitmap_to_bbox.right)
- ET.SubElement(xml_ann_obj_bndbox, "ymax").text = str(bitmap_to_bbox.bottom)
-
- tree = ET.ElementTree(xml_root)
-
- img_name = os.path.join(result_ann_dir, os.path.splitext(img_filename)[0] + ".xml")
- ann_path = (os.path.join(result_ann_dir, img_name))
- ET.indent(tree, space=" ")
- tree.write(ann_path, pretty_print=True)
-
-
-def find_first_tag(img_tags, split_tags):
- for tag in split_tags:
- if img_tags.has_key(tag):
- return img_tags.get(tag)
- return None
-
-
-def write_main_set(is_trainval, images_stats, meta_json, result_imgsets_dir):
- result_imgsets_main_subdir = os.path.join(result_imgsets_dir, trainval_sets_main_name)
- result_imgsets_segm_subdir = os.path.join(result_imgsets_dir, trainval_sets_segm_name)
- sly.fs.mkdir(result_imgsets_main_subdir)
-
- res_files = ["trainval.txt", "train.txt", "val.txt"]
- for file in os.listdir(result_imgsets_segm_subdir):
- if file in res_files:
- copyfile(os.path.join(result_imgsets_segm_subdir, file), os.path.join(result_imgsets_main_subdir, file))
-
- train_imgs = [i for i in images_stats if i['dataset'] == TRAIN_TAG_NAME]
- val_imgs = [i for i in images_stats if i['dataset'] == VAL_TAG_NAME]
-
- write_objs = [
- {'suffix': 'trainval', 'imgs': images_stats},
- {'suffix': 'train', 'imgs': train_imgs},
- {'suffix': 'val', 'imgs': val_imgs},
- ]
-
- if is_trainval == 1:
- trainval_imgs = [i for i in images_stats if i['dataset'] == TRAIN_TAG_NAME + VAL_TAG_NAME]
- write_objs[0] = {'suffix': 'trainval', 'imgs': trainval_imgs}
-
- for obj_cls in meta_json.obj_classes:
- if obj_cls.geometry_type not in SUPPORTED_GEOMETRY_TYPES:
- continue
- if obj_cls.name == 'neutral':
- continue
- for o in write_objs:
- with open(os.path.join(result_imgsets_main_subdir, f'{obj_cls.name}_{o["suffix"]}.txt'), 'w') as f:
- for img_stats in o['imgs']:
- v = "1" if obj_cls.name in img_stats['classes'] else "-1"
- f.write(f'{img_stats["name"]} {v}\n')
-
-
-def write_segm_set(is_trainval, images_stats, result_imgsets_dir):
- result_imgsets_segm_subdir = os.path.join(result_imgsets_dir, trainval_sets_segm_name)
- sly.fs.mkdir(result_imgsets_segm_subdir)
-
- with open(os.path.join(result_imgsets_segm_subdir, 'trainval.txt'), 'w') as f:
- if is_trainval ==1:
- f.writelines(i['name'] + '\n' for i in images_stats if i['dataset'] == TRAIN_TAG_NAME+VAL_TAG_NAME)
- else:
- f.writelines(i['name'] + '\n' for i in images_stats)
- with open(os.path.join(result_imgsets_segm_subdir, 'train.txt'), 'w') as f:
- f.writelines(i['name'] + '\n' for i in images_stats if i['dataset'] == TRAIN_TAG_NAME)
- with open(os.path.join(result_imgsets_segm_subdir, 'val.txt'), 'w') as f:
- f.writelines(i['name'] + '\n' for i in images_stats if i['dataset'] == VAL_TAG_NAME)
-
-
-@my_app.callback("from_sly_to_pascal")
-@sly.timeit
-def from_sly_to_pascal(api: sly.Api, task_id, context, state, app_logger):
- global PASCAL_CONTOUR_THICKNESS, TRAIN_VAL_SPLIT_COEF
-
- project_info = api.project.get_info_by_id(PROJECT_ID)
- meta_json = api.project.get_meta(PROJECT_ID)
- meta = sly.ProjectMeta.from_json(meta_json)
- app_logger.info("Palette has been created")
-
- full_archive_name = str(project_info.id) + '_' + project_info.name + ARCHIVE_NAME_ENDING
- full_result_dir_name = str(project_info.id) + '_' + project_info.name + RESULT_DIR_NAME_ENDING
-
- result_archive = os.path.join(my_app.data_dir, full_archive_name)
- result_dir = os.path.join(my_app.data_dir, full_result_dir_name)
- result_subdir = os.path.join(result_dir, RESULT_SUBDIR_NAME)
-
- result_ann_dir = os.path.join(result_subdir, ann_dir_name)
- result_images_dir = os.path.join(result_subdir, images_dir_name)
- result_class_dir_name = os.path.join(result_subdir, ann_class_dir_name)
- result_obj_dir = os.path.join(result_subdir, ann_obj_dir_name)
- result_imgsets_dir = os.path.join(result_subdir, trainval_sets_dir_name)
-
- sly.fs.mkdir(result_ann_dir)
- sly.fs.mkdir(result_imgsets_dir)
- sly.fs.mkdir(result_images_dir)
- sly.fs.mkdir(result_class_dir_name)
- sly.fs.mkdir(result_obj_dir)
-
- app_logger.info("Pascal VOC directories have been created")
-
- images_stats = []
- classes_colors = {}
-
- datasets = api.dataset.get_list(PROJECT_ID)
- dataset_names = ['trainval', 'val', 'train']
- progress = sly.Progress('Preparing images for export', api.project.get_images_count(PROJECT_ID), app_logger)
- for dataset in datasets:
- if dataset.name in dataset_names:
- is_trainval = 1
- else:
- is_trainval = 0
-
- images = api.image.get_list(dataset.id)
- for batch in sly.batched(images):
- image_ids = [image_info.id for image_info in batch]
- image_paths = [os.path.join(result_images_dir, image_info.name) for image_info in batch]
-
- api.image.download_paths(dataset.id, image_ids, image_paths)
- ann_infos = api.annotation.download_batch(dataset.id, image_ids)
- for image_info, ann_info in zip(batch, ann_infos):
- img_title, img_ext = os.path.splitext(image_info.name)
- cur_img_filename = image_info.name
-
- if is_trainval == 1:
- cur_img_stats = {'classes': set(), 'dataset': dataset.name, 'name': img_title}
- images_stats.append(cur_img_stats)
- else:
- cur_img_stats = {'classes': set(), 'dataset': None, 'name': img_title}
- images_stats.append(cur_img_stats)
-
- if img_ext not in VALID_IMG_EXT:
- orig_image_path = os.path.join(result_images_dir, cur_img_filename)
-
- jpg_image = img_title + ".jpg"
- jpg_image_path = os.path.join(result_images_dir, jpg_image)
-
- im = sly.image.read(orig_image_path)
- sly.image.write(jpg_image_path, im)
- sly.fs.silent_remove(orig_image_path)
-
- ann = sly.Annotation.from_json(ann_info.annotation, meta)
- tag = find_first_tag(ann.img_tags, SPLIT_TAGS)
- if tag is not None:
- cur_img_stats['dataset'] = tag.meta.name
-
- valid_labels = []
- for label in ann.labels:
- if type(label.geometry) in SUPPORTED_GEOMETRY_TYPES:
- valid_labels.append(label)
- else:
- app_logger.warn(
- f"Label has unsupported geometry type ({type(label.geometry)}) and will be skipped.")
-
- ann = ann.clone(labels=valid_labels)
- ann_to_xml(project_info, image_info, cur_img_filename, result_ann_dir, ann)
- for label in ann.labels:
- cur_img_stats['classes'].add(label.obj_class.name)
- classes_colors[label.obj_class.name] = tuple(label.obj_class.color)
-
- fake_contour_th = 0
- if PASCAL_CONTOUR_THICKNESS != 0:
- fake_contour_th = 2 * PASCAL_CONTOUR_THICKNESS + 1
-
- from_ann_to_instance_mask(ann, os.path.join(result_class_dir_name, img_title + pascal_ann_ext), fake_contour_th)
- from_ann_to_class_mask(ann, os.path.join(result_obj_dir, img_title + pascal_ann_ext), fake_contour_th)
-
- progress.iter_done_report()
-
- classes_colors = OrderedDict((sorted(classes_colors.items(), key=lambda t: t[0])))
-
- with open(os.path.join(result_subdir, "colors.txt"), "w") as cc:
- if PASCAL_CONTOUR_THICKNESS != 0:
- cc.write(f"neutral {pascal_contour_color[0]} {pascal_contour_color[1]} {pascal_contour_color[2]}\n")
-
- for k in classes_colors.keys():
- if k == 'neutral':
- continue
-
- cc.write(f"{k} {classes_colors[k][0]} {classes_colors[k][1]} {classes_colors[k][2]}\n")
-
- imgs_to_split = [i for i in images_stats if i['dataset'] is None]
- train_len = int(len(imgs_to_split) * TRAIN_VAL_SPLIT_COEF)
-
- for img_stat in imgs_to_split[:train_len]: img_stat['dataset'] = TRAIN_TAG_NAME
- for img_stat in imgs_to_split[train_len:]: img_stat['dataset'] = VAL_TAG_NAME
-
- write_segm_set(is_trainval, images_stats, result_imgsets_dir)
- write_main_set(is_trainval, images_stats, meta, result_imgsets_dir)
-
- sly.fs.archive_directory(result_dir, result_archive)
- app_logger.info("Result directory is archived")
-
- upload_progress = []
- remote_archive_path = os.path.join(
- sly.team_files.RECOMMENDED_EXPORT_PATH, "export-to-Pascal-VOC/{}/{}".format(task_id, full_archive_name))
-
- def _print_progress(monitor, upload_progress):
- if len(upload_progress) == 0:
- upload_progress.append(sly.Progress(message="Upload {!r}".format(full_archive_name),
- total_cnt=monitor.len,
- ext_logger=app_logger,
- is_size=True))
- upload_progress[0].set_current_value(monitor.bytes_read)
-
- file_info = api.file.upload(TEAM_ID, result_archive, remote_archive_path,
- lambda m: _print_progress(m, upload_progress))
- app_logger.info("Uploaded to Team-Files: {!r}".format(file_info.storage_path))
- api.task.set_output_archive(task_id, file_info.id, full_archive_name, file_url=file_info.storage_path)
-
- my_app.stop()
-
-
-def main():
- sly.logger.info("Script arguments", extra={
- "TEAM_ID": TEAM_ID,
- "WORKSPACE_ID": WORKSPACE_ID,
- "PROJECT_ID": PROJECT_ID
- })
-
- my_app.run(initial_events=[{"command": "from_sly_to_pascal"}])
-
-
-if __name__ == '__main__':
- sly.main_wrapper("main", main)
diff --git a/src/globals.py b/src/globals.py
new file mode 100644
index 0000000..26ee4e8
--- /dev/null
+++ b/src/globals.py
@@ -0,0 +1,51 @@
+import os
+from dotenv import load_dotenv
+import supervisely as sly
+from supervisely.app.v1.app_service import AppService
+
+if sly.is_development():
+ load_dotenv("local.env")
+ load_dotenv(os.path.expanduser("~/supervisely.env"))
+
+
+my_app = AppService()
+
+TEAM_ID = int(os.environ["context.teamId"])
+WORKSPACE_ID = int(os.environ["context.workspaceId"])
+PROJECT_ID = int(os.environ["modal.state.slyProjectId"])
+
+PASCAL_CONTOUR_THICKNESS = int(os.environ["modal.state.pascalContourThickness"])
+TRAIN_VAL_SPLIT_COEF = float(os.environ["modal.state.trainSplitCoef"])
+
+ARCHIVE_NAME_ENDING = "_pascal_voc.tar.gz"
+RESULT_DIR_NAME_ENDING = "_pascal_voc"
+RESULT_SUBDIR_NAME = "VOCdevkit/VOC"
+
+images_dir_name = "JPEGImages"
+ann_dir_name = "Annotations"
+ann_class_dir_name = "SegmentationClass"
+ann_obj_dir_name = "SegmentationObject"
+
+trainval_sets_dir_name = "ImageSets"
+trainval_sets_main_name = "Main"
+trainval_sets_segm_name = "Segmentation"
+
+train_txt_name = "train.txt"
+val_txt_name = "val.txt"
+
+is_trainval = None
+
+pascal_contour_color = [224, 224, 192]
+pascal_ann_ext = ".png"
+
+TRAIN_TAG_NAME = "train"
+VAL_TAG_NAME = "val"
+SPLIT_TAGS = {TRAIN_TAG_NAME, VAL_TAG_NAME}
+
+VALID_IMG_EXT = {".jpe", ".jpeg", ".jpg"}
+SUPPORTED_GEOMETRY_TYPES = {sly.Bitmap, sly.Polygon, sly.Rectangle}
+
+if TRAIN_VAL_SPLIT_COEF > 1 or TRAIN_VAL_SPLIT_COEF < 0:
+ raise ValueError(
+ f"train_val_split_coef should be between 0 and 1, your data is {TRAIN_VAL_SPLIT_COEF}"
+ )
diff --git a/src/main.py b/src/main.py
new file mode 100644
index 0000000..fc32531
--- /dev/null
+++ b/src/main.py
@@ -0,0 +1,184 @@
+import os
+import supervisely as sly
+from collections import OrderedDict
+import globals as g
+import utils
+
+
+@g.my_app.callback("from_sly_to_pascal")
+@sly.timeit
+def from_sly_to_pascal(api: sly.Api, task_id, context, state, app_logger):
+ project_info = api.project.get_info_by_id(g.PROJECT_ID)
+ meta_json = api.project.get_meta(g.PROJECT_ID)
+ meta = sly.ProjectMeta.from_json(meta_json)
+ app_logger.info("Palette has been created")
+
+ full_archive_name = f"{str(project_info.id)}_{project_info.name}{g.ARCHIVE_NAME_ENDING}"
+ full_result_dir_name = f"{str(project_info.id)}_{project_info.name}{g.RESULT_DIR_NAME_ENDING}"
+
+ result_archive = os.path.join(g.my_app.data_dir, full_archive_name)
+ result_dir = os.path.join(g.my_app.data_dir, full_result_dir_name)
+ result_subdir = os.path.join(result_dir, g.RESULT_SUBDIR_NAME)
+
+ result_ann_dir = os.path.join(result_subdir, g.ann_dir_name)
+ result_images_dir = os.path.join(result_subdir, g.images_dir_name)
+ result_class_dir_name = os.path.join(result_subdir, g.ann_class_dir_name)
+ result_obj_dir = os.path.join(result_subdir, g.ann_obj_dir_name)
+ result_imgsets_dir = os.path.join(result_subdir, g.trainval_sets_dir_name)
+
+ sly.fs.mkdir(result_ann_dir)
+ sly.fs.mkdir(result_imgsets_dir)
+ sly.fs.mkdir(result_images_dir)
+ sly.fs.mkdir(result_class_dir_name)
+ sly.fs.mkdir(result_obj_dir)
+
+ app_logger.info("Pascal VOC directories have been created")
+
+ images_stats = []
+ classes_colors = {}
+
+ datasets = api.dataset.get_list(g.PROJECT_ID)
+ dataset_names = ["trainval", "val", "train"]
+ progress = sly.Progress(
+ "Preparing images for export", api.project.get_images_count(g.PROJECT_ID), app_logger
+ )
+ for dataset in datasets:
+ if dataset.name in dataset_names:
+ is_trainval = 1
+ else:
+ is_trainval = 0
+
+ images = api.image.get_list(dataset.id)
+ for batch in sly.batched(images):
+ image_ids = [image_info.id for image_info in batch]
+ image_paths = [os.path.join(result_images_dir, image_info.name) for image_info in batch]
+
+ api.image.download_paths(dataset.id, image_ids, image_paths)
+ ann_infos = api.annotation.download_batch(dataset.id, image_ids)
+ for image_info, ann_info in zip(batch, ann_infos):
+ img_title, img_ext = os.path.splitext(image_info.name)
+ cur_img_filename = image_info.name
+
+ if is_trainval == 1:
+ cur_img_stats = {"classes": set(), "dataset": dataset.name, "name": img_title}
+ images_stats.append(cur_img_stats)
+ else:
+ cur_img_stats = {"classes": set(), "dataset": None, "name": img_title}
+ images_stats.append(cur_img_stats)
+
+ if img_ext not in g.VALID_IMG_EXT:
+ orig_image_path = os.path.join(result_images_dir, cur_img_filename)
+
+ jpg_image = f"{img_title}.jpg"
+ jpg_image_path = os.path.join(result_images_dir, jpg_image)
+
+ im = sly.image.read(orig_image_path)
+ sly.image.write(jpg_image_path, im)
+ sly.fs.silent_remove(orig_image_path)
+
+ ann = sly.Annotation.from_json(ann_info.annotation, meta)
+ tag = utils.find_first_tag(ann.img_tags, g.SPLIT_TAGS)
+ if tag is not None:
+ cur_img_stats["dataset"] = tag.meta.name
+
+ valid_labels = []
+ for label in ann.labels:
+ if type(label.geometry) in g.SUPPORTED_GEOMETRY_TYPES:
+ valid_labels.append(label)
+ else:
+ app_logger.warn(
+ f"Label has unsupported geometry type ({type(label.geometry)}) and will be skipped."
+ )
+
+ ann = ann.clone(labels=valid_labels)
+ utils.ann_to_xml(project_info, image_info, cur_img_filename, result_ann_dir, ann)
+ for label in ann.labels:
+ cur_img_stats["classes"].add(label.obj_class.name)
+ classes_colors[label.obj_class.name] = tuple(label.obj_class.color)
+
+ fake_contour_th = 0
+ if g.PASCAL_CONTOUR_THICKNESS != 0:
+ fake_contour_th = 2 * g.PASCAL_CONTOUR_THICKNESS + 1
+
+ utils.from_ann_to_instance_mask(
+ ann,
+ os.path.join(result_class_dir_name, img_title + g.pascal_ann_ext),
+ fake_contour_th,
+ )
+ utils.from_ann_to_class_mask(
+ ann, os.path.join(result_obj_dir, img_title + g.pascal_ann_ext), fake_contour_th
+ )
+
+ progress.iter_done_report()
+
+ classes_colors = OrderedDict((sorted(classes_colors.items(), key=lambda t: t[0])))
+
+ with open(os.path.join(result_subdir, "colors.txt"), "w") as cc:
+ if g.PASCAL_CONTOUR_THICKNESS != 0:
+ cc.write(
+ f"neutral {g.pascal_contour_color[0]} {g.pascal_contour_color[1]} {g.pascal_contour_color[2]}\n"
+ )
+
+ for k in classes_colors.keys():
+ if k == "neutral":
+ continue
+
+ cc.write(f"{k} {classes_colors[k][0]} {classes_colors[k][1]} {classes_colors[k][2]}\n")
+
+ imgs_to_split = [i for i in images_stats if i["dataset"] is None]
+ train_len = int(len(imgs_to_split) * g.TRAIN_VAL_SPLIT_COEF)
+
+ for img_stat in imgs_to_split[:train_len]:
+ img_stat["dataset"] = g.TRAIN_TAG_NAME
+ for img_stat in imgs_to_split[train_len:]:
+ img_stat["dataset"] = g.VAL_TAG_NAME
+
+ utils.write_segm_set(is_trainval, images_stats, result_imgsets_dir)
+ utils.write_main_set(is_trainval, images_stats, meta, result_imgsets_dir)
+
+ sly.fs.archive_directory(result_dir, result_archive)
+ app_logger.info("Result directory is archived")
+
+ upload_progress = []
+ remote_archive_path = os.path.join(
+ sly.team_files.RECOMMENDED_EXPORT_PATH,
+ "export-to-Pascal-VOC/{}/{}".format(task_id, full_archive_name),
+ )
+
+ def _print_progress(monitor, upload_progress):
+ if len(upload_progress) == 0:
+ upload_progress.append(
+ sly.Progress(
+ message="Upload {!r}".format(full_archive_name),
+ total_cnt=monitor.len,
+ ext_logger=app_logger,
+ is_size=True,
+ )
+ )
+ upload_progress[0].set_current_value(monitor.bytes_read)
+
+ file_info = api.file.upload(
+ g.TEAM_ID,
+ result_archive,
+ remote_archive_path,
+ lambda m: _print_progress(m, upload_progress),
+ )
+ app_logger.info("Uploaded to Team-Files: {!r}".format(file_info.storage_path))
+ api.task.set_output_archive(
+ task_id, file_info.id, full_archive_name, file_url=file_info.storage_path
+ )
+
+ g.my_app.stop()
+
+
+def main():
+ sly.logger.info(
+ "Script arguments",
+ extra={"TEAM_ID": g.TEAM_ID, "WORKSPACE_ID": g.WORKSPACE_ID, "PROJECT_ID": g.PROJECT_ID},
+ )
+
+ g.my_app.run(initial_events=[{"command": "from_sly_to_pascal"}])
+
+
+if __name__ == "__main__":
+ sly.main_wrapper("main", main)
diff --git a/src/utils.py b/src/utils.py
new file mode 100644
index 0000000..8114518
--- /dev/null
+++ b/src/utils.py
@@ -0,0 +1,166 @@
+import os
+from typing import List
+import numpy as np
+import lxml.etree as ET
+import supervisely as sly
+from PIL import Image
+from shutil import copyfile
+from supervisely.imaging.color import generate_rgb
+from supervisely.io.fs import get_file_name
+
+import globals as g
+
+
+def from_ann_to_instance_mask(ann: sly.Annotation, mask_outpath, contour_thickness):
+ mask = np.zeros((ann.img_size[0], ann.img_size[1], 3), dtype=np.uint8)
+ for label in ann.labels:
+ if label.obj_class.geometry_type == sly.Rectangle:
+ continue
+
+ if label.obj_class.name == "neutral":
+ label.geometry.draw(mask, g.pascal_contour_color)
+ continue
+
+ label.geometry.draw_contour(mask, g.pascal_contour_color, contour_thickness)
+ label.geometry.draw(mask, label.obj_class.color)
+
+ im = Image.fromarray(mask)
+ im = im.convert("P", palette=Image.ADAPTIVE)
+ im.save(mask_outpath)
+
+
+def from_ann_to_class_mask(ann: sly.Annotation, mask_outpath, contour_thickness):
+ exist_colors = [[0, 0, 0], g.pascal_contour_color]
+ mask = np.zeros((ann.img_size[0], ann.img_size[1], 3), dtype=np.uint8)
+ for label in ann.labels:
+ if label.obj_class.geometry_type == sly.Rectangle:
+ continue
+
+ if label.obj_class.name == "neutral":
+ label.geometry.draw(mask, g.pascal_contour_color)
+ continue
+
+ new_color = generate_rgb(exist_colors)
+ exist_colors.append(new_color)
+ label.geometry.draw_contour(mask, g.pascal_contour_color, contour_thickness)
+ label.geometry.draw(mask, new_color)
+
+ im = Image.fromarray(mask)
+ im = im.convert("P", palette=Image.ADAPTIVE)
+ im.save(mask_outpath)
+
+
+def ann_to_xml(project_info, image_info, img_filename, result_ann_dir, ann):
+ xml_root = ET.Element("annotation")
+
+ ET.SubElement(xml_root, "folder").text = f"VOC_{project_info.name}"
+ ET.SubElement(xml_root, "filename").text = img_filename
+
+ xml_root_source = ET.SubElement(xml_root, "source")
+ ET.SubElement(
+ xml_root_source, "database"
+ ).text = f"Supervisely Project ID:{str(project_info.id)}"
+
+ ET.SubElement(xml_root_source, "annotation").text = "PASCAL VOC"
+ ET.SubElement(xml_root_source, "image").text = f"Supervisely Image ID:{str(image_info.id)}"
+
+ xml_root_size = ET.SubElement(xml_root, "size")
+ ET.SubElement(xml_root_size, "width").text = str(image_info.width)
+ ET.SubElement(xml_root_size, "height").text = str(image_info.height)
+ ET.SubElement(xml_root_size, "depth").text = "3"
+
+ ET.SubElement(xml_root, "segmented").text = "1" if len(ann.labels) > 0 else "0"
+
+ for label in ann.labels:
+ if label.obj_class.name == "neutral":
+ continue
+
+ bitmap_to_bbox = label.geometry.to_bbox()
+
+ xml_ann_obj = ET.SubElement(xml_root, "object")
+ ET.SubElement(xml_ann_obj, "name").text = label.obj_class.name
+ ET.SubElement(xml_ann_obj, "pose").text = "Unspecified"
+ ET.SubElement(xml_ann_obj, "truncated").text = "0"
+ ET.SubElement(xml_ann_obj, "difficult").text = "0"
+
+ xml_ann_obj_bndbox = ET.SubElement(xml_ann_obj, "bndbox")
+ ET.SubElement(xml_ann_obj_bndbox, "xmin").text = str(bitmap_to_bbox.left)
+ ET.SubElement(xml_ann_obj_bndbox, "ymin").text = str(bitmap_to_bbox.top)
+ ET.SubElement(xml_ann_obj_bndbox, "xmax").text = str(bitmap_to_bbox.right)
+ ET.SubElement(xml_ann_obj_bndbox, "ymax").text = str(bitmap_to_bbox.bottom)
+
+ tree = ET.ElementTree(xml_root)
+
+ # img_name = os.path.join(result_ann_dir, f"{os.path.splitext(img_filename)[0]}.xml")
+ img_name = f"{get_file_name(img_filename)}.xml"
+ ann_path = os.path.join(result_ann_dir, img_name)
+ ET.indent(tree, space=" ")
+ tree.write(ann_path, pretty_print=True)
+
+
+def find_first_tag(img_tags: sly.TagCollection, split_tags: set) -> sly.Tag:
+ for tag in split_tags:
+ if img_tags.has_key(tag):
+ return img_tags.get(tag)
+ return None
+
+
+def write_main_set(is_trainval, images_stats, meta_json, result_imgsets_dir):
+ result_imgsets_main_subdir = os.path.join(result_imgsets_dir, g.trainval_sets_main_name)
+ result_imgsets_segm_subdir = os.path.join(result_imgsets_dir, g.trainval_sets_segm_name)
+ sly.fs.mkdir(result_imgsets_main_subdir)
+
+ res_files = ["trainval.txt", "train.txt", "val.txt"]
+ for file in os.listdir(result_imgsets_segm_subdir):
+ if file in res_files:
+ copyfile(
+ os.path.join(result_imgsets_segm_subdir, file),
+ os.path.join(result_imgsets_main_subdir, file),
+ )
+
+ train_imgs = [i for i in images_stats if i["dataset"] == g.TRAIN_TAG_NAME]
+ val_imgs = [i for i in images_stats if i["dataset"] == g.VAL_TAG_NAME]
+
+ write_objs = [
+ {"suffix": "trainval", "imgs": images_stats},
+ {"suffix": "train", "imgs": train_imgs},
+ {"suffix": "val", "imgs": val_imgs},
+ ]
+
+ if is_trainval == 1:
+ trainval_imgs = [
+ i for i in images_stats if i["dataset"] == g.TRAIN_TAG_NAME + g.VAL_TAG_NAME
+ ]
+ write_objs[0] = {"suffix": "trainval", "imgs": trainval_imgs}
+
+ for obj_cls in meta_json.obj_classes:
+ if obj_cls.geometry_type not in g.SUPPORTED_GEOMETRY_TYPES:
+ continue
+ if obj_cls.name == "neutral":
+ continue
+ for o in write_objs:
+ with open(
+ os.path.join(result_imgsets_main_subdir, f'{obj_cls.name}_{o["suffix"]}.txt'), "w"
+ ) as f:
+ for img_stats in o["imgs"]:
+ v = "1" if obj_cls.name in img_stats["classes"] else "-1"
+ f.write(f'{img_stats["name"]} {v}\n')
+
+
+def write_segm_set(is_trainval, images_stats, result_imgsets_dir):
+ result_imgsets_segm_subdir = os.path.join(result_imgsets_dir, g.trainval_sets_segm_name)
+ sly.fs.mkdir(result_imgsets_segm_subdir)
+
+ with open(os.path.join(result_imgsets_segm_subdir, "trainval.txt"), "w") as f:
+ if is_trainval == 1:
+ f.writelines(
+ i["name"] + "\n"
+ for i in images_stats
+ if i["dataset"] == g.TRAIN_TAG_NAME + g.VAL_TAG_NAME
+ )
+ else:
+ f.writelines(i["name"] + "\n" for i in images_stats)
+ with open(os.path.join(result_imgsets_segm_subdir, "train.txt"), "w") as f:
+ f.writelines(i["name"] + "\n" for i in images_stats if i["dataset"] == g.TRAIN_TAG_NAME)
+ with open(os.path.join(result_imgsets_segm_subdir, "val.txt"), "w") as f:
+ f.writelines(i["name"] + "\n" for i in images_stats if i["dataset"] == g.VAL_TAG_NAME)