diff --git a/.vscode/settings.json b/.vscode/settings.json index 7604595..9495388 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -16,7 +16,10 @@ "editor.defaultFormatter": "esbenp.prettier-vscode" }, "[json]": { - "editor.defaultFormatter": "esbenp.prettier-vscode" + "editor.defaultFormatter": "esbenp.prettier-vscode", + "editor.formatOnSave": true, + "editor.tabSize": 4, + "editor.insertSpaces": false }, "[python]": { "editor.defaultFormatter": "ms-python.black-formatter", diff --git a/config.json b/config.json index 2e1d4dc..3379d1e 100644 --- a/config.json +++ b/config.json @@ -1,30 +1,24 @@ { - "name": "Export to Pascal VOC", - "type": "app", - "categories": [ - "images", - "export" - ], - "description": "Converts Supervisely Project to Pascal VOC format", - "docker_image": "supervisely/import-export:6.73.242", - "instance_version": "6.12.5", - "main_script": "src/main.py", - "modal_template": "src/modal.html", - "modal_template_state": { - "pascalContourThickness": 3, - "trainSplitCoef": 0.8, - "addPrefixToImages": true - }, - "task_location": "workspace_tasks", - "icon": "https://i.imgur.com/zTseThz.png", - "icon_background": "#FFFFFF", - "headless": true, - "context_menu": { - "target": [ - "images_project", - "images_dataset" - ], - "context_root": "Download as" - }, - "poster": "https://user-images.githubusercontent.com/48245050/182382862-d74f1b2c-b19e-47c2-84db-45cd934ec34e.png" -} \ No newline at end of file + "name": "Export to Pascal VOC", + "type": "app", + "categories": ["images", "export"], + "description": "Converts Supervisely Project to Pascal VOC format", + "docker_image": "supervisely/import-export:6.73.259", + "instance_version": "6.12.12", + "main_script": "src/main.py", + "modal_template": "src/modal.html", + "modal_template_state": { + "pascalContourThickness": 3, + "trainSplitCoef": 0.8, + "addPrefixToImages": true + }, + "task_location": "workspace_tasks", + "icon": "https://i.imgur.com/zTseThz.png", + "icon_background": "#FFFFFF", + "headless": true, + "context_menu": { + "target": ["images_project", "images_dataset"], + "context_root": "Download as" + }, + "poster": "https://user-images.githubusercontent.com/48245050/182382862-d74f1b2c-b19e-47c2-84db-45cd934ec34e.png" +} diff --git a/dev_requirements.txt b/dev_requirements.txt index 2ec3e86..8783b9e 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -1,4 +1,4 @@ -supervisely==6.73.242 +supervisely==6.73.259 lxml numpy>=1.19.4 Pillow>=8.0.1 diff --git a/local.env b/local.env index 5f0d1d6..3a6f0cd 100644 --- a/local.env +++ b/local.env @@ -1,10 +1,6 @@ #TEAM_ID=448 #WORKSPACE_ID=690 -#PROJECT_ID=35637 - -TEAM_ID = 431 -WORKSPACE_ID = 1019 -PROJECT_ID = 40721 +PROJECT_ID=44124 modal.state.pascalContourThickness=3 modal.state.trainSplitCoef=0.8 \ No newline at end of file diff --git a/src/globals.py b/src/globals.py index ce7b98c..fa2f19b 100644 --- a/src/globals.py +++ b/src/globals.py @@ -1,9 +1,8 @@ import os +from distutils.util import strtobool import supervisely as sly from dotenv import load_dotenv -from distutils.util import strtobool -import time if sly.is_development(): load_dotenv("local.env") @@ -57,23 +56,3 @@ raise ValueError( f"train_val_split_coef should be between 0 and 1, your data is {TRAIN_VAL_SPLIT_COEF}" ) - -class Timer: - def __init__(self, message=None, items_cnt=None): - self.message = message - self.items_cnt = items_cnt - self.elapsed = 0 - - def __enter__(self): - self.start = time.perf_counter() - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - self.end = time.perf_counter() - self.elapsed = self.end - self.start - msg = self.message or "Block execution" - if self.items_cnt is not None: - log_msg = f"{msg} time: {self.elapsed:.3f} seconds per {self.items_cnt} items ({self.elapsed/self.items_cnt:.3f} seconds per item)" - else: - log_msg = f"{msg} time: {self.elapsed:.3f} seconds" - sly.logger.info(log_msg) \ No newline at end of file diff --git a/src/main.py b/src/main.py index bbd3334..a983252 100644 --- a/src/main.py +++ b/src/main.py @@ -1,13 +1,13 @@ +import asyncio import os from collections import OrderedDict import supervisely as sly import globals as g -import workflow as w import utils +import workflow as w -import asyncio @sly.handle_exceptions(has_ui=False) def from_sly_to_pascal(api: sly.Api): @@ -49,116 +49,115 @@ def from_sly_to_pascal(api: sly.Api): total_images_cnt = ds_info.items_count dataset_names = ["trainval", "val", "train"] - progress = sly.Progress( - "Preparing images for export", total_images_cnt, sly.logger - ) - + progress = sly.tqdm_sly(desc="Preparing images for export", total=total_images_cnt) for dataset in datasets: + sly.logger.info(f"Processing dataset: {dataset.name}") if dataset.name in dataset_names: is_trainval = 1 else: is_trainval = 0 images = api.image.get_list(dataset.id) - for batch in sly.batched(images): - image_ids = [image_info.id for image_info in batch] - - if g.ADD_PREFIX_TO_IMAGES: - image_paths = [ - os.path.join(result_images_dir, f"{dataset.id}_{image_info.name}") - for image_info in images - ] - else: - image_paths = [ - os.path.join(result_images_dir, image_info.name) for image_info in images - ] - for idx, path in enumerate(image_paths): - if os.path.exists(path): - img_name = os.path.basename(path) - name, ext = os.path.splitext(img_name) - i = 1 + image_ids = [image_info.id for image_info in images] + + if g.ADD_PREFIX_TO_IMAGES: + image_paths = [ + os.path.join(result_images_dir, f"{dataset.id}_{image_info.name}") + for image_info in images + ] + else: + image_paths = [ + os.path.join(result_images_dir, image_info.name) for image_info in images + ] + for idx, path in enumerate(image_paths): + if os.path.exists(path): + img_name = os.path.basename(path) + name, ext = os.path.splitext(img_name) + i = 1 + new_name = f"{name}_{i}{ext}" + while os.path.exists(os.path.join(result_images_dir, new_name)): + i += 1 new_name = f"{name}_{i}{ext}" - while os.path.exists(os.path.join(result_images_dir, new_name)): - i += 1 - new_name = f"{name}_{i}{ext}" - sly.logger.warn( - f"Image {img_name} already exists in the directory. New name: {new_name}" - ) - image_paths[idx] = os.path.join(result_images_dir, new_name) - - with g.Timer("Image downloading", len(image_ids)): - coro = api.image.download_paths_async(image_ids, image_paths) - loop = sly.utils.get_or_create_event_loop() - if loop.is_running(): - future = asyncio.run_coroutine_threadsafe(coro, loop) - future.result() - else: - loop.run_until_complete(coro) - - ann_infos = [] - with g.Timer("Annotation downloading", len(image_ids)): - coro = api.annotation.download_batch_async(dataset.id, image_ids) - loop = sly.utils.get_or_create_event_loop() - if loop.is_running(): - future = asyncio.run_coroutine_threadsafe(coro, loop) - ann_infos.extend(future.result()) - else: - ann_infos.extend(loop.run_until_complete(coro)) - - for image_info, ann_info, img_path in zip(batch, ann_infos, image_paths): - cur_img_filename = os.path.basename(img_path) - img_title, img_ext = os.path.splitext(cur_img_filename) - - if is_trainval == 1: - cur_img_stats = {"classes": set(), "dataset": dataset.name, "name": img_title} - images_stats.append(cur_img_stats) + sly.logger.warning( + f"Image {img_name} already exists in the directory. New name: {new_name}" + ) + image_paths[idx] = os.path.join(result_images_dir, new_name) + + di_progress = sly.tqdm_sly( + desc=f"Downloading images from {dataset.name}", total=len(images) + ) + coro = api.image.download_paths_async(image_ids, image_paths, progress_cb=di_progress) + loop = sly.utils.get_or_create_event_loop() + if loop.is_running(): + future = asyncio.run_coroutine_threadsafe(coro, loop) + future.result() + else: + loop.run_until_complete(coro) + + da_progress = sly.tqdm_sly( + desc=f"Downloading annotations from {dataset.name}", total=len(images) + ) + coro = api.annotation.download_batch_async(dataset.id, image_ids, progress_cb=da_progress) + loop = sly.utils.get_or_create_event_loop() + if loop.is_running(): + future = asyncio.run_coroutine_threadsafe(coro, loop) + ann_infos = future.result() + else: + ann_infos = loop.run_until_complete(coro) + + for image_info, ann_info, img_path in zip(images, ann_infos, image_paths): + cur_img_filename = os.path.basename(img_path) + img_title, img_ext = os.path.splitext(cur_img_filename) + + if is_trainval == 1: + cur_img_stats = {"classes": set(), "dataset": dataset.name, "name": img_title} + images_stats.append(cur_img_stats) + else: + cur_img_stats = {"classes": set(), "dataset": None, "name": img_title} + images_stats.append(cur_img_stats) + + if img_ext not in g.VALID_IMG_EXT: + + jpg_image = f"{img_title}.jpg" + jpg_image_path = os.path.join(result_images_dir, jpg_image) + + im = sly.image.read(img_path) + sly.image.write(jpg_image_path, im) + sly.fs.silent_remove(img_path) + + ann = sly.Annotation.from_json(ann_info.annotation, meta) + tag = utils.find_first_tag(ann.img_tags, g.SPLIT_TAGS) + if tag is not None: + cur_img_stats["dataset"] = tag.meta.name + + valid_labels = [] + for label in ann.labels: + if type(label.geometry) in g.SUPPORTED_GEOMETRY_TYPES: + valid_labels.append(label) else: - cur_img_stats = {"classes": set(), "dataset": None, "name": img_title} - images_stats.append(cur_img_stats) - - if img_ext not in g.VALID_IMG_EXT: - - jpg_image = f"{img_title}.jpg" - jpg_image_path = os.path.join(result_images_dir, jpg_image) - - im = sly.image.read(img_path) - sly.image.write(jpg_image_path, im) - sly.fs.silent_remove(img_path) - - ann = sly.Annotation.from_json(ann_info.annotation, meta) - tag = utils.find_first_tag(ann.img_tags, g.SPLIT_TAGS) - if tag is not None: - cur_img_stats["dataset"] = tag.meta.name - - valid_labels = [] - for label in ann.labels: - if type(label.geometry) in g.SUPPORTED_GEOMETRY_TYPES: - valid_labels.append(label) - else: - sly.logger.warn( - f"Label has unsupported geometry type ({type(label.geometry)}) and will be skipped." - ) - - ann = ann.clone(labels=valid_labels) - utils.ann_to_xml(project_info, image_info, cur_img_filename, result_ann_dir, ann) - for label in ann.labels: - cur_img_stats["classes"].add(label.obj_class.name) - classes_colors[label.obj_class.name] = tuple(label.obj_class.color) - - fake_contour_th = 0 - if g.PASCAL_CONTOUR_THICKNESS != 0: - fake_contour_th = 2 * g.PASCAL_CONTOUR_THICKNESS + 1 - - utils.from_ann_to_instance_mask( - ann, - os.path.join(result_class_dir_name, img_title + g.pascal_ann_ext), - fake_contour_th, - ) - utils.from_ann_to_class_mask( - ann, os.path.join(result_obj_dir, img_title + g.pascal_ann_ext), fake_contour_th - ) - - progress.iter_done_report() + sly.logger.warning( + f"Label has unsupported geometry type ({type(label.geometry)}) and will be skipped." + ) + + ann = ann.clone(labels=valid_labels) + utils.ann_to_xml(project_info, image_info, cur_img_filename, result_ann_dir, ann) + for label in ann.labels: + cur_img_stats["classes"].add(label.obj_class.name) + classes_colors[label.obj_class.name] = tuple(label.obj_class.color) + + fake_contour_th = 0 + if g.PASCAL_CONTOUR_THICKNESS != 0: + fake_contour_th = 2 * g.PASCAL_CONTOUR_THICKNESS + 1 + + utils.from_ann_to_instance_mask( + ann, + os.path.join(result_class_dir_name, img_title + g.pascal_ann_ext), + fake_contour_th, + ) + utils.from_ann_to_class_mask( + ann, os.path.join(result_obj_dir, img_title + g.pascal_ann_ext), fake_contour_th + ) + progress(1) classes_colors = OrderedDict((sorted(classes_colors.items(), key=lambda t: t[0])))