From 3d197d3f83805e28b62d7f9176904ab8479c42e4 Mon Sep 17 00:00:00 2001
From: GoldenAnpu <seryios.sych@gmail.com>
Date: Thu, 19 Dec 2024 21:06:44 +0100
Subject: [PATCH] Refactor

---
 .vscode/settings.json |   5 +-
 config.json           |  52 +++++------
 dev_requirements.txt  |   2 +-
 local.env             |   6 +-
 src/globals.py        |  23 +----
 src/main.py           | 205 +++++++++++++++++++++---------------------
 6 files changed, 132 insertions(+), 161 deletions(-)

diff --git a/.vscode/settings.json b/.vscode/settings.json
index 7604595..9495388 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -16,7 +16,10 @@
         "editor.defaultFormatter": "esbenp.prettier-vscode"
     },
     "[json]": {
-        "editor.defaultFormatter": "esbenp.prettier-vscode"
+        "editor.defaultFormatter": "esbenp.prettier-vscode",
+        "editor.formatOnSave": true,
+        "editor.tabSize": 4,
+        "editor.insertSpaces": false
     },
     "[python]": {
         "editor.defaultFormatter": "ms-python.black-formatter",
diff --git a/config.json b/config.json
index 2e1d4dc..3379d1e 100644
--- a/config.json
+++ b/config.json
@@ -1,30 +1,24 @@
 {
-  "name": "Export to Pascal VOC",
-  "type": "app",
-  "categories": [
-    "images",
-    "export"
-  ],
-  "description": "Converts Supervisely Project to Pascal VOC format",
-  "docker_image": "supervisely/import-export:6.73.242",
-  "instance_version": "6.12.5",
-  "main_script": "src/main.py",
-  "modal_template": "src/modal.html",
-  "modal_template_state": {
-    "pascalContourThickness": 3,
-    "trainSplitCoef": 0.8,
-    "addPrefixToImages": true
-  },
-  "task_location": "workspace_tasks",
-  "icon": "https://i.imgur.com/zTseThz.png",
-  "icon_background": "#FFFFFF",
-  "headless": true,
-  "context_menu": {
-    "target": [
-      "images_project",
-      "images_dataset"
-    ],
-    "context_root": "Download as"
-  },
-  "poster": "https://user-images.githubusercontent.com/48245050/182382862-d74f1b2c-b19e-47c2-84db-45cd934ec34e.png"
-}
\ No newline at end of file
+	"name": "Export to Pascal VOC",
+	"type": "app",
+	"categories": ["images", "export"],
+	"description": "Converts Supervisely Project to Pascal VOC format",
+	"docker_image": "supervisely/import-export:6.73.259",
+	"instance_version": "6.12.12",
+	"main_script": "src/main.py",
+	"modal_template": "src/modal.html",
+	"modal_template_state": {
+		"pascalContourThickness": 3,
+		"trainSplitCoef": 0.8,
+		"addPrefixToImages": true
+	},
+	"task_location": "workspace_tasks",
+	"icon": "https://i.imgur.com/zTseThz.png",
+	"icon_background": "#FFFFFF",
+	"headless": true,
+	"context_menu": {
+		"target": ["images_project", "images_dataset"],
+		"context_root": "Download as"
+	},
+	"poster": "https://user-images.githubusercontent.com/48245050/182382862-d74f1b2c-b19e-47c2-84db-45cd934ec34e.png"
+}
diff --git a/dev_requirements.txt b/dev_requirements.txt
index 2ec3e86..8783b9e 100644
--- a/dev_requirements.txt
+++ b/dev_requirements.txt
@@ -1,4 +1,4 @@
-supervisely==6.73.242
+supervisely==6.73.259
 lxml
 numpy>=1.19.4
 Pillow>=8.0.1
diff --git a/local.env b/local.env
index 5f0d1d6..3a6f0cd 100644
--- a/local.env
+++ b/local.env
@@ -1,10 +1,6 @@
 #TEAM_ID=448
 #WORKSPACE_ID=690
-#PROJECT_ID=35637
-
-TEAM_ID = 431
-WORKSPACE_ID = 1019
-PROJECT_ID = 40721
+PROJECT_ID=44124
 
 modal.state.pascalContourThickness=3
 modal.state.trainSplitCoef=0.8
\ No newline at end of file
diff --git a/src/globals.py b/src/globals.py
index ce7b98c..fa2f19b 100644
--- a/src/globals.py
+++ b/src/globals.py
@@ -1,9 +1,8 @@
 import os
+from distutils.util import strtobool
 
 import supervisely as sly
 from dotenv import load_dotenv
-from distutils.util import strtobool
-import time
 
 if sly.is_development():
     load_dotenv("local.env")
@@ -57,23 +56,3 @@
     raise ValueError(
         f"train_val_split_coef should be between 0 and 1, your data is {TRAIN_VAL_SPLIT_COEF}"
     )
-
-class Timer:
-    def __init__(self, message=None, items_cnt=None):
-        self.message = message
-        self.items_cnt = items_cnt
-        self.elapsed = 0
-
-    def __enter__(self):
-        self.start = time.perf_counter()
-        return self
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        self.end = time.perf_counter()
-        self.elapsed = self.end - self.start
-        msg = self.message or "Block execution"
-        if self.items_cnt is not None:
-            log_msg = f"{msg} time: {self.elapsed:.3f} seconds per {self.items_cnt} items  ({self.elapsed/self.items_cnt:.3f} seconds per item)"
-        else:
-            log_msg = f"{msg} time: {self.elapsed:.3f} seconds"
-        sly.logger.info(log_msg)
\ No newline at end of file
diff --git a/src/main.py b/src/main.py
index bbd3334..a983252 100644
--- a/src/main.py
+++ b/src/main.py
@@ -1,13 +1,13 @@
+import asyncio
 import os
 from collections import OrderedDict
 
 import supervisely as sly
 
 import globals as g
-import workflow as w
 import utils
+import workflow as w
 
-import asyncio
 
 @sly.handle_exceptions(has_ui=False)
 def from_sly_to_pascal(api: sly.Api):
@@ -49,116 +49,115 @@ def from_sly_to_pascal(api: sly.Api):
         total_images_cnt = ds_info.items_count
 
     dataset_names = ["trainval", "val", "train"]
-    progress = sly.Progress(
-        "Preparing images for export", total_images_cnt, sly.logger
-    )
-
+    progress = sly.tqdm_sly(desc="Preparing images for export", total=total_images_cnt)
     for dataset in datasets:
+        sly.logger.info(f"Processing dataset: {dataset.name}")
         if dataset.name in dataset_names:
             is_trainval = 1
         else:
             is_trainval = 0
 
         images = api.image.get_list(dataset.id)
-        for batch in sly.batched(images):
-            image_ids = [image_info.id for image_info in batch]
-
-            if g.ADD_PREFIX_TO_IMAGES:
-                image_paths = [
-                    os.path.join(result_images_dir, f"{dataset.id}_{image_info.name}")
-                    for image_info in images
-                ]
-            else:
-                image_paths = [
-                    os.path.join(result_images_dir, image_info.name) for image_info in images
-                ]
-                for idx, path in enumerate(image_paths):
-                    if os.path.exists(path):
-                        img_name = os.path.basename(path)
-                        name, ext = os.path.splitext(img_name)
-                        i = 1
+        image_ids = [image_info.id for image_info in images]
+
+        if g.ADD_PREFIX_TO_IMAGES:
+            image_paths = [
+                os.path.join(result_images_dir, f"{dataset.id}_{image_info.name}")
+                for image_info in images
+            ]
+        else:
+            image_paths = [
+                os.path.join(result_images_dir, image_info.name) for image_info in images
+            ]
+            for idx, path in enumerate(image_paths):
+                if os.path.exists(path):
+                    img_name = os.path.basename(path)
+                    name, ext = os.path.splitext(img_name)
+                    i = 1
+                    new_name = f"{name}_{i}{ext}"
+                    while os.path.exists(os.path.join(result_images_dir, new_name)):
+                        i += 1
                         new_name = f"{name}_{i}{ext}"
-                        while os.path.exists(os.path.join(result_images_dir, new_name)):
-                            i += 1
-                            new_name = f"{name}_{i}{ext}"
-                        sly.logger.warn(
-                            f"Image {img_name} already exists in the directory. New name: {new_name}"
-                        )
-                        image_paths[idx] = os.path.join(result_images_dir, new_name)
-
-            with g.Timer("Image downloading", len(image_ids)):
-                coro = api.image.download_paths_async(image_ids, image_paths)
-                loop = sly.utils.get_or_create_event_loop()
-                if loop.is_running():
-                    future = asyncio.run_coroutine_threadsafe(coro, loop)
-                    future.result()
-                else:
-                    loop.run_until_complete(coro)
-                    
-            ann_infos = []
-            with g.Timer("Annotation downloading", len(image_ids)):
-                coro = api.annotation.download_batch_async(dataset.id, image_ids)
-                loop = sly.utils.get_or_create_event_loop()
-                if loop.is_running():
-                    future = asyncio.run_coroutine_threadsafe(coro, loop)
-                    ann_infos.extend(future.result())
-                else:
-                    ann_infos.extend(loop.run_until_complete(coro))
-                    
-            for image_info, ann_info, img_path in zip(batch, ann_infos, image_paths):
-                cur_img_filename = os.path.basename(img_path)
-                img_title, img_ext = os.path.splitext(cur_img_filename)
-
-                if is_trainval == 1:
-                    cur_img_stats = {"classes": set(), "dataset": dataset.name, "name": img_title}
-                    images_stats.append(cur_img_stats)
+                    sly.logger.warning(
+                        f"Image {img_name} already exists in the directory. New name: {new_name}"
+                    )
+                    image_paths[idx] = os.path.join(result_images_dir, new_name)
+
+        di_progress = sly.tqdm_sly(
+            desc=f"Downloading images from {dataset.name}", total=len(images)
+        )
+        coro = api.image.download_paths_async(image_ids, image_paths, progress_cb=di_progress)
+        loop = sly.utils.get_or_create_event_loop()
+        if loop.is_running():
+            future = asyncio.run_coroutine_threadsafe(coro, loop)
+            future.result()
+        else:
+            loop.run_until_complete(coro)
+
+        da_progress = sly.tqdm_sly(
+            desc=f"Downloading annotations from {dataset.name}", total=len(images)
+        )
+        coro = api.annotation.download_batch_async(dataset.id, image_ids, progress_cb=da_progress)
+        loop = sly.utils.get_or_create_event_loop()
+        if loop.is_running():
+            future = asyncio.run_coroutine_threadsafe(coro, loop)
+            ann_infos = future.result()
+        else:
+            ann_infos = loop.run_until_complete(coro)
+
+        for image_info, ann_info, img_path in zip(images, ann_infos, image_paths):
+            cur_img_filename = os.path.basename(img_path)
+            img_title, img_ext = os.path.splitext(cur_img_filename)
+
+            if is_trainval == 1:
+                cur_img_stats = {"classes": set(), "dataset": dataset.name, "name": img_title}
+                images_stats.append(cur_img_stats)
+            else:
+                cur_img_stats = {"classes": set(), "dataset": None, "name": img_title}
+                images_stats.append(cur_img_stats)
+
+            if img_ext not in g.VALID_IMG_EXT:
+
+                jpg_image = f"{img_title}.jpg"
+                jpg_image_path = os.path.join(result_images_dir, jpg_image)
+
+                im = sly.image.read(img_path)
+                sly.image.write(jpg_image_path, im)
+                sly.fs.silent_remove(img_path)
+
+            ann = sly.Annotation.from_json(ann_info.annotation, meta)
+            tag = utils.find_first_tag(ann.img_tags, g.SPLIT_TAGS)
+            if tag is not None:
+                cur_img_stats["dataset"] = tag.meta.name
+
+            valid_labels = []
+            for label in ann.labels:
+                if type(label.geometry) in g.SUPPORTED_GEOMETRY_TYPES:
+                    valid_labels.append(label)
                 else:
-                    cur_img_stats = {"classes": set(), "dataset": None, "name": img_title}
-                    images_stats.append(cur_img_stats)
-
-                if img_ext not in g.VALID_IMG_EXT:
-
-                    jpg_image = f"{img_title}.jpg"
-                    jpg_image_path = os.path.join(result_images_dir, jpg_image)
-
-                    im = sly.image.read(img_path)
-                    sly.image.write(jpg_image_path, im)
-                    sly.fs.silent_remove(img_path)
-
-                ann = sly.Annotation.from_json(ann_info.annotation, meta)
-                tag = utils.find_first_tag(ann.img_tags, g.SPLIT_TAGS)
-                if tag is not None:
-                    cur_img_stats["dataset"] = tag.meta.name
-
-                valid_labels = []
-                for label in ann.labels:
-                    if type(label.geometry) in g.SUPPORTED_GEOMETRY_TYPES:
-                        valid_labels.append(label)
-                    else:
-                        sly.logger.warn(
-                            f"Label has unsupported geometry type ({type(label.geometry)}) and will be skipped."
-                        )
-
-                ann = ann.clone(labels=valid_labels)
-                utils.ann_to_xml(project_info, image_info, cur_img_filename, result_ann_dir, ann)
-                for label in ann.labels:
-                    cur_img_stats["classes"].add(label.obj_class.name)
-                    classes_colors[label.obj_class.name] = tuple(label.obj_class.color)
-
-                fake_contour_th = 0
-                if g.PASCAL_CONTOUR_THICKNESS != 0:
-                    fake_contour_th = 2 * g.PASCAL_CONTOUR_THICKNESS + 1
-
-                utils.from_ann_to_instance_mask(
-                    ann,
-                    os.path.join(result_class_dir_name, img_title + g.pascal_ann_ext),
-                    fake_contour_th,
-                )
-                utils.from_ann_to_class_mask(
-                    ann, os.path.join(result_obj_dir, img_title + g.pascal_ann_ext), fake_contour_th
-                )
-
-                progress.iter_done_report()
+                    sly.logger.warning(
+                        f"Label has unsupported geometry type ({type(label.geometry)}) and will be skipped."
+                    )
+
+            ann = ann.clone(labels=valid_labels)
+            utils.ann_to_xml(project_info, image_info, cur_img_filename, result_ann_dir, ann)
+            for label in ann.labels:
+                cur_img_stats["classes"].add(label.obj_class.name)
+                classes_colors[label.obj_class.name] = tuple(label.obj_class.color)
+
+            fake_contour_th = 0
+            if g.PASCAL_CONTOUR_THICKNESS != 0:
+                fake_contour_th = 2 * g.PASCAL_CONTOUR_THICKNESS + 1
+
+            utils.from_ann_to_instance_mask(
+                ann,
+                os.path.join(result_class_dir_name, img_title + g.pascal_ann_ext),
+                fake_contour_th,
+            )
+            utils.from_ann_to_class_mask(
+                ann, os.path.join(result_obj_dir, img_title + g.pascal_ann_ext), fake_contour_th
+            )
+            progress(1)
 
     classes_colors = OrderedDict((sorted(classes_colors.items(), key=lambda t: t[0])))