Merge branch 'master' into dninja

supervisely-ecosystem · Jan 17, 2024 · 0d57194 · 0d57194
2 parents d7da973 + ace8bf2
commit 0d57194
Show file tree

Hide file tree

Showing 9 changed files with 184 additions and 45 deletions.
diff --git a/.gitignore b/.gitignore
@@ -4,4 +4,4 @@ venv
 .venv/
 dataset_tools
 supervisely
-debug
+debug
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,35 @@
+{
+  "files.exclude": {
+    "**/__pycache__": true,
+    "build": true,
+    "supervisely.egg-info": true,
+    // ".venv": true
+  },
+  "python.defaultInterpreterPath": ".venv/bin/python",
+  "editor.formatOnSave": true,
+  "editor.formatOnPaste": true,
+  "editor.formatOnType": true,
+  "black-formatter.args": ["--line-length", "100"],
+  "[html]": {
+    "editor.defaultFormatter": "esbenp.prettier-vscode"
+  },
+  "[json]": {
+    "editor.defaultFormatter": "esbenp.prettier-vscode"
+  },
+  "[python]": {
+    "editor.defaultFormatter": "ms-python.black-formatter",
+    "editor.formatOnSave": true,
+    "editor.codeActionsOnSave": {
+      "source.organizeImports": "explicit"
+    }
+  },
+  "isort.args": ["--profile", "black"],
+  "debug.inlineValues": "off",
+  "python.analysis.typeCheckingMode": "off",
+  "python.analysis.autoImportCompletions": false,
+  "autoDocstring.docstringFormat": "sphinx",
+  "autoDocstring.customTemplatePath": "docs/.mustache",
+  "python.testing.pytestArgs": ["tests/inference_cache"],
+  "python.testing.unittestEnabled": false,
+  "python.testing.pytestEnabled": true
+}
diff --git a/README.md b/README.md
@@ -8,7 +8,6 @@
   <a href="#How-To-Use">How To Use</a>
 </p>
 
-
 [![](https://img.shields.io/badge/supervisely-ecosystem-brightgreen)](https://ecosystem.supervise.ly/apps/supervisely-ecosystem/export-to-supervisely-format)
 [![](https://img.shields.io/badge/slack-chat-green.svg?logo=slack)](https://supervise.ly/slack)
 ![GitHub release (latest SemVer)](https://img.shields.io/github/v/release/supervisely-ecosystem/export-to-supervisely-format)
@@ -19,13 +18,15 @@
 
 # Overview
 
-Download images project or dataset in [Supervisely JSON format](https://docs.supervise.ly/data-organization/00_ann_format_navi). It is possible to download both images and annotations or only annotations.
+ℹ️ Starting from version 2.7.7 the application will save images metadata in JSON format to `meta` directory in each dataset.
 
+Download images project or dataset in [Supervisely JSON format](https://docs.supervise.ly/data-organization/00_ann_format_navi). It is possible to download both images and annotations or only annotations.
 
 # How To Use
+
 **Step 1**: Add app to your team from [Ecosystem](https://ecosystem.supervise.ly/apps/export-to-supervisely-format) if it is not there
 
-**Step 2**: Open context menu of images project (or images dataset) -> `Run App` -> `Download via app` -> `Export to Supervisely format` 
+**Step 2**: Open context menu of images project (or images dataset) -> `Run App` -> `Download via app` -> `Export to Supervisely format`
 
 <img src="https://i.imgur.com/6JNfu3g.png" width="600px"/>
 

diff --git a/config.json b/config.json
@@ -6,8 +6,8 @@
         "export"
     ],
     "description": "images and JSON annotations",
-    "docker_image": "supervisely/import-export:6.72.114",
-    "instance_version": "6.5.1",
+    "docker_image": "supervisely/import-export:6.72.205",
+    "instance_version": "6.8.48",
     "main_script": "src/main.py",
     "modal_template": "src/modal.html",
     "modal_template_state": {

diff --git a/create_venv.sh b/create_venv.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+# learn more in documentation
+# Official python docs: https://docs.python.org/3/library/venv.html
+# Superviely developer portal: https://developer.supervise.ly/getting-started/installation#venv
+
+if [ -d ".venv" ]; then
+    echo "VENV already exists, will be removed"
+    rm -rf .venv
+fi
+
+echo "VENV will be created" && \
+python3 -m venv .venv && \
+source .venv/bin/activate && \
+
+echo "Install requirements..." && \
+pip3 install -r dev_requirements.txt && \
+echo "Requirements have been successfully installed" && \
+echo "Testing imports, please wait a minute ..." && \
+python3 -c "import supervisely as sly" && \
+echo "Success!" && \
+deactivate
diff --git a/debug.env b/debug.env
diff --git a/local.env b/local.env
@@ -0,0 +1,13 @@
+PYTHONUNBUFFERED=1
+TASK_ID=51612 #50046
+
+context.teamId=449 #537
+context.workspaceId=691 #1029
+modal.state.slyProjectId=32796 #32318
+modal.state.download=all
+modal.state.fixExtension=true
+
+DEBUG_APP_DIR=debug/app_debug_data
+DEBUG_CACHE_DIR=debug/app_debug_cache
+
+LOG_LEVEL="debug"
diff --git a/src/main.py b/src/main.py
@@ -1,21 +1,23 @@
-
-import os, json
+import json
+import os
 import tarfile
+from distutils import util
+
 import tqdm
+from dotenv import load_dotenv
+from PIL import Image
+
 import supervisely as sly
 from supervisely.api.module_api import ApiField
-from supervisely.io.fs import get_file_ext, get_file_name_with_ext
 from supervisely.app.v1.app_service import AppService
-from distutils import util
-from dotenv import load_dotenv
-
-from PIL import Image
+from supervisely.io.fs import get_file_ext, get_file_name_with_ext
 
 Image.MAX_IMAGE_PIXELS = 1000000000
 
+from typing import Optional
+
 from dataset_tools import ProjectRepo
 
-from typing import Optional
 if sly.is_development():
     load_dotenv("local.env")
     load_dotenv(os.path.expanduser("~/ninja.env"))
@@ -123,9 +125,7 @@ def download_as_sly(api: sly.Api, task_id, context, state, app_logger):
         try:
             datasets = api.dataset.get_list(project.id)
         except Exception as e:
-            raise Exception(
-                f"Failed to get list of datasets from project ID:{project.id}. {e}"
-            )
+            raise Exception(f"Failed to get list of datasets from project ID:{project.id}. {e}")
         dataset_ids = [dataset.id for dataset in datasets]
     if mode == "all":
         download_json_plus_images(api, project, dataset_ids)
@@ -176,13 +176,13 @@ def download_json_plus_images(api, project, dataset_ids):
         sly.fs.clean_dir(download_dir)
 
     tf_urls_path = "/cache/released_datasets.json"
-    local_save_path = sly.app.get_data_dir() + '/tmp/released_datasets.json'
+    local_save_path = sly.app.get_data_dir() + "/tmp/released_datasets.json"
     if api.file.exists(TEAM_ID, tf_urls_path):
         api.file.download(TEAM_ID, tf_urls_path, local_save_path)
         with open(local_save_path, "r") as f:
             urls = json.load(f)
     else:
-        raise FileNotFoundError(f"File not found: '{tf_urls_path}'")        
+        raise FileNotFoundError(f"File not found: '{tf_urls_path}'")
     sly.download_project(
         api,
         project.id,
@@ -191,25 +191,23 @@ def download_json_plus_images(api, project, dataset_ids):
         log_progress=True,
         batch_size=batch_size,
     )
-    sly.logger.info(
-        "Project {!r} has been successfully downloaded.".format(project.name)
-    )
+    sly.logger.info("Project {!r} has been successfully downloaded.".format(project.name))
 
     sly.logger.info("Start building files...")
     # sly.logger.info(
     #     f"LICENSE: {urls[project.name].get('LICENSE', 'Please add license')}"
     # )
     # sly.logger.info(f"README: {urls[project.name].get('README', 'Please add readme')}")
-    build_license(urls[project.name]['markdown']['LICENSE'], download_dir)
-    build_readme(urls[project.name]['markdown']['README'], download_dir)
+    build_license(urls[project.name]["markdown"]["LICENSE"], download_dir)
+    build_readme(urls[project.name]["markdown"]["README"], download_dir)
     sly.logger.info("'LICENSE.md' and 'README.md' were successfully built.")
 
 
 def download_only_json(api, project, dataset_ids):
     sly.logger.info("DOWNLOAD_PROJECT", extra={"title": project.name})
     download_dir = os.path.join(my_app.data_dir, f"{project.id}_{project.name}")
     sly.fs.mkdir(download_dir)
-    meta_json = api.project.get_meta(project.id)
+    meta_json = api.project.get_meta(project.id, with_settings=True)
     sly.io.json.dump_json_file(meta_json, os.path.join(download_dir, "meta.json"))
 
     total_images = 0
@@ -242,9 +240,7 @@ def download_only_json(api, project, dataset_ids):
             ds_progress.iters_done_report(len(batch))
             total_images += len(batch)
 
-    sly.logger.info(
-        "Project {!r} has been successfully downloaded".format(project.name)
-    )
+    sly.logger.info("Project {!r} has been successfully downloaded".format(project.name))
     sly.logger.info("Total number of images: {!r}".format(total_images))
 
 
@@ -255,8 +251,8 @@ def build_license(license_content: str, download_dir: str):
         license_file.write(license_content)
 
 
-def build_readme(readme_content:str, download_dir:str):
-    readme_path = os.path.join(download_dir, "README.md")      
+def build_readme(readme_content: str, download_dir: str):
+    readme_path = os.path.join(download_dir, "README.md")
     with open(readme_path, "w") as license_file:
         license_file.write(readme_content)
 

diff --git a/src/sly_functions.py b/src/sly_functions.py
@@ -0,0 +1,87 @@
+import os
+import time
+
+import supervisely as sly
+
+
+def _download_batch_with_retry(api: sly.Api, dataset_id, image_ids):
+    retry_cnt = 5
+    curr_retry = 1
+    try:
+        imgs_bytes = api.image.download_bytes(dataset_id, image_ids)
+        if len(imgs_bytes) != len(image_ids):
+            raise RuntimeError(
+                f"Downloaded {len(imgs_bytes)} images, but {len(image_ids)} expected."
+            )
+        return imgs_bytes
+    except Exception as e:
+        sly.logger.warn(f"Failed to download images... Error: {e}")
+        while curr_retry <= retry_cnt:
+            try:
+                sly.logger.warn(f"Retry {curr_retry}/{retry_cnt} to download images")
+                time.sleep(2 * curr_retry)
+                imgs_bytes = api.image.download_bytes(dataset_id, image_ids)
+                if len(imgs_bytes) != len(image_ids):
+                    raise RuntimeError(
+                        f"Downloaded {len(imgs_bytes)} images, but {len(image_ids)} expected."
+                    )
+                return imgs_bytes
+            except Exception as e:
+                curr_retry += 1
+    raise RuntimeError(
+        f"Failed to download images with ids {image_ids}. Check your data and try again later."
+    )
+
+
+def download_project(
+    api: sly.Api,
+    project_id,
+    dest_dir,
+    dataset_ids=None,
+    log_progress=True,
+    batch_size=10,
+    save_image_meta=True,
+):
+    dataset_ids = set(dataset_ids) if (dataset_ids is not None) else None
+    project_fs = sly.Project(dest_dir, sly.OpenMode.CREATE)
+    meta = sly.ProjectMeta.from_json(api.project.get_meta(project_id, with_settings=True))
+    project_fs.set_meta(meta)
+
+    for dataset_info in api.dataset.get_list(project_id):
+        dataset_id = dataset_info.id
+        if dataset_ids is not None and dataset_id not in dataset_ids:
+            continue
+
+        dataset_fs = project_fs.create_dataset(dataset_info.name)
+        images = api.image.get_list(dataset_id)
+
+        if save_image_meta:
+            meta_dir = os.path.join(dest_dir, dataset_info.name, "meta")
+            sly.fs.mkdir(meta_dir)
+            for image_info in images:
+                meta_paths = os.path.join(meta_dir, image_info.name + ".json")
+                sly.json.dump_json_file(image_info.meta, meta_paths)
+
+        ds_progress = None
+        if log_progress:
+            ds_progress = sly.Progress(
+                "Downloading dataset: {!r}".format(dataset_info.name),
+                total_cnt=len(images),
+            )
+
+        for batch in sly.batched(images, batch_size=batch_size):
+            image_ids = [image_info.id for image_info in batch]
+            image_names = [image_info.name for image_info in batch]
+
+            # download images
+            batch_imgs_bytes = _download_batch_with_retry(api, dataset_id, image_ids)
+
+            # download annotations in json format
+            ann_infos = api.annotation.download_batch(dataset_id, image_ids)
+            ann_jsons = [ann_info.annotation for ann_info in ann_infos]
+
+            for name, img_bytes, ann in zip(image_names, batch_imgs_bytes, ann_jsons):
+                dataset_fs.add_item_raw_bytes(item_name=name, item_raw_bytes=img_bytes, ann=ann)
+
+            if log_progress:
+                ds_progress.iters_done_report(len(batch))
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,4 +4,4 @@ venv @@
     .venv/
     dataset_tools
     supervisely
-    debug
+    debug