diff --git a/.gitignore b/.gitignore index c389fee..3e8c02c 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,4 @@ venv .venv/ dataset_tools supervisely -debug \ No newline at end of file +debug diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..adb9109 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,35 @@ +{ + "files.exclude": { + "**/__pycache__": true, + "build": true, + "supervisely.egg-info": true, + // ".venv": true + }, + "python.defaultInterpreterPath": ".venv/bin/python", + "editor.formatOnSave": true, + "editor.formatOnPaste": true, + "editor.formatOnType": true, + "black-formatter.args": ["--line-length", "100"], + "[html]": { + "editor.defaultFormatter": "esbenp.prettier-vscode" + }, + "[json]": { + "editor.defaultFormatter": "esbenp.prettier-vscode" + }, + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter", + "editor.formatOnSave": true, + "editor.codeActionsOnSave": { + "source.organizeImports": "explicit" + } + }, + "isort.args": ["--profile", "black"], + "debug.inlineValues": "off", + "python.analysis.typeCheckingMode": "off", + "python.analysis.autoImportCompletions": false, + "autoDocstring.docstringFormat": "sphinx", + "autoDocstring.customTemplatePath": "docs/.mustache", + "python.testing.pytestArgs": ["tests/inference_cache"], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true +} diff --git a/README.md b/README.md index 72a4d86..a8fb091 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,6 @@ How To Use

- [![](https://img.shields.io/badge/supervisely-ecosystem-brightgreen)](https://ecosystem.supervise.ly/apps/supervisely-ecosystem/export-to-supervisely-format) [![](https://img.shields.io/badge/slack-chat-green.svg?logo=slack)](https://supervise.ly/slack) ![GitHub release (latest SemVer)](https://img.shields.io/github/v/release/supervisely-ecosystem/export-to-supervisely-format) @@ -19,13 +18,15 @@ # Overview -Download images project or dataset in [Supervisely JSON format](https://docs.supervise.ly/data-organization/00_ann_format_navi). It is possible to download both images and annotations or only annotations. +ℹ️ Starting from version 2.7.7 the application will save images metadata in JSON format to `meta` directory in each dataset. +Download images project or dataset in [Supervisely JSON format](https://docs.supervise.ly/data-organization/00_ann_format_navi). It is possible to download both images and annotations or only annotations. # How To Use + **Step 1**: Add app to your team from [Ecosystem](https://ecosystem.supervise.ly/apps/export-to-supervisely-format) if it is not there -**Step 2**: Open context menu of images project (or images dataset) -> `Run App` -> `Download via app` -> `Export to Supervisely format` +**Step 2**: Open context menu of images project (or images dataset) -> `Run App` -> `Download via app` -> `Export to Supervisely format` diff --git a/config.json b/config.json index 2ab9fe9..9a480d7 100644 --- a/config.json +++ b/config.json @@ -6,8 +6,8 @@ "export" ], "description": "images and JSON annotations", - "docker_image": "supervisely/import-export:6.72.114", - "instance_version": "6.5.1", + "docker_image": "supervisely/import-export:6.72.205", + "instance_version": "6.8.48", "main_script": "src/main.py", "modal_template": "src/modal.html", "modal_template_state": { diff --git a/create_venv.sh b/create_venv.sh new file mode 100644 index 0000000..ba53ea2 --- /dev/null +++ b/create_venv.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +# learn more in documentation +# Official python docs: https://docs.python.org/3/library/venv.html +# Superviely developer portal: https://developer.supervise.ly/getting-started/installation#venv + +if [ -d ".venv" ]; then + echo "VENV already exists, will be removed" + rm -rf .venv +fi + +echo "VENV will be created" && \ +python3 -m venv .venv && \ +source .venv/bin/activate && \ + +echo "Install requirements..." && \ +pip3 install -r dev_requirements.txt && \ +echo "Requirements have been successfully installed" && \ +echo "Testing imports, please wait a minute ..." && \ +python3 -c "import supervisely as sly" && \ +echo "Success!" && \ +deactivate \ No newline at end of file diff --git a/debug.env b/debug.env deleted file mode 100644 index 2dc2f1b..0000000 --- a/debug.env +++ /dev/null @@ -1,15 +0,0 @@ -PYTHONUNBUFFERED=1 -TASK_ID=14507 - -context.teamId=9 -context.workspaceId=28 -modal.state.slyProjectId=1195 -modal.state.download=all -modal.state.fixExtension=true -DEBUG_APP_DIR="debug/app_debug_data" -DEBUG_CACHE_DIR="debug/app_debug_cache" - -LOG_LEVEL="debug" - -AGENT_ID = 4 - diff --git a/local.env b/local.env new file mode 100644 index 0000000..e0625e6 --- /dev/null +++ b/local.env @@ -0,0 +1,13 @@ +PYTHONUNBUFFERED=1 +TASK_ID=51612 #50046 + +context.teamId=449 #537 +context.workspaceId=691 #1029 +modal.state.slyProjectId=32796 #32318 +modal.state.download=all +modal.state.fixExtension=true + +DEBUG_APP_DIR=debug/app_debug_data +DEBUG_CACHE_DIR=debug/app_debug_cache + +LOG_LEVEL="debug" diff --git a/src/main.py b/src/main.py index 0b51648..655bf94 100644 --- a/src/main.py +++ b/src/main.py @@ -1,21 +1,23 @@ - -import os, json +import json +import os import tarfile +from distutils import util + import tqdm +from dotenv import load_dotenv +from PIL import Image + import supervisely as sly from supervisely.api.module_api import ApiField -from supervisely.io.fs import get_file_ext, get_file_name_with_ext from supervisely.app.v1.app_service import AppService -from distutils import util -from dotenv import load_dotenv - -from PIL import Image +from supervisely.io.fs import get_file_ext, get_file_name_with_ext Image.MAX_IMAGE_PIXELS = 1000000000 +from typing import Optional + from dataset_tools import ProjectRepo -from typing import Optional if sly.is_development(): load_dotenv("local.env") load_dotenv(os.path.expanduser("~/ninja.env")) @@ -123,9 +125,7 @@ def download_as_sly(api: sly.Api, task_id, context, state, app_logger): try: datasets = api.dataset.get_list(project.id) except Exception as e: - raise Exception( - f"Failed to get list of datasets from project ID:{project.id}. {e}" - ) + raise Exception(f"Failed to get list of datasets from project ID:{project.id}. {e}") dataset_ids = [dataset.id for dataset in datasets] if mode == "all": download_json_plus_images(api, project, dataset_ids) @@ -176,13 +176,13 @@ def download_json_plus_images(api, project, dataset_ids): sly.fs.clean_dir(download_dir) tf_urls_path = "/cache/released_datasets.json" - local_save_path = sly.app.get_data_dir() + '/tmp/released_datasets.json' + local_save_path = sly.app.get_data_dir() + "/tmp/released_datasets.json" if api.file.exists(TEAM_ID, tf_urls_path): api.file.download(TEAM_ID, tf_urls_path, local_save_path) with open(local_save_path, "r") as f: urls = json.load(f) else: - raise FileNotFoundError(f"File not found: '{tf_urls_path}'") + raise FileNotFoundError(f"File not found: '{tf_urls_path}'") sly.download_project( api, project.id, @@ -191,17 +191,15 @@ def download_json_plus_images(api, project, dataset_ids): log_progress=True, batch_size=batch_size, ) - sly.logger.info( - "Project {!r} has been successfully downloaded.".format(project.name) - ) + sly.logger.info("Project {!r} has been successfully downloaded.".format(project.name)) sly.logger.info("Start building files...") # sly.logger.info( # f"LICENSE: {urls[project.name].get('LICENSE', 'Please add license')}" # ) # sly.logger.info(f"README: {urls[project.name].get('README', 'Please add readme')}") - build_license(urls[project.name]['markdown']['LICENSE'], download_dir) - build_readme(urls[project.name]['markdown']['README'], download_dir) + build_license(urls[project.name]["markdown"]["LICENSE"], download_dir) + build_readme(urls[project.name]["markdown"]["README"], download_dir) sly.logger.info("'LICENSE.md' and 'README.md' were successfully built.") @@ -209,7 +207,7 @@ def download_only_json(api, project, dataset_ids): sly.logger.info("DOWNLOAD_PROJECT", extra={"title": project.name}) download_dir = os.path.join(my_app.data_dir, f"{project.id}_{project.name}") sly.fs.mkdir(download_dir) - meta_json = api.project.get_meta(project.id) + meta_json = api.project.get_meta(project.id, with_settings=True) sly.io.json.dump_json_file(meta_json, os.path.join(download_dir, "meta.json")) total_images = 0 @@ -242,9 +240,7 @@ def download_only_json(api, project, dataset_ids): ds_progress.iters_done_report(len(batch)) total_images += len(batch) - sly.logger.info( - "Project {!r} has been successfully downloaded".format(project.name) - ) + sly.logger.info("Project {!r} has been successfully downloaded".format(project.name)) sly.logger.info("Total number of images: {!r}".format(total_images)) @@ -255,8 +251,8 @@ def build_license(license_content: str, download_dir: str): license_file.write(license_content) -def build_readme(readme_content:str, download_dir:str): - readme_path = os.path.join(download_dir, "README.md") +def build_readme(readme_content: str, download_dir: str): + readme_path = os.path.join(download_dir, "README.md") with open(readme_path, "w") as license_file: license_file.write(readme_content) diff --git a/src/sly_functions.py b/src/sly_functions.py new file mode 100644 index 0000000..dbc8f09 --- /dev/null +++ b/src/sly_functions.py @@ -0,0 +1,87 @@ +import os +import time + +import supervisely as sly + + +def _download_batch_with_retry(api: sly.Api, dataset_id, image_ids): + retry_cnt = 5 + curr_retry = 1 + try: + imgs_bytes = api.image.download_bytes(dataset_id, image_ids) + if len(imgs_bytes) != len(image_ids): + raise RuntimeError( + f"Downloaded {len(imgs_bytes)} images, but {len(image_ids)} expected." + ) + return imgs_bytes + except Exception as e: + sly.logger.warn(f"Failed to download images... Error: {e}") + while curr_retry <= retry_cnt: + try: + sly.logger.warn(f"Retry {curr_retry}/{retry_cnt} to download images") + time.sleep(2 * curr_retry) + imgs_bytes = api.image.download_bytes(dataset_id, image_ids) + if len(imgs_bytes) != len(image_ids): + raise RuntimeError( + f"Downloaded {len(imgs_bytes)} images, but {len(image_ids)} expected." + ) + return imgs_bytes + except Exception as e: + curr_retry += 1 + raise RuntimeError( + f"Failed to download images with ids {image_ids}. Check your data and try again later." + ) + + +def download_project( + api: sly.Api, + project_id, + dest_dir, + dataset_ids=None, + log_progress=True, + batch_size=10, + save_image_meta=True, +): + dataset_ids = set(dataset_ids) if (dataset_ids is not None) else None + project_fs = sly.Project(dest_dir, sly.OpenMode.CREATE) + meta = sly.ProjectMeta.from_json(api.project.get_meta(project_id, with_settings=True)) + project_fs.set_meta(meta) + + for dataset_info in api.dataset.get_list(project_id): + dataset_id = dataset_info.id + if dataset_ids is not None and dataset_id not in dataset_ids: + continue + + dataset_fs = project_fs.create_dataset(dataset_info.name) + images = api.image.get_list(dataset_id) + + if save_image_meta: + meta_dir = os.path.join(dest_dir, dataset_info.name, "meta") + sly.fs.mkdir(meta_dir) + for image_info in images: + meta_paths = os.path.join(meta_dir, image_info.name + ".json") + sly.json.dump_json_file(image_info.meta, meta_paths) + + ds_progress = None + if log_progress: + ds_progress = sly.Progress( + "Downloading dataset: {!r}".format(dataset_info.name), + total_cnt=len(images), + ) + + for batch in sly.batched(images, batch_size=batch_size): + image_ids = [image_info.id for image_info in batch] + image_names = [image_info.name for image_info in batch] + + # download images + batch_imgs_bytes = _download_batch_with_retry(api, dataset_id, image_ids) + + # download annotations in json format + ann_infos = api.annotation.download_batch(dataset_id, image_ids) + ann_jsons = [ann_info.annotation for ann_info in ann_infos] + + for name, img_bytes, ann in zip(image_names, batch_imgs_bytes, ann_jsons): + dataset_fs.add_item_raw_bytes(item_name=name, item_raw_bytes=img_bytes, ann=ann) + + if log_progress: + ds_progress.iters_done_report(len(batch))