Skip to content

Commit

Permalink
Merge branch 'master' into dninja
Browse files Browse the repository at this point in the history
  • Loading branch information
grokhi committed Jan 17, 2024
2 parents d7da973 + ace8bf2 commit 0d57194
Show file tree
Hide file tree
Showing 9 changed files with 184 additions and 45 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ venv
.venv/
dataset_tools
supervisely
debug
debug
35 changes: 35 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
"files.exclude": {
"**/__pycache__": true,
"build": true,
"supervisely.egg-info": true,
// ".venv": true
},
"python.defaultInterpreterPath": ".venv/bin/python",
"editor.formatOnSave": true,
"editor.formatOnPaste": true,
"editor.formatOnType": true,
"black-formatter.args": ["--line-length", "100"],
"[html]": {
"editor.defaultFormatter": "esbenp.prettier-vscode"
},
"[json]": {
"editor.defaultFormatter": "esbenp.prettier-vscode"
},
"[python]": {
"editor.defaultFormatter": "ms-python.black-formatter",
"editor.formatOnSave": true,
"editor.codeActionsOnSave": {
"source.organizeImports": "explicit"
}
},
"isort.args": ["--profile", "black"],
"debug.inlineValues": "off",
"python.analysis.typeCheckingMode": "off",
"python.analysis.autoImportCompletions": false,
"autoDocstring.docstringFormat": "sphinx",
"autoDocstring.customTemplatePath": "docs/.mustache",
"python.testing.pytestArgs": ["tests/inference_cache"],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true
}
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
<a href="#How-To-Use">How To Use</a>
</p>


[![](https://img.shields.io/badge/supervisely-ecosystem-brightgreen)](https://ecosystem.supervise.ly/apps/supervisely-ecosystem/export-to-supervisely-format)
[![](https://img.shields.io/badge/slack-chat-green.svg?logo=slack)](https://supervise.ly/slack)
![GitHub release (latest SemVer)](https://img.shields.io/github/v/release/supervisely-ecosystem/export-to-supervisely-format)
Expand All @@ -19,13 +18,15 @@

# Overview

Download images project or dataset in [Supervisely JSON format](https://docs.supervise.ly/data-organization/00_ann_format_navi). It is possible to download both images and annotations or only annotations.
ℹ️ Starting from version 2.7.7 the application will save images metadata in JSON format to `meta` directory in each dataset.

Download images project or dataset in [Supervisely JSON format](https://docs.supervise.ly/data-organization/00_ann_format_navi). It is possible to download both images and annotations or only annotations.

# How To Use

**Step 1**: Add app to your team from [Ecosystem](https://ecosystem.supervise.ly/apps/export-to-supervisely-format) if it is not there

**Step 2**: Open context menu of images project (or images dataset) -> `Run App` -> `Download via app` -> `Export to Supervisely format`
**Step 2**: Open context menu of images project (or images dataset) -> `Run App` -> `Download via app` -> `Export to Supervisely format`

<img src="https://i.imgur.com/6JNfu3g.png" width="600px"/>

Expand Down
4 changes: 2 additions & 2 deletions config.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
"export"
],
"description": "images and JSON annotations",
"docker_image": "supervisely/import-export:6.72.114",
"instance_version": "6.5.1",
"docker_image": "supervisely/import-export:6.72.205",
"instance_version": "6.8.48",
"main_script": "src/main.py",
"modal_template": "src/modal.html",
"modal_template_state": {
Expand Down
22 changes: 22 additions & 0 deletions create_venv.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash

# learn more in documentation
# Official python docs: https://docs.python.org/3/library/venv.html
# Superviely developer portal: https://developer.supervise.ly/getting-started/installation#venv

if [ -d ".venv" ]; then
echo "VENV already exists, will be removed"
rm -rf .venv
fi

echo "VENV will be created" && \
python3 -m venv .venv && \
source .venv/bin/activate && \

echo "Install requirements..." && \
pip3 install -r dev_requirements.txt && \
echo "Requirements have been successfully installed" && \
echo "Testing imports, please wait a minute ..." && \
python3 -c "import supervisely as sly" && \
echo "Success!" && \
deactivate
15 changes: 0 additions & 15 deletions debug.env

This file was deleted.

13 changes: 13 additions & 0 deletions local.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
PYTHONUNBUFFERED=1
TASK_ID=51612 #50046

context.teamId=449 #537
context.workspaceId=691 #1029
modal.state.slyProjectId=32796 #32318
modal.state.download=all
modal.state.fixExtension=true

DEBUG_APP_DIR=debug/app_debug_data
DEBUG_CACHE_DIR=debug/app_debug_cache

LOG_LEVEL="debug"
44 changes: 20 additions & 24 deletions src/main.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@

import os, json
import json
import os
import tarfile
from distutils import util

import tqdm
from dotenv import load_dotenv
from PIL import Image

import supervisely as sly
from supervisely.api.module_api import ApiField
from supervisely.io.fs import get_file_ext, get_file_name_with_ext
from supervisely.app.v1.app_service import AppService
from distutils import util
from dotenv import load_dotenv

from PIL import Image
from supervisely.io.fs import get_file_ext, get_file_name_with_ext

Image.MAX_IMAGE_PIXELS = 1000000000

from typing import Optional

from dataset_tools import ProjectRepo

from typing import Optional
if sly.is_development():
load_dotenv("local.env")
load_dotenv(os.path.expanduser("~/ninja.env"))
Expand Down Expand Up @@ -123,9 +125,7 @@ def download_as_sly(api: sly.Api, task_id, context, state, app_logger):
try:
datasets = api.dataset.get_list(project.id)
except Exception as e:
raise Exception(
f"Failed to get list of datasets from project ID:{project.id}. {e}"
)
raise Exception(f"Failed to get list of datasets from project ID:{project.id}. {e}")
dataset_ids = [dataset.id for dataset in datasets]
if mode == "all":
download_json_plus_images(api, project, dataset_ids)
Expand Down Expand Up @@ -176,13 +176,13 @@ def download_json_plus_images(api, project, dataset_ids):
sly.fs.clean_dir(download_dir)

tf_urls_path = "/cache/released_datasets.json"
local_save_path = sly.app.get_data_dir() + '/tmp/released_datasets.json'
local_save_path = sly.app.get_data_dir() + "/tmp/released_datasets.json"
if api.file.exists(TEAM_ID, tf_urls_path):
api.file.download(TEAM_ID, tf_urls_path, local_save_path)
with open(local_save_path, "r") as f:
urls = json.load(f)
else:
raise FileNotFoundError(f"File not found: '{tf_urls_path}'")
raise FileNotFoundError(f"File not found: '{tf_urls_path}'")
sly.download_project(
api,
project.id,
Expand All @@ -191,25 +191,23 @@ def download_json_plus_images(api, project, dataset_ids):
log_progress=True,
batch_size=batch_size,
)
sly.logger.info(
"Project {!r} has been successfully downloaded.".format(project.name)
)
sly.logger.info("Project {!r} has been successfully downloaded.".format(project.name))

sly.logger.info("Start building files...")
# sly.logger.info(
# f"LICENSE: {urls[project.name].get('LICENSE', 'Please add license')}"
# )
# sly.logger.info(f"README: {urls[project.name].get('README', 'Please add readme')}")
build_license(urls[project.name]['markdown']['LICENSE'], download_dir)
build_readme(urls[project.name]['markdown']['README'], download_dir)
build_license(urls[project.name]["markdown"]["LICENSE"], download_dir)
build_readme(urls[project.name]["markdown"]["README"], download_dir)
sly.logger.info("'LICENSE.md' and 'README.md' were successfully built.")


def download_only_json(api, project, dataset_ids):
sly.logger.info("DOWNLOAD_PROJECT", extra={"title": project.name})
download_dir = os.path.join(my_app.data_dir, f"{project.id}_{project.name}")
sly.fs.mkdir(download_dir)
meta_json = api.project.get_meta(project.id)
meta_json = api.project.get_meta(project.id, with_settings=True)
sly.io.json.dump_json_file(meta_json, os.path.join(download_dir, "meta.json"))

total_images = 0
Expand Down Expand Up @@ -242,9 +240,7 @@ def download_only_json(api, project, dataset_ids):
ds_progress.iters_done_report(len(batch))
total_images += len(batch)

sly.logger.info(
"Project {!r} has been successfully downloaded".format(project.name)
)
sly.logger.info("Project {!r} has been successfully downloaded".format(project.name))
sly.logger.info("Total number of images: {!r}".format(total_images))


Expand All @@ -255,8 +251,8 @@ def build_license(license_content: str, download_dir: str):
license_file.write(license_content)


def build_readme(readme_content:str, download_dir:str):
readme_path = os.path.join(download_dir, "README.md")
def build_readme(readme_content: str, download_dir: str):
readme_path = os.path.join(download_dir, "README.md")
with open(readme_path, "w") as license_file:
license_file.write(readme_content)

Expand Down
87 changes: 87 additions & 0 deletions src/sly_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import os
import time

import supervisely as sly


def _download_batch_with_retry(api: sly.Api, dataset_id, image_ids):
retry_cnt = 5
curr_retry = 1
try:
imgs_bytes = api.image.download_bytes(dataset_id, image_ids)
if len(imgs_bytes) != len(image_ids):
raise RuntimeError(
f"Downloaded {len(imgs_bytes)} images, but {len(image_ids)} expected."
)
return imgs_bytes
except Exception as e:
sly.logger.warn(f"Failed to download images... Error: {e}")
while curr_retry <= retry_cnt:
try:
sly.logger.warn(f"Retry {curr_retry}/{retry_cnt} to download images")
time.sleep(2 * curr_retry)
imgs_bytes = api.image.download_bytes(dataset_id, image_ids)
if len(imgs_bytes) != len(image_ids):
raise RuntimeError(
f"Downloaded {len(imgs_bytes)} images, but {len(image_ids)} expected."
)
return imgs_bytes
except Exception as e:
curr_retry += 1
raise RuntimeError(
f"Failed to download images with ids {image_ids}. Check your data and try again later."
)


def download_project(
api: sly.Api,
project_id,
dest_dir,
dataset_ids=None,
log_progress=True,
batch_size=10,
save_image_meta=True,
):
dataset_ids = set(dataset_ids) if (dataset_ids is not None) else None
project_fs = sly.Project(dest_dir, sly.OpenMode.CREATE)
meta = sly.ProjectMeta.from_json(api.project.get_meta(project_id, with_settings=True))
project_fs.set_meta(meta)

for dataset_info in api.dataset.get_list(project_id):
dataset_id = dataset_info.id
if dataset_ids is not None and dataset_id not in dataset_ids:
continue

dataset_fs = project_fs.create_dataset(dataset_info.name)
images = api.image.get_list(dataset_id)

if save_image_meta:
meta_dir = os.path.join(dest_dir, dataset_info.name, "meta")
sly.fs.mkdir(meta_dir)
for image_info in images:
meta_paths = os.path.join(meta_dir, image_info.name + ".json")
sly.json.dump_json_file(image_info.meta, meta_paths)

ds_progress = None
if log_progress:
ds_progress = sly.Progress(
"Downloading dataset: {!r}".format(dataset_info.name),
total_cnt=len(images),
)

for batch in sly.batched(images, batch_size=batch_size):
image_ids = [image_info.id for image_info in batch]
image_names = [image_info.name for image_info in batch]

# download images
batch_imgs_bytes = _download_batch_with_retry(api, dataset_id, image_ids)

# download annotations in json format
ann_infos = api.annotation.download_batch(dataset_id, image_ids)
ann_jsons = [ann_info.annotation for ann_info in ann_infos]

for name, img_bytes, ann in zip(image_names, batch_imgs_bytes, ann_jsons):
dataset_fs.add_item_raw_bytes(item_name=name, item_raw_bytes=img_bytes, ann=ann)

if log_progress:
ds_progress.iters_done_report(len(batch))

0 comments on commit 0d57194

Please sign in to comment.