From 568b37d11123b2450b11851bbb633454f755773a Mon Sep 17 00:00:00 2001 From: Ars Vladimirov Date: Tue, 10 Oct 2023 22:27:02 +0600 Subject: [PATCH] fix max image size --- src/main.py | 134 ++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 92 insertions(+), 42 deletions(-) diff --git a/src/main.py b/src/main.py index f232c0d..95c8735 100644 --- a/src/main.py +++ b/src/main.py @@ -1,4 +1,6 @@ import os +import tarfile +from tqdm import tqdm import supervisely as sly from supervisely.api.module_api import ApiField from supervisely.io.fs import get_file_ext @@ -6,6 +8,10 @@ from distutils import util from dotenv import load_dotenv +from PIL import Image + +Image.MAX_IMAGE_PIXELS = 1000000000 + from dataset_tools import ProjectRepo if sly.is_development(): @@ -17,19 +23,30 @@ my_app: AppService = AppService() -TEAM_ID = int(os.environ['context.teamId']) -WORKSPACE_ID = int(os.environ['context.workspaceId']) -PROJECT_ID = int(os.environ['modal.state.slyProjectId']) -DATASET_ID = os.environ.get('modal.state.slyDatasetId', None) +TEAM_ID = int(os.environ["context.teamId"]) +WORKSPACE_ID = int(os.environ["context.workspaceId"]) +PROJECT_ID = int(os.environ["modal.state.slyProjectId"]) +DATASET_ID = os.environ.get("modal.state.slyDatasetId", None) if DATASET_ID is not None: DATASET_ID = int(DATASET_ID) task_id = int(os.environ["TASK_ID"]) -mode = os.environ['modal.state.download'] -replace_method = bool(util.strtobool(os.environ['modal.state.fixExtension'])) +mode = os.environ["modal.state.download"] +replace_method = bool(util.strtobool(os.environ["modal.state.fixExtension"])) batch_size = 10 +def pack_directory_to_tar(source_dir, output_tar): + if not os.path.exists(source_dir): + raise FileNotFoundError(f"Source directory '{source_dir}' does not exist.") + + with tarfile.open(output_tar, "w") as tar: + for root, _, files in os.walk(source_dir): + for file in tqdm(files, desc=f"Packing to '{output_tar}'", unit="file"): + file_path = os.path.join(root, file) + tar.add(file_path, arcname=os.path.relpath(file_path, source_dir)) + + def ours_convert_json_info(self, info: dict, skip_missing=True): if info is None: return None @@ -44,7 +61,7 @@ def ours_convert_json_info(self, info: dict, skip_missing=True): val = info[field_name] field_values.append(val) if field_name == ApiField.MIME: - temp_ext = val.split('/')[1] + temp_ext = val.split("/")[1] field_values.append(temp_ext) for idx, field_name in enumerate(self.info_sequence()): if field_name == ApiField.NAME: @@ -52,7 +69,7 @@ def ours_convert_json_info(self, info: dict, skip_missing=True): if not cur_ext: field_values[idx] = "{}.{}".format(field_values[idx], temp_ext) break - if temp_ext == 'jpeg' and cur_ext in ['jpg', 'jpeg', 'mpo']: + if temp_ext == "jpeg" and cur_ext in ["jpg", "jpeg", "mpo"]: break if temp_ext != cur_ext and cur_ext is not None: pass @@ -61,12 +78,12 @@ def ours_convert_json_info(self, info: dict, skip_missing=True): def init(data, state): - state['download'] = mode - state['fixExtension'] = replace_method + state["download"] = mode + state["fixExtension"] = replace_method if replace_method: - sly.logger.debug('change SDK method') + sly.logger.debug("change SDK method") sly.api.image_api.ImageApi._convert_json_info = ours_convert_json_info @@ -86,45 +103,68 @@ def download_as_sly(api: sly.Api, task_id, context, state, app_logger): try: datasets = api.dataset.get_list(project.id) except Exception as e: - raise Exception(f"Failed to get list of datasets from project ID:{project.id}. {e}") + raise Exception( + f"Failed to get list of datasets from project ID:{project.id}. {e}" + ) dataset_ids = [dataset.id for dataset in datasets] - if mode == 'all': + if mode == "all": download_json_plus_images(api, project, dataset_ids) else: download_only_json(api, project, dataset_ids) - download_dir = os.path.join(my_app.data_dir, f'{project.id}_{project.name}') - full_archive_name = str(project.id) + '_' + project.name + '.tar' + download_dir = os.path.join(my_app.data_dir, f"{project.id}_{project.name}") + full_archive_name = str(project.id) + "_" + project.name + ".tar" result_archive = os.path.join(my_app.data_dir, full_archive_name) - sly.fs.archive_directory(download_dir, result_archive) + pack_directory_to_tar(download_dir, result_archive) + # sly.fs.archive_directory(download_dir, result_archive) app_logger.info("Result directory is archived") upload_progress = [] remote_archive_path = os.path.join( - sly.team_files.RECOMMENDED_EXPORT_PATH, "export-to-supervisely-format/{}_{}".format(task_id, full_archive_name)) + sly.team_files.RECOMMENDED_EXPORT_PATH, + "export-to-supervisely-format/{}_{}".format(task_id, full_archive_name), + ) def _print_progress(monitor, upload_progress): if len(upload_progress) == 0: - upload_progress.append(sly.Progress(message="Upload {!r}".format(full_archive_name), - total_cnt=monitor.len, - ext_logger=app_logger, - is_size=True)) + upload_progress.append( + sly.Progress( + message="Upload {!r}".format(full_archive_name), + total_cnt=monitor.len, + ext_logger=app_logger, + is_size=True, + ) + ) upload_progress[0].set_current_value(monitor.bytes_read) - file_info = api.file.upload(TEAM_ID, result_archive, remote_archive_path, - lambda m: _print_progress(m, upload_progress)) + file_info = api.file.upload( + TEAM_ID, + result_archive, + remote_archive_path, + lambda m: _print_progress(m, upload_progress), + ) app_logger.info("Uploaded to Team-Files: {!r}".format(file_info.path)) - api.task.set_output_archive(task_id, file_info.id, full_archive_name, file_url=file_info.storage_path) + api.task.set_output_archive( + task_id, file_info.id, full_archive_name, file_url=file_info.storage_path + ) my_app.stop() def download_json_plus_images(api, project, dataset_ids): - sly.logger.info('###########DOWNLOAD_PROJECT', extra={'title': project.name}) - download_dir = os.path.join(my_app.data_dir, f'{project.id}_{project.name}') + sly.logger.info("DOWNLOAD_PROJECT", extra={"title": project.name}) + download_dir = os.path.join(my_app.data_dir, f"{project.id}_{project.name}") if os.path.exists(download_dir): sly.fs.clean_dir(download_dir) - sly.download_project(api, project.id, download_dir, dataset_ids=dataset_ids, - log_progress=True, batch_size=batch_size) - sly.logger.info('Project {!r} has been successfully downloaded.'.format(project.name)) + sly.download_project( + api, + project.id, + download_dir, + dataset_ids=dataset_ids, + log_progress=True, + batch_size=batch_size, + ) + sly.logger.info( + "Project {!r} has been successfully downloaded.".format(project.name) + ) sly.logger.info("Start building files...") print(project.custom_data) @@ -134,24 +174,28 @@ def download_json_plus_images(api, project, dataset_ids): def download_only_json(api, project, dataset_ids): - sly.logger.info('DOWNLOAD_PROJECT', extra={'title': project.name}) - download_dir = os.path.join(my_app.data_dir, f'{project.id}_{project.name}') + sly.logger.info("DOWNLOAD_PROJECT", extra={"title": project.name}) + download_dir = os.path.join(my_app.data_dir, f"{project.id}_{project.name}") sly.fs.mkdir(download_dir) meta_json = api.project.get_meta(project.id) - sly.io.json.dump_json_file(meta_json, os.path.join(download_dir, 'meta.json')) + sly.io.json.dump_json_file(meta_json, os.path.join(download_dir, "meta.json")) total_images = 0 dataset_info = ( [api.dataset.get_info_by_id(ds_id) for ds_id in dataset_ids] - if (dataset_ids is not None) else api.dataset.get_list(project.id)) + if (dataset_ids is not None) + else api.dataset.get_list(project.id) + ) for dataset in dataset_info: - ann_dir = os.path.join(download_dir, dataset.name, 'ann') + ann_dir = os.path.join(download_dir, dataset.name, "ann") sly.fs.mkdir(ann_dir) images = api.image.get_list(dataset.id) ds_progress = sly.Progress( - 'Downloading annotations for: {!r}/{!r}'.format(project.name, dataset.name), total_cnt=len(images)) + "Downloading annotations for: {!r}/{!r}".format(project.name, dataset.name), + total_cnt=len(images), + ) for batch in sly.batched(images, batch_size=10): image_ids = [image_info.id for image_info in batch] image_names = [image_info.name for image_info in batch] @@ -160,14 +204,19 @@ def download_only_json(api, project, dataset_ids): ann_infos = api.annotation.download_batch(dataset.id, image_ids) for image_name, ann_info in zip(image_names, ann_infos): - sly.io.json.dump_json_file(ann_info.annotation, os.path.join(ann_dir, image_name + '.json')) + sly.io.json.dump_json_file( + ann_info.annotation, os.path.join(ann_dir, image_name + ".json") + ) ds_progress.iters_done_report(len(batch)) total_images += len(batch) - sly.logger.info('Project {!r} has been successfully downloaded'.format(project.name)) - sly.logger.info('Total number of images: {!r}'.format(total_images)) + sly.logger.info( + "Project {!r} has been successfully downloaded".format(project.name) + ) + sly.logger.info("Total number of images: {!r}".format(total_images)) -def build_license(license_content:str, download_dir:str): + +def build_license(license_content: str, download_dir: str): license_path = os.path.join(download_dir, "LICENSE.md") print(license_content) with open(license_path, "w") as license_file: @@ -179,14 +228,15 @@ def build_readme(readme_content, download_dir): with open(readme_path, "w") as license_file: license_file.write(readme_content) + def main(): sly.logger.info( "Script arguments", extra={ - "TEAM_ID": TEAM_ID, + "TEAM_ID": TEAM_ID, "WORKSPACE_ID": WORKSPACE_ID, - "PROJECT_ID": PROJECT_ID - } + "PROJECT_ID": PROJECT_ID, + }, ) data = {}