Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add perserve date option #2

Merged
merged 15 commits into from
Dec 30, 2024
2 changes: 1 addition & 1 deletion config.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"type": "app",
"headless": true,
"main_script": "src/main.py",
"docker_image": "supervisely/base-py-sdk:6.73.252",
"docker_image": "supervisely/base-py-sdk:6.73.265",
"instance_version": "6.12.12",
"system": true
}
131 changes: 129 additions & 2 deletions src/api_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from ast import Dict
from typing import Optional
from typing import List, Optional
import pandas as pd
import supervisely as sly
from supervisely.api.api import ApiField
Expand All @@ -16,6 +16,7 @@ def create_project(
readme: str = None,
change_name_if_conflict: Optional[bool] = False,
created_at: Optional[str] = None,
updated_at: Optional[str] = None,
created_by: Optional[str] = None,
) -> sly.Project:

Expand All @@ -38,13 +39,139 @@ def create_project(
data[ApiField.README] = readme
if created_at is not None:
data[ApiField.CREATED_AT] = created_at
if updated_at is not None:
data[ApiField.UPDATED_AT] = updated_at
if created_by is not None:
data[ApiField.CREATED_BY_ID[0][0]] = created_by
response = api.post("projects.add", data)
try:
response = api.post("projects.add", data)
except Exception as e:
if "Some users not found in team" in str(e):
raise ValueError(
"Unable to create a project. Project creator is not a member of the destination team."
) from e
else:
raise e
return api.project._convert_json_info(response.json())


def get_project_activity(api: sly.Api, project_id: int):
activity = api.post("projects.activity", {ApiField.ID: project_id}).json()
df = pd.DataFrame(activity)
return df


def images_get_list(api: sly.Api, dataset_id, image_ids=None):
api_fields = [
ApiField.ID,
ApiField.NAME,
ApiField.HASH,
ApiField.DATASET_ID,
ApiField.CREATED_AT,
ApiField.UPDATED_AT,
ApiField.META,
ApiField.PATH_ORIGINAL,
ApiField.CREATED_BY_ID[0][0],
]
filters = None
if image_ids is not None:
filters = [{"field": ApiField.ID, "operator": "in", "value": image_ids}]
img_infos = api.image.get_list(
dataset_id, filters=filters, fields=api_fields, force_metadata_for_links=False
)
return img_infos


def create_dataset(
api: sly.Api,
project_id: int,
name: str,
description: Optional[str] = "",
change_name_if_conflict: Optional[bool] = False,
parent_id: Optional[int] = None,
created_at: Optional[str] = None,
updated_at: Optional[str] = None,
created_by: Optional[int] = None,
):
effective_name = api.dataset._get_effective_new_name(
project_id=project_id,
name=name,
change_name_if_conflict=change_name_if_conflict,
parent_id=parent_id,
)
data = {
ApiField.PROJECT_ID: project_id,
ApiField.NAME: effective_name,
ApiField.DESCRIPTION: description,
ApiField.PARENT_ID: parent_id,
}
if created_at is not None:
data[ApiField.CREATED_AT] = created_at
if updated_at is not None:
data[ApiField.UPDATED_AT] = updated_at
if created_by is not None:
data[ApiField.CREATED_BY_ID[0][0]] = created_by
try:
response = api.post(
"datasets.add",
data,
)
except Exception as e:
if "Some users not found in team" in str(e):
raise ValueError(
"Unable to create a dataset. Dataset creator is not a member of the destination team."
) from e
else:
raise e
return api.dataset._convert_json_info(response.json())


def images_bulk_add(
api: sly.Api,
dataset_id: int,
names: List[str],
image_infos: List[sly.ImageInfo],
perserve_dates: bool = False,
):
img_data = []
for name, img_info in zip(names, image_infos):
img_json = {
ApiField.NAME: name,
ApiField.META: img_info.meta,
}
if perserve_dates:
img_json[ApiField.CREATED_AT] = img_info.created_at
img_json[ApiField.UPDATED_AT] = img_info.updated_at
img_json[ApiField.CREATED_BY_ID[0][0]] = img_info.created_by
if img_info.link is not None:
img_json[ApiField.LINK] = img_info.link
elif img_info.hash is not None:
img_json[ApiField.HASH] = img_info.hash
img_data.append(img_json)

try:
response = api.post(
"images.bulk.add",
{
ApiField.DATASET_ID: dataset_id,
ApiField.IMAGES: img_data,
ApiField.FORCE_METADATA_FOR_LINKS: False,
ApiField.SKIP_VALIDATION: True,
},
)
except Exception as e:
if "Some users are not members of the destination group" in str(e):
raise ValueError(
"Unable to add images. Image creator is not a member of the destination team."
) from e
else:
raise e

results = []
for info_json in response.json():
info_json_copy = info_json.copy()
if info_json.get(ApiField.MIME, None) is not None:
info_json_copy[ApiField.EXT] = info_json[ApiField.MIME].split("/")[1]
# results.append(self.InfoType(*[info_json_copy[field_name] for field_name in self.info_sequence()]))
results.append(api.image._convert_json_info(info_json_copy))
return results
67 changes: 39 additions & 28 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,41 +119,34 @@ def clone_images_with_annotations(

def _copy_imgs(
names,
ids,
metas,
infos,
):
uploaded = api.image.upload_ids(
dst_dataset_id,
names=names,
ids=ids,
metas=metas,
batch_size=UPLOAD_IMAGES_BATCH_SIZE,
force_metadata_for_links=False,
infos=infos,
skip_validation=True, # TODO: check if it is needed
uploaded = api_utils.images_bulk_add(
api, dst_dataset_id, names, infos, perserve_dates=options[JSONKEYS.PRESERVE_SRC_DATE]
)
return infos, uploaded

def _copy_anns(src: List[sly.ImageInfo], dst: List[sly.ImageInfo]):
src_ann_infos = api.annotation.download_batch(
src_dataset_id, [info.id for info in src], force_metadata_for_links=False
) # not sure that the order is perserved
src_id_to_dst_id = {s.id: d.id for s, d in zip(src, dst)}
api.annotation.upload_jsons(
[src_id_to_dst_id[info.image_id] for info in src_ann_infos],
[info.annotation for info in src_ann_infos],
skip_bounds_validation=True,
)
try:
api.annotation.copy_batch_by_ids(
[i.id for i in src],
[i.id for i in dst],
save_source_date=options[JSONKEYS.PRESERVE_SRC_DATE],
)
except Exception as e:
if "Some users are not members of the destination group" in str(e):
raise ValueError(
"Unable to copy annotations. Annotation creator is not a member of the destination team."
) from e
else:
raise e

return src, dst

src_dataset_id = image_infos[0].dataset_id
to_rename = {} # {new_name: old_name}
upload_images_tasks = []
for src_image_infos_batch in sly.batched(image_infos, UPLOAD_IMAGES_BATCH_SIZE):
names = [info.name for info in src_image_infos_batch]
ids = [info.id for info in src_image_infos_batch]
metas = [info.meta for info in src_image_infos_batch]
now = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")

if options[JSONKEYS.CONFLICT_RESOLUTION_MODE] in [
Expand All @@ -171,8 +164,6 @@ def _copy_anns(src: List[sly.ImageInfo], dst: List[sly.ImageInfo]):
executor.submit(
_copy_imgs,
names=names,
ids=ids,
metas=metas,
infos=src_image_infos_batch,
)
)
Expand Down Expand Up @@ -643,7 +634,7 @@ def clone_items(
):
if project_type == str(sly.ProjectType.IMAGES):
if src_infos is None:
src_infos = run_in_executor(api.image.get_list, src_dataset_id)
src_infos = run_in_executor(api_utils.images_get_list, api, src_dataset_id)
clone_f = clone_images_with_annotations
elif project_type == str(sly.ProjectType.VIDEOS):
if src_infos is None:
Expand Down Expand Up @@ -696,6 +687,7 @@ def create_dataset_recursively(
)
tasks_queue = Queue()
local_executor = ThreadPoolExecutor()
perserve_date = options.get(JSONKEYS.PRESERVE_SRC_DATE, False)

def _create_rec(
dataset_info: sly.DatasetInfo, children: Dict[sly.DatasetInfo, Dict], dst_parent_id: int
Expand All @@ -710,12 +702,16 @@ def _create_rec(
if any(ds.name == dataset_info.name for ds in existing):
return
created_info = run_in_executor(
api.dataset.create,
api_utils.create_dataset,
api,
dst_project_id,
dataset_info.name,
dataset_info.description,
change_name_if_conflict=True,
parent_id=dst_parent_id,
created_at=dataset_info.created_at if perserve_date else None,
updated_at=dataset_info.updated_at if perserve_date else None,
created_by=dataset_info.created_by if perserve_date else None,
)

created_id = created_info.id
Expand Down Expand Up @@ -771,9 +767,11 @@ def create_project(
) -> Tuple[sly.ProjectInfo, sly.ProjectMeta]:
created_at = None
created_by = None
updated_at = None
if options.get(JSONKEYS.PRESERVE_SRC_DATE, False):
created_at = src_project_info.created_at
created_by = src_project_info.created_by_id
updated_at = src_project_info.updated_at
dst_project_info = api_utils.create_project(
api,
dst_workspace_id,
Expand All @@ -785,6 +783,7 @@ def create_project(
readme=src_project_info.readme,
change_name_if_conflict=True,
created_at=created_at,
updated_at=updated_at,
created_by=created_by,
)
sly.logger.info(
Expand Down Expand Up @@ -905,6 +904,7 @@ def copy_project_with_replace(
)
progress_cb(src_project_info.items_count)
return []
perserve_date = options.get(JSONKEYS.PRESERVE_SRC_DATE, False)
project_type = src_project_info.type
created_datasets = []
if datasets_tree is None:
Expand All @@ -919,6 +919,9 @@ def copy_project_with_replace(
src_project_info.description,
change_name_if_conflict=True,
parent_id=dst_dataset_id,
created_at=src_project_info.created_at if perserve_date else None,
updated_at=src_project_info.updated_at if perserve_date else None,
created_by=src_project_info.created_by_id if perserve_date else None,
)
existing_datasets = find_children_in_tree(datasets_tree, parent_id=dst_dataset_id)
for ds, children in datasets_tree.items():
Expand Down Expand Up @@ -983,6 +986,7 @@ def copy_project_with_skip(
existing_projects=None,
datasets_tree=None,
):
perserve_date = options.get(JSONKEYS.PRESERVE_SRC_DATE, False)
project_type = src_project_info.type
created_datasets = []
if dst_project_id is not None:
Expand All @@ -1001,6 +1005,9 @@ def copy_project_with_skip(
src_project_info.description,
change_name_if_conflict=True,
parent_id=dst_dataset_id,
created_at=src_project_info.created_at if perserve_date else None,
updated_at=src_project_info.updated_at if perserve_date else None,
created_by=src_project_info.created_by_id if perserve_date else None,
)
for ds, children in datasets_tree.items():
created_datasets.extend(
Expand Down Expand Up @@ -1082,6 +1089,7 @@ def copy_project(
existing_projects,
datasets_tree,
)
perserve_date = options.get(JSONKEYS.PRESERVE_SRC_DATE, False)
project_type = src_project_info.type
created_datasets = []
if dst_project_id is not None:
Expand All @@ -1093,6 +1101,9 @@ def copy_project(
src_project_info.description,
change_name_if_conflict=True,
parent_id=dst_dataset_id,
created_at=src_project_info.created_at if perserve_date else None,
updated_at=src_project_info.updated_at if perserve_date else None,
created_by=src_project_info.created_by_id if perserve_date else None,
)
for ds, children in datasets_tree.items():
created_datasets.extend(
Expand Down Expand Up @@ -1237,7 +1248,7 @@ def move_datasets_tree(
def get_item_infos(dataset_id: int, item_ids: List[int], project_type: str):
filters = [{"field": "id", "operator": "in", "value": item_ids}]
if project_type == str(sly.ProjectType.IMAGES):
return api.image.get_info_by_id_batch(item_ids, force_metadata_for_links=False)
return api_utils.images_get_list(api, dataset_id, item_ids)
if project_type == str(sly.ProjectType.VIDEOS):
return api.video.get_info_by_id_batch(item_ids)
if project_type == str(sly.ProjectType.VOLUMES):
Expand Down
Loading