From 7dcd85340dac37a51e0344ec34014f4dc4a17346 Mon Sep 17 00:00:00 2001 From: dk Date: Fri, 2 Aug 2024 15:48:59 +0700 Subject: [PATCH 1/3] [syft/dataset] improve error message when uploading unsupported datatypes --- packages/syft/src/syft/client/datasite_client.py | 3 +++ .../syft/src/syft/service/blob_storage/util.py | 6 +++++- .../syft/src/syft/types/syft_object_registry.py | 4 +++- packages/syft/src/syft/util/util.py | 14 ++++++++++++-- 4 files changed, 23 insertions(+), 4 deletions(-) diff --git a/packages/syft/src/syft/client/datasite_client.py b/packages/syft/src/syft/client/datasite_client.py index fdd30bbed03..6338b8f89e6 100644 --- a/packages/syft/src/syft/client/datasite_client.py +++ b/packages/syft/src/syft/client/datasite_client.py @@ -26,6 +26,7 @@ from ..service.dataset.dataset import CreateDataset from ..service.migration.object_migration_state import MigrationData from ..service.response import SyftError +from ..service.response import SyftException from ..service.response import SyftSuccess from ..service.response import SyftWarning from ..service.sync.diff_state import ResolvedSyncState @@ -143,6 +144,8 @@ def upload_dataset(self, dataset: CreateDataset) -> SyftSuccess | SyftError: res = twin._save_to_blob_storage(allow_empty=contains_empty) if isinstance(res, SyftError): return res + except SyftException as se: + return SyftError(message=f"{se}") except Exception as e: tqdm.write(f"Failed to create twin for {asset.name}. {e}") return SyftError(message=f"Failed to create twin. {e}") diff --git a/packages/syft/src/syft/service/blob_storage/util.py b/packages/syft/src/syft/service/blob_storage/util.py index df795c86b87..31e1117e43f 100644 --- a/packages/syft/src/syft/service/blob_storage/util.py +++ b/packages/syft/src/syft/service/blob_storage/util.py @@ -2,6 +2,7 @@ from typing import Any # relative +from ...service.response import SyftException from ...util.util import get_mb_serialized_size from ..metadata.server_metadata import ServerMetadata from ..metadata.server_metadata import ServerMetadataJSON @@ -16,4 +17,7 @@ def min_size_for_blob_storage_upload( def can_upload_to_blob_storage( data: Any, metadata: ServerMetadata | ServerMetadataJSON ) -> bool: - return get_mb_serialized_size(data) >= min_size_for_blob_storage_upload(metadata) + serialized_size = get_mb_serialized_size(data) + if serialized_size.is_err(): + raise SyftException(f"{serialized_size.err()}") + return serialized_size.ok() >= min_size_for_blob_storage_upload(metadata) diff --git a/packages/syft/src/syft/types/syft_object_registry.py b/packages/syft/src/syft/types/syft_object_registry.py index 3d0548f6cf1..dbbed869cc9 100644 --- a/packages/syft/src/syft/types/syft_object_registry.py +++ b/packages/syft/src/syft/types/syft_object_registry.py @@ -76,7 +76,9 @@ def get_canonical_name_version(cls, obj: Any) -> tuple[str, int]: obj_type = type(obj) if obj_type in cls.__type_to_canonical_name__: return cls.__type_to_canonical_name__[obj_type] - raise ValueError(f"Could not find canonical name for {obj}") + raise ValueError( + f"Could not find canonical name for '{obj_type.__module__}.{obj_type.__name__}'" + ) @classmethod def get_serde_properties(cls, canonical_name: str, version: int) -> tuple: diff --git a/packages/syft/src/syft/util/util.py b/packages/syft/src/syft/util/util.py index 43620c4cab5..c67b0ae04ef 100644 --- a/packages/syft/src/syft/util/util.py +++ b/packages/syft/src/syft/util/util.py @@ -42,6 +42,8 @@ from nacl.signing import VerifyKey import nh3 import requests +from result import Err +from result import Ok # relative from ..serde.serialize import _serialize as serialize @@ -97,8 +99,16 @@ def get_mb_size(data: Any) -> float: return sys.getsizeof(data) / (1024 * 1024) -def get_mb_serialized_size(data: Any) -> float: - return sys.getsizeof(serialize(data, to_bytes=True)) / (1024 * 1024) +def get_mb_serialized_size(data: Any) -> Ok[float] | Err[str]: + try: + serialized_data = serialize(data, to_bytes=True) + return Ok(sys.getsizeof(serialized_data) / (1024 * 1024)) + except Exception as e: + data_type = type(data) + return Err( + f"Failed to serialize data of type '{data_type.__module__}.{data_type.__name__}'. " + f"Data type not supported. Detailed error: {e}" + ) def extract_name(klass: type) -> str: From 08d831255aa69d8e6159d58cb5047ac208b87183 Mon Sep 17 00:00:00 2001 From: dk Date: Tue, 6 Aug 2024 11:24:48 +0700 Subject: [PATCH 2/3] [syft/action_obj] add a check if an object's type is supported before trying to create an `ActionObject` --- .../src/syft/service/action/action_object.py | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index bbad29396b9..0c04b3d7c50 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -19,6 +19,7 @@ from typing import TYPE_CHECKING # third party +from IPython.display import display from pydantic import ConfigDict from pydantic import Field from pydantic import field_validator @@ -46,6 +47,7 @@ from ...types.syft_object import SYFT_OBJECT_VERSION_1 from ...types.syft_object import SyftBaseObject from ...types.syft_object import SyftObject +from ...types.syft_object_registry import SyftObjectRegistry from ...types.syncable_object import SyncableSyftObject from ...types.uid import LineageID from ...types.uid import UID @@ -1410,6 +1412,25 @@ def from_obj( if id is not None and syft_lineage_id is not None and id != syft_lineage_id.id: raise ValueError("UID and LineageID should match") + # check if the object's type is supported + try: + canonical_name, version = SyftObjectRegistry.get_canonical_name_version( + syft_action_data + ) + except Exception: + obj_type = type(syft_action_data) + raise SyftException( + f"Error when creating action object for {syft_action_data}.\n" + f"Unsupported data type: '{obj_type.__module__}.{obj_type.__name__}'" + ) + can_be_serialized = SyftObjectRegistry.has_serde_class(canonical_name, version) + if not can_be_serialized: + warning = SyftWarning( + message=f"Object of type '{obj_type.__module__}.{obj_type.__name__}' " + f"is not supported by Syft serialization." + ) + display(warning) + action_type = action_type_for_object(syft_action_data) action_object = action_type(syft_action_data_cache=syft_action_data) action_object.syft_blob_storage_entry_id = syft_blob_storage_entry_id From 782b3b86d610a5465fa4436a36b00366f808e2b6 Mon Sep 17 00:00:00 2001 From: dk Date: Mon, 12 Aug 2024 14:40:49 +0700 Subject: [PATCH 3/3] [syft/action_obj] remove redundant display warning --- packages/syft/src/syft/service/action/action_object.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/packages/syft/src/syft/service/action/action_object.py b/packages/syft/src/syft/service/action/action_object.py index 1d596377c41..1196e3b5dc1 100644 --- a/packages/syft/src/syft/service/action/action_object.py +++ b/packages/syft/src/syft/service/action/action_object.py @@ -19,7 +19,6 @@ from typing import TYPE_CHECKING # third party -from IPython.display import display from pydantic import ConfigDict from pydantic import Field from pydantic import field_validator @@ -1425,13 +1424,6 @@ def from_obj( f"Error when creating action object for {syft_action_data}.\n" f"Unsupported data type: '{obj_type.__module__}.{obj_type.__name__}'" ) - can_be_serialized = SyftObjectRegistry.has_serde_class(canonical_name, version) - if not can_be_serialized: - warning = SyftWarning( - message=f"Object of type '{obj_type.__module__}.{obj_type.__name__}' " - f"is not supported by Syft serialization." - ) - display(warning) action_type = action_type_for_object(syft_action_data) action_object = action_type(syft_action_data_cache=syft_action_data)