From 1c65f65156d3954780b034a5cb5f676a3ce38193 Mon Sep 17 00:00:00 2001 From: yash1993 Date: Thu, 7 Sep 2023 01:20:27 +0200 Subject: [PATCH 1/6] Implemented checks for duplicate contributor email --- packages/syft/src/syft/service/dataset/dataset.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/packages/syft/src/syft/service/dataset/dataset.py b/packages/syft/src/syft/service/dataset/dataset.py index 1449678d609..1d60fea618a 100644 --- a/packages/syft/src/syft/service/dataset/dataset.py +++ b/packages/syft/src/syft/service/dataset/dataset.py @@ -9,6 +9,7 @@ from typing import Optional from typing import Tuple from typing import Union +from typing import Set # third party import itables @@ -99,6 +100,7 @@ class Asset(SyftObject): name: str description: Optional[str] contributors: List[Contributor] = [] + contributor_email_set: Set[str] = set() data_subjects: List[DataSubject] = [] mock_is_real: bool = False shape: Optional[Tuple] @@ -243,6 +245,7 @@ class CreateAsset(SyftObject): name: str description: Optional[str] contributors: List[Contributor] = [] + contributor_email_set: Set[str] = set() data_subjects: List[DataSubjectCreate] = [] node_uid: Optional[UID] action_id: Optional[UID] @@ -292,6 +295,9 @@ def add_contributor( contributor = Contributor( name=name, role=_role_str, email=email, phone=phone, note=note ) + if contributor.email in self.contributor_email_set: + raise Exception("Contributor with this email already exists.") + self.contributor_email_set.add(contributor.email) self.contributors.append(contributor) return SyftSuccess( message=f"Contributor '{name}' added to '{self.name}' Asset." @@ -374,6 +380,7 @@ class Dataset(SyftObject): node_uid: Optional[UID] asset_list: List[Asset] = [] contributors: List[Contributor] = [] + contributor_email_set: Set[str] = set() citation: Optional[str] url: Optional[str] description: Optional[str] @@ -569,6 +576,9 @@ def add_contributor( contributor = Contributor( name=name, role=_role_str, email=email, phone=phone, note=note ) + if contributor.email in self.contributor_email_set: + raise Exception("Contributor with this email already exists.") + self.contributor_email_set.add(contributor.email) self.contributors.append(contributor) return SyftSuccess( message=f"Contributor '{name}' added to '{self.name}' Dataset." From bbec16d3bb1ba9f336e28998bd61b879aefd77b4 Mon Sep 17 00:00:00 2001 From: yash1993 Date: Thu, 7 Sep 2023 01:32:02 +0200 Subject: [PATCH 2/6] Fixed linting test --- packages/syft/src/syft/service/dataset/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/syft/src/syft/service/dataset/dataset.py b/packages/syft/src/syft/service/dataset/dataset.py index 1d60fea618a..d79d233242a 100644 --- a/packages/syft/src/syft/service/dataset/dataset.py +++ b/packages/syft/src/syft/service/dataset/dataset.py @@ -7,9 +7,9 @@ from typing import Dict from typing import List from typing import Optional +from typing import Set from typing import Tuple from typing import Union -from typing import Set # third party import itables From 0ca50ff4625fbbe31d9ce714b4a929061185668a Mon Sep 17 00:00:00 2001 From: yash1993 Date: Fri, 22 Sep 2023 14:16:08 +0200 Subject: [PATCH 3/6] Added pytest for testing duplicate email contributor --- .../tests/syft/service/dataset/dataset_service_test.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/packages/syft/tests/syft/service/dataset/dataset_service_test.py b/packages/syft/tests/syft/service/dataset/dataset_service_test.py index 48d735111d1..316ef691e34 100644 --- a/packages/syft/tests/syft/service/dataset/dataset_service_test.py +++ b/packages/syft/tests/syft/service/dataset/dataset_service_test.py @@ -15,6 +15,7 @@ from syft.service.dataset.dataset import CreateDataset as Dataset from syft.service.dataset.dataset import _ASSET_WITH_NONE_MOCK_ERROR_MESSAGE from syft.types.twin_object import TwinMode +import syft as sy def random_hash() -> str: @@ -72,7 +73,6 @@ def test_mock_always_not_real_after_calling_no_mock( asset.no_mock() assert not asset.mock_is_real - def test_mock_always_not_real_after_set_mock_to_empty( asset_with_mock: dict[str, Any] ) -> None: @@ -216,3 +216,11 @@ def test_domain_client_cannot_upload_dataset_with_non_mock(worker: Worker) -> No root_domain_client.upload_dataset(dataset) assert _ASSET_WITH_NONE_MOCK_ERROR_MESSAGE in str(excinfo.value) + +def test_adding_contributors_with_duplicate_email(): + + dataset = Dataset(name = 'dummy dataset') + dataset.add_contributor(role = sy.roles.UPLOADER, name = 'Jim Carey', email = 'jim@69.com') + dataset.add_contributor(role = sy.roles.UPLOADER, name = 'Jim Car', email = 'jim@69.com') + + assert len(dataset.contributors) == 1 \ No newline at end of file From 7d7c25d5d423fe4f5c8e7a940622de1974bb879a Mon Sep 17 00:00:00 2001 From: yash1993 Date: Fri, 22 Sep 2023 14:21:07 +0200 Subject: [PATCH 4/6] Fixed linting --- .../syft/service/dataset/dataset_service_test.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/packages/syft/tests/syft/service/dataset/dataset_service_test.py b/packages/syft/tests/syft/service/dataset/dataset_service_test.py index 316ef691e34..6b6e83bb9a9 100644 --- a/packages/syft/tests/syft/service/dataset/dataset_service_test.py +++ b/packages/syft/tests/syft/service/dataset/dataset_service_test.py @@ -9,13 +9,13 @@ import pytest # syft absolute +import syft as sy from syft.node.worker import Worker from syft.service.action.action_object import ActionObject from syft.service.dataset.dataset import CreateAsset as Asset from syft.service.dataset.dataset import CreateDataset as Dataset from syft.service.dataset.dataset import _ASSET_WITH_NONE_MOCK_ERROR_MESSAGE from syft.types.twin_object import TwinMode -import syft as sy def random_hash() -> str: @@ -73,6 +73,7 @@ def test_mock_always_not_real_after_calling_no_mock( asset.no_mock() assert not asset.mock_is_real + def test_mock_always_not_real_after_set_mock_to_empty( asset_with_mock: dict[str, Any] ) -> None: @@ -217,10 +218,12 @@ def test_domain_client_cannot_upload_dataset_with_non_mock(worker: Worker) -> No assert _ASSET_WITH_NONE_MOCK_ERROR_MESSAGE in str(excinfo.value) + def test_adding_contributors_with_duplicate_email(): + dataset = Dataset(name="dummy dataset") + dataset.add_contributor( + role=sy.roles.UPLOADER, name="Jim Carey", email="jim@69.com" + ) + dataset.add_contributor(role=sy.roles.UPLOADER, name="Jim Car", email="jim@69.com") - dataset = Dataset(name = 'dummy dataset') - dataset.add_contributor(role = sy.roles.UPLOADER, name = 'Jim Carey', email = 'jim@69.com') - dataset.add_contributor(role = sy.roles.UPLOADER, name = 'Jim Car', email = 'jim@69.com') - - assert len(dataset.contributors) == 1 \ No newline at end of file + assert len(dataset.contributors) == 1 From ca75ef8eec4a038184e3f8224626494aa8e38753 Mon Sep 17 00:00:00 2001 From: rasswanth-s <43314053+rasswanth-s@users.noreply.github.com> Date: Thu, 5 Oct 2023 15:27:06 +0530 Subject: [PATCH 5/6] reformatted contributor email duplicate check --- .../syft/src/syft/service/dataset/dataset.py | 38 ++++++++++++------- packages/syft/src/syft/types/syft_object.py | 3 ++ .../service/dataset/dataset_service_test.py | 32 ++++++++++++++-- 3 files changed, 55 insertions(+), 18 deletions(-) diff --git a/packages/syft/src/syft/service/dataset/dataset.py b/packages/syft/src/syft/service/dataset/dataset.py index 545dfa62243..cc9fcc04ec8 100644 --- a/packages/syft/src/syft/service/dataset/dataset.py +++ b/packages/syft/src/syft/service/dataset/dataset.py @@ -90,6 +90,16 @@ def _repr_html_(self) -> Any: """ + def __eq__(self, value: object) -> bool: + if not isinstance(value, Contributor): + return False + + # We assoctiate two contributors as equal if they have the same email + return self.email == value.email + + def __hash__(self) -> int: + return hash(self.email) + @serializable() class MarkdownDescription(SyftObject): @@ -126,8 +136,7 @@ class Asset(SyftObject): node_uid: UID name: str description: Optional[MarkdownDescription] = None - contributors: List[Contributor] = [] - contributor_email_set: Set[str] = set() + contributors: Set[Contributor] = set() data_subjects: List[DataSubject] = [] mock_is_real: bool = False shape: Optional[Tuple] @@ -296,8 +305,7 @@ class CreateAsset(SyftObject): id: Optional[UID] = None name: str description: Optional[MarkdownDescription] = None - contributors: List[Contributor] = [] - contributor_email_set: Set[str] = set() + contributors: Set[Contributor] = set() data_subjects: List[DataSubjectCreate] = [] node_uid: Optional[UID] action_id: Optional[UID] @@ -350,10 +358,12 @@ def add_contributor( contributor = Contributor( name=name, role=_role_str, email=email, phone=phone, note=note ) - if contributor.email in self.contributor_email_set: - raise Exception("Contributor with this email already exists.") - self.contributor_email_set.add(contributor.email) - self.contributors.append(contributor) + if contributor in self.contributors: + return SyftError( + message=f"Contributor with email: '{email}' already exists in '{self.name}' Asset." + ) + self.contributors.add(contributor) + return SyftSuccess( message=f"Contributor '{name}' added to '{self.name}' Asset." ) @@ -434,8 +444,7 @@ class Dataset(SyftObject): name: str node_uid: Optional[UID] asset_list: List[Asset] = [] - contributors: List[Contributor] = [] - contributor_email_set: Set[str] = set() + contributors: Set[Contributor] = set() citation: Optional[str] url: Optional[str] description: Optional[MarkdownDescription] = None @@ -638,10 +647,11 @@ def add_contributor( contributor = Contributor( name=name, role=_role_str, email=email, phone=phone, note=note ) - if contributor.email in self.contributor_email_set: - raise Exception("Contributor with this email already exists.") - self.contributor_email_set.add(contributor.email) - self.contributors.append(contributor) + if contributor in self.contributors: + return SyftError( + message=f"Contributor with email: '{email}' already exists in '{self.name}' Dataset." + ) + self.contributors.add(contributor) return SyftSuccess( message=f"Contributor '{name}' added to '{self.name}' Dataset." ) diff --git a/packages/syft/src/syft/types/syft_object.py b/packages/syft/src/syft/types/syft_object.py index 74fe7ee5f2e..1799264cca3 100644 --- a/packages/syft/src/syft/types/syft_object.py +++ b/packages/syft/src/syft/types/syft_object.py @@ -566,6 +566,8 @@ def list_dict_repr_html(self) -> str: extra_fields = [] if isinstance(self, dict): values = list(self.values()) + elif isinstance(self, set): + values = list(self) else: values = self @@ -625,6 +627,7 @@ def list_dict_repr_html(self) -> str: # give lists and dicts a _repr_html_ if they contain SyftObject's aggressive_set_attr(type([]), "_repr_html_", list_dict_repr_html) aggressive_set_attr(type({}), "_repr_html_", list_dict_repr_html) +aggressive_set_attr(type(set()), "_repr_html_", list_dict_repr_html) class StorableObjectType: diff --git a/packages/syft/tests/syft/service/dataset/dataset_service_test.py b/packages/syft/tests/syft/service/dataset/dataset_service_test.py index 6b6e83bb9a9..e2d317bef2e 100644 --- a/packages/syft/tests/syft/service/dataset/dataset_service_test.py +++ b/packages/syft/tests/syft/service/dataset/dataset_service_test.py @@ -15,6 +15,8 @@ from syft.service.dataset.dataset import CreateAsset as Asset from syft.service.dataset.dataset import CreateDataset as Dataset from syft.service.dataset.dataset import _ASSET_WITH_NONE_MOCK_ERROR_MESSAGE +from syft.service.response import SyftError +from syft.service.response import SyftSuccess from syft.types.twin_object import TwinMode @@ -220,10 +222,32 @@ def test_domain_client_cannot_upload_dataset_with_non_mock(worker: Worker) -> No def test_adding_contributors_with_duplicate_email(): - dataset = Dataset(name="dummy dataset") - dataset.add_contributor( - role=sy.roles.UPLOADER, name="Jim Carey", email="jim@69.com" + # Datasets + + dataset = Dataset(name="Sample dataset") + res1 = dataset.add_contributor( + role=sy.roles.UPLOADER, name="Alice", email="alice@naboo.net" + ) + res2 = dataset.add_contributor( + role=sy.roles.UPLOADER, name="Alice Smith", email="alice@naboo.net" ) - dataset.add_contributor(role=sy.roles.UPLOADER, name="Jim Car", email="jim@69.com") + assert isinstance(res1, SyftSuccess) + assert isinstance(res2, SyftError) assert len(dataset.contributors) == 1 + + # Assets + asset = Asset(**make_asset_without_mock(), mock=ActionObject.empty()) + + res3 = asset.add_contributor( + role=sy.roles.UPLOADER, name="Bob", email="bob@naboo.net" + ) + + res4 = asset.add_contributor( + role=sy.roles.UPLOADER, name="Bob Abraham", email="bob@naboo.net" + ) + dataset.add_asset(asset) + + assert isinstance(res3, SyftSuccess) + assert isinstance(res4, SyftError) + assert len(asset.contributors) == 1 From b71c8c3d5ac0ce361af34376e201edec27f5242c Mon Sep 17 00:00:00 2001 From: rasswanth-s <43314053+rasswanth-s@users.noreply.github.com> Date: Thu, 5 Oct 2023 17:26:21 +0530 Subject: [PATCH 6/6] fixed syft unit tests --- packages/syft/src/syft/client/domain_client.py | 2 +- packages/syft/src/syft/service/dataset/dataset.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/packages/syft/src/syft/client/domain_client.py b/packages/syft/src/syft/client/domain_client.py index 7adaeaa8b39..9bf677359e3 100644 --- a/packages/syft/src/syft/client/domain_client.py +++ b/packages/syft/src/syft/client/domain_client.py @@ -49,7 +49,7 @@ def add_default_uploader( name=user.name, email=user.email, ) - obj.contributors.append(uploader) + obj.contributors.add(uploader) obj.uploader = uploader return obj diff --git a/packages/syft/src/syft/service/dataset/dataset.py b/packages/syft/src/syft/service/dataset/dataset.py index cc9fcc04ec8..45bb7059659 100644 --- a/packages/syft/src/syft/service/dataset/dataset.py +++ b/packages/syft/src/syft/service/dataset/dataset.py @@ -224,6 +224,21 @@ def _repr_markdown_(self) -> str: _repr_str += f"\t{contributor.name}: {contributor.email}\n" return as_markdown_python_code(_repr_str) + def __eq__(self, other: object) -> bool: + if not isinstance(other, Asset): + return False + return ( + self.action_id == other.action_id + and self.name == other.name + and self.contributors == other.contributors + and self.shape == other.shape + and self.description == other.description + and self.data_subjects == other.data_subjects + and self.mock_is_real == other.mock_is_real + and self.uploader == other.uploader + and self.created_at == other.created_at + ) + @property def pointer(self) -> Any: # relative