From 162f6518b8834e2fc33324670c51d1e89f25ff38 Mon Sep 17 00:00:00 2001 From: Andrew Trask Date: Sat, 3 Aug 2024 01:05:53 -0400 Subject: [PATCH 01/18] Bare-bones FL tutorial based on model-training tutorial --- .../00 - Setup FL Datasites.ipynb | 246 ++ .../01 - Submit FL Experiment.ipynb | 375 +++ ...02 - Data Owners Approve Experiments.ipynb | 2992 +++++++++++++++++ .../03 - Run Federated Learning.ipynb | 325 ++ .../federated-learning/mnist_dataset.py | 88 + 5 files changed, 4026 insertions(+) create mode 100644 notebooks/tutorials/federated-learning/00 - Setup FL Datasites.ipynb create mode 100644 notebooks/tutorials/federated-learning/01 - Submit FL Experiment.ipynb create mode 100644 notebooks/tutorials/federated-learning/02 - Data Owners Approve Experiments.ipynb create mode 100644 notebooks/tutorials/federated-learning/03 - Run Federated Learning.ipynb create mode 100644 notebooks/tutorials/federated-learning/mnist_dataset.py diff --git a/notebooks/tutorials/federated-learning/00 - Setup FL Datasites.ipynb b/notebooks/tutorials/federated-learning/00 - Setup FL Datasites.ipynb new file mode 100644 index 00000000000..476428bbe43 --- /dev/null +++ b/notebooks/tutorials/federated-learning/00 - Setup FL Datasites.ipynb @@ -0,0 +1,246 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ac36ba2d-3662-4637-a94b-bd78ce4cde22", + "metadata": {}, + "source": [ + "# Step 1: Launch some datasite servers" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "9a823a3e-3576-4fa2-881a-f4671a6a7966", + "metadata": {}, + "outputs": [], + "source": [ + "import syft as sy\n", + "from mnist_dataset import mnist" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "bb256760-8067-4410-a926-2ea522f17ffc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
SyftInfo:
You have launched a development server at http://0.0.0.0:None.It is intended only for local use.

" + ], + "text/plain": [ + "SyftInfo: You have launched a development server at http://0.0.0.0:None.It is intended only for local use." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Logged into as \n" + ] + }, + { + "data": { + "text/html": [ + "
SyftWarning:
You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`.

" + ], + "text/plain": [ + "SyftWarning: You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
SyftInfo:
You have launched a development server at http://0.0.0.0:None.It is intended only for local use.

" + ], + "text/plain": [ + "SyftInfo: You have launched a development server at http://0.0.0.0:None.It is intended only for local use." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Logged into as \n" + ] + }, + { + "data": { + "text/html": [ + "
SyftWarning:
You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`.

" + ], + "text/plain": [ + "SyftWarning: You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
SyftInfo:
You have launched a development server at http://0.0.0.0:None.It is intended only for local use.

" + ], + "text/plain": [ + "SyftInfo: You have launched a development server at http://0.0.0.0:None.It is intended only for local use." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Logged into as \n" + ] + }, + { + "data": { + "text/html": [ + "
SyftWarning:
You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`.

" + ], + "text/plain": [ + "SyftWarning: You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`." + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "datasites = list()\n", + "for i in range(3):\n", + " server = sy.orchestra.launch(name=\"fl-datasite-\"+str(i), reset=True)\n", + " client = server.login(email=\"info@openmined.org\", password=\"changethis\")\n", + " datasites.append(client)" + ] + }, + { + "cell_type": "markdown", + "id": "9e2d0623-8324-4fcf-863d-27ed149b14e1", + "metadata": {}, + "source": [ + "# Step 2: Split MNIST across datasites" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "455592f8-8af7-4e8e-af61-5319a77ee987", + "metadata": {}, + "outputs": [], + "source": [ + "train_images, train_labels, _, _ = mnist()\n", + "images = [train_images[0:20000], train_images[20000:40000],train_images[40000:60000]]\n", + "labels = [train_labels[0:20000], train_labels[20000:40000],train_labels[40000:60000]]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "2f3f3330-6ec4-4484-889a-b4be14501ceb", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Uploading: labels: 100%|\u001b[32m████████████████████████████████████████████████████████████████████\u001b[0m| 2/2 [00:00<00:00, 4.15it/s]\u001b[0m\n", + "Uploading: labels: 100%|\u001b[32m████████████████████████████████████████████████████████████████████\u001b[0m| 2/2 [00:00<00:00, 4.26it/s]\u001b[0m\n", + "Uploading: labels: 100%|\u001b[32m████████████████████████████████████████████████████████████████████\u001b[0m| 2/2 [00:00<00:00, 3.89it/s]\u001b[0m\n" + ] + } + ], + "source": [ + "for i, datasite in enumerate(datasites):\n", + " dataset = sy.Dataset(name=\"MNIST Dataset\")\n", + " dataset.add_asset(sy.Asset(name=\"images\", data=images[i], mock=0*images[i]))\n", + " dataset.add_asset(sy.Asset(name=\"labels\", data=labels[i], mock=0*labels[i])) \n", + " datasites[i].upload_dataset(dataset)" + ] + }, + { + "cell_type": "markdown", + "id": "9861eeb9-f29b-43c0-83b1-86ff53e022ad", + "metadata": {}, + "source": [ + "# Create data scientist user accounts" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "502f63ba-1cd7-4942-95c0-bfc9a437561e", + "metadata": {}, + "outputs": [], + "source": [ + "for datasite in datasites:\n", + " register_result = datasite.register(\n", + " name=\"Sheldon Cooper\",\n", + " email=\"sheldon@caltech.edu\",\n", + " password=\"changethis\",\n", + " password_verify=\"changethis\",\n", + " institution=\"Caltech\",\n", + " website=\"https://www.caltech.edu/\",\n", + " )\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e410da12-2681-4582-b337-10b149b05d7c", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa20d4c3-a6af-442c-bb0a-e3ff410e9b60", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "285a3bdb-5e5e-4881-8cac-924844c04fd3", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/tutorials/federated-learning/01 - Submit FL Experiment.ipynb b/notebooks/tutorials/federated-learning/01 - Submit FL Experiment.ipynb new file mode 100644 index 00000000000..fbefa937a02 --- /dev/null +++ b/notebooks/tutorials/federated-learning/01 - Submit FL Experiment.ipynb @@ -0,0 +1,375 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "2ddc5050-75bd-44f9-821e-2cf5b5b5f1f9", + "metadata": {}, + "source": [ + "# Step 1: Login as External Researcher" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "c2645f54-4db6-4760-9dc1-8f38f53f39b3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
SyftInfo:
You have launched a development server at http://0.0.0.0:None.It is intended only for local use.

" + ], + "text/plain": [ + "SyftInfo: You have launched a development server at http://0.0.0.0:None.It is intended only for local use." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Logged into as \n" + ] + }, + { + "data": { + "text/html": [ + "
SyftWarning:
You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`.

" + ], + "text/plain": [ + "SyftWarning: You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
SyftInfo:
You have launched a development server at http://0.0.0.0:None.It is intended only for local use.

" + ], + "text/plain": [ + "SyftInfo: You have launched a development server at http://0.0.0.0:None.It is intended only for local use." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Logged into as \n" + ] + }, + { + "data": { + "text/html": [ + "
SyftWarning:
You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`.

" + ], + "text/plain": [ + "SyftWarning: You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
SyftInfo:
You have launched a development server at http://0.0.0.0:None.It is intended only for local use.

" + ], + "text/plain": [ + "SyftInfo: You have launched a development server at http://0.0.0.0:None.It is intended only for local use." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Logged into as \n" + ] + }, + { + "data": { + "text/html": [ + "
SyftWarning:
You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`.

" + ], + "text/plain": [ + "SyftWarning: You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`." + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import syft as sy\n", + "\n", + "datasites = list()\n", + "for i in range(3):\n", + " server = sy.orchestra.launch(name=\"fl-datasite-\"+str(i)) # connects to same server\n", + " client = server.login(email=\"sheldon@caltech.edu\", password=\"changethis\")\n", + " datasites.append(client)" + ] + }, + { + "cell_type": "markdown", + "id": "2c636278-a868-417d-8683-04cf792af393", + "metadata": {}, + "source": [ + "# Step 2: Get mock data and test a neural network" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "5adcb65a-88be-4e37-8e88-c025c1647f5e", + "metadata": {}, + "outputs": [], + "source": [ + "mock_images = datasites[0].datasets['MNIST Dataset'].assets['images'].mock[0:100]\n", + "mock_labels = datasites[0].datasets['MNIST Dataset'].assets['labels'].mock[0:100]" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e768fa19-65f3-4bad-9350-7204a7479e38", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1, Loss: 0.0000\n", + "Epoch 2, Loss: 0.0000\n", + "Epoch 3, Loss: 0.0000\n", + "Epoch 4, Loss: 0.0000\n", + "Epoch 5, Loss: 0.0000\n", + "Epoch 6, Loss: 0.0000\n", + "Epoch 7, Loss: 0.0000\n", + "Epoch 8, Loss: 0.0000\n", + "Epoch 9, Loss: 0.0000\n", + "Epoch 10, Loss: 0.0000\n" + ] + } + ], + "source": [ + "# third party\n", + "import torch\n", + "import torch.nn as nn\n", + "import torch.optim as optim\n", + "from torch.utils.data import TensorDataset\n", + "\n", + "# Define the data loader\n", + "train_loader = torch.utils.data.DataLoader(\n", + " TensorDataset(torch.tensor(mock_images, dtype=torch.float32), \n", + " torch.tensor(mock_labels, dtype=torch.float32)), \n", + " batch_size=4, \n", + " shuffle=True\n", + ")\n", + "\n", + "# Define the neural network class\n", + "class MLP(nn.Module):\n", + " def __init__(self):\n", + " super().__init__()\n", + " self.fc1 = nn.Linear(784, 10)\n", + "\n", + " def forward(self, x):\n", + " x = torch.log_softmax(self.fc1(x.view(-1, 784)), dim=1)\n", + " return x\n", + "\n", + "# Define the model, optimizer, and loss function\n", + "model = MLP()\n", + "optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)\n", + "criterion = nn.CrossEntropyLoss()\n", + "\n", + "# Train the model\n", + "for epoch in range(10):\n", + " running_loss = 0.0\n", + " for _, data in enumerate(train_loader, 0):\n", + " inputs, labels = data\n", + " optimizer.zero_grad()\n", + " outputs = model(inputs)\n", + " loss = criterion(outputs, labels)\n", + " loss.backward()\n", + " optimizer.step()\n", + " running_loss += loss.item()\n", + " print(\n", + " f\"Epoch {epoch + 1}, Loss: {(running_loss / len(train_loader)):.4f}\",\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "60c16944-e6ae-4858-8c02-ddc654162ae7", + "metadata": {}, + "source": [ + "# Step 3: Submit experiment to each datasites" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "ef3823a0-69c7-4347-8f69-d312067e024c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
SyftSuccess:
Syft function 'train' successfully created. To add a code request, please create a project using `project = syft.Project(...)`, then use command `project.create_code_request`.

" + ], + "text/plain": [ + "SyftSuccess: Syft function 'train' successfully created. To add a code request, please create a project using `project = syft.Project(...)`, then use command `project.create_code_request`." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
SyftSuccess:
Syft function 'train' successfully created. To add a code request, please create a project using `project = syft.Project(...)`, then use command `project.create_code_request`.

" + ], + "text/plain": [ + "SyftSuccess: Syft function 'train' successfully created. To add a code request, please create a project using `project = syft.Project(...)`, then use command `project.create_code_request`." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
SyftSuccess:
Syft function 'train' successfully created. To add a code request, please create a project using `project = syft.Project(...)`, then use command `project.create_code_request`.

" + ], + "text/plain": [ + "SyftSuccess: Syft function 'train' successfully created. To add a code request, please create a project using `project = syft.Project(...)`, then use command `project.create_code_request`." + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "for dasite in datasites:\n", + "\n", + " mock_images_ptr = dasite.datasets['MNIST Dataset'].assets['images']\n", + " mock_labels_ptr = dasite.datasets['MNIST Dataset'].assets['labels']\n", + " \n", + " \n", + " @sy.syft_function(\n", + " input_policy=sy.ExactMatch(\n", + " mnist_images=mock_images_ptr, \n", + " mnist_labels=mock_labels_ptr\n", + " ),\n", + " output_policy=sy.SingleExecutionExactOutput(),\n", + " )\n", + " def train(mnist_images, mnist_labels):\n", + " # third party\n", + " import torch\n", + " import torch.nn as nn\n", + " import torch.optim as optim\n", + " from torch.utils.data import TensorDataset\n", + "\n", + " # Define the data loader\n", + " train_loader = torch.utils.data.DataLoader(\n", + " TensorDataset(torch.tensor(mnist_images, dtype=torch.float32), \n", + " torch.tensor(mnist_labels, dtype=torch.float32)), \n", + " batch_size=4, \n", + " shuffle=True\n", + " )\n", + "\n", + " # Define the neural network class\n", + " class MLP(nn.Module):\n", + " def __init__(self):\n", + " super().__init__()\n", + " self.fc1 = nn.Linear(784, 10)\n", + "\n", + " def forward(self, x):\n", + " x = torch.log_softmax(self.fc1(x.view(-1, 784)), dim=1)\n", + " return x\n", + "\n", + " # Define the model, optimizer, and loss function\n", + " model = MLP()\n", + " optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)\n", + " criterion = nn.CrossEntropyLoss()\n", + "\n", + " # Train the model\n", + " train_accs = []\n", + " for epoch in range(20):\n", + " running_loss = 0.0\n", + " for _, data in enumerate(train_loader, 0):\n", + " inputs, labels = data\n", + " optimizer.zero_grad()\n", + " outputs = model(inputs)\n", + " loss = criterion(outputs, labels)\n", + " loss.backward()\n", + " optimizer.step()\n", + " running_loss += loss.item()\n", + " print(\n", + " f\"Epoch {epoch + 1}, Loss: {(running_loss / len(train_loader)):.4f}\"\n", + " )\n", + " # Calculate accuracy on the training set\n", + " train_accs.append((running_loss / len(train_loader)))\n", + "\n", + " # Get model parameters\n", + " params = model.state_dict()\n", + "\n", + " # Return training accuracy and model parameters\n", + " return train_accs, params\n", + " new_project = sy.Project(\n", + " name=\"Training a 3-layer torch neural network on MNIST data\",\n", + " description=\"\"\"Hi, I would like to train my neural network on your MNIST data \n", + " (I can download it online too but I just want to use Syft coz it's cool)\"\"\",\n", + " members=[dasite],\n", + " )\n", + " new_project.create_code_request(obj=train, client=dasite)\n", + " project = new_project.send()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82224c82-d89d-41e1-82e2-8ffd130f0a63", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f2230949-3e89-4186-ad12-69b683254128", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/tutorials/federated-learning/02 - Data Owners Approve Experiments.ipynb b/notebooks/tutorials/federated-learning/02 - Data Owners Approve Experiments.ipynb new file mode 100644 index 00000000000..23435116bbc --- /dev/null +++ b/notebooks/tutorials/federated-learning/02 - Data Owners Approve Experiments.ipynb @@ -0,0 +1,2992 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "f6534fd6-ce55-492b-858b-9e25eca94986", + "metadata": {}, + "source": [ + "# Step 1: Login as Data Owner" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "dc9728f3-3f4b-4c44-924b-b2577733ecbc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
SyftInfo:
You have launched a development server at http://0.0.0.0:None.It is intended only for local use.

" + ], + "text/plain": [ + "SyftInfo: You have launched a development server at http://0.0.0.0:None.It is intended only for local use." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Logged into as \n" + ] + }, + { + "data": { + "text/html": [ + "
SyftWarning:
You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`.

" + ], + "text/plain": [ + "SyftWarning: You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
SyftInfo:
You have launched a development server at http://0.0.0.0:None.It is intended only for local use.

" + ], + "text/plain": [ + "SyftInfo: You have launched a development server at http://0.0.0.0:None.It is intended only for local use." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Logged into as \n" + ] + }, + { + "data": { + "text/html": [ + "
SyftWarning:
You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`.

" + ], + "text/plain": [ + "SyftWarning: You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
SyftInfo:
You have launched a development server at http://0.0.0.0:None.It is intended only for local use.

" + ], + "text/plain": [ + "SyftInfo: You have launched a development server at http://0.0.0.0:None.It is intended only for local use." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Logged into as \n" + ] + }, + { + "data": { + "text/html": [ + "
SyftWarning:
You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`.

" + ], + "text/plain": [ + "SyftWarning: You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`." + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import syft as sy\n", + "\n", + "datasites = list()\n", + "for i in range(3):\n", + " server = sy.orchestra.launch(name=\"fl-datasite-\"+str(i)) # connects to same server\n", + " client = server.login(email=\"info@openmined.org\", password=\"changethis\")\n", + " datasites.append(client)" + ] + }, + { + "cell_type": "markdown", + "id": "53ad4666-f5c1-4169-bf74-c7f113076a72", + "metadata": {}, + "source": [ + "# Step 2: Review projects and code\n", + "\n", + "(optionally — the code can be run/tested. skipping for brevity)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "0a353120-e826-4de3-9019-8019fc0d7563", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
\n", + "\n", + "
\n", + "
\n", + " \n", + "
\n", + "

Request List

\n", + "
\n", + "
\n", + "
\n", + " \n", + "
\n", + "
\n", + "

Total: 0

\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "datasites[0].requests" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "ed367067-f930-4301-87f2-a619c27b3a09", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + "
\n", + "

UserCode

\n", + "

id: UID = e1cdc3eb19734764932f5865a688c9b6

\n", + "

service_func_name: str = train

\n", + "

shareholders: list = ['fl-datasite-0']

\n", + "

status: list = ['Server: fl-datasite-0, Status: pending']

\n", + " \n", + " \n", + "

inputs: dict =

{\n",
+       "  \"assets\": {\n",
+       "    \"mnist_images\": {\n",
+       "      \"action_id\": \"03d031afb46c482aa37ea64912e94651\",\n",
+       "      \"source_asset\": \"images\",\n",
+       "      \"source_dataset\": null,\n",
+       "      \"source_server\": \"4d3cee6cb0d2450f84d56bbfdeb84197\"\n",
+       "    },\n",
+       "    \"mnist_labels\": {\n",
+       "      \"action_id\": \"618fdf7ee1334a539dc5a8df54e5c786\",\n",
+       "      \"source_asset\": \"labels\",\n",
+       "      \"source_dataset\": null,\n",
+       "      \"source_server\": \"4d3cee6cb0d2450f84d56bbfdeb84197\"\n",
+       "    }\n",
+       "  }\n",
+       "}

\n", + "

code:

\n", + "
\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/markdown": [ + "```python\n", + "@sy.syft_function(\n", + " input_policy=sy.ExactMatch(\n", + " mnist_images=mock_images_ptr, \n", + " mnist_labels=mock_labels_ptr\n", + " ),\n", + " output_policy=sy.SingleExecutionExactOutput(),\n", + ")\n", + "def train(mnist_images, mnist_labels):\n", + " # third party\n", + " import torch\n", + " import torch.nn as nn\n", + " import torch.optim as optim\n", + " from torch.utils.data import TensorDataset\n", + "\n", + " # Define the data loader\n", + " train_loader = torch.utils.data.DataLoader(\n", + " TensorDataset(torch.tensor(mnist_images, dtype=torch.float32), \n", + " torch.tensor(mnist_labels, dtype=torch.float32)), \n", + " batch_size=4, \n", + " shuffle=True\n", + " )\n", + "\n", + " # Define the neural network class\n", + " class MLP(nn.Module):\n", + " def __init__(self):\n", + " super().__init__()\n", + " self.fc1 = nn.Linear(784, 10)\n", + "\n", + " def forward(self, x):\n", + " x = torch.log_softmax(self.fc1(x.view(-1, 784)), dim=1)\n", + " return x\n", + "\n", + " # Define the model, optimizer, and loss function\n", + " model = MLP()\n", + " optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)\n", + " criterion = nn.CrossEntropyLoss()\n", + "\n", + " # Train the model\n", + " train_accs = []\n", + " for epoch in range(20):\n", + " running_loss = 0.0\n", + " for _, data in enumerate(train_loader, 0):\n", + " inputs, labels = data\n", + " optimizer.zero_grad()\n", + " outputs = model(inputs)\n", + " loss = criterion(outputs, labels)\n", + " loss.backward()\n", + " optimizer.step()\n", + " running_loss += loss.item()\n", + " print(\n", + " f\"Epoch {epoch + 1}, Loss: {(running_loss / len(train_loader)):.4f}\"\n", + " )\n", + " # Calculate accuracy on the training set\n", + " train_accs.append((running_loss / len(train_loader)))\n", + "\n", + " # Get model parameters\n", + " params = model.state_dict()\n", + "\n", + " # Return training accuracy and model parameters\n", + " return train_accs, params\n", + "```" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "datasites[0].requests[0].changes[0].code" + ] + }, + { + "cell_type": "markdown", + "id": "445dd4f7-9788-42d8-9fcb-155f6106eb60", + "metadata": {}, + "source": [ + "# Step 3: Approve projects and code" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "a66626f1-480f-4633-9ef8-27e09f686506", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Approving request on change train for datasite fl-datasite-0\n", + "Approving request on change train for datasite fl-datasite-1\n", + "Approving request on change train for datasite fl-datasite-2\n" + ] + } + ], + "source": [ + "for dasite in datasites:\n", + " dasite.requests[-1].approve()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d3fb7231-8a85-45fb-af7c-bbc2b2d619f6", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/tutorials/federated-learning/03 - Run Federated Learning.ipynb b/notebooks/tutorials/federated-learning/03 - Run Federated Learning.ipynb new file mode 100644 index 00000000000..ba7e1d4baf4 --- /dev/null +++ b/notebooks/tutorials/federated-learning/03 - Run Federated Learning.ipynb @@ -0,0 +1,325 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b2e0dacb-7dfc-42b4-8a7b-30a087c2db2e", + "metadata": {}, + "source": [ + "# Step 1: Login as External Researcher" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "4ef3a306-f6f8-46cd-8646-84702ac77336", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
SyftInfo:
You have launched a development server at http://0.0.0.0:None.It is intended only for local use.

" + ], + "text/plain": [ + "SyftInfo: You have launched a development server at http://0.0.0.0:None.It is intended only for local use." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Logged into as \n" + ] + }, + { + "data": { + "text/html": [ + "
SyftWarning:
You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`.

" + ], + "text/plain": [ + "SyftWarning: You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
SyftInfo:
You have launched a development server at http://0.0.0.0:None.It is intended only for local use.

" + ], + "text/plain": [ + "SyftInfo: You have launched a development server at http://0.0.0.0:None.It is intended only for local use." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Logged into as \n" + ] + }, + { + "data": { + "text/html": [ + "
SyftWarning:
You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`.

" + ], + "text/plain": [ + "SyftWarning: You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
SyftInfo:
You have launched a development server at http://0.0.0.0:None.It is intended only for local use.

" + ], + "text/plain": [ + "SyftInfo: You have launched a development server at http://0.0.0.0:None.It is intended only for local use." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Logged into as \n" + ] + }, + { + "data": { + "text/html": [ + "
SyftWarning:
You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`.

" + ], + "text/plain": [ + "SyftWarning: You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`." + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import syft as sy\n", + "\n", + "datasites = list()\n", + "for i in range(3):\n", + " server = sy.orchestra.launch(name=\"fl-datasite-\"+str(i)) # connects to same server\n", + " client = server.login(email=\"sheldon@caltech.edu\", password=\"changethis\")\n", + " datasites.append(client)" + ] + }, + { + "cell_type": "markdown", + "id": "00fb6bcf-8587-43f1-a3ea-1f15cdceb2af", + "metadata": {}, + "source": [ + "# Step 2: Train models on all datasites" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "1838fb85-4a73-462d-b6b7-bb9a4f4b0e92", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
SyftWarning:
Policy is no longer valid. count: 1 >= limit: 1. Loading results from cache.

" + ], + "text/plain": [ + "SyftWarning: Policy is no longer valid. count: 1 >= limit: 1. Loading results from cache." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
SyftWarning:
Policy is no longer valid. count: 1 >= limit: 1. Loading results from cache.

" + ], + "text/plain": [ + "SyftWarning: Policy is no longer valid. count: 1 >= limit: 1. Loading results from cache." + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
SyftWarning:
Policy is no longer valid. count: 1 >= limit: 1. Loading results from cache.

" + ], + "text/plain": [ + "SyftWarning: Policy is no longer valid. count: 1 >= limit: 1. Loading results from cache." + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "results = list()\n", + "\n", + "for dasite in datasites:\n", + " print()\n", + " print(dasite)\n", + " images_ptr = dasite.datasets['MNIST Dataset'].assets['images']\n", + " labels_ptr = dasite.datasets['MNIST Dataset'].assets['labels']\n", + " \n", + " results.append(dasite.code.train(mnist_images=images_ptr, \n", + " mnist_labels=labels_ptr).get_from(dasite))" + ] + }, + { + "cell_type": "markdown", + "id": "958e5514-209f-450e-9b89-fb46922c13b6", + "metadata": {}, + "source": [ + "# Step 3: Merge models" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "3359ac77-b582-4ac9-94b3-d7b756b1ef07", + "metadata": {}, + "outputs": [], + "source": [ + "from itertools import groupby\n", + "from collections import OrderedDict\n", + "\n", + "def ave(d):\n", + " _data = sorted([i for b in d for i in b.items()], key=lambda x:x[0])\n", + " _d = [(a, [j for _, j in b]) for a, b in groupby(_data, key=lambda x:x[0])]\n", + " return OrderedDict({a:ave(b) if isinstance(b[0], dict) else sum(b)/float(len(b)) for a, b in _d})\n" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "30fc8abb-0494-4857-83a2-e6eaea3a1625", + "metadata": {}, + "outputs": [], + "source": [ + "models_weights = list(map(lambda x:x[1], results))" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "e65a0050-6e4f-4f25-be26-3835a862eca7", + "metadata": {}, + "outputs": [], + "source": [ + "new_model_weights = ave(models_weights)" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "91abef0b-af16-4481-a449-c50c9922c179", + "metadata": {}, + "outputs": [], + "source": [ + "import torch as torch\n", + "from torch import nn\n", + "# Define the neural network class\n", + "class MLP(nn.Module):\n", + " def __init__(self):\n", + " super().__init__()\n", + " self.fc1 = nn.Linear(784, 10)\n", + "\n", + " def forward(self, x):\n", + " x = torch.log_softmax(self.fc1(x.view(-1, 784)), dim=1)\n", + " return x" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "id": "9c467d7e-fe2a-4f2d-8fbf-73e593b77fd8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_model = MLP()\n", + "new_model.load_state_dict(new_model_weights)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "089ecfaf-ab2f-4c8e-8909-6085dea18f9a", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/tutorials/federated-learning/mnist_dataset.py b/notebooks/tutorials/federated-learning/mnist_dataset.py new file mode 100644 index 00000000000..77b7c2c7afe --- /dev/null +++ b/notebooks/tutorials/federated-learning/mnist_dataset.py @@ -0,0 +1,88 @@ +""" +Code for the MNIST dataset +Source: https://github.com/google/jax/blob/main/examples/datasets.py +""" + +# stdlib +import array +import gzip +import os +from os import path +import struct +import urllib.request + +# third party +import numpy as np +from numpy import ndarray + +_DATA = "/tmp/mnist_data/" + + +def _download(url: str, filename: str) -> None: + """Download a url to a file in the JAX data temp directory.""" + if not path.exists(_DATA): + os.makedirs(_DATA) + out_file = path.join(_DATA, filename) + if not path.isfile(out_file): + urllib.request.urlretrieve(url, out_file) + print(f"downloaded {url} to {_DATA}") + + +def _partial_flatten(x) -> ndarray: + """Flatten all but the first dimension of an ndarray.""" + return np.reshape(x, (x.shape[0], -1)) + + +def _one_hot(x: ndarray, k: int, dtype: type = np.float32) -> ndarray: + """Create a one-hot encoding of x of size k.""" + return np.array(x[:, None] == np.arange(k), dtype) + + +def mnist_raw() -> tuple[ndarray, ndarray, ndarray, ndarray]: + """Download and parse the raw MNIST dataset.""" + # CVDF mirror of http://yann.lecun.com/exdb/mnist/ + base_url = "https://storage.googleapis.com/cvdf-datasets/mnist/" + + def parse_labels(filename): + with gzip.open(filename, "rb") as fh: + _ = struct.unpack(">II", fh.read(8)) + return np.array(array.array("B", fh.read()), dtype=np.uint8) + + def parse_images(filename): + with gzip.open(filename, "rb") as fh: + _, num_data, rows, cols = struct.unpack(">IIII", fh.read(16)) + return np.array(array.array("B", fh.read()), dtype=np.uint8).reshape( + num_data, rows, cols + ) + + for filename in [ + "train-images-idx3-ubyte.gz", + "train-labels-idx1-ubyte.gz", + "t10k-images-idx3-ubyte.gz", + "t10k-labels-idx1-ubyte.gz", + ]: + _download(base_url + filename, filename) + + train_images = parse_images(path.join(_DATA, "train-images-idx3-ubyte.gz")) + train_labels = parse_labels(path.join(_DATA, "train-labels-idx1-ubyte.gz")) + test_images = parse_images(path.join(_DATA, "t10k-images-idx3-ubyte.gz")) + test_labels = parse_labels(path.join(_DATA, "t10k-labels-idx1-ubyte.gz")) + + return train_images, train_labels, test_images, test_labels + + +def mnist(permute_train: bool = False) -> tuple[ndarray, ndarray, ndarray, ndarray]: + """Download, parse and process MNIST data to unit scale and one-hot labels.""" + train_images, train_labels, test_images, test_labels = mnist_raw() + + train_images = _partial_flatten(train_images) / np.float32(255.0) + test_images = _partial_flatten(test_images) / np.float32(255.0) + train_labels = _one_hot(train_labels, 10) + test_labels = _one_hot(test_labels, 10) + + if permute_train: + perm = np.random.RandomState(0).permutation(train_images.shape[0]) + train_images = train_images[perm] + train_labels = train_labels[perm] + + return train_images, train_labels, test_images, test_labels From 752bc20a4c6b511729e676ee621067c458e10528 Mon Sep 17 00:00:00 2001 From: Andrew Trask Date: Sat, 3 Aug 2024 21:55:15 -0400 Subject: [PATCH 02/18] Update datasite registry to work with datasites directly --- packages/syft/src/syft/client/registry.py | 92 ++++++++++++++++++++++- 1 file changed, 90 insertions(+), 2 deletions(-) diff --git a/packages/syft/src/syft/client/registry.py b/packages/syft/src/syft/client/registry.py index 48627172eeb..649b5e71362 100644 --- a/packages/syft/src/syft/client/registry.py +++ b/packages/syft/src/syft/client/registry.py @@ -27,7 +27,7 @@ ) NETWORK_REGISTRY_REPO = "https://github.com/OpenMined/NetworkRegistry" - +DATASITE_REGISTRY_URL = "https://raw.githubusercontent.com/OpenMined/NetworkRegistry/main/datasites.json" def _get_all_networks(network_json: dict, version: str) -> list[dict]: return network_json.get(version, {}).get("gateways", []) @@ -181,8 +181,96 @@ def __getitem__(self, key: str | int) -> Client: return self.create_client(network=network) raise KeyError(f"Invalid key: {key} for {on}") - class DatasiteRegistry: + + def __init__(self) -> None: + self.all_datasites: list[dict] = [] + try: + response = requests.get(DATASITE_REGISTRY_URL) # nosec + datasites_json = response.json() + self.all_datasites = datasites_json["datasites"] + except Exception as e: + logger.warning( + f"Failed to get Datasite Registry, go checkout: {DATASITE_REGISTRY_URL}. {e}" + ) + + @property + def online_datasites(self) -> list[dict]: + datasites = self.all_datasites + + def check_datasite(datasite: dict) -> dict[Any, Any] | None: + url = "http://" + datasite["host_or_ip"] + ":" + str(datasite["port"]) + "/" + online = "as;lfjasdfsadf" + try: + res = requests.get(url, timeout=DEFAULT_TIMEOUT) # nosec + if "status" in res.json(): + online = res.json()['status'] == 'ok' + elif "detail" in res.json(): + online = True + except Exception as e: + print(e) + online = False + if online: + version = datasite.get("version", None) + # Check if syft version was described in DatasiteRegistry + # If it's unknown, try to update it to an available version. + if not version or version == "unknown": + # If not defined, try to ask in /syft/version endpoint (supported by 0.7.0) + try: + version_url = url + "api/v2/metadata" + res = requests.get(version_url, timeout=DEFAULT_TIMEOUT) # nosec + if res.status_code == 200: + datasite["version"] = res.json()["syft_version"] + else: + datasite["version"] = "unknown" + except Exception: + datasite["version"] = "unknown" + return datasite + return None + # We can use a with statement to ensure threads are cleaned up promptly + with futures.ThreadPoolExecutor(max_workers=20) as executor: + # map + _online_datasites = list( + executor.map(lambda datasite: check_datasite(datasite), datasites) + ) + + online_datasites = [each for each in _online_datasites if each is not None] + return online_datasites + + def _repr_html_(self) -> str: + on = self.online_datasites + if len(on) == 0: + return "(no gateways online - try syft.gateways.all_networks to see offline gateways)" + df = pd.DataFrame(on) + + return df._repr_html_() # type: ignore + + @staticmethod + def create_client(datasite: dict[str, Any]) -> Client: + # relative + from syft.client.client import connect + + try: + port = int(datasite["port"]) + protocol = datasite["protocol"] + host_or_ip = datasite["host_or_ip"] + server_url = ServerURL(port=port, protocol=protocol, host_or_ip=host_or_ip) + client = connect(url=str(server_url)) + return client.guest() + except Exception as e: + raise SyftException(f"Failed to login with: {datasite}. {e}") + + def __getitem__(self, key: str | int) -> Client: + if isinstance(key, int): + return self.create_client(datasite=self.online_datasites[key]) + else: + on = self.online_datasites + for datasite in on: + if datasite["name"] == key: + return self.create_client(datasite=datasite) + raise KeyError(f"Invalid key: {key} for {on}") + +class NetworksOfDatasitesRegistry: def __init__(self) -> None: self.all_networks: list[dict] = [] self.all_datasites: dict[str, ServerPeer] = {} From fabe3f93a40f3b7112777441313d05652801f68c Mon Sep 17 00:00:00 2001 From: Andrew Trask Date: Sat, 3 Aug 2024 21:55:47 -0400 Subject: [PATCH 03/18] Move and update FL tutorial --- .../00 - Setup FL Datasites.ipynb | 159 +- .../01 - Submit FL Experiment.ipynb | 228 +- ...02 - Data Owners Approve Experiments.ipynb | 2923 +---------------- .../03 - Run Federated Learning.ipynb | 246 +- 4 files changed, 139 insertions(+), 3417 deletions(-) diff --git a/notebooks/tutorials/federated-learning/00 - Setup FL Datasites.ipynb b/notebooks/tutorials/federated-learning/00 - Setup FL Datasites.ipynb index 476428bbe43..b574b175de5 100644 --- a/notebooks/tutorials/federated-learning/00 - Setup FL Datasites.ipynb +++ b/notebooks/tutorials/federated-learning/00 - Setup FL Datasites.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "ac36ba2d-3662-4637-a94b-bd78ce4cde22", + "id": "0", "metadata": {}, "source": [ "# Step 1: Launch some datasite servers" @@ -10,126 +10,35 @@ }, { "cell_type": "code", - "execution_count": 1, - "id": "9a823a3e-3576-4fa2-881a-f4671a6a7966", + "execution_count": null, + "id": "1", "metadata": {}, "outputs": [], "source": [ - "import syft as sy\n", - "from mnist_dataset import mnist" + "# third party\n", + "from mnist_dataset import mnist\n", + "\n", + "# syft absolute\n", + "import syft as sy" ] }, { "cell_type": "code", - "execution_count": 2, - "id": "bb256760-8067-4410-a926-2ea522f17ffc", + "execution_count": null, + "id": "2", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
SyftInfo:
You have launched a development server at http://0.0.0.0:None.It is intended only for local use.

" - ], - "text/plain": [ - "SyftInfo: You have launched a development server at http://0.0.0.0:None.It is intended only for local use." - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Logged into as \n" - ] - }, - { - "data": { - "text/html": [ - "
SyftWarning:
You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`.

" - ], - "text/plain": [ - "SyftWarning: You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`." - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
SyftInfo:
You have launched a development server at http://0.0.0.0:None.It is intended only for local use.

" - ], - "text/plain": [ - "SyftInfo: You have launched a development server at http://0.0.0.0:None.It is intended only for local use." - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Logged into as \n" - ] - }, - { - "data": { - "text/html": [ - "
SyftWarning:
You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`.

" - ], - "text/plain": [ - "SyftWarning: You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`." - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
SyftInfo:
You have launched a development server at http://0.0.0.0:None.It is intended only for local use.

" - ], - "text/plain": [ - "SyftInfo: You have launched a development server at http://0.0.0.0:None.It is intended only for local use." - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Logged into as \n" - ] - }, - { - "data": { - "text/html": [ - "
SyftWarning:
You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`.

" - ], - "text/plain": [ - "SyftWarning: You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`." - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "datasites = list()\n", "for i in range(3):\n", - " server = sy.orchestra.launch(name=\"fl-datasite-\"+str(i), reset=True)\n", + " server = sy.orchestra.launch(name=\"fl-datasite-\" + str(i), reset=True)\n", " client = server.login(email=\"info@openmined.org\", password=\"changethis\")\n", " datasites.append(client)" ] }, { "cell_type": "markdown", - "id": "9e2d0623-8324-4fcf-863d-27ed149b14e1", + "id": "3", "metadata": {}, "source": [ "# Step 2: Split MNIST across datasites" @@ -137,43 +46,33 @@ }, { "cell_type": "code", - "execution_count": 3, - "id": "455592f8-8af7-4e8e-af61-5319a77ee987", + "execution_count": null, + "id": "4", "metadata": {}, "outputs": [], "source": [ "train_images, train_labels, _, _ = mnist()\n", - "images = [train_images[0:20000], train_images[20000:40000],train_images[40000:60000]]\n", - "labels = [train_labels[0:20000], train_labels[20000:40000],train_labels[40000:60000]]\n" + "images = [train_images[0:20000], train_images[20000:40000], train_images[40000:60000]]\n", + "labels = [train_labels[0:20000], train_labels[20000:40000], train_labels[40000:60000]]" ] }, { "cell_type": "code", - "execution_count": 4, - "id": "2f3f3330-6ec4-4484-889a-b4be14501ceb", + "execution_count": null, + "id": "5", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Uploading: labels: 100%|\u001b[32m████████████████████████████████████████████████████████████████████\u001b[0m| 2/2 [00:00<00:00, 4.15it/s]\u001b[0m\n", - "Uploading: labels: 100%|\u001b[32m████████████████████████████████████████████████████████████████████\u001b[0m| 2/2 [00:00<00:00, 4.26it/s]\u001b[0m\n", - "Uploading: labels: 100%|\u001b[32m████████████████████████████████████████████████████████████████████\u001b[0m| 2/2 [00:00<00:00, 3.89it/s]\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "for i, datasite in enumerate(datasites):\n", " dataset = sy.Dataset(name=\"MNIST Dataset\")\n", - " dataset.add_asset(sy.Asset(name=\"images\", data=images[i], mock=0*images[i]))\n", - " dataset.add_asset(sy.Asset(name=\"labels\", data=labels[i], mock=0*labels[i])) \n", + " dataset.add_asset(sy.Asset(name=\"images\", data=images[i], mock=0 * images[i]))\n", + " dataset.add_asset(sy.Asset(name=\"labels\", data=labels[i], mock=0 * labels[i]))\n", " datasites[i].upload_dataset(dataset)" ] }, { "cell_type": "markdown", - "id": "9861eeb9-f29b-43c0-83b1-86ff53e022ad", + "id": "6", "metadata": {}, "source": [ "# Create data scientist user accounts" @@ -181,8 +80,8 @@ }, { "cell_type": "code", - "execution_count": 5, - "id": "502f63ba-1cd7-4942-95c0-bfc9a437561e", + "execution_count": null, + "id": "7", "metadata": {}, "outputs": [], "source": [ @@ -194,13 +93,13 @@ " password_verify=\"changethis\",\n", " institution=\"Caltech\",\n", " website=\"https://www.caltech.edu/\",\n", - " )\n" + " )" ] }, { "cell_type": "code", "execution_count": null, - "id": "e410da12-2681-4582-b337-10b149b05d7c", + "id": "8", "metadata": {}, "outputs": [], "source": [] @@ -208,7 +107,7 @@ { "cell_type": "code", "execution_count": null, - "id": "fa20d4c3-a6af-442c-bb0a-e3ff410e9b60", + "id": "9", "metadata": {}, "outputs": [], "source": [] @@ -216,7 +115,7 @@ { "cell_type": "code", "execution_count": null, - "id": "285a3bdb-5e5e-4881-8cac-924844c04fd3", + "id": "10", "metadata": {}, "outputs": [], "source": [] diff --git a/notebooks/tutorials/federated-learning/01 - Submit FL Experiment.ipynb b/notebooks/tutorials/federated-learning/01 - Submit FL Experiment.ipynb index fbefa937a02..5d490726b3d 100644 --- a/notebooks/tutorials/federated-learning/01 - Submit FL Experiment.ipynb +++ b/notebooks/tutorials/federated-learning/01 - Submit FL Experiment.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "2ddc5050-75bd-44f9-821e-2cf5b5b5f1f9", + "id": "0", "metadata": {}, "source": [ "# Step 1: Login as External Researcher" @@ -10,117 +10,26 @@ }, { "cell_type": "code", - "execution_count": 1, - "id": "c2645f54-4db6-4760-9dc1-8f38f53f39b3", + "execution_count": null, + "id": "1", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
SyftInfo:
You have launched a development server at http://0.0.0.0:None.It is intended only for local use.

" - ], - "text/plain": [ - "SyftInfo: You have launched a development server at http://0.0.0.0:None.It is intended only for local use." - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Logged into as \n" - ] - }, - { - "data": { - "text/html": [ - "
SyftWarning:
You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`.

" - ], - "text/plain": [ - "SyftWarning: You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`." - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
SyftInfo:
You have launched a development server at http://0.0.0.0:None.It is intended only for local use.

" - ], - "text/plain": [ - "SyftInfo: You have launched a development server at http://0.0.0.0:None.It is intended only for local use." - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Logged into as \n" - ] - }, - { - "data": { - "text/html": [ - "
SyftWarning:
You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`.

" - ], - "text/plain": [ - "SyftWarning: You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`." - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
SyftInfo:
You have launched a development server at http://0.0.0.0:None.It is intended only for local use.

" - ], - "text/plain": [ - "SyftInfo: You have launched a development server at http://0.0.0.0:None.It is intended only for local use." - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Logged into as \n" - ] - }, - { - "data": { - "text/html": [ - "
SyftWarning:
You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`.

" - ], - "text/plain": [ - "SyftWarning: You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`." - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ + "# syft absolute\n", "import syft as sy\n", "\n", "datasites = list()\n", "for i in range(3):\n", - " server = sy.orchestra.launch(name=\"fl-datasite-\"+str(i)) # connects to same server\n", + " server = sy.orchestra.launch(\n", + " name=\"fl-datasite-\" + str(i)\n", + " ) # connects to same server\n", " client = server.login(email=\"sheldon@caltech.edu\", password=\"changethis\")\n", " datasites.append(client)" ] }, { "cell_type": "markdown", - "id": "2c636278-a868-417d-8683-04cf792af393", + "id": "2", "metadata": {}, "source": [ "# Step 2: Get mock data and test a neural network" @@ -128,38 +37,21 @@ }, { "cell_type": "code", - "execution_count": 2, - "id": "5adcb65a-88be-4e37-8e88-c025c1647f5e", + "execution_count": null, + "id": "3", "metadata": {}, "outputs": [], "source": [ - "mock_images = datasites[0].datasets['MNIST Dataset'].assets['images'].mock[0:100]\n", - "mock_labels = datasites[0].datasets['MNIST Dataset'].assets['labels'].mock[0:100]" + "mock_images = datasites[0].datasets[\"MNIST Dataset\"].assets[\"images\"].mock[0:100]\n", + "mock_labels = datasites[0].datasets[\"MNIST Dataset\"].assets[\"labels\"].mock[0:100]" ] }, { "cell_type": "code", - "execution_count": 3, - "id": "e768fa19-65f3-4bad-9350-7204a7479e38", + "execution_count": null, + "id": "4", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1, Loss: 0.0000\n", - "Epoch 2, Loss: 0.0000\n", - "Epoch 3, Loss: 0.0000\n", - "Epoch 4, Loss: 0.0000\n", - "Epoch 5, Loss: 0.0000\n", - "Epoch 6, Loss: 0.0000\n", - "Epoch 7, Loss: 0.0000\n", - "Epoch 8, Loss: 0.0000\n", - "Epoch 9, Loss: 0.0000\n", - "Epoch 10, Loss: 0.0000\n" - ] - } - ], + "outputs": [], "source": [ "# third party\n", "import torch\n", @@ -169,13 +61,17 @@ "\n", "# Define the data loader\n", "train_loader = torch.utils.data.DataLoader(\n", - " TensorDataset(torch.tensor(mock_images, dtype=torch.float32), \n", - " torch.tensor(mock_labels, dtype=torch.float32)), \n", - " batch_size=4, \n", - " shuffle=True\n", + " TensorDataset(\n", + " torch.tensor(mock_images, dtype=torch.float32),\n", + " torch.tensor(mock_labels, dtype=torch.float32),\n", + " ),\n", + " batch_size=4,\n", + " shuffle=True,\n", ")\n", "\n", "# Define the neural network class\n", + "\n", + "\n", "class MLP(nn.Module):\n", " def __init__(self):\n", " super().__init__()\n", @@ -185,6 +81,7 @@ " x = torch.log_softmax(self.fc1(x.view(-1, 784)), dim=1)\n", " return x\n", "\n", + "\n", "# Define the model, optimizer, and loss function\n", "model = MLP()\n", "optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)\n", @@ -208,7 +105,7 @@ }, { "cell_type": "markdown", - "id": "60c16944-e6ae-4858-8c02-ddc654162ae7", + "id": "5", "metadata": {}, "source": [ "# Step 3: Submit experiment to each datasites" @@ -216,58 +113,18 @@ }, { "cell_type": "code", - "execution_count": 4, - "id": "ef3823a0-69c7-4347-8f69-d312067e024c", + "execution_count": null, + "id": "6", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
SyftSuccess:
Syft function 'train' successfully created. To add a code request, please create a project using `project = syft.Project(...)`, then use command `project.create_code_request`.

" - ], - "text/plain": [ - "SyftSuccess: Syft function 'train' successfully created. To add a code request, please create a project using `project = syft.Project(...)`, then use command `project.create_code_request`." - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
SyftSuccess:
Syft function 'train' successfully created. To add a code request, please create a project using `project = syft.Project(...)`, then use command `project.create_code_request`.

" - ], - "text/plain": [ - "SyftSuccess: Syft function 'train' successfully created. To add a code request, please create a project using `project = syft.Project(...)`, then use command `project.create_code_request`." - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
SyftSuccess:
Syft function 'train' successfully created. To add a code request, please create a project using `project = syft.Project(...)`, then use command `project.create_code_request`.

" - ], - "text/plain": [ - "SyftSuccess: Syft function 'train' successfully created. To add a code request, please create a project using `project = syft.Project(...)`, then use command `project.create_code_request`." - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "for dasite in datasites:\n", + " mock_images_ptr = dasite.datasets[\"MNIST Dataset\"].assets[\"images\"]\n", + " mock_labels_ptr = dasite.datasets[\"MNIST Dataset\"].assets[\"labels\"]\n", "\n", - " mock_images_ptr = dasite.datasets['MNIST Dataset'].assets['images']\n", - " mock_labels_ptr = dasite.datasets['MNIST Dataset'].assets['labels']\n", - " \n", - " \n", " @sy.syft_function(\n", " input_policy=sy.ExactMatch(\n", - " mnist_images=mock_images_ptr, \n", - " mnist_labels=mock_labels_ptr\n", + " mnist_images=mock_images_ptr, mnist_labels=mock_labels_ptr\n", " ),\n", " output_policy=sy.SingleExecutionExactOutput(),\n", " )\n", @@ -280,10 +137,12 @@ "\n", " # Define the data loader\n", " train_loader = torch.utils.data.DataLoader(\n", - " TensorDataset(torch.tensor(mnist_images, dtype=torch.float32), \n", - " torch.tensor(mnist_labels, dtype=torch.float32)), \n", - " batch_size=4, \n", - " shuffle=True\n", + " TensorDataset(\n", + " torch.tensor(mnist_images, dtype=torch.float32),\n", + " torch.tensor(mnist_labels, dtype=torch.float32),\n", + " ),\n", + " batch_size=4,\n", + " shuffle=True,\n", " )\n", "\n", " # Define the neural network class\n", @@ -313,17 +172,16 @@ " loss.backward()\n", " optimizer.step()\n", " running_loss += loss.item()\n", - " print(\n", - " f\"Epoch {epoch + 1}, Loss: {(running_loss / len(train_loader)):.4f}\"\n", - " )\n", + " print(f\"Epoch {epoch + 1}, Loss: {(running_loss / len(train_loader)):.4f}\")\n", " # Calculate accuracy on the training set\n", - " train_accs.append((running_loss / len(train_loader)))\n", + " train_accs.append(running_loss / len(train_loader))\n", "\n", " # Get model parameters\n", " params = model.state_dict()\n", "\n", " # Return training accuracy and model parameters\n", " return train_accs, params\n", + "\n", " new_project = sy.Project(\n", " name=\"Training a 3-layer torch neural network on MNIST data\",\n", " description=\"\"\"Hi, I would like to train my neural network on your MNIST data \n", @@ -337,7 +195,7 @@ { "cell_type": "code", "execution_count": null, - "id": "82224c82-d89d-41e1-82e2-8ffd130f0a63", + "id": "7", "metadata": {}, "outputs": [], "source": [] @@ -345,7 +203,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f2230949-3e89-4186-ad12-69b683254128", + "id": "8", "metadata": {}, "outputs": [], "source": [] diff --git a/notebooks/tutorials/federated-learning/02 - Data Owners Approve Experiments.ipynb b/notebooks/tutorials/federated-learning/02 - Data Owners Approve Experiments.ipynb index 23435116bbc..9ebc4ef25ff 100644 --- a/notebooks/tutorials/federated-learning/02 - Data Owners Approve Experiments.ipynb +++ b/notebooks/tutorials/federated-learning/02 - Data Owners Approve Experiments.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "f6534fd6-ce55-492b-858b-9e25eca94986", + "id": "0", "metadata": {}, "source": [ "# Step 1: Login as Data Owner" @@ -10,117 +10,26 @@ }, { "cell_type": "code", - "execution_count": 1, - "id": "dc9728f3-3f4b-4c44-924b-b2577733ecbc", + "execution_count": null, + "id": "1", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
SyftInfo:
You have launched a development server at http://0.0.0.0:None.It is intended only for local use.

" - ], - "text/plain": [ - "SyftInfo: You have launched a development server at http://0.0.0.0:None.It is intended only for local use." - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Logged into as \n" - ] - }, - { - "data": { - "text/html": [ - "
SyftWarning:
You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`.

" - ], - "text/plain": [ - "SyftWarning: You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`." - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
SyftInfo:
You have launched a development server at http://0.0.0.0:None.It is intended only for local use.

" - ], - "text/plain": [ - "SyftInfo: You have launched a development server at http://0.0.0.0:None.It is intended only for local use." - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Logged into as \n" - ] - }, - { - "data": { - "text/html": [ - "
SyftWarning:
You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`.

" - ], - "text/plain": [ - "SyftWarning: You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`." - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
SyftInfo:
You have launched a development server at http://0.0.0.0:None.It is intended only for local use.

" - ], - "text/plain": [ - "SyftInfo: You have launched a development server at http://0.0.0.0:None.It is intended only for local use." - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Logged into as \n" - ] - }, - { - "data": { - "text/html": [ - "
SyftWarning:
You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`.

" - ], - "text/plain": [ - "SyftWarning: You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`." - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ + "# syft absolute\n", "import syft as sy\n", "\n", "datasites = list()\n", "for i in range(3):\n", - " server = sy.orchestra.launch(name=\"fl-datasite-\"+str(i)) # connects to same server\n", + " server = sy.orchestra.launch(\n", + " name=\"fl-datasite-\" + str(i)\n", + " ) # connects to same server\n", " client = server.login(email=\"info@openmined.org\", password=\"changethis\")\n", " datasites.append(client)" ] }, { "cell_type": "markdown", - "id": "53ad4666-f5c1-4169-bf74-c7f113076a72", + "id": "2", "metadata": {}, "source": [ "# Step 2: Review projects and code\n", @@ -130,2809 +39,27 @@ }, { "cell_type": "code", - "execution_count": 2, - "id": "0a353120-e826-4de3-9019-8019fc0d7563", + "execution_count": null, + "id": "3", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "
\n", - "\n", - "
\n", - "
\n", - " \n", - "
\n", - "

Request List

\n", - "
\n", - "
\n", - "
\n", - " \n", - "
\n", - "
\n", - "

Total: 0

\n", - "
\n", - "
\n", - "
\n", - "\n", - "\n", - "\n", - "" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "datasites[0].requests" ] }, { "cell_type": "code", - "execution_count": 3, - "id": "ed367067-f930-4301-87f2-a619c27b3a09", + "execution_count": null, + "id": "4", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - " \n", - "
\n", - "

UserCode

\n", - "

id: UID = e1cdc3eb19734764932f5865a688c9b6

\n", - "

service_func_name: str = train

\n", - "

shareholders: list = ['fl-datasite-0']

\n", - "

status: list = ['Server: fl-datasite-0, Status: pending']

\n", - " \n", - " \n", - "

inputs: dict =

{\n",
-       "  \"assets\": {\n",
-       "    \"mnist_images\": {\n",
-       "      \"action_id\": \"03d031afb46c482aa37ea64912e94651\",\n",
-       "      \"source_asset\": \"images\",\n",
-       "      \"source_dataset\": null,\n",
-       "      \"source_server\": \"4d3cee6cb0d2450f84d56bbfdeb84197\"\n",
-       "    },\n",
-       "    \"mnist_labels\": {\n",
-       "      \"action_id\": \"618fdf7ee1334a539dc5a8df54e5c786\",\n",
-       "      \"source_asset\": \"labels\",\n",
-       "      \"source_dataset\": null,\n",
-       "      \"source_server\": \"4d3cee6cb0d2450f84d56bbfdeb84197\"\n",
-       "    }\n",
-       "  }\n",
-       "}

\n", - "

code:

\n", - "
\n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "```python\n", - "@sy.syft_function(\n", - " input_policy=sy.ExactMatch(\n", - " mnist_images=mock_images_ptr, \n", - " mnist_labels=mock_labels_ptr\n", - " ),\n", - " output_policy=sy.SingleExecutionExactOutput(),\n", - ")\n", - "def train(mnist_images, mnist_labels):\n", - " # third party\n", - " import torch\n", - " import torch.nn as nn\n", - " import torch.optim as optim\n", - " from torch.utils.data import TensorDataset\n", - "\n", - " # Define the data loader\n", - " train_loader = torch.utils.data.DataLoader(\n", - " TensorDataset(torch.tensor(mnist_images, dtype=torch.float32), \n", - " torch.tensor(mnist_labels, dtype=torch.float32)), \n", - " batch_size=4, \n", - " shuffle=True\n", - " )\n", - "\n", - " # Define the neural network class\n", - " class MLP(nn.Module):\n", - " def __init__(self):\n", - " super().__init__()\n", - " self.fc1 = nn.Linear(784, 10)\n", - "\n", - " def forward(self, x):\n", - " x = torch.log_softmax(self.fc1(x.view(-1, 784)), dim=1)\n", - " return x\n", - "\n", - " # Define the model, optimizer, and loss function\n", - " model = MLP()\n", - " optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)\n", - " criterion = nn.CrossEntropyLoss()\n", - "\n", - " # Train the model\n", - " train_accs = []\n", - " for epoch in range(20):\n", - " running_loss = 0.0\n", - " for _, data in enumerate(train_loader, 0):\n", - " inputs, labels = data\n", - " optimizer.zero_grad()\n", - " outputs = model(inputs)\n", - " loss = criterion(outputs, labels)\n", - " loss.backward()\n", - " optimizer.step()\n", - " running_loss += loss.item()\n", - " print(\n", - " f\"Epoch {epoch + 1}, Loss: {(running_loss / len(train_loader)):.4f}\"\n", - " )\n", - " # Calculate accuracy on the training set\n", - " train_accs.append((running_loss / len(train_loader)))\n", - "\n", - " # Get model parameters\n", - " params = model.state_dict()\n", - "\n", - " # Return training accuracy and model parameters\n", - " return train_accs, params\n", - "```" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "datasites[0].requests[0].changes[0].code" ] }, { "cell_type": "markdown", - "id": "445dd4f7-9788-42d8-9fcb-155f6106eb60", + "id": "5", "metadata": {}, "source": [ "# Step 3: Approve projects and code" @@ -2940,20 +67,10 @@ }, { "cell_type": "code", - "execution_count": 4, - "id": "a66626f1-480f-4633-9ef8-27e09f686506", + "execution_count": null, + "id": "6", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Approving request on change train for datasite fl-datasite-0\n", - "Approving request on change train for datasite fl-datasite-1\n", - "Approving request on change train for datasite fl-datasite-2\n" - ] - } - ], + "outputs": [], "source": [ "for dasite in datasites:\n", " dasite.requests[-1].approve()" @@ -2962,7 +79,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d3fb7231-8a85-45fb-af7c-bbc2b2d619f6", + "id": "7", "metadata": {}, "outputs": [], "source": [] diff --git a/notebooks/tutorials/federated-learning/03 - Run Federated Learning.ipynb b/notebooks/tutorials/federated-learning/03 - Run Federated Learning.ipynb index ba7e1d4baf4..efd87d26881 100644 --- a/notebooks/tutorials/federated-learning/03 - Run Federated Learning.ipynb +++ b/notebooks/tutorials/federated-learning/03 - Run Federated Learning.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "b2e0dacb-7dfc-42b4-8a7b-30a087c2db2e", + "id": "0", "metadata": {}, "source": [ "# Step 1: Login as External Researcher" @@ -10,117 +10,26 @@ }, { "cell_type": "code", - "execution_count": 1, - "id": "4ef3a306-f6f8-46cd-8646-84702ac77336", + "execution_count": null, + "id": "1", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
SyftInfo:
You have launched a development server at http://0.0.0.0:None.It is intended only for local use.

" - ], - "text/plain": [ - "SyftInfo: You have launched a development server at http://0.0.0.0:None.It is intended only for local use." - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Logged into as \n" - ] - }, - { - "data": { - "text/html": [ - "
SyftWarning:
You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`.

" - ], - "text/plain": [ - "SyftWarning: You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`." - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
SyftInfo:
You have launched a development server at http://0.0.0.0:None.It is intended only for local use.

" - ], - "text/plain": [ - "SyftInfo: You have launched a development server at http://0.0.0.0:None.It is intended only for local use." - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Logged into as \n" - ] - }, - { - "data": { - "text/html": [ - "
SyftWarning:
You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`.

" - ], - "text/plain": [ - "SyftWarning: You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`." - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
SyftInfo:
You have launched a development server at http://0.0.0.0:None.It is intended only for local use.

" - ], - "text/plain": [ - "SyftInfo: You have launched a development server at http://0.0.0.0:None.It is intended only for local use." - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Logged into as \n" - ] - }, - { - "data": { - "text/html": [ - "
SyftWarning:
You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`.

" - ], - "text/plain": [ - "SyftWarning: You are using a default password. Please change the password using `[your_client].account.set_password([new_password])`." - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ + "# syft absolute\n", "import syft as sy\n", "\n", "datasites = list()\n", "for i in range(3):\n", - " server = sy.orchestra.launch(name=\"fl-datasite-\"+str(i)) # connects to same server\n", + " server = sy.orchestra.launch(\n", + " name=\"fl-datasite-\" + str(i)\n", + " ) # connects to same server\n", " client = server.login(email=\"sheldon@caltech.edu\", password=\"changethis\")\n", " datasites.append(client)" ] }, { "cell_type": "markdown", - "id": "00fb6bcf-8587-43f1-a3ea-1f15cdceb2af", + "id": "2", "metadata": {}, "source": [ "# Step 2: Train models on all datasites" @@ -128,87 +37,29 @@ }, { "cell_type": "code", - "execution_count": 36, - "id": "1838fb85-4a73-462d-b6b7-bb9a4f4b0e92", + "execution_count": null, + "id": "3", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n" - ] - }, - { - "data": { - "text/html": [ - "
SyftWarning:
Policy is no longer valid. count: 1 >= limit: 1. Loading results from cache.

" - ], - "text/plain": [ - "SyftWarning: Policy is no longer valid. count: 1 >= limit: 1. Loading results from cache." - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n" - ] - }, - { - "data": { - "text/html": [ - "
SyftWarning:
Policy is no longer valid. count: 1 >= limit: 1. Loading results from cache.

" - ], - "text/plain": [ - "SyftWarning: Policy is no longer valid. count: 1 >= limit: 1. Loading results from cache." - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n" - ] - }, - { - "data": { - "text/html": [ - "
SyftWarning:
Policy is no longer valid. count: 1 >= limit: 1. Loading results from cache.

" - ], - "text/plain": [ - "SyftWarning: Policy is no longer valid. count: 1 >= limit: 1. Loading results from cache." - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "results = list()\n", "\n", "for dasite in datasites:\n", " print()\n", " print(dasite)\n", - " images_ptr = dasite.datasets['MNIST Dataset'].assets['images']\n", - " labels_ptr = dasite.datasets['MNIST Dataset'].assets['labels']\n", - " \n", - " results.append(dasite.code.train(mnist_images=images_ptr, \n", - " mnist_labels=labels_ptr).get_from(dasite))" + " images_ptr = dasite.datasets[\"MNIST Dataset\"].assets[\"images\"]\n", + " labels_ptr = dasite.datasets[\"MNIST Dataset\"].assets[\"labels\"]\n", + "\n", + " results.append(\n", + " dasite.code.train(mnist_images=images_ptr, mnist_labels=labels_ptr).get_from(\n", + " dasite\n", + " )\n", + " )" ] }, { "cell_type": "markdown", - "id": "958e5514-209f-450e-9b89-fb46922c13b6", + "id": "4", "metadata": {}, "source": [ "# Step 3: Merge models" @@ -216,34 +67,38 @@ }, { "cell_type": "code", - "execution_count": 55, - "id": "3359ac77-b582-4ac9-94b3-d7b756b1ef07", + "execution_count": null, + "id": "5", "metadata": {}, "outputs": [], "source": [ - "from itertools import groupby\n", + "# stdlib\n", "from collections import OrderedDict\n", + "from itertools import groupby\n", + "\n", "\n", "def ave(d):\n", - " _data = sorted([i for b in d for i in b.items()], key=lambda x:x[0])\n", - " _d = [(a, [j for _, j in b]) for a, b in groupby(_data, key=lambda x:x[0])]\n", - " return OrderedDict({a:ave(b) if isinstance(b[0], dict) else sum(b)/float(len(b)) for a, b in _d})\n" + " _data = sorted([i for b in d for i in b.items()], key=lambda x: x[0])\n", + " _d = [(a, [j for _, j in b]) for a, b in groupby(_data, key=lambda x: x[0])]\n", + " return OrderedDict(\n", + " {a: ave(b) if isinstance(b[0], dict) else sum(b) / float(len(b)) for a, b in _d}\n", + " )" ] }, { "cell_type": "code", - "execution_count": 59, - "id": "30fc8abb-0494-4857-83a2-e6eaea3a1625", + "execution_count": null, + "id": "6", "metadata": {}, "outputs": [], "source": [ - "models_weights = list(map(lambda x:x[1], results))" + "models_weights = list(map(lambda x: x[1], results))" ] }, { "cell_type": "code", - "execution_count": 60, - "id": "e65a0050-6e4f-4f25-be26-3835a862eca7", + "execution_count": null, + "id": "7", "metadata": {}, "outputs": [], "source": [ @@ -252,14 +107,18 @@ }, { "cell_type": "code", - "execution_count": 63, - "id": "91abef0b-af16-4481-a449-c50c9922c179", + "execution_count": null, + "id": "8", "metadata": {}, "outputs": [], "source": [ - "import torch as torch\n", + "# third party\n", + "import torch\n", "from torch import nn\n", + "\n", "# Define the neural network class\n", + "\n", + "\n", "class MLP(nn.Module):\n", " def __init__(self):\n", " super().__init__()\n", @@ -272,21 +131,10 @@ }, { "cell_type": "code", - "execution_count": 70, - "id": "9c467d7e-fe2a-4f2d-8fbf-73e593b77fd8", + "execution_count": null, + "id": "9", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 70, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "new_model = MLP()\n", "new_model.load_state_dict(new_model_weights)" @@ -295,7 +143,7 @@ { "cell_type": "code", "execution_count": null, - "id": "089ecfaf-ab2f-4c8e-8909-6085dea18f9a", + "id": "10", "metadata": {}, "outputs": [], "source": [] From 898743852f9df898c467017fa50ca9aafe09e0a0 Mon Sep 17 00:00:00 2001 From: Andrew Trask Date: Sat, 3 Aug 2024 21:56:51 -0400 Subject: [PATCH 04/18] Add whitespace --- packages/syft/src/syft/client/registry.py | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/syft/src/syft/client/registry.py b/packages/syft/src/syft/client/registry.py index 649b5e71362..3810e1df351 100644 --- a/packages/syft/src/syft/client/registry.py +++ b/packages/syft/src/syft/client/registry.py @@ -27,6 +27,7 @@ ) NETWORK_REGISTRY_REPO = "https://github.com/OpenMined/NetworkRegistry" + DATASITE_REGISTRY_URL = "https://raw.githubusercontent.com/OpenMined/NetworkRegistry/main/datasites.json" def _get_all_networks(network_json: dict, version: str) -> list[dict]: From d84d118c0daae1c67af3107b6e1ea8c440463836 Mon Sep 17 00:00:00 2001 From: Andrew Trask Date: Sat, 3 Aug 2024 21:58:08 -0400 Subject: [PATCH 05/18] fix imprecise bool/str ambiguity --- packages/syft/src/syft/client/registry.py | 30 +++++++++++++---------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/packages/syft/src/syft/client/registry.py b/packages/syft/src/syft/client/registry.py index 3810e1df351..100bc0af96d 100644 --- a/packages/syft/src/syft/client/registry.py +++ b/packages/syft/src/syft/client/registry.py @@ -28,7 +28,10 @@ NETWORK_REGISTRY_REPO = "https://github.com/OpenMined/NetworkRegistry" -DATASITE_REGISTRY_URL = "https://raw.githubusercontent.com/OpenMined/NetworkRegistry/main/datasites.json" +DATASITE_REGISTRY_URL = ( + "https://raw.githubusercontent.com/OpenMined/NetworkRegistry/main/datasites.json" +) + def _get_all_networks(network_json: dict, version: str) -> list[dict]: return network_json.get(version, {}).get("gateways", []) @@ -182,8 +185,8 @@ def __getitem__(self, key: str | int) -> Client: return self.create_client(network=network) raise KeyError(f"Invalid key: {key} for {on}") + class DatasiteRegistry: - def __init__(self) -> None: self.all_datasites: list[dict] = [] try: @@ -194,18 +197,17 @@ def __init__(self) -> None: logger.warning( f"Failed to get Datasite Registry, go checkout: {DATASITE_REGISTRY_URL}. {e}" ) - + @property def online_datasites(self) -> list[dict]: datasites = self.all_datasites - + def check_datasite(datasite: dict) -> dict[Any, Any] | None: url = "http://" + datasite["host_or_ip"] + ":" + str(datasite["port"]) + "/" - online = "as;lfjasdfsadf" try: res = requests.get(url, timeout=DEFAULT_TIMEOUT) # nosec if "status" in res.json(): - online = res.json()['status'] == 'ok' + online = res.json()["status"] == "ok" elif "detail" in res.json(): online = True except Exception as e: @@ -228,7 +230,8 @@ def check_datasite(datasite: dict) -> dict[Any, Any] | None: datasite["version"] = "unknown" return datasite return None - # We can use a with statement to ensure threads are cleaned up promptly + + # We can use a with statement to ensure threads are cleaned up promptly with futures.ThreadPoolExecutor(max_workers=20) as executor: # map _online_datasites = list( @@ -237,19 +240,19 @@ def check_datasite(datasite: dict) -> dict[Any, Any] | None: online_datasites = [each for each in _online_datasites if each is not None] return online_datasites - + def _repr_html_(self) -> str: on = self.online_datasites if len(on) == 0: return "(no gateways online - try syft.gateways.all_networks to see offline gateways)" df = pd.DataFrame(on) - + return df._repr_html_() # type: ignore - + @staticmethod def create_client(datasite: dict[str, Any]) -> Client: # relative - from syft.client.client import connect + from .client import connect try: port = int(datasite["port"]) @@ -259,8 +262,8 @@ def create_client(datasite: dict[str, Any]) -> Client: client = connect(url=str(server_url)) return client.guest() except Exception as e: - raise SyftException(f"Failed to login with: {datasite}. {e}") - + raise SyftException(f"Failed to login with: {datasite}. {e}") + def __getitem__(self, key: str | int) -> Client: if isinstance(key, int): return self.create_client(datasite=self.online_datasites[key]) @@ -271,6 +274,7 @@ def __getitem__(self, key: str | int) -> Client: return self.create_client(datasite=datasite) raise KeyError(f"Invalid key: {key} for {on}") + class NetworksOfDatasitesRegistry: def __init__(self) -> None: self.all_networks: list[dict] = [] From ec692a9e2521247effa9e8168608f744666224f2 Mon Sep 17 00:00:00 2001 From: Andrew Trask Date: Sat, 3 Aug 2024 22:12:04 -0400 Subject: [PATCH 06/18] Fix ruff issues --- .../federated-learning/00 - Setup FL Datasites.ipynb | 4 ++-- .../federated-learning/01 - Submit FL Experiment.ipynb | 2 +- .../02 - Data Owners Approve Experiments.ipynb | 2 +- .../federated-learning/03 - Run Federated Learning.ipynb | 6 +++--- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/notebooks/tutorials/federated-learning/00 - Setup FL Datasites.ipynb b/notebooks/tutorials/federated-learning/00 - Setup FL Datasites.ipynb index b574b175de5..ad9e5486c1d 100644 --- a/notebooks/tutorials/federated-learning/00 - Setup FL Datasites.ipynb +++ b/notebooks/tutorials/federated-learning/00 - Setup FL Datasites.ipynb @@ -29,7 +29,7 @@ "metadata": {}, "outputs": [], "source": [ - "datasites = list()\n", + "datasites = []\n", "for i in range(3):\n", " server = sy.orchestra.launch(name=\"fl-datasite-\" + str(i), reset=True)\n", " client = server.login(email=\"info@openmined.org\", password=\"changethis\")\n", @@ -67,7 +67,7 @@ " dataset = sy.Dataset(name=\"MNIST Dataset\")\n", " dataset.add_asset(sy.Asset(name=\"images\", data=images[i], mock=0 * images[i]))\n", " dataset.add_asset(sy.Asset(name=\"labels\", data=labels[i], mock=0 * labels[i]))\n", - " datasites[i].upload_dataset(dataset)" + " datasite.upload_dataset(dataset)" ] }, { diff --git a/notebooks/tutorials/federated-learning/01 - Submit FL Experiment.ipynb b/notebooks/tutorials/federated-learning/01 - Submit FL Experiment.ipynb index 5d490726b3d..7d477a3c10e 100644 --- a/notebooks/tutorials/federated-learning/01 - Submit FL Experiment.ipynb +++ b/notebooks/tutorials/federated-learning/01 - Submit FL Experiment.ipynb @@ -18,7 +18,7 @@ "# syft absolute\n", "import syft as sy\n", "\n", - "datasites = list()\n", + "datasites = []\n", "for i in range(3):\n", " server = sy.orchestra.launch(\n", " name=\"fl-datasite-\" + str(i)\n", diff --git a/notebooks/tutorials/federated-learning/02 - Data Owners Approve Experiments.ipynb b/notebooks/tutorials/federated-learning/02 - Data Owners Approve Experiments.ipynb index 9ebc4ef25ff..512bbda62f4 100644 --- a/notebooks/tutorials/federated-learning/02 - Data Owners Approve Experiments.ipynb +++ b/notebooks/tutorials/federated-learning/02 - Data Owners Approve Experiments.ipynb @@ -18,7 +18,7 @@ "# syft absolute\n", "import syft as sy\n", "\n", - "datasites = list()\n", + "datasites = []\n", "for i in range(3):\n", " server = sy.orchestra.launch(\n", " name=\"fl-datasite-\" + str(i)\n", diff --git a/notebooks/tutorials/federated-learning/03 - Run Federated Learning.ipynb b/notebooks/tutorials/federated-learning/03 - Run Federated Learning.ipynb index efd87d26881..f6ccb1af89d 100644 --- a/notebooks/tutorials/federated-learning/03 - Run Federated Learning.ipynb +++ b/notebooks/tutorials/federated-learning/03 - Run Federated Learning.ipynb @@ -18,7 +18,7 @@ "# syft absolute\n", "import syft as sy\n", "\n", - "datasites = list()\n", + "datasites = []\n", "for i in range(3):\n", " server = sy.orchestra.launch(\n", " name=\"fl-datasite-\" + str(i)\n", @@ -42,7 +42,7 @@ "metadata": {}, "outputs": [], "source": [ - "results = list()\n", + "results = []\n", "\n", "for dasite in datasites:\n", " print()\n", @@ -92,7 +92,7 @@ "metadata": {}, "outputs": [], "source": [ - "models_weights = list(map(lambda x: x[1], results))" + "models_weights = [(lambda x: x[1])(x) for x in results]" ] }, { From 1ea8ef40bae8ff4ce39622ca429bb5dce1a23ac7 Mon Sep 17 00:00:00 2001 From: Andrew Trask Date: Sat, 3 Aug 2024 22:26:16 -0400 Subject: [PATCH 07/18] Deactivate Search for now --- packages/syft/src/syft/__init__.py | 9 +- packages/syft/src/syft/client/search.py | 104 +++++++++++------------- 2 files changed, 54 insertions(+), 59 deletions(-) diff --git a/packages/syft/src/syft/__init__.py b/packages/syft/src/syft/__init__.py index 44f2efc202c..641d0865a3c 100644 --- a/packages/syft/src/syft/__init__.py +++ b/packages/syft/src/syft/__init__.py @@ -19,8 +19,9 @@ from .client.registry import DatasiteRegistry from .client.registry import EnclaveRegistry from .client.registry import NetworkRegistry -from .client.search import Search -from .client.search import SearchResults + +# from .client.search import Search +# from .client.search import SearchResults from .client.syncing import compare_clients from .client.syncing import compare_states from .client.syncing import sync @@ -147,5 +148,5 @@ def hello_baby() -> None: print("Welcome to the world. \u2764\ufe0f") -def search(name: str) -> SearchResults: - return Search(_datasites()).search(name=name) +# def search(name: str) -> SearchResults: +# return Search(_datasites()).search(name=name) diff --git a/packages/syft/src/syft/client/search.py b/packages/syft/src/syft/client/search.py index e4450987aff..24a6648dc9c 100644 --- a/packages/syft/src/syft/client/search.py +++ b/packages/syft/src/syft/client/search.py @@ -1,17 +1,11 @@ # stdlib -from concurrent.futures import ThreadPoolExecutor # third party -from IPython.display import display # relative from ..service.dataset.dataset import Dataset -from ..service.metadata.server_metadata import ServerMetadataJSON -from ..service.network.network_service import ServerPeer -from ..service.response import SyftWarning from ..types.uid import UID from .client import SyftClient -from .registry import DatasiteRegistry class SearchResults: @@ -57,52 +51,52 @@ def __len__(self) -> int: return len(self._datasets) -class Search: - def __init__(self, datasites: DatasiteRegistry) -> None: - self.datasites: list[tuple[ServerPeer, ServerMetadataJSON | None]] = ( - datasites.online_datasites - ) - - @staticmethod - def __search_one_server( - peer_tuple: tuple[ServerPeer, ServerMetadataJSON], name: str - ) -> tuple[SyftClient | None, list[Dataset]]: - try: - peer, server_metadata = peer_tuple - client = peer.guest_client - results = client.api.services.dataset.search(name=name) - return (client, results) - except Exception as e: # noqa - warning = SyftWarning( - message=f"Got exception {e} at server {server_metadata.name}" - ) - display(warning) - return (None, []) - - def __search(self, name: str) -> list[tuple[SyftClient, list[Dataset]]]: - with ThreadPoolExecutor(max_workers=20) as executor: - # results: list[tuple[SyftClient | None, list[Dataset]]] = [ - # self.__search_one_server(peer_tuple, name) for peer_tuple in self.datasites - # ] - results: list[tuple[SyftClient | None, list[Dataset]]] = list( - executor.map( - lambda peer_tuple: self.__search_one_server(peer_tuple, name), - self.datasites, - ) - ) - # filter out SyftError - filtered = [(client, result) for client, result in results if client and result] - - return filtered - - def search(self, name: str) -> SearchResults: - """ - Searches for a specific dataset by name. - - Args: - name (str): The name of the dataset to search for. - - Returns: - SearchResults: An object containing the search results. - """ - return SearchResults(self.__search(name)) +# class Search: +# def __init__(self, datasites: DatasiteRegistry) -> None: +# self.datasites: list[tuple[ServerPeer, ServerMetadataJSON | None]] = ( +# datasites.online_datasites +# ) + +# @staticmethod +# def __search_one_server( +# peer_tuple: tuple[ServerPeer, ServerMetadataJSON], name: str +# ) -> tuple[SyftClient | None, list[Dataset]]: +# try: +# peer, server_metadata = peer_tuple +# client = peer.guest_client +# results = client.api.services.dataset.search(name=name) +# return (client, results) +# except Exception as e: # noqa +# warning = SyftWarning( +# message=f"Got exception {e} at server {server_metadata.name}" +# ) +# display(warning) +# return (None, []) + +# def __search(self, name: str) -> list[tuple[SyftClient, list[Dataset]]]: +# with ThreadPoolExecutor(max_workers=20) as executor: +# # results: list[tuple[SyftClient | None, list[Dataset]]] = [ +# # self.__search_one_server(peer_tuple, name) for peer_tuple in self.datasites +# # ] +# results: list[tuple[SyftClient | None, list[Dataset]]] = list( +# executor.map( +# lambda peer_tuple: self.__search_one_server(peer_tuple, name), +# self.datasites, +# ) +# ) +# # filter out SyftError +# filtered = [(client, result) for client, result in results if client and result] + +# return filtered + +# def search(self, name: str) -> SearchResults: +# """ +# Searches for a specific dataset by name. + +# Args: +# name (str): The name of the dataset to search for. + +# Returns: +# SearchResults: An object containing the search results. +# """ +# return SearchResults(self.__search(name)) From b3ead9f7344e23b535de388e09e1fee191a03f2a Mon Sep 17 00:00:00 2001 From: Andrew Trask Date: Sat, 3 Aug 2024 22:44:11 -0400 Subject: [PATCH 08/18] Rename notebooks to try to fix a testing bug --- ...tup FL Datasites.ipynb => 00-setup-fl-datasites.ipynb} | 0 ... FL Experiment.ipynb => 01-submit-fl-experiment.ipynb} | 0 ...nts.ipynb => 02-data-owners-approve-experiments.ipynb} | 0 ...ted Learning.ipynb => 03-run-federated-learning.ipynb} | 8 ++++++++ 4 files changed, 8 insertions(+) rename notebooks/tutorials/federated-learning/{00 - Setup FL Datasites.ipynb => 00-setup-fl-datasites.ipynb} (100%) rename notebooks/tutorials/federated-learning/{01 - Submit FL Experiment.ipynb => 01-submit-fl-experiment.ipynb} (100%) rename notebooks/tutorials/federated-learning/{02 - Data Owners Approve Experiments.ipynb => 02-data-owners-approve-experiments.ipynb} (100%) rename notebooks/tutorials/federated-learning/{03 - Run Federated Learning.ipynb => 03-run-federated-learning.ipynb} (96%) diff --git a/notebooks/tutorials/federated-learning/00 - Setup FL Datasites.ipynb b/notebooks/tutorials/federated-learning/00-setup-fl-datasites.ipynb similarity index 100% rename from notebooks/tutorials/federated-learning/00 - Setup FL Datasites.ipynb rename to notebooks/tutorials/federated-learning/00-setup-fl-datasites.ipynb diff --git a/notebooks/tutorials/federated-learning/01 - Submit FL Experiment.ipynb b/notebooks/tutorials/federated-learning/01-submit-fl-experiment.ipynb similarity index 100% rename from notebooks/tutorials/federated-learning/01 - Submit FL Experiment.ipynb rename to notebooks/tutorials/federated-learning/01-submit-fl-experiment.ipynb diff --git a/notebooks/tutorials/federated-learning/02 - Data Owners Approve Experiments.ipynb b/notebooks/tutorials/federated-learning/02-data-owners-approve-experiments.ipynb similarity index 100% rename from notebooks/tutorials/federated-learning/02 - Data Owners Approve Experiments.ipynb rename to notebooks/tutorials/federated-learning/02-data-owners-approve-experiments.ipynb diff --git a/notebooks/tutorials/federated-learning/03 - Run Federated Learning.ipynb b/notebooks/tutorials/federated-learning/03-run-federated-learning.ipynb similarity index 96% rename from notebooks/tutorials/federated-learning/03 - Run Federated Learning.ipynb rename to notebooks/tutorials/federated-learning/03-run-federated-learning.ipynb index f6ccb1af89d..58fb26d9077 100644 --- a/notebooks/tutorials/federated-learning/03 - Run Federated Learning.ipynb +++ b/notebooks/tutorials/federated-learning/03-run-federated-learning.ipynb @@ -147,6 +147,14 @@ "metadata": {}, "outputs": [], "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { From ce4b11ba03e8469a95556047e1ebe57bcb39b0e7 Mon Sep 17 00:00:00 2001 From: Andrew Trask Date: Sun, 4 Aug 2024 03:59:10 +0100 Subject: [PATCH 09/18] Delete notebooks/tutorials/federated-learning/00-setup-fl-datasites.ipynb --- .../00-setup-fl-datasites.ipynb | 145 ------------------ 1 file changed, 145 deletions(-) delete mode 100644 notebooks/tutorials/federated-learning/00-setup-fl-datasites.ipynb diff --git a/notebooks/tutorials/federated-learning/00-setup-fl-datasites.ipynb b/notebooks/tutorials/federated-learning/00-setup-fl-datasites.ipynb deleted file mode 100644 index ad9e5486c1d..00000000000 --- a/notebooks/tutorials/federated-learning/00-setup-fl-datasites.ipynb +++ /dev/null @@ -1,145 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "0", - "metadata": {}, - "source": [ - "# Step 1: Launch some datasite servers" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "# third party\n", - "from mnist_dataset import mnist\n", - "\n", - "# syft absolute\n", - "import syft as sy" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [ - "datasites = []\n", - "for i in range(3):\n", - " server = sy.orchestra.launch(name=\"fl-datasite-\" + str(i), reset=True)\n", - " client = server.login(email=\"info@openmined.org\", password=\"changethis\")\n", - " datasites.append(client)" - ] - }, - { - "cell_type": "markdown", - "id": "3", - "metadata": {}, - "source": [ - "# Step 2: Split MNIST across datasites" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [ - "train_images, train_labels, _, _ = mnist()\n", - "images = [train_images[0:20000], train_images[20000:40000], train_images[40000:60000]]\n", - "labels = [train_labels[0:20000], train_labels[20000:40000], train_labels[40000:60000]]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [ - "for i, datasite in enumerate(datasites):\n", - " dataset = sy.Dataset(name=\"MNIST Dataset\")\n", - " dataset.add_asset(sy.Asset(name=\"images\", data=images[i], mock=0 * images[i]))\n", - " dataset.add_asset(sy.Asset(name=\"labels\", data=labels[i], mock=0 * labels[i]))\n", - " datasite.upload_dataset(dataset)" - ] - }, - { - "cell_type": "markdown", - "id": "6", - "metadata": {}, - "source": [ - "# Create data scientist user accounts" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7", - "metadata": {}, - "outputs": [], - "source": [ - "for datasite in datasites:\n", - " register_result = datasite.register(\n", - " name=\"Sheldon Cooper\",\n", - " email=\"sheldon@caltech.edu\",\n", - " password=\"changethis\",\n", - " password_verify=\"changethis\",\n", - " institution=\"Caltech\",\n", - " website=\"https://www.caltech.edu/\",\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "10", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.9" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From bb03a0bb1cd3c310fd79945f480b5b4cbaafcb2a Mon Sep 17 00:00:00 2001 From: Andrew Trask Date: Sun, 4 Aug 2024 03:59:25 +0100 Subject: [PATCH 10/18] Delete notebooks/tutorials/federated-learning/01-submit-fl-experiment.ipynb --- .../01-submit-fl-experiment.ipynb | 233 ------------------ 1 file changed, 233 deletions(-) delete mode 100644 notebooks/tutorials/federated-learning/01-submit-fl-experiment.ipynb diff --git a/notebooks/tutorials/federated-learning/01-submit-fl-experiment.ipynb b/notebooks/tutorials/federated-learning/01-submit-fl-experiment.ipynb deleted file mode 100644 index 7d477a3c10e..00000000000 --- a/notebooks/tutorials/federated-learning/01-submit-fl-experiment.ipynb +++ /dev/null @@ -1,233 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "0", - "metadata": {}, - "source": [ - "# Step 1: Login as External Researcher" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "# syft absolute\n", - "import syft as sy\n", - "\n", - "datasites = []\n", - "for i in range(3):\n", - " server = sy.orchestra.launch(\n", - " name=\"fl-datasite-\" + str(i)\n", - " ) # connects to same server\n", - " client = server.login(email=\"sheldon@caltech.edu\", password=\"changethis\")\n", - " datasites.append(client)" - ] - }, - { - "cell_type": "markdown", - "id": "2", - "metadata": {}, - "source": [ - "# Step 2: Get mock data and test a neural network" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "mock_images = datasites[0].datasets[\"MNIST Dataset\"].assets[\"images\"].mock[0:100]\n", - "mock_labels = datasites[0].datasets[\"MNIST Dataset\"].assets[\"labels\"].mock[0:100]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [ - "# third party\n", - "import torch\n", - "import torch.nn as nn\n", - "import torch.optim as optim\n", - "from torch.utils.data import TensorDataset\n", - "\n", - "# Define the data loader\n", - "train_loader = torch.utils.data.DataLoader(\n", - " TensorDataset(\n", - " torch.tensor(mock_images, dtype=torch.float32),\n", - " torch.tensor(mock_labels, dtype=torch.float32),\n", - " ),\n", - " batch_size=4,\n", - " shuffle=True,\n", - ")\n", - "\n", - "# Define the neural network class\n", - "\n", - "\n", - "class MLP(nn.Module):\n", - " def __init__(self):\n", - " super().__init__()\n", - " self.fc1 = nn.Linear(784, 10)\n", - "\n", - " def forward(self, x):\n", - " x = torch.log_softmax(self.fc1(x.view(-1, 784)), dim=1)\n", - " return x\n", - "\n", - "\n", - "# Define the model, optimizer, and loss function\n", - "model = MLP()\n", - "optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)\n", - "criterion = nn.CrossEntropyLoss()\n", - "\n", - "# Train the model\n", - "for epoch in range(10):\n", - " running_loss = 0.0\n", - " for _, data in enumerate(train_loader, 0):\n", - " inputs, labels = data\n", - " optimizer.zero_grad()\n", - " outputs = model(inputs)\n", - " loss = criterion(outputs, labels)\n", - " loss.backward()\n", - " optimizer.step()\n", - " running_loss += loss.item()\n", - " print(\n", - " f\"Epoch {epoch + 1}, Loss: {(running_loss / len(train_loader)):.4f}\",\n", - " )" - ] - }, - { - "cell_type": "markdown", - "id": "5", - "metadata": {}, - "source": [ - "# Step 3: Submit experiment to each datasites" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6", - "metadata": {}, - "outputs": [], - "source": [ - "for dasite in datasites:\n", - " mock_images_ptr = dasite.datasets[\"MNIST Dataset\"].assets[\"images\"]\n", - " mock_labels_ptr = dasite.datasets[\"MNIST Dataset\"].assets[\"labels\"]\n", - "\n", - " @sy.syft_function(\n", - " input_policy=sy.ExactMatch(\n", - " mnist_images=mock_images_ptr, mnist_labels=mock_labels_ptr\n", - " ),\n", - " output_policy=sy.SingleExecutionExactOutput(),\n", - " )\n", - " def train(mnist_images, mnist_labels):\n", - " # third party\n", - " import torch\n", - " import torch.nn as nn\n", - " import torch.optim as optim\n", - " from torch.utils.data import TensorDataset\n", - "\n", - " # Define the data loader\n", - " train_loader = torch.utils.data.DataLoader(\n", - " TensorDataset(\n", - " torch.tensor(mnist_images, dtype=torch.float32),\n", - " torch.tensor(mnist_labels, dtype=torch.float32),\n", - " ),\n", - " batch_size=4,\n", - " shuffle=True,\n", - " )\n", - "\n", - " # Define the neural network class\n", - " class MLP(nn.Module):\n", - " def __init__(self):\n", - " super().__init__()\n", - " self.fc1 = nn.Linear(784, 10)\n", - "\n", - " def forward(self, x):\n", - " x = torch.log_softmax(self.fc1(x.view(-1, 784)), dim=1)\n", - " return x\n", - "\n", - " # Define the model, optimizer, and loss function\n", - " model = MLP()\n", - " optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)\n", - " criterion = nn.CrossEntropyLoss()\n", - "\n", - " # Train the model\n", - " train_accs = []\n", - " for epoch in range(20):\n", - " running_loss = 0.0\n", - " for _, data in enumerate(train_loader, 0):\n", - " inputs, labels = data\n", - " optimizer.zero_grad()\n", - " outputs = model(inputs)\n", - " loss = criterion(outputs, labels)\n", - " loss.backward()\n", - " optimizer.step()\n", - " running_loss += loss.item()\n", - " print(f\"Epoch {epoch + 1}, Loss: {(running_loss / len(train_loader)):.4f}\")\n", - " # Calculate accuracy on the training set\n", - " train_accs.append(running_loss / len(train_loader))\n", - "\n", - " # Get model parameters\n", - " params = model.state_dict()\n", - "\n", - " # Return training accuracy and model parameters\n", - " return train_accs, params\n", - "\n", - " new_project = sy.Project(\n", - " name=\"Training a 3-layer torch neural network on MNIST data\",\n", - " description=\"\"\"Hi, I would like to train my neural network on your MNIST data \n", - " (I can download it online too but I just want to use Syft coz it's cool)\"\"\",\n", - " members=[dasite],\n", - " )\n", - " new_project.create_code_request(obj=train, client=dasite)\n", - " project = new_project.send()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.9" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From de7a3d32d02197761184d47b0f815999497547d2 Mon Sep 17 00:00:00 2001 From: Andrew Trask Date: Sun, 4 Aug 2024 03:59:36 +0100 Subject: [PATCH 11/18] Delete notebooks/tutorials/federated-learning/02-data-owners-approve-experiments.ipynb --- .../02-data-owners-approve-experiments.ipynb | 109 ------------------ 1 file changed, 109 deletions(-) delete mode 100644 notebooks/tutorials/federated-learning/02-data-owners-approve-experiments.ipynb diff --git a/notebooks/tutorials/federated-learning/02-data-owners-approve-experiments.ipynb b/notebooks/tutorials/federated-learning/02-data-owners-approve-experiments.ipynb deleted file mode 100644 index 512bbda62f4..00000000000 --- a/notebooks/tutorials/federated-learning/02-data-owners-approve-experiments.ipynb +++ /dev/null @@ -1,109 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "0", - "metadata": {}, - "source": [ - "# Step 1: Login as Data Owner" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "# syft absolute\n", - "import syft as sy\n", - "\n", - "datasites = []\n", - "for i in range(3):\n", - " server = sy.orchestra.launch(\n", - " name=\"fl-datasite-\" + str(i)\n", - " ) # connects to same server\n", - " client = server.login(email=\"info@openmined.org\", password=\"changethis\")\n", - " datasites.append(client)" - ] - }, - { - "cell_type": "markdown", - "id": "2", - "metadata": {}, - "source": [ - "# Step 2: Review projects and code\n", - "\n", - "(optionally — the code can be run/tested. skipping for brevity)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "datasites[0].requests" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [ - "datasites[0].requests[0].changes[0].code" - ] - }, - { - "cell_type": "markdown", - "id": "5", - "metadata": {}, - "source": [ - "# Step 3: Approve projects and code" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6", - "metadata": {}, - "outputs": [], - "source": [ - "for dasite in datasites:\n", - " dasite.requests[-1].approve()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.9" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From 53e1eff4c092e11cfa56922b733da7a071535494 Mon Sep 17 00:00:00 2001 From: Andrew Trask Date: Sun, 4 Aug 2024 03:59:48 +0100 Subject: [PATCH 12/18] Delete notebooks/tutorials/federated-learning/03-run-federated-learning.ipynb --- .../03-run-federated-learning.ipynb | 181 ------------------ 1 file changed, 181 deletions(-) delete mode 100644 notebooks/tutorials/federated-learning/03-run-federated-learning.ipynb diff --git a/notebooks/tutorials/federated-learning/03-run-federated-learning.ipynb b/notebooks/tutorials/federated-learning/03-run-federated-learning.ipynb deleted file mode 100644 index 58fb26d9077..00000000000 --- a/notebooks/tutorials/federated-learning/03-run-federated-learning.ipynb +++ /dev/null @@ -1,181 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "0", - "metadata": {}, - "source": [ - "# Step 1: Login as External Researcher" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "# syft absolute\n", - "import syft as sy\n", - "\n", - "datasites = []\n", - "for i in range(3):\n", - " server = sy.orchestra.launch(\n", - " name=\"fl-datasite-\" + str(i)\n", - " ) # connects to same server\n", - " client = server.login(email=\"sheldon@caltech.edu\", password=\"changethis\")\n", - " datasites.append(client)" - ] - }, - { - "cell_type": "markdown", - "id": "2", - "metadata": {}, - "source": [ - "# Step 2: Train models on all datasites" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "results = []\n", - "\n", - "for dasite in datasites:\n", - " print()\n", - " print(dasite)\n", - " images_ptr = dasite.datasets[\"MNIST Dataset\"].assets[\"images\"]\n", - " labels_ptr = dasite.datasets[\"MNIST Dataset\"].assets[\"labels\"]\n", - "\n", - " results.append(\n", - " dasite.code.train(mnist_images=images_ptr, mnist_labels=labels_ptr).get_from(\n", - " dasite\n", - " )\n", - " )" - ] - }, - { - "cell_type": "markdown", - "id": "4", - "metadata": {}, - "source": [ - "# Step 3: Merge models" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [ - "# stdlib\n", - "from collections import OrderedDict\n", - "from itertools import groupby\n", - "\n", - "\n", - "def ave(d):\n", - " _data = sorted([i for b in d for i in b.items()], key=lambda x: x[0])\n", - " _d = [(a, [j for _, j in b]) for a, b in groupby(_data, key=lambda x: x[0])]\n", - " return OrderedDict(\n", - " {a: ave(b) if isinstance(b[0], dict) else sum(b) / float(len(b)) for a, b in _d}\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6", - "metadata": {}, - "outputs": [], - "source": [ - "models_weights = [(lambda x: x[1])(x) for x in results]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7", - "metadata": {}, - "outputs": [], - "source": [ - "new_model_weights = ave(models_weights)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8", - "metadata": {}, - "outputs": [], - "source": [ - "# third party\n", - "import torch\n", - "from torch import nn\n", - "\n", - "# Define the neural network class\n", - "\n", - "\n", - "class MLP(nn.Module):\n", - " def __init__(self):\n", - " super().__init__()\n", - " self.fc1 = nn.Linear(784, 10)\n", - "\n", - " def forward(self, x):\n", - " x = torch.log_softmax(self.fc1(x.view(-1, 784)), dim=1)\n", - " return x" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9", - "metadata": {}, - "outputs": [], - "source": [ - "new_model = MLP()\n", - "new_model.load_state_dict(new_model_weights)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "10", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "11", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.9" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From 5bf1da8a2f088ff26757faf7b2c35ca3741dedce Mon Sep 17 00:00:00 2001 From: Andrew Trask Date: Sun, 4 Aug 2024 04:00:00 +0100 Subject: [PATCH 13/18] Delete notebooks/tutorials/federated-learning/mnist_dataset.py --- .../federated-learning/mnist_dataset.py | 88 ------------------- 1 file changed, 88 deletions(-) delete mode 100644 notebooks/tutorials/federated-learning/mnist_dataset.py diff --git a/notebooks/tutorials/federated-learning/mnist_dataset.py b/notebooks/tutorials/federated-learning/mnist_dataset.py deleted file mode 100644 index 77b7c2c7afe..00000000000 --- a/notebooks/tutorials/federated-learning/mnist_dataset.py +++ /dev/null @@ -1,88 +0,0 @@ -""" -Code for the MNIST dataset -Source: https://github.com/google/jax/blob/main/examples/datasets.py -""" - -# stdlib -import array -import gzip -import os -from os import path -import struct -import urllib.request - -# third party -import numpy as np -from numpy import ndarray - -_DATA = "/tmp/mnist_data/" - - -def _download(url: str, filename: str) -> None: - """Download a url to a file in the JAX data temp directory.""" - if not path.exists(_DATA): - os.makedirs(_DATA) - out_file = path.join(_DATA, filename) - if not path.isfile(out_file): - urllib.request.urlretrieve(url, out_file) - print(f"downloaded {url} to {_DATA}") - - -def _partial_flatten(x) -> ndarray: - """Flatten all but the first dimension of an ndarray.""" - return np.reshape(x, (x.shape[0], -1)) - - -def _one_hot(x: ndarray, k: int, dtype: type = np.float32) -> ndarray: - """Create a one-hot encoding of x of size k.""" - return np.array(x[:, None] == np.arange(k), dtype) - - -def mnist_raw() -> tuple[ndarray, ndarray, ndarray, ndarray]: - """Download and parse the raw MNIST dataset.""" - # CVDF mirror of http://yann.lecun.com/exdb/mnist/ - base_url = "https://storage.googleapis.com/cvdf-datasets/mnist/" - - def parse_labels(filename): - with gzip.open(filename, "rb") as fh: - _ = struct.unpack(">II", fh.read(8)) - return np.array(array.array("B", fh.read()), dtype=np.uint8) - - def parse_images(filename): - with gzip.open(filename, "rb") as fh: - _, num_data, rows, cols = struct.unpack(">IIII", fh.read(16)) - return np.array(array.array("B", fh.read()), dtype=np.uint8).reshape( - num_data, rows, cols - ) - - for filename in [ - "train-images-idx3-ubyte.gz", - "train-labels-idx1-ubyte.gz", - "t10k-images-idx3-ubyte.gz", - "t10k-labels-idx1-ubyte.gz", - ]: - _download(base_url + filename, filename) - - train_images = parse_images(path.join(_DATA, "train-images-idx3-ubyte.gz")) - train_labels = parse_labels(path.join(_DATA, "train-labels-idx1-ubyte.gz")) - test_images = parse_images(path.join(_DATA, "t10k-images-idx3-ubyte.gz")) - test_labels = parse_labels(path.join(_DATA, "t10k-labels-idx1-ubyte.gz")) - - return train_images, train_labels, test_images, test_labels - - -def mnist(permute_train: bool = False) -> tuple[ndarray, ndarray, ndarray, ndarray]: - """Download, parse and process MNIST data to unit scale and one-hot labels.""" - train_images, train_labels, test_images, test_labels = mnist_raw() - - train_images = _partial_flatten(train_images) / np.float32(255.0) - test_images = _partial_flatten(test_images) / np.float32(255.0) - train_labels = _one_hot(train_labels, 10) - test_labels = _one_hot(test_labels, 10) - - if permute_train: - perm = np.random.RandomState(0).permutation(train_images.shape[0]) - train_images = train_images[perm] - train_labels = train_labels[perm] - - return train_images, train_labels, test_images, test_labels From f70a991951ed557d96b09750a0f3bd9445ce5909 Mon Sep 17 00:00:00 2001 From: Andrew Trask Date: Sat, 3 Aug 2024 23:29:35 -0400 Subject: [PATCH 14/18] Remove some tests that shouldn't be working --- .../00-setup-fl-datasites.ipynb | 145 ----------- .../01-submit-fl-experiment.ipynb | 233 ------------------ .../02-data-owners-approve-experiments.ipynb | 109 -------- .../03-run-federated-learning.ipynb | 181 -------------- .../federated-learning/mnist_dataset.py | 88 ------- .../00-data-owner-upload-data.ipynb | 2 +- .../01-data-scientist-submit-code.ipynb | 2 +- .../02-data-owner-review-approve-code.ipynb | 2 +- .../03-data-scientist-download-results.ipynb | 2 +- tests/integration/network/gateway_test.py | 46 ++-- 10 files changed, 27 insertions(+), 783 deletions(-) delete mode 100644 notebooks/tutorials/federated-learning/00-setup-fl-datasites.ipynb delete mode 100644 notebooks/tutorials/federated-learning/01-submit-fl-experiment.ipynb delete mode 100644 notebooks/tutorials/federated-learning/02-data-owners-approve-experiments.ipynb delete mode 100644 notebooks/tutorials/federated-learning/03-run-federated-learning.ipynb delete mode 100644 notebooks/tutorials/federated-learning/mnist_dataset.py diff --git a/notebooks/tutorials/federated-learning/00-setup-fl-datasites.ipynb b/notebooks/tutorials/federated-learning/00-setup-fl-datasites.ipynb deleted file mode 100644 index ad9e5486c1d..00000000000 --- a/notebooks/tutorials/federated-learning/00-setup-fl-datasites.ipynb +++ /dev/null @@ -1,145 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "0", - "metadata": {}, - "source": [ - "# Step 1: Launch some datasite servers" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "# third party\n", - "from mnist_dataset import mnist\n", - "\n", - "# syft absolute\n", - "import syft as sy" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [ - "datasites = []\n", - "for i in range(3):\n", - " server = sy.orchestra.launch(name=\"fl-datasite-\" + str(i), reset=True)\n", - " client = server.login(email=\"info@openmined.org\", password=\"changethis\")\n", - " datasites.append(client)" - ] - }, - { - "cell_type": "markdown", - "id": "3", - "metadata": {}, - "source": [ - "# Step 2: Split MNIST across datasites" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [ - "train_images, train_labels, _, _ = mnist()\n", - "images = [train_images[0:20000], train_images[20000:40000], train_images[40000:60000]]\n", - "labels = [train_labels[0:20000], train_labels[20000:40000], train_labels[40000:60000]]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [ - "for i, datasite in enumerate(datasites):\n", - " dataset = sy.Dataset(name=\"MNIST Dataset\")\n", - " dataset.add_asset(sy.Asset(name=\"images\", data=images[i], mock=0 * images[i]))\n", - " dataset.add_asset(sy.Asset(name=\"labels\", data=labels[i], mock=0 * labels[i]))\n", - " datasite.upload_dataset(dataset)" - ] - }, - { - "cell_type": "markdown", - "id": "6", - "metadata": {}, - "source": [ - "# Create data scientist user accounts" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7", - "metadata": {}, - "outputs": [], - "source": [ - "for datasite in datasites:\n", - " register_result = datasite.register(\n", - " name=\"Sheldon Cooper\",\n", - " email=\"sheldon@caltech.edu\",\n", - " password=\"changethis\",\n", - " password_verify=\"changethis\",\n", - " institution=\"Caltech\",\n", - " website=\"https://www.caltech.edu/\",\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "10", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.9" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/tutorials/federated-learning/01-submit-fl-experiment.ipynb b/notebooks/tutorials/federated-learning/01-submit-fl-experiment.ipynb deleted file mode 100644 index 7d477a3c10e..00000000000 --- a/notebooks/tutorials/federated-learning/01-submit-fl-experiment.ipynb +++ /dev/null @@ -1,233 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "0", - "metadata": {}, - "source": [ - "# Step 1: Login as External Researcher" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "# syft absolute\n", - "import syft as sy\n", - "\n", - "datasites = []\n", - "for i in range(3):\n", - " server = sy.orchestra.launch(\n", - " name=\"fl-datasite-\" + str(i)\n", - " ) # connects to same server\n", - " client = server.login(email=\"sheldon@caltech.edu\", password=\"changethis\")\n", - " datasites.append(client)" - ] - }, - { - "cell_type": "markdown", - "id": "2", - "metadata": {}, - "source": [ - "# Step 2: Get mock data and test a neural network" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "mock_images = datasites[0].datasets[\"MNIST Dataset\"].assets[\"images\"].mock[0:100]\n", - "mock_labels = datasites[0].datasets[\"MNIST Dataset\"].assets[\"labels\"].mock[0:100]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [ - "# third party\n", - "import torch\n", - "import torch.nn as nn\n", - "import torch.optim as optim\n", - "from torch.utils.data import TensorDataset\n", - "\n", - "# Define the data loader\n", - "train_loader = torch.utils.data.DataLoader(\n", - " TensorDataset(\n", - " torch.tensor(mock_images, dtype=torch.float32),\n", - " torch.tensor(mock_labels, dtype=torch.float32),\n", - " ),\n", - " batch_size=4,\n", - " shuffle=True,\n", - ")\n", - "\n", - "# Define the neural network class\n", - "\n", - "\n", - "class MLP(nn.Module):\n", - " def __init__(self):\n", - " super().__init__()\n", - " self.fc1 = nn.Linear(784, 10)\n", - "\n", - " def forward(self, x):\n", - " x = torch.log_softmax(self.fc1(x.view(-1, 784)), dim=1)\n", - " return x\n", - "\n", - "\n", - "# Define the model, optimizer, and loss function\n", - "model = MLP()\n", - "optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)\n", - "criterion = nn.CrossEntropyLoss()\n", - "\n", - "# Train the model\n", - "for epoch in range(10):\n", - " running_loss = 0.0\n", - " for _, data in enumerate(train_loader, 0):\n", - " inputs, labels = data\n", - " optimizer.zero_grad()\n", - " outputs = model(inputs)\n", - " loss = criterion(outputs, labels)\n", - " loss.backward()\n", - " optimizer.step()\n", - " running_loss += loss.item()\n", - " print(\n", - " f\"Epoch {epoch + 1}, Loss: {(running_loss / len(train_loader)):.4f}\",\n", - " )" - ] - }, - { - "cell_type": "markdown", - "id": "5", - "metadata": {}, - "source": [ - "# Step 3: Submit experiment to each datasites" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6", - "metadata": {}, - "outputs": [], - "source": [ - "for dasite in datasites:\n", - " mock_images_ptr = dasite.datasets[\"MNIST Dataset\"].assets[\"images\"]\n", - " mock_labels_ptr = dasite.datasets[\"MNIST Dataset\"].assets[\"labels\"]\n", - "\n", - " @sy.syft_function(\n", - " input_policy=sy.ExactMatch(\n", - " mnist_images=mock_images_ptr, mnist_labels=mock_labels_ptr\n", - " ),\n", - " output_policy=sy.SingleExecutionExactOutput(),\n", - " )\n", - " def train(mnist_images, mnist_labels):\n", - " # third party\n", - " import torch\n", - " import torch.nn as nn\n", - " import torch.optim as optim\n", - " from torch.utils.data import TensorDataset\n", - "\n", - " # Define the data loader\n", - " train_loader = torch.utils.data.DataLoader(\n", - " TensorDataset(\n", - " torch.tensor(mnist_images, dtype=torch.float32),\n", - " torch.tensor(mnist_labels, dtype=torch.float32),\n", - " ),\n", - " batch_size=4,\n", - " shuffle=True,\n", - " )\n", - "\n", - " # Define the neural network class\n", - " class MLP(nn.Module):\n", - " def __init__(self):\n", - " super().__init__()\n", - " self.fc1 = nn.Linear(784, 10)\n", - "\n", - " def forward(self, x):\n", - " x = torch.log_softmax(self.fc1(x.view(-1, 784)), dim=1)\n", - " return x\n", - "\n", - " # Define the model, optimizer, and loss function\n", - " model = MLP()\n", - " optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)\n", - " criterion = nn.CrossEntropyLoss()\n", - "\n", - " # Train the model\n", - " train_accs = []\n", - " for epoch in range(20):\n", - " running_loss = 0.0\n", - " for _, data in enumerate(train_loader, 0):\n", - " inputs, labels = data\n", - " optimizer.zero_grad()\n", - " outputs = model(inputs)\n", - " loss = criterion(outputs, labels)\n", - " loss.backward()\n", - " optimizer.step()\n", - " running_loss += loss.item()\n", - " print(f\"Epoch {epoch + 1}, Loss: {(running_loss / len(train_loader)):.4f}\")\n", - " # Calculate accuracy on the training set\n", - " train_accs.append(running_loss / len(train_loader))\n", - "\n", - " # Get model parameters\n", - " params = model.state_dict()\n", - "\n", - " # Return training accuracy and model parameters\n", - " return train_accs, params\n", - "\n", - " new_project = sy.Project(\n", - " name=\"Training a 3-layer torch neural network on MNIST data\",\n", - " description=\"\"\"Hi, I would like to train my neural network on your MNIST data \n", - " (I can download it online too but I just want to use Syft coz it's cool)\"\"\",\n", - " members=[dasite],\n", - " )\n", - " new_project.create_code_request(obj=train, client=dasite)\n", - " project = new_project.send()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.9" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/tutorials/federated-learning/02-data-owners-approve-experiments.ipynb b/notebooks/tutorials/federated-learning/02-data-owners-approve-experiments.ipynb deleted file mode 100644 index 512bbda62f4..00000000000 --- a/notebooks/tutorials/federated-learning/02-data-owners-approve-experiments.ipynb +++ /dev/null @@ -1,109 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "0", - "metadata": {}, - "source": [ - "# Step 1: Login as Data Owner" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "# syft absolute\n", - "import syft as sy\n", - "\n", - "datasites = []\n", - "for i in range(3):\n", - " server = sy.orchestra.launch(\n", - " name=\"fl-datasite-\" + str(i)\n", - " ) # connects to same server\n", - " client = server.login(email=\"info@openmined.org\", password=\"changethis\")\n", - " datasites.append(client)" - ] - }, - { - "cell_type": "markdown", - "id": "2", - "metadata": {}, - "source": [ - "# Step 2: Review projects and code\n", - "\n", - "(optionally — the code can be run/tested. skipping for brevity)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "datasites[0].requests" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [ - "datasites[0].requests[0].changes[0].code" - ] - }, - { - "cell_type": "markdown", - "id": "5", - "metadata": {}, - "source": [ - "# Step 3: Approve projects and code" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6", - "metadata": {}, - "outputs": [], - "source": [ - "for dasite in datasites:\n", - " dasite.requests[-1].approve()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.9" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/tutorials/federated-learning/03-run-federated-learning.ipynb b/notebooks/tutorials/federated-learning/03-run-federated-learning.ipynb deleted file mode 100644 index 58fb26d9077..00000000000 --- a/notebooks/tutorials/federated-learning/03-run-federated-learning.ipynb +++ /dev/null @@ -1,181 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "0", - "metadata": {}, - "source": [ - "# Step 1: Login as External Researcher" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "# syft absolute\n", - "import syft as sy\n", - "\n", - "datasites = []\n", - "for i in range(3):\n", - " server = sy.orchestra.launch(\n", - " name=\"fl-datasite-\" + str(i)\n", - " ) # connects to same server\n", - " client = server.login(email=\"sheldon@caltech.edu\", password=\"changethis\")\n", - " datasites.append(client)" - ] - }, - { - "cell_type": "markdown", - "id": "2", - "metadata": {}, - "source": [ - "# Step 2: Train models on all datasites" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "results = []\n", - "\n", - "for dasite in datasites:\n", - " print()\n", - " print(dasite)\n", - " images_ptr = dasite.datasets[\"MNIST Dataset\"].assets[\"images\"]\n", - " labels_ptr = dasite.datasets[\"MNIST Dataset\"].assets[\"labels\"]\n", - "\n", - " results.append(\n", - " dasite.code.train(mnist_images=images_ptr, mnist_labels=labels_ptr).get_from(\n", - " dasite\n", - " )\n", - " )" - ] - }, - { - "cell_type": "markdown", - "id": "4", - "metadata": {}, - "source": [ - "# Step 3: Merge models" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [ - "# stdlib\n", - "from collections import OrderedDict\n", - "from itertools import groupby\n", - "\n", - "\n", - "def ave(d):\n", - " _data = sorted([i for b in d for i in b.items()], key=lambda x: x[0])\n", - " _d = [(a, [j for _, j in b]) for a, b in groupby(_data, key=lambda x: x[0])]\n", - " return OrderedDict(\n", - " {a: ave(b) if isinstance(b[0], dict) else sum(b) / float(len(b)) for a, b in _d}\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6", - "metadata": {}, - "outputs": [], - "source": [ - "models_weights = [(lambda x: x[1])(x) for x in results]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7", - "metadata": {}, - "outputs": [], - "source": [ - "new_model_weights = ave(models_weights)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8", - "metadata": {}, - "outputs": [], - "source": [ - "# third party\n", - "import torch\n", - "from torch import nn\n", - "\n", - "# Define the neural network class\n", - "\n", - "\n", - "class MLP(nn.Module):\n", - " def __init__(self):\n", - " super().__init__()\n", - " self.fc1 = nn.Linear(784, 10)\n", - "\n", - " def forward(self, x):\n", - " x = torch.log_softmax(self.fc1(x.view(-1, 784)), dim=1)\n", - " return x" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9", - "metadata": {}, - "outputs": [], - "source": [ - "new_model = MLP()\n", - "new_model.load_state_dict(new_model_weights)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "10", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "11", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.9" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/tutorials/federated-learning/mnist_dataset.py b/notebooks/tutorials/federated-learning/mnist_dataset.py deleted file mode 100644 index 77b7c2c7afe..00000000000 --- a/notebooks/tutorials/federated-learning/mnist_dataset.py +++ /dev/null @@ -1,88 +0,0 @@ -""" -Code for the MNIST dataset -Source: https://github.com/google/jax/blob/main/examples/datasets.py -""" - -# stdlib -import array -import gzip -import os -from os import path -import struct -import urllib.request - -# third party -import numpy as np -from numpy import ndarray - -_DATA = "/tmp/mnist_data/" - - -def _download(url: str, filename: str) -> None: - """Download a url to a file in the JAX data temp directory.""" - if not path.exists(_DATA): - os.makedirs(_DATA) - out_file = path.join(_DATA, filename) - if not path.isfile(out_file): - urllib.request.urlretrieve(url, out_file) - print(f"downloaded {url} to {_DATA}") - - -def _partial_flatten(x) -> ndarray: - """Flatten all but the first dimension of an ndarray.""" - return np.reshape(x, (x.shape[0], -1)) - - -def _one_hot(x: ndarray, k: int, dtype: type = np.float32) -> ndarray: - """Create a one-hot encoding of x of size k.""" - return np.array(x[:, None] == np.arange(k), dtype) - - -def mnist_raw() -> tuple[ndarray, ndarray, ndarray, ndarray]: - """Download and parse the raw MNIST dataset.""" - # CVDF mirror of http://yann.lecun.com/exdb/mnist/ - base_url = "https://storage.googleapis.com/cvdf-datasets/mnist/" - - def parse_labels(filename): - with gzip.open(filename, "rb") as fh: - _ = struct.unpack(">II", fh.read(8)) - return np.array(array.array("B", fh.read()), dtype=np.uint8) - - def parse_images(filename): - with gzip.open(filename, "rb") as fh: - _, num_data, rows, cols = struct.unpack(">IIII", fh.read(16)) - return np.array(array.array("B", fh.read()), dtype=np.uint8).reshape( - num_data, rows, cols - ) - - for filename in [ - "train-images-idx3-ubyte.gz", - "train-labels-idx1-ubyte.gz", - "t10k-images-idx3-ubyte.gz", - "t10k-labels-idx1-ubyte.gz", - ]: - _download(base_url + filename, filename) - - train_images = parse_images(path.join(_DATA, "train-images-idx3-ubyte.gz")) - train_labels = parse_labels(path.join(_DATA, "train-labels-idx1-ubyte.gz")) - test_images = parse_images(path.join(_DATA, "t10k-images-idx3-ubyte.gz")) - test_labels = parse_labels(path.join(_DATA, "t10k-labels-idx1-ubyte.gz")) - - return train_images, train_labels, test_images, test_labels - - -def mnist(permute_train: bool = False) -> tuple[ndarray, ndarray, ndarray, ndarray]: - """Download, parse and process MNIST data to unit scale and one-hot labels.""" - train_images, train_labels, test_images, test_labels = mnist_raw() - - train_images = _partial_flatten(train_images) / np.float32(255.0) - test_images = _partial_flatten(test_images) / np.float32(255.0) - train_labels = _one_hot(train_labels, 10) - test_labels = _one_hot(test_labels, 10) - - if permute_train: - perm = np.random.RandomState(0).permutation(train_images.shape[0]) - train_images = train_images[perm] - train_labels = train_labels[perm] - - return train_images, train_labels, test_images, test_labels diff --git a/notebooks/tutorials/model-training/00-data-owner-upload-data.ipynb b/notebooks/tutorials/model-training/00-data-owner-upload-data.ipynb index adca3805b12..2d5078fa614 100644 --- a/notebooks/tutorials/model-training/00-data-owner-upload-data.ipynb +++ b/notebooks/tutorials/model-training/00-data-owner-upload-data.ipynb @@ -386,7 +386,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.2" + "version": "3.10.9" }, "toc": { "base_numbering": 1, diff --git a/notebooks/tutorials/model-training/01-data-scientist-submit-code.ipynb b/notebooks/tutorials/model-training/01-data-scientist-submit-code.ipynb index 13e52c83015..c66092b49b4 100644 --- a/notebooks/tutorials/model-training/01-data-scientist-submit-code.ipynb +++ b/notebooks/tutorials/model-training/01-data-scientist-submit-code.ipynb @@ -548,7 +548,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.2" + "version": "3.10.9" }, "toc": { "base_numbering": 1, diff --git a/notebooks/tutorials/model-training/02-data-owner-review-approve-code.ipynb b/notebooks/tutorials/model-training/02-data-owner-review-approve-code.ipynb index 5606ec79111..f4a75cb669b 100644 --- a/notebooks/tutorials/model-training/02-data-owner-review-approve-code.ipynb +++ b/notebooks/tutorials/model-training/02-data-owner-review-approve-code.ipynb @@ -296,7 +296,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.13" + "version": "3.10.9" }, "toc": { "base_numbering": 1, diff --git a/notebooks/tutorials/model-training/03-data-scientist-download-results.ipynb b/notebooks/tutorials/model-training/03-data-scientist-download-results.ipynb index 250a9f23dcc..2277e6ad2f2 100644 --- a/notebooks/tutorials/model-training/03-data-scientist-download-results.ipynb +++ b/notebooks/tutorials/model-training/03-data-scientist-download-results.ipynb @@ -282,7 +282,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.2" + "version": "3.10.9" }, "toc": { "base_numbering": 1, diff --git a/tests/integration/network/gateway_test.py b/tests/integration/network/gateway_test.py index fd4b9751c42..b9b9eee866b 100644 --- a/tests/integration/network/gateway_test.py +++ b/tests/integration/network/gateway_test.py @@ -16,8 +16,6 @@ from syft.client.datasite_client import DatasiteClient from syft.client.gateway_client import GatewayClient from syft.client.registry import NetworkRegistry -from syft.client.search import SearchResults -from syft.service.dataset.dataset import Dataset from syft.service.network.association_request import AssociationRequestChange from syft.service.network.network_service import ServerPeerAssociationStatus from syft.service.network.routes import HTTPServerRoute @@ -133,9 +131,11 @@ def test_datasite_connect_to_gateway( assert len(gateway_client.peers) == 1 time.sleep(PeerHealthCheckTask.repeat_time * 2 + 1) + + # this is the wrong test — sy.datasites checks the gateway registry # check that the datasite is online on the network - assert len(sy.datasites.all_datasites) == 1 - assert len(sy.datasites.online_datasites) == 1 + # assert len(sy.datasites.all_datasites) == 1 + # assert len(sy.datasites.online_datasites) == 1 proxy_datasite_client = gateway_client.peers[0] datasite_peer = datasite_client.peers[0] @@ -215,25 +215,25 @@ def test_dataset_search(set_env_var, gateway_port: int, datasite_1_port: int) -> # we need to wait to make sure peers health check is done time.sleep(PeerHealthCheckTask.repeat_time * 2 + 1) # test if the dataset can be searched by the syft network - right_search = sy.search(dataset_name) - assert isinstance(right_search, SearchResults) - assert len(right_search) == 1 - dataset = right_search[0] - assert isinstance(dataset, Dataset) - assert len(dataset.assets) == 1 - assert isinstance(dataset.assets[0].mock, np.ndarray) - assert dataset.assets[0].data is None - - # search a wrong dataset should return an empty list - wrong_search = sy.search(_random_hash()) - assert len(wrong_search) == 0 + # right_search = sy.search(dataset_name) + # assert isinstance(right_search, SearchResults) + # assert len(right_search) == 1 + # dataset = right_search[0] + # assert isinstance(dataset, Dataset) + # assert len(dataset.assets) == 1 + # assert isinstance(dataset.assets[0].mock, np.ndarray) + # assert dataset.assets[0].data is None - # the datasite client delete the dataset - datasite_client.api.services.dataset.delete(uid=dataset.id) + # # search a wrong dataset should return an empty list + # wrong_search = sy.search(_random_hash()) + # assert len(wrong_search) == 0 - # Remove existing peers - assert isinstance(_remove_existing_peers(datasite_client), SyftSuccess) - assert isinstance(_remove_existing_peers(gateway_client), SyftSuccess) + # # the datasite client delete the dataset + # datasite_client.api.services.dataset.delete(uid=dataset.id) + + # # Remove existing peers + # assert isinstance(_remove_existing_peers(datasite_client), SyftSuccess) + # assert isinstance(_remove_existing_peers(gateway_client), SyftSuccess) @pytest.mark.skip(reason="Possible bug") @@ -352,8 +352,8 @@ def test_deleting_peers(set_env_var, datasite_1_port: int, gateway_port: int) -> # check that the online datasites and gateways are updated time.sleep(PeerHealthCheckTask.repeat_time * 2 + 1) assert len(sy.gateways.all_networks) == 1 - assert len(sy.datasites.all_datasites) == 0 - assert len(sy.datasites.online_datasites) == 0 + # assert len(sy.datasites.all_datasites) == 0 + # assert len(sy.datasites.online_datasites) == 0 # reconnect the datasite to the gateway result = datasite_client.connect_to_gateway(gateway_client) From 1e988972826ffd99a931a1914811c01e6881e8ce Mon Sep 17 00:00:00 2001 From: Andrew Trask Date: Sun, 4 Aug 2024 00:11:19 -0400 Subject: [PATCH 15/18] Add some logging --- packages/syft/src/syft/client/registry.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/syft/src/syft/client/registry.py b/packages/syft/src/syft/client/registry.py index 100bc0af96d..39414f1bfdb 100644 --- a/packages/syft/src/syft/client/registry.py +++ b/packages/syft/src/syft/client/registry.py @@ -210,8 +210,7 @@ def check_datasite(datasite: dict) -> dict[Any, Any] | None: online = res.json()["status"] == "ok" elif "detail" in res.json(): online = True - except Exception as e: - print(e) + except Exception: online = False if online: version = datasite.get("version", None) @@ -246,7 +245,9 @@ def _repr_html_(self) -> str: if len(on) == 0: return "(no gateways online - try syft.gateways.all_networks to see offline gateways)" df = pd.DataFrame(on) - + print( + "Add your datasite to this list: https://github.com/OpenMined/NetworkRegistry/" + ) return df._repr_html_() # type: ignore @staticmethod From 624637caea1713b0dab8bc0234b8e276fa4faa0c Mon Sep 17 00:00:00 2001 From: Andrew Trask Date: Sun, 4 Aug 2024 01:07:45 -0400 Subject: [PATCH 16/18] Improve logging and remove test section which is no longer valid --- packages/syft/src/syft/client/registry.py | 52 ++++++++++++++++++- .../src/syft/protocol/protocol_version.json | 11 ++++ tests/integration/local/gateway_local_test.py | 24 +++++---- 3 files changed, 74 insertions(+), 13 deletions(-) diff --git a/packages/syft/src/syft/client/registry.py b/packages/syft/src/syft/client/registry.py index 39414f1bfdb..ed16c078df9 100644 --- a/packages/syft/src/syft/client/registry.py +++ b/packages/syft/src/syft/client/registry.py @@ -18,6 +18,7 @@ from ..service.network.server_peer import ServerPeerConnectionStatus from ..service.response import SyftException from ..types.server_url import ServerURL +from ..types.syft_object import SyftObject from ..util.constants import DEFAULT_TIMEOUT from .client import SyftClient as Client @@ -186,6 +187,51 @@ def __getitem__(self, key: str | int) -> Client: raise KeyError(f"Invalid key: {key} for {on}") +class Datasite(SyftObject): + __canonical_name__ = "ServerMetadata" + # __version__ = SYFT_OBJECT_VERSION_1 + + name: str + host_or_ip: str + protocol: str + admin_email: str + website: str + slack: str + slack_channel: str + + __attr_searchable__ = [ + "name", + "host_or_ip", + "protocol", + "port", + "admin_email", + "website", + "slack", + "slack_channel", + ] + __attr_unique__ = [ + "name", + "host_or_ip", + "protocol", + "port", + "admin_email", + "website", + "slack", + "slack_channel", + ] + __repr_attrs__ = [ + "name", + "host_or_ip", + "protocol", + "port", + "admin_email", + "website", + "slack", + "slack_channel", + ] + __table_sort_attr__ = "name" + + class DatasiteRegistry: def __init__(self) -> None: self.all_datasites: list[dict] = [] @@ -244,11 +290,13 @@ def _repr_html_(self) -> str: on = self.online_datasites if len(on) == 0: return "(no gateways online - try syft.gateways.all_networks to see offline gateways)" - df = pd.DataFrame(on) + + # df = pd.DataFrame(on) print( "Add your datasite to this list: https://github.com/OpenMined/NetworkRegistry/" ) - return df._repr_html_() # type: ignore + # return df._repr_html_() # type: ignore + return ([Datasite(**ds) for ds in on])._repr_html_() @staticmethod def create_client(datasite: dict[str, Any]) -> Client: diff --git a/packages/syft/src/syft/protocol/protocol_version.json b/packages/syft/src/syft/protocol/protocol_version.json index 49f2dcd4d8e..223ad28d2e9 100644 --- a/packages/syft/src/syft/protocol/protocol_version.json +++ b/packages/syft/src/syft/protocol/protocol_version.json @@ -7,5 +7,16 @@ }, "3": { "release_name": "0.9.0.json" + }, + "dev": { + "object_versions": { + "EnclaveMetadata": { + "1": { + "version": 1, + "hash": "8d2dfafa01ec909c080a790cf15a8fc78e00382d3bfe6207098ceb25a60b9c53", + "action": "add" + } + } + } } } diff --git a/tests/integration/local/gateway_local_test.py b/tests/integration/local/gateway_local_test.py index e10e9cb1540..909bb3dd598 100644 --- a/tests/integration/local/gateway_local_test.py +++ b/tests/integration/local/gateway_local_test.py @@ -15,7 +15,6 @@ from syft.client.gateway_client import GatewayClient from syft.service.network.network_service import ServerPeerAssociationStatus from syft.service.network.server_peer import ServerPeer -from syft.service.network.server_peer import ServerPeerConnectionStatus from syft.service.network.utils import PeerHealthCheckTask from syft.service.request.request import Request from syft.service.response import SyftSuccess @@ -164,16 +163,19 @@ def test_create_gateway( assert isinstance(result, SyftSuccess) time.sleep(PeerHealthCheckTask.repeat_time * 2 + 1) - assert len(sy.datasites.all_datasites) == 2 - assert len(sy.datasites.online_datasites) == 2 - # check for peer connection status - for peer in gateway_client.api.services.network.get_all_peers(): - assert peer.ping_status == ServerPeerConnectionStatus.ACTIVE - - # check the guest client - client = gateway_webserver.client - assert isinstance(client, GatewayClient) - assert client.metadata.server_type == ServerType.GATEWAY.value + + # TRASK: i've changed the functionality here so that + # sy.datasites always goes out to the network + # assert len(sy.datasites.all_datasites) == 2 + # assert len(sy.datasites.online_datasites) == 2 + # # check for peer connection status + # for peer in gateway_client.api.services.network.get_all_peers(): + # assert peer.ping_status == ServerPeerConnectionStatus.ACTIVE + + # # check the guest client + # client = gateway_webserver.client + # assert isinstance(client, GatewayClient) + # assert client.metadata.server_type == ServerType.GATEWAY.value @pytest.mark.local_server From b0ff13721eb9f5dc118e6a28d218f8c7308dcfd9 Mon Sep 17 00:00:00 2001 From: Andrew Trask Date: Sun, 4 Aug 2024 01:11:44 -0400 Subject: [PATCH 17/18] Remove protocol_Version change --- packages/syft/src/syft/protocol/protocol_version.json | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/packages/syft/src/syft/protocol/protocol_version.json b/packages/syft/src/syft/protocol/protocol_version.json index 223ad28d2e9..49f2dcd4d8e 100644 --- a/packages/syft/src/syft/protocol/protocol_version.json +++ b/packages/syft/src/syft/protocol/protocol_version.json @@ -7,16 +7,5 @@ }, "3": { "release_name": "0.9.0.json" - }, - "dev": { - "object_versions": { - "EnclaveMetadata": { - "1": { - "version": 1, - "hash": "8d2dfafa01ec909c080a790cf15a8fc78e00382d3bfe6207098ceb25a60b9c53", - "action": "add" - } - } - } } } From 7946ca625123f8b362f474baff2ac77de5e646ab Mon Sep 17 00:00:00 2001 From: Andrew Trask Date: Sun, 4 Aug 2024 01:31:52 -0400 Subject: [PATCH 18/18] Add version to printout --- packages/syft/src/syft/client/registry.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/packages/syft/src/syft/client/registry.py b/packages/syft/src/syft/client/registry.py index ed16c078df9..1d06166d1b3 100644 --- a/packages/syft/src/syft/client/registry.py +++ b/packages/syft/src/syft/client/registry.py @@ -193,6 +193,7 @@ class Datasite(SyftObject): name: str host_or_ip: str + version: str protocol: str admin_email: str website: str @@ -202,32 +203,35 @@ class Datasite(SyftObject): __attr_searchable__ = [ "name", "host_or_ip", - "protocol", + "version", "port", "admin_email", "website", "slack", "slack_channel", + "protocol", ] __attr_unique__ = [ "name", "host_or_ip", - "protocol", + "version", "port", "admin_email", "website", "slack", "slack_channel", + "protocol", ] __repr_attrs__ = [ "name", "host_or_ip", - "protocol", + "version", "port", "admin_email", "website", "slack", "slack_channel", + "protocol", ] __table_sort_attr__ = "name"