From d4258294210d8a76db682cd93165f722f57be593 Mon Sep 17 00:00:00 2001 From: Shailesh Tanwar <135304487+tanwarsh@users.noreply.github.com> Date: Fri, 13 Dec 2024 14:07:05 +0530 Subject: [PATCH] Updated Keras and TensorFlow Task Runner and related workspaces. (#1174) * keras and tf updated Signed-off-by: yes * skipped rebuild model for round 0 Signed-off-by: yes * formatting issues fixed Signed-off-by: yes * added keras in workflow Signed-off-by: yes * pipeline tensorflowversion check Signed-off-by: yes * revert changes Signed-off-by: yes * revert changes Signed-off-by: yes * removed extra line Signed-off-by: yes * workspace changes Signed-off-by: yes * removed python 3.8 for taskrunner workflow Signed-off-by: yes * updated python version to 3.9 Signed-off-by: yes * updated python version to 3.9 Signed-off-by: yes * saved model name changes in tests Signed-off-by: yes * code change to save model Signed-off-by: yes * keras_nlp workspace changes Signed-off-by: yes * keras and tf updated Signed-off-by: yes * skipped rebuild model for round 0 Signed-off-by: yes * formatting issues fixed Signed-off-by: yes * added keras in workflow Signed-off-by: yes * pipeline tensorflowversion check Signed-off-by: yes * revert changes Signed-off-by: yes * revert changes Signed-off-by: yes * removed extra line Signed-off-by: yes * workspace changes Signed-off-by: yes * removed python 3.8 for taskrunner workflow Signed-off-by: yes * updated python version to 3.9 Signed-off-by: yes * updated python version to 3.9 Signed-off-by: yes * saved model name changes in tests Signed-off-by: yes * code change to save model Signed-off-by: yes * keras_nlp workspace changes Signed-off-by: yes * removed duplicate code Signed-off-by: yes * removed oython 3.8 from taskrunnner e2e workflow Signed-off-by: yes * code changes as per comments Signed-off-by: yes * fix for formatting issue Signed-off-by: yes * keras cnn with compression code changes Signed-off-by: yes * remove version changes Signed-off-by: yes * revert version changes Signed-off-by: yes * keep keras runner for tensorflow workspaces Signed-off-by: yes * formatting issue Signed-off-by: yes * removed keras as ke Signed-off-by: yes * comment changes Signed-off-by: yes * code changes Signed-off-by: yes * code changes Signed-off-by: yes * code changes Signed-off-by: yes * code changes Signed-off-by: yes --------- Signed-off-by: yes --- docs/developer_ref/troubleshooting.rst | 2 +- .../.workspace | 0 .../{tf_2dunet => keras_2dunet}/README.md | 0 .../plan/cols.yaml | 0 .../plan/data.yaml | 4 +- .../plan/defaults | 0 .../plan/plan.yaml | 37 +- .../requirements.txt | 3 +- .../src/__init__.py | 0 .../src/brats_utils.py | 0 .../src/dataloader.py} | 6 +- .../src/nii_reader.py | 0 .../keras_2dunet/src/taskrunner.py | 158 ++++++ .../keras_cnn_mnist/plan/plan.yaml | 14 +- .../keras_cnn_mnist/requirements.txt | 5 +- .../{tfmnist_inmemory.py => dataloader.py} | 6 +- .../src/{keras_cnn.py => taskrunner.py} | 25 +- .../keras_cnn_with_compression/plan/cols.yaml | 5 - .../keras_cnn_with_compression/plan/data.yaml | 7 - .../keras_cnn_with_compression/plan/plan.yaml | 47 -- .../requirements.txt | 1 - .../src/keras_cnn.py | 85 --- .../src/mnist_utils.py | 118 ----- .../src/tfmnist_inmemory.py | 46 -- openfl-workspace/keras_nlp/plan/plan.yaml | 13 +- openfl-workspace/keras_nlp/requirements.txt | 3 +- .../src/{nlp_dataloader.py => dataloader.py} | 13 +- .../keras_nlp/src/nlp_taskrunner.py | 73 --- openfl-workspace/keras_nlp/src/taskrunner.py | 72 +++ openfl-workspace/tf_2dunet/.workspace | 2 - openfl-workspace/tf_2dunet/plan/data.yaml | 8 - openfl-workspace/tf_2dunet/plan/defaults | 2 - openfl-workspace/tf_2dunet/src/__init__.py | 3 - openfl-workspace/tf_2dunet/src/tf_2dunet.py | 250 --------- openfl-workspace/tf_3dunet_brats/.workspace | 2 - openfl-workspace/tf_3dunet_brats/README.md | 166 ------ .../tf_3dunet_brats/plan/cols.yaml | 4 - .../tf_3dunet_brats/plan/data.yaml | 16 - .../plan/defaults/aggregator.yaml | 4 - .../plan/defaults/assigner.yaml | 9 - .../plan/defaults/collaborator.yaml | 5 - .../plan/defaults/compression_pipeline.yaml | 1 - .../plan/defaults/data_loader.yaml | 1 - .../tf_3dunet_brats/plan/defaults/defaults | 2 - .../plan/defaults/network.yaml | 9 - .../plan/defaults/task_runner.yaml | 1 - .../plan/defaults/tasks_fast_estimator.yaml | 22 - .../plan/defaults/tasks_keras.yaml | 23 - .../plan/defaults/tasks_tensorflow.yaml | 23 - .../plan/defaults/tasks_torch.yaml | 19 - .../tf_3dunet_brats/plan/plan.yaml | 73 --- .../tf_3dunet_brats/requirements.txt | 6 - .../tf_3dunet_brats/split_directory.sh | 70 --- .../tf_3dunet_brats/src/__init__.py | 3 - .../tf_3dunet_brats/src/dataloader.py | 303 ----------- .../tf_3dunet_brats/src/define_model.py | 158 ------ .../tf_3dunet_brats/src/tf_3dunet_model.py | 215 -------- .../src/tf_brats_dataloader.py | 99 ---- openfl-workspace/tf_cnn_histology/.workspace | 2 - .../tf_cnn_histology/plan/cols.yaml | 4 - .../tf_cnn_histology/plan/defaults | 2 - .../tf_cnn_histology/plan/plan.yaml | 64 --- .../tf_cnn_histology/requirements.txt | 3 - .../tf_cnn_histology/src/__init__.py | 3 - .../tf_cnn_histology/src/tf_cnn.py | 108 ---- .../tf_cnn_histology/src/tfds_utils.py | 126 ----- .../src/tfhistology_inmemory.py | 34 -- .../workspace/plan/defaults/tasks_keras.yaml | 6 +- .../plan/defaults/tasks_tensorflow.yaml | 23 - openfl/federated/__init__.py | 6 +- openfl/federated/data/__init__.py | 3 +- openfl/federated/data/loader_tf.py | 130 ----- openfl/federated/task/__init__.py | 3 +- openfl/federated/task/runner_keras.py | 130 +++-- openfl/federated/task/runner_tf.py | 485 ------------------ tests/github/test_hello_federation.py | 2 +- 76 files changed, 373 insertions(+), 3003 deletions(-) rename openfl-workspace/{keras_cnn_with_compression => keras_2dunet}/.workspace (100%) rename openfl-workspace/{tf_2dunet => keras_2dunet}/README.md (100%) rename openfl-workspace/{tf_2dunet => keras_2dunet}/plan/cols.yaml (100%) rename openfl-workspace/{tf_cnn_histology => keras_2dunet}/plan/data.yaml (91%) rename openfl-workspace/{keras_cnn_with_compression => keras_2dunet}/plan/defaults (100%) rename openfl-workspace/{tf_2dunet => keras_2dunet}/plan/plan.yaml (55%) rename openfl-workspace/{tf_2dunet => keras_2dunet}/requirements.txt (74%) rename openfl-workspace/{keras_cnn_with_compression => keras_2dunet}/src/__init__.py (100%) rename openfl-workspace/{tf_2dunet => keras_2dunet}/src/brats_utils.py (100%) rename openfl-workspace/{tf_2dunet/src/tfbrats_inmemory.py => keras_2dunet/src/dataloader.py} (89%) rename openfl-workspace/{tf_2dunet => keras_2dunet}/src/nii_reader.py (100%) create mode 100644 openfl-workspace/keras_2dunet/src/taskrunner.py rename openfl-workspace/keras_cnn_mnist/src/{tfmnist_inmemory.py => dataloader.py} (90%) rename openfl-workspace/keras_cnn_mnist/src/{keras_cnn.py => taskrunner.py} (74%) delete mode 100644 openfl-workspace/keras_cnn_with_compression/plan/cols.yaml delete mode 100644 openfl-workspace/keras_cnn_with_compression/plan/data.yaml delete mode 100644 openfl-workspace/keras_cnn_with_compression/plan/plan.yaml delete mode 100644 openfl-workspace/keras_cnn_with_compression/requirements.txt delete mode 100644 openfl-workspace/keras_cnn_with_compression/src/keras_cnn.py delete mode 100644 openfl-workspace/keras_cnn_with_compression/src/mnist_utils.py delete mode 100644 openfl-workspace/keras_cnn_with_compression/src/tfmnist_inmemory.py rename openfl-workspace/keras_nlp/src/{nlp_dataloader.py => dataloader.py} (93%) delete mode 100644 openfl-workspace/keras_nlp/src/nlp_taskrunner.py create mode 100644 openfl-workspace/keras_nlp/src/taskrunner.py delete mode 100644 openfl-workspace/tf_2dunet/.workspace delete mode 100644 openfl-workspace/tf_2dunet/plan/data.yaml delete mode 100644 openfl-workspace/tf_2dunet/plan/defaults delete mode 100644 openfl-workspace/tf_2dunet/src/__init__.py delete mode 100644 openfl-workspace/tf_2dunet/src/tf_2dunet.py delete mode 100644 openfl-workspace/tf_3dunet_brats/.workspace delete mode 100644 openfl-workspace/tf_3dunet_brats/README.md delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/cols.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/data.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/aggregator.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/assigner.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/collaborator.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/compression_pipeline.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/data_loader.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/defaults delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/network.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/task_runner.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_fast_estimator.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_keras.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_tensorflow.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_torch.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/plan/plan.yaml delete mode 100644 openfl-workspace/tf_3dunet_brats/requirements.txt delete mode 100755 openfl-workspace/tf_3dunet_brats/split_directory.sh delete mode 100644 openfl-workspace/tf_3dunet_brats/src/__init__.py delete mode 100644 openfl-workspace/tf_3dunet_brats/src/dataloader.py delete mode 100644 openfl-workspace/tf_3dunet_brats/src/define_model.py delete mode 100644 openfl-workspace/tf_3dunet_brats/src/tf_3dunet_model.py delete mode 100644 openfl-workspace/tf_3dunet_brats/src/tf_brats_dataloader.py delete mode 100644 openfl-workspace/tf_cnn_histology/.workspace delete mode 100644 openfl-workspace/tf_cnn_histology/plan/cols.yaml delete mode 100644 openfl-workspace/tf_cnn_histology/plan/defaults delete mode 100644 openfl-workspace/tf_cnn_histology/plan/plan.yaml delete mode 100644 openfl-workspace/tf_cnn_histology/requirements.txt delete mode 100644 openfl-workspace/tf_cnn_histology/src/__init__.py delete mode 100644 openfl-workspace/tf_cnn_histology/src/tf_cnn.py delete mode 100644 openfl-workspace/tf_cnn_histology/src/tfds_utils.py delete mode 100644 openfl-workspace/tf_cnn_histology/src/tfhistology_inmemory.py delete mode 100644 openfl-workspace/workspace/plan/defaults/tasks_tensorflow.yaml delete mode 100644 openfl/federated/data/loader_tf.py delete mode 100644 openfl/federated/task/runner_tf.py diff --git a/docs/developer_ref/troubleshooting.rst b/docs/developer_ref/troubleshooting.rst index 005792f96d..dae59bc763 100644 --- a/docs/developer_ref/troubleshooting.rst +++ b/docs/developer_ref/troubleshooting.rst @@ -10,7 +10,7 @@ The following is a list of commonly reported issues in Open Federated Learning (|productName|). If you don't see your issue reported here, please submit a `Github issue `_ or contact us directly on `Slack `_. -1. I see the error :code:`Cannot import name TensorFlowDataLoader from openfl.federated` +1. I see the error :code:`Cannot import name KerasDataLoader from openfl.federated` |productName| currently uses conditional imports to attempt to be framework agnostic. If your task runner is derived from `KerasTaskRunner` or `TensorflowTaskRunner`, this error could come up if TensorFlow\*\ was not installed in your collaborator's virtual environment. If running on multi-node experiment, we recommend using the :code:`fx workspace export` and :code:`fx workspace import` commands, as this will ensure consistent modules between aggregator and collaborators. diff --git a/openfl-workspace/keras_cnn_with_compression/.workspace b/openfl-workspace/keras_2dunet/.workspace similarity index 100% rename from openfl-workspace/keras_cnn_with_compression/.workspace rename to openfl-workspace/keras_2dunet/.workspace diff --git a/openfl-workspace/tf_2dunet/README.md b/openfl-workspace/keras_2dunet/README.md similarity index 100% rename from openfl-workspace/tf_2dunet/README.md rename to openfl-workspace/keras_2dunet/README.md diff --git a/openfl-workspace/tf_2dunet/plan/cols.yaml b/openfl-workspace/keras_2dunet/plan/cols.yaml similarity index 100% rename from openfl-workspace/tf_2dunet/plan/cols.yaml rename to openfl-workspace/keras_2dunet/plan/cols.yaml diff --git a/openfl-workspace/tf_cnn_histology/plan/data.yaml b/openfl-workspace/keras_2dunet/plan/data.yaml similarity index 91% rename from openfl-workspace/tf_cnn_histology/plan/data.yaml rename to openfl-workspace/keras_2dunet/plan/data.yaml index ce40a6ec6c..37c4bfd193 100644 --- a/openfl-workspace/tf_cnn_histology/plan/data.yaml +++ b/openfl-workspace/keras_2dunet/plan/data.yaml @@ -1,4 +1,4 @@ -# Copyright (C) 2020-2021 Intel Corporation +# Copyright (C) 2020-2024 Intel Corporation # Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. # all keys under 'collaborators' corresponds to a specific colaborator name the corresponding dictionary has data_name, data_path pairs. @@ -8,4 +8,4 @@ # collaborator_name,data_directory_path one,1 -two,2 +two,2 \ No newline at end of file diff --git a/openfl-workspace/keras_cnn_with_compression/plan/defaults b/openfl-workspace/keras_2dunet/plan/defaults similarity index 100% rename from openfl-workspace/keras_cnn_with_compression/plan/defaults rename to openfl-workspace/keras_2dunet/plan/defaults diff --git a/openfl-workspace/tf_2dunet/plan/plan.yaml b/openfl-workspace/keras_2dunet/plan/plan.yaml similarity index 55% rename from openfl-workspace/tf_2dunet/plan/plan.yaml rename to openfl-workspace/keras_2dunet/plan/plan.yaml index 2d00302208..fa26d97b9a 100644 --- a/openfl-workspace/tf_2dunet/plan/plan.yaml +++ b/openfl-workspace/keras_2dunet/plan/plan.yaml @@ -5,9 +5,9 @@ aggregator : defaults : plan/defaults/aggregator.yaml template : openfl.component.Aggregator settings : - init_state_path : save/tf_2dunet_brats_init.pbuf - last_state_path : save/tf_2dunet_brats_latest.pbuf - best_state_path : save/tf_2dunet_brats_best.pbuf + init_state_path : save/init.pbuf + last_state_path : save/latest.pbuf + best_state_path : save/best.pbuf rounds_to_train : 10 db_store_rounds : 2 @@ -20,7 +20,7 @@ collaborator : data_loader : defaults : plan/defaults/data_loader.yaml - template : src.tfbrats_inmemory.TensorFlowBratsInMemory + template : src.dataloader.KerasBratsInMemory settings : batch_size: 64 percent_train: 0.8 @@ -29,7 +29,7 @@ data_loader : task_runner : defaults : plan/defaults/task_runner.yaml - template : src.tf_2dunet.TensorFlow2DUNet + template : src.taskrunner.Keras2DUNet network : defaults : plan/defaults/network.yaml @@ -38,7 +38,30 @@ assigner : defaults : plan/defaults/assigner.yaml tasks : - defaults : plan/defaults/tasks_tensorflow.yaml + aggregated_model_validation: + function : validate_task + kwargs : + batch_size : 32 + apply : global + metrics : + - acc + + locally_tuned_model_validation: + function : validate_task + kwargs : + batch_size : 32 + apply : local + metrics : + - acc + + train: + function : train_task + kwargs : + batch_size : 32 + metrics : + - loss + epochs : 1 + compression_pipeline : - defaults : plan/defaults/compression_pipeline.yaml + defaults : plan/defaults/compression_pipeline.yaml \ No newline at end of file diff --git a/openfl-workspace/tf_2dunet/requirements.txt b/openfl-workspace/keras_2dunet/requirements.txt similarity index 74% rename from openfl-workspace/tf_2dunet/requirements.txt rename to openfl-workspace/keras_2dunet/requirements.txt index 640dad7154..d790f6ff87 100644 --- a/openfl-workspace/tf_2dunet/requirements.txt +++ b/openfl-workspace/keras_2dunet/requirements.txt @@ -1,3 +1,4 @@ +keras==3.6.0 nibabel setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability -tensorflow==2.13 +tensorflow==2.18.0 diff --git a/openfl-workspace/keras_cnn_with_compression/src/__init__.py b/openfl-workspace/keras_2dunet/src/__init__.py similarity index 100% rename from openfl-workspace/keras_cnn_with_compression/src/__init__.py rename to openfl-workspace/keras_2dunet/src/__init__.py diff --git a/openfl-workspace/tf_2dunet/src/brats_utils.py b/openfl-workspace/keras_2dunet/src/brats_utils.py similarity index 100% rename from openfl-workspace/tf_2dunet/src/brats_utils.py rename to openfl-workspace/keras_2dunet/src/brats_utils.py diff --git a/openfl-workspace/tf_2dunet/src/tfbrats_inmemory.py b/openfl-workspace/keras_2dunet/src/dataloader.py similarity index 89% rename from openfl-workspace/tf_2dunet/src/tfbrats_inmemory.py rename to openfl-workspace/keras_2dunet/src/dataloader.py index 49b4484fc2..a0bc12f1dd 100644 --- a/openfl-workspace/tf_2dunet/src/tfbrats_inmemory.py +++ b/openfl-workspace/keras_2dunet/src/dataloader.py @@ -3,12 +3,12 @@ """You may copy this file as the starting point of your own model.""" -from openfl.federated import TensorFlowDataLoader +from openfl.federated import KerasDataLoader from .brats_utils import load_from_nifti -class TensorFlowBratsInMemory(TensorFlowDataLoader): - """TensorFlow Data Loader for the BraTS dataset.""" +class KerasBratsInMemory(KerasDataLoader): + """Keras Data Loader for the BraTS dataset.""" def __init__(self, data_path, batch_size, percent_train=0.8, pre_split_shuffle=True, **kwargs): """Initialize. diff --git a/openfl-workspace/tf_2dunet/src/nii_reader.py b/openfl-workspace/keras_2dunet/src/nii_reader.py similarity index 100% rename from openfl-workspace/tf_2dunet/src/nii_reader.py rename to openfl-workspace/keras_2dunet/src/nii_reader.py diff --git a/openfl-workspace/keras_2dunet/src/taskrunner.py b/openfl-workspace/keras_2dunet/src/taskrunner.py new file mode 100644 index 0000000000..38ecef4ba7 --- /dev/null +++ b/openfl-workspace/keras_2dunet/src/taskrunner.py @@ -0,0 +1,158 @@ +# Copyright (C) 2020-2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +"""You may copy this file as the starting point of your own model.""" + +import tensorflow as tf +import keras + +from openfl.federated import KerasTaskRunner + + +class Keras2DUNet(KerasTaskRunner): + """Initialize. + + Args: + **kwargs: Additional parameters to pass to the function + + """ + + def __init__(self, **kwargs): + """Initialize. + + Args: + **kwargs: Additional parameters to pass to the function + + """ + super().__init__(**kwargs) + + self.model = self.build_model(self.data_loader.get_feature_shape(), use_upsampling=True, **kwargs) + self.model.summary(print_fn=self.logger.info, line_length=120) + self.initialize_tensorkeys_for_functions() + + + def build_model(self, input_shape, + use_upsampling=False, + n_cl_out=1, + dropout=0.2, + activation_function='relu', + seed=0xFEEDFACE, + depth=5, + dropout_at=None, + initial_filters=32, + batch_norm=True, + **kwargs): + """Define the TensorFlow model. + + Args: + input_shape: input shape of the model + use_upsampling (bool): True = use bilinear interpolation; + False = use transposed convolution (Default=False) + n_cl_out (int): Number of channels in input layer (Default=1) + dropout (float): Dropout percentage (Default=0.2)(Default = True) + activation_function: The activation function to use after convolutional layers (Default='relu') + seed: random seed (Default=0xFEEDFACE) + depth (int): Number of max pooling layers in encoder (Default=5) + dropout_at: Layers to perform dropout after (Default=[2,3]) + initial_filters (int): Number of filters in first convolutional layer (Default=32) + batch_norm (bool): True = use batch normalization (Default=True) + **kwargs: Additional parameters to pass to the function + + """ + if dropout_at is None: + dropout_at = [2, 3] + + inputs = keras.layers.Input(shape=input_shape, name='Images') + + if activation_function == 'relu': + activation = tf.nn.relu + elif activation_function == 'leakyrelu': + activation = tf.nn.leaky_relu + + params = { + 'activation': activation, + 'kernel_initializer': keras.initializers.he_uniform(seed=seed), + 'kernel_size': (3, 3), + 'padding': 'same', + } + + convb_layers = {} + + net = inputs + filters = initial_filters + for i in range(depth): + name = f'conv{i + 1}a' + net = keras.layers.Conv2D(name=name, filters=filters, **params)(net) + if i in dropout_at: + net = keras.layers.Dropout(dropout)(net) + name = f'conv{i + 1}b' + net = keras.layers.Conv2D(name=name, filters=filters, **params)(net) + if batch_norm: + net = keras.layers.BatchNormalization()(net) + convb_layers[name] = net + # only pool if not last level + if i != depth - 1: + name = f'pool{i + 1}' + net = keras.layers.MaxPooling2D(name=name, pool_size=(2, 2))(net) + filters *= 2 + + # do the up levels + filters //= 2 + for i in range(depth - 1): + if use_upsampling: + up = keras.layers.UpSampling2D( + name=f'up{depth + i + 1}', size=(2, 2))(net) + else: + up = keras.layers.Conv2DTranspose( + name='transConv6', filters=filters, + kernel_size=(2, 2), strides=(2, 2), padding='same')(net) + net = keras.layers.concatenate( + [up, convb_layers[f'conv{depth - i - 1}b']], + axis=-1 + ) + net = keras.layers.Conv2D( + name=f'conv{depth + i + 1}a', + filters=filters, **params)(net) + net = keras.layers.Conv2D( + name=f'conv{depth + i + 1}b', + filters=filters, **params)(net) + filters //= 2 + net = keras.layers.Conv2D(name='Mask', filters=n_cl_out, + kernel_size=(1, 1), + activation='sigmoid')(net) + model = keras.models.Model(inputs=[inputs], outputs=[net]) + + + self.optimizer = keras.optimizers.RMSprop(1e-2) + model.compile( + loss=self.dice_coef_loss, + optimizer=self.optimizer, + metrics=["acc"] + ) + + return model + + def dice_coef_loss(self, y_true, y_pred, smooth=1.0): + """Dice coefficient loss. + + Calculate the -log(Dice Coefficient) loss + + Args: + y_true: Ground truth annotation array + y_pred: Prediction array from model + smooth (float): Laplace smoothing factor (Default=1.0) + Returns: + float: -log(Dice cofficient) metric + """ + intersection = tf.reduce_sum(y_true * y_pred, axis=(1, 2, 3)) + + term1 = -tf.math.log(tf.constant(2.0) * intersection + smooth) + term2 = tf.math.log(tf.reduce_sum(y_true, axis=(1, 2, 3)) + + tf.reduce_sum(y_pred, axis=(1, 2, 3)) + smooth) + + term1 = tf.reduce_mean(term1) + term2 = tf.reduce_mean(term2) + + loss = term1 + term2 + + return loss diff --git a/openfl-workspace/keras_cnn_mnist/plan/plan.yaml b/openfl-workspace/keras_cnn_mnist/plan/plan.yaml index e1c661343e..54867f4578 100644 --- a/openfl-workspace/keras_cnn_mnist/plan/plan.yaml +++ b/openfl-workspace/keras_cnn_mnist/plan/plan.yaml @@ -5,9 +5,9 @@ aggregator : defaults : plan/defaults/aggregator.yaml template : openfl.component.Aggregator settings : - init_state_path : save/keras_cnn_mnist_init.pbuf - best_state_path : save/keras_cnn_mnist_best.pbuf - last_state_path : save/keras_cnn_mnist_last.pbuf + init_state_path : save/init.pbuf + best_state_path : save/best.pbuf + last_state_path : save/last.pbuf rounds_to_train : 10 collaborator : @@ -19,7 +19,7 @@ collaborator : data_loader : defaults : plan/defaults/data_loader.yaml - template : src.tfmnist_inmemory.TensorFlowMNISTInMemory + template : src.dataloader.KerasMNISTInMemory settings : collaborator_count : 2 data_group_name : mnist @@ -27,7 +27,7 @@ data_loader : task_runner : defaults : plan/defaults/task_runner.yaml - template : src.keras_cnn.KerasCNN + template : src.taskrunner.KerasCNN network : defaults : plan/defaults/network.yaml @@ -40,3 +40,7 @@ tasks : compression_pipeline : defaults : plan/defaults/compression_pipeline.yaml + # To use different Compression Pipeline, uncomment the following lines + # template : openfl.pipelines.KCPipeline + # settings : + # n_clusters : 6 diff --git a/openfl-workspace/keras_cnn_mnist/requirements.txt b/openfl-workspace/keras_cnn_mnist/requirements.txt index 4ce0afb4ce..5fa9907811 100644 --- a/openfl-workspace/keras_cnn_mnist/requirements.txt +++ b/openfl-workspace/keras_cnn_mnist/requirements.txt @@ -1,2 +1,3 @@ -numpy==1.23.5 -tensorflow==2.13 +keras==3.6.0 +tensorflow==2.18.0 + diff --git a/openfl-workspace/keras_cnn_mnist/src/tfmnist_inmemory.py b/openfl-workspace/keras_cnn_mnist/src/dataloader.py similarity index 90% rename from openfl-workspace/keras_cnn_mnist/src/tfmnist_inmemory.py rename to openfl-workspace/keras_cnn_mnist/src/dataloader.py index 51f4ccf739..040e8091c9 100644 --- a/openfl-workspace/keras_cnn_mnist/src/tfmnist_inmemory.py +++ b/openfl-workspace/keras_cnn_mnist/src/dataloader.py @@ -3,12 +3,12 @@ """You may copy this file as the starting point of your own model.""" -from openfl.federated import TensorFlowDataLoader +from openfl.federated import KerasDataLoader from .mnist_utils import load_mnist_shard -class TensorFlowMNISTInMemory(TensorFlowDataLoader): - """TensorFlow Data Loader for MNIST Dataset.""" +class KerasMNISTInMemory(KerasDataLoader): + """Data Loader for MNIST Dataset.""" def __init__(self, data_path, batch_size, **kwargs): """ diff --git a/openfl-workspace/keras_cnn_mnist/src/keras_cnn.py b/openfl-workspace/keras_cnn_mnist/src/taskrunner.py similarity index 74% rename from openfl-workspace/keras_cnn_mnist/src/keras_cnn.py rename to openfl-workspace/keras_cnn_mnist/src/taskrunner.py index 35a71f7734..165861033c 100644 --- a/openfl-workspace/keras_cnn_mnist/src/keras_cnn.py +++ b/openfl-workspace/keras_cnn_mnist/src/taskrunner.py @@ -1,13 +1,12 @@ -# Copyright (C) 2020-2021 Intel Corporation +# Copyright (C) 2020-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 """You may copy this file as the starting point of your own model.""" -import tensorflow.keras as ke -from tensorflow.keras import Sequential -from tensorflow.keras.layers import Conv2D -from tensorflow.keras.layers import Dense -from tensorflow.keras.layers import Flatten +from keras.models import Sequential +from keras.layers import Conv2D +from keras.layers import Dense +from keras.layers import Flatten from openfl.federated import KerasTaskRunner @@ -50,7 +49,7 @@ def build_model(self, num_classes (int): The number of classes of the dataset Returns: - tensorflow.python.keras.engine.sequential.Sequential: The model defined in Keras + keras.models.Sequential: The model defined in Keras """ model = Sequential() @@ -72,14 +71,8 @@ def build_model(self, model.add(Dense(num_classes, activation='softmax')) - model.compile(loss=ke.losses.categorical_crossentropy, - optimizer=ke.optimizers.legacy.Adam(), - metrics=['accuracy']) - - # initialize the optimizer variables - opt_vars = model.optimizer.variables() - - for v in opt_vars: - v.initializer.run(session=self.sess) + model.compile(loss="categorical_crossentropy", + optimizer="adam", + metrics=["accuracy"]) return model diff --git a/openfl-workspace/keras_cnn_with_compression/plan/cols.yaml b/openfl-workspace/keras_cnn_with_compression/plan/cols.yaml deleted file mode 100644 index 95307de3bc..0000000000 --- a/openfl-workspace/keras_cnn_with_compression/plan/cols.yaml +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. - -collaborators: - \ No newline at end of file diff --git a/openfl-workspace/keras_cnn_with_compression/plan/data.yaml b/openfl-workspace/keras_cnn_with_compression/plan/data.yaml deleted file mode 100644 index 257c7825fe..0000000000 --- a/openfl-workspace/keras_cnn_with_compression/plan/data.yaml +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. - -# collaborator_name,data_directory_path -one,1 - - diff --git a/openfl-workspace/keras_cnn_with_compression/plan/plan.yaml b/openfl-workspace/keras_cnn_with_compression/plan/plan.yaml deleted file mode 100644 index f528396b48..0000000000 --- a/openfl-workspace/keras_cnn_with_compression/plan/plan.yaml +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. - -aggregator : - defaults : plan/defaults/aggregator.yaml - template : openfl.component.Aggregator - settings : - init_state_path : save/keras_cnn_mnist_init.pbuf - best_state_path : save/keras_cnn_mnist_best.pbuf - last_state_path : save/keras_cnn_mnist_last.pbuf - db_store_rounds: 2 - rounds_to_train : 10 - -collaborator : - defaults : plan/defaults/collaborator.yaml - template : openfl.component.Collaborator - settings : - db_store_rounds: 2 - delta_updates : true - opt_treatment : RESET - -data_loader : - defaults : plan/defaults/data_loader.yaml - template : src.tfmnist_inmemory.TensorFlowMNISTInMemory - settings : - collaborator_count : 2 - data_group_name : mnist - batch_size : 256 - -task_runner : - defaults : plan/defaults/task_runner.yaml - template : src.keras_cnn.KerasCNN - -network : - defaults : plan/defaults/network.yaml - -assigner : - defaults : plan/defaults/assigner.yaml - -tasks : - defaults : plan/defaults/tasks_keras.yaml - -compression_pipeline : - defaults : plan/defaults/compression_pipeline.yaml - template : openfl.pipelines.KCPipeline - settings : - n_clusters : 6 diff --git a/openfl-workspace/keras_cnn_with_compression/requirements.txt b/openfl-workspace/keras_cnn_with_compression/requirements.txt deleted file mode 100644 index af80212eeb..0000000000 --- a/openfl-workspace/keras_cnn_with_compression/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -tensorflow==2.13 diff --git a/openfl-workspace/keras_cnn_with_compression/src/keras_cnn.py b/openfl-workspace/keras_cnn_with_compression/src/keras_cnn.py deleted file mode 100644 index 35a71f7734..0000000000 --- a/openfl-workspace/keras_cnn_with_compression/src/keras_cnn.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""You may copy this file as the starting point of your own model.""" - -import tensorflow.keras as ke -from tensorflow.keras import Sequential -from tensorflow.keras.layers import Conv2D -from tensorflow.keras.layers import Dense -from tensorflow.keras.layers import Flatten - -from openfl.federated import KerasTaskRunner - - -class KerasCNN(KerasTaskRunner): - """A basic convolutional neural network model.""" - - def __init__(self, **kwargs): - """ - Initialize. - - Args: - **kwargs: Additional parameters to pass to the function - """ - super().__init__(**kwargs) - - self.model = self.build_model(self.feature_shape, self.data_loader.num_classes, **kwargs) - - self.initialize_tensorkeys_for_functions() - - self.model.summary(print_fn=self.logger.info) - - self.logger.info(f'Train Set Size : {self.get_train_data_size()}') - self.logger.info(f'Valid Set Size : {self.get_valid_data_size()}') - - def build_model(self, - input_shape, - num_classes, - conv_kernel_size=(4, 4), - conv_strides=(2, 2), - conv1_channels_out=16, - conv2_channels_out=32, - final_dense_inputsize=100, - **kwargs): - """ - Define the model architecture. - - Args: - input_shape (numpy.ndarray): The shape of the data - num_classes (int): The number of classes of the dataset - - Returns: - tensorflow.python.keras.engine.sequential.Sequential: The model defined in Keras - - """ - model = Sequential() - - model.add(Conv2D(conv1_channels_out, - kernel_size=conv_kernel_size, - strides=conv_strides, - activation='relu', - input_shape=input_shape)) - - model.add(Conv2D(conv2_channels_out, - kernel_size=conv_kernel_size, - strides=conv_strides, - activation='relu')) - - model.add(Flatten()) - - model.add(Dense(final_dense_inputsize, activation='relu')) - - model.add(Dense(num_classes, activation='softmax')) - - model.compile(loss=ke.losses.categorical_crossentropy, - optimizer=ke.optimizers.legacy.Adam(), - metrics=['accuracy']) - - # initialize the optimizer variables - opt_vars = model.optimizer.variables() - - for v in opt_vars: - v.initializer.run(session=self.sess) - - return model diff --git a/openfl-workspace/keras_cnn_with_compression/src/mnist_utils.py b/openfl-workspace/keras_cnn_with_compression/src/mnist_utils.py deleted file mode 100644 index d19e13d9dd..0000000000 --- a/openfl-workspace/keras_cnn_with_compression/src/mnist_utils.py +++ /dev/null @@ -1,118 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""You may copy this file as the starting point of your own model.""" - -from logging import getLogger - -import numpy as np -from tensorflow.python.keras.utils.data_utils import get_file - -logger = getLogger(__name__) - - -def one_hot(labels, classes): - """ - One Hot encode a vector. - - Args: - labels (list): List of labels to onehot encode - classes (int): Total number of categorical classes - - Returns: - np.array: Matrix of one-hot encoded labels - """ - return np.eye(classes)[labels] - - -def _load_raw_datashards(shard_num, collaborator_count): - """ - Load the raw data by shard. - - Returns tuples of the dataset shard divided into training and validation. - - Args: - shard_num (int): The shard number to use - collaborator_count (int): The number of collaborators in the federation - - Returns: - 2 tuples: (image, label) of the training, validation dataset - """ - origin_folder = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/' - path = get_file('mnist.npz', - origin=origin_folder + 'mnist.npz', - file_hash='731c5ac602752760c8e48fbffcf8c3b850d9dc2a2aedcf2cc48468fc17b673d1') - - with np.load(path) as f: - # get all of mnist - X_train_tot = f['x_train'] - y_train_tot = f['y_train'] - - X_valid_tot = f['x_test'] - y_valid_tot = f['y_test'] - - # create the shards - shard_num = int(shard_num) - X_train = X_train_tot[shard_num::collaborator_count] - y_train = y_train_tot[shard_num::collaborator_count] - - X_valid = X_valid_tot[shard_num::collaborator_count] - y_valid = y_valid_tot[shard_num::collaborator_count] - - return (X_train, y_train), (X_valid, y_valid) - - -def load_mnist_shard(shard_num, collaborator_count, categorical=True, - channels_last=True, **kwargs): - """ - Load the MNIST dataset. - - Args: - shard_num (int): The shard to use from the dataset - collaborator_count (int): The number of collaborators in the federation - categorical (bool): True = convert the labels to one-hot encoded - vectors (Default = True) - channels_last (bool): True = The input images have the channels - last (Default = True) - **kwargs: Additional parameters to pass to the function - - Returns: - list: The input shape - int: The number of classes - numpy.ndarray: The training data - numpy.ndarray: The training labels - numpy.ndarray: The validation data - numpy.ndarray: The validation labels - """ - img_rows, img_cols = 28, 28 - num_classes = 10 - - (X_train, y_train), (X_valid, y_valid) = _load_raw_datashards( - shard_num, collaborator_count - ) - - if channels_last: - X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1) - X_valid = X_valid.reshape(X_valid.shape[0], img_rows, img_cols, 1) - input_shape = (img_rows, img_cols, 1) - else: - X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols) - X_valid = X_valid.reshape(X_valid.shape[0], 1, img_rows, img_cols) - input_shape = (1, img_rows, img_cols) - - X_train = X_train.astype('float32') - X_valid = X_valid.astype('float32') - X_train /= 255 - X_valid /= 255 - - logger.info(f'MNIST > X_train Shape : {X_train.shape}') - logger.info(f'MNIST > y_train Shape : {y_train.shape}') - logger.info(f'MNIST > Train Samples : {X_train.shape[0]}') - logger.info(f'MNIST > Valid Samples : {X_valid.shape[0]}') - - if categorical: - # convert class vectors to binary class matrices - y_train = one_hot(y_train, num_classes) - y_valid = one_hot(y_valid, num_classes) - - return input_shape, num_classes, X_train, y_train, X_valid, y_valid diff --git a/openfl-workspace/keras_cnn_with_compression/src/tfmnist_inmemory.py b/openfl-workspace/keras_cnn_with_compression/src/tfmnist_inmemory.py deleted file mode 100644 index 80b913e5f5..0000000000 --- a/openfl-workspace/keras_cnn_with_compression/src/tfmnist_inmemory.py +++ /dev/null @@ -1,46 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""You may copy this file as the starting point of your own model.""" - -from openfl.federated import TensorFlowDataLoader -from .mnist_utils import load_mnist_shard - - -class TensorFlowMNISTInMemory(TensorFlowDataLoader): - """TensorFlow Data Loader for MNIST Dataset.""" - - def __init__(self, data_path, batch_size, **kwargs): - """ - Initialize. - - Args: - data_path: File path for the dataset - batch_size (int): The batch size for the data loader - **kwargs: Additional arguments, passed to super init and load_mnist_shard - """ - super().__init__(batch_size, **kwargs) - - # TODO: We should be downloading the dataset shard into a directory - # TODO: There needs to be a method to ask how many collaborators and - # what index/rank is this collaborator. - # Then we have a way to automatically shard based on rank and size of - # collaborator list. - try: - int(data_path) - except: - raise ValueError( - "Expected `%s` to be representable as `int`, as it refers to the data shard " + - "number used by the collaborator.", - data_path - ) - - _, num_classes, X_train, y_train, X_valid, y_valid = load_mnist_shard( - shard_num=int(data_path), **kwargs - ) - self.X_train = X_train - self.y_train = y_train - self.X_valid = X_valid - self.y_valid = y_valid - - self.num_classes = num_classes diff --git a/openfl-workspace/keras_nlp/plan/plan.yaml b/openfl-workspace/keras_nlp/plan/plan.yaml index 67e335aa02..ce7476ab85 100644 --- a/openfl-workspace/keras_nlp/plan/plan.yaml +++ b/openfl-workspace/keras_nlp/plan/plan.yaml @@ -5,23 +5,22 @@ aggregator : defaults : plan/defaults/aggregator.yaml template : openfl.component.Aggregator settings : - init_state_path : save/keras_nlp_init.pbuf - best_state_path : save/keras_nlp_best.pbuf - last_state_path : save/keras_nlp_last.pbuf + init_state_path : save/init.pbuf + best_state_path : save/best.pbuf + last_state_path : save/last.pbuf rounds_to_train : 10 collaborator : defaults : plan/defaults/collaborator.yaml template : openfl.component.Collaborator settings : - epochs_per_round : 10 - polling_interval : 4 + db_store_rounds: 2 delta_updates : false opt_treatment : RESET data_loader : defaults : plan/defaults/data_loader.yaml - template : src.nlp_dataloader.NLPDataLoader + template : src.dataloader.NLPDataLoader settings : collaborator_count : 2 batch_size : 64 @@ -30,7 +29,7 @@ data_loader : task_runner : defaults : plan/defaults/task_runner.yaml - template : src.nlp_taskrunner.KerasNLP + template : src.taskrunner.KerasNLP settings : latent_dim : 256 diff --git a/openfl-workspace/keras_nlp/requirements.txt b/openfl-workspace/keras_nlp/requirements.txt index af80212eeb..6bba4a277d 100644 --- a/openfl-workspace/keras_nlp/requirements.txt +++ b/openfl-workspace/keras_nlp/requirements.txt @@ -1 +1,2 @@ -tensorflow==2.13 +keras==3.6.0 +tensorflow==2.18.0 diff --git a/openfl-workspace/keras_nlp/src/nlp_dataloader.py b/openfl-workspace/keras_nlp/src/dataloader.py similarity index 93% rename from openfl-workspace/keras_nlp/src/nlp_dataloader.py rename to openfl-workspace/keras_nlp/src/dataloader.py index 2a60e68266..d7180b4d3c 100644 --- a/openfl-workspace/keras_nlp/src/nlp_dataloader.py +++ b/openfl-workspace/keras_nlp/src/dataloader.py @@ -5,9 +5,8 @@ license agreement between Intel Corporation and you. """ from logging import getLogger -from typing import Iterator -from typing import List from typing import Optional +from typing import Iterator from typing import Tuple from typing import Union @@ -57,7 +56,7 @@ def get_feature_shape(self) -> Tuple[int, ...]: """Get the shape of an example feature array.""" return self.X_train[0].shape - def get_train_loader(self, batch_size: Optional[int] = None) -> Iterator[List[np.ndarray]]: + def get_train_loader(self, batch_size: Optional[int] = None) -> Iterator[Tuple[np.ndarray]]: """ Get training data loader. @@ -68,7 +67,7 @@ def get_train_loader(self, batch_size: Optional[int] = None) -> Iterator[List[np return self._get_batch_generator(X1=self.X_train[0], X2=self.X_train[1], y=self.y_train, batch_size=batch_size) - def get_valid_loader(self, batch_size: Optional[int] = None) -> Iterator[List[np.ndarray]]: + def get_valid_loader(self, batch_size: Optional[int] = None) -> Iterator[Tuple[np.ndarray]]: """ Get validation data loader. @@ -100,7 +99,7 @@ def get_valid_data_size(self) -> int: def _batch_generator(X1: np.ndarray, X2: np.ndarray, y: np.ndarray, idxs: np.ndarray, batch_size: int, - num_batches: int) -> Iterator[List[np.ndarray]]: + num_batches: int) -> Iterator[Tuple[np.ndarray]]: """ Generate batch of data. @@ -116,11 +115,11 @@ def _batch_generator(X1: np.ndarray, X2: np.ndarray, for i in range(num_batches): a = i * batch_size b = a + batch_size - yield [X1[idxs[a:b]], X2[idxs[a:b]]], y[idxs[a:b]] + yield (X1[idxs[a:b]], X2[idxs[a:b]]), y[idxs[a:b]] def _get_batch_generator(self, X1: np.ndarray, X2: np.ndarray, y: np.ndarray, - batch_size: Union[int, None]) -> Iterator[List[np.ndarray]]: + batch_size: Union[int, None]): """ Return the dataset generator. diff --git a/openfl-workspace/keras_nlp/src/nlp_taskrunner.py b/openfl-workspace/keras_nlp/src/nlp_taskrunner.py deleted file mode 100644 index 7dc53716f4..0000000000 --- a/openfl-workspace/keras_nlp/src/nlp_taskrunner.py +++ /dev/null @@ -1,73 +0,0 @@ -"""Copyright (C) 2020-2021 Intel Corporation - SPDX-License-Identifier: Apache-2.0 - -Licensed subject to the terms of the separately executed evaluation -license agreement between Intel Corporation and you. -""" -from tensorflow import keras - -from openfl.federated import KerasTaskRunner - - -def build_model(latent_dim, num_encoder_tokens, num_decoder_tokens, **kwargs): - """ - Define the model architecture. - - Args: - input_shape (numpy.ndarray): The shape of the data - num_classes (int): The number of classes of the dataset - Returns: - tensorflow.python.keras.engine.sequential.Sequential: The model defined in Keras - """ - encoder_inputs = keras.Input(shape=(None, num_encoder_tokens)) - encoder = keras.layers.LSTM(latent_dim, return_state=True) - encoder_outputs, state_h, state_c = encoder(encoder_inputs) - - # We discard `encoder_outputs` and only keep the states. - encoder_states = [state_h, state_c] - - # Set up the decoder, using `encoder_states` as initial state. - decoder_inputs = keras.Input(shape=(None, num_decoder_tokens)) - - # We set up our decoder to return full output sequences, - # and to return internal states as well. We don't use the - # return states in the training model, but we will use them in inference. - decoder_lstm = keras.layers.LSTM(latent_dim, return_sequences=True, return_state=True) - decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states) - decoder_dense = keras.layers.Dense(num_decoder_tokens, activation='softmax') - decoder_outputs = decoder_dense(decoder_outputs) - - # Define the model that will turn - # `encoder_input_data` & `decoder_input_data` into `decoder_target_data` - model = keras.Model([encoder_inputs, decoder_inputs], decoder_outputs) - - model.compile( - optimizer=keras.optimizers.legacy.RMSprop(), - loss='categorical_crossentropy', metrics=['accuracy'] - ) - - return model - - -class KerasNLP(KerasTaskRunner): - """A basic convolutional neural network model.""" - - def __init__(self, latent_dim, **kwargs): - """ - Init taskrunner. - - Args: - **kwargs: Additional parameters to pass to the function - """ - super().__init__(**kwargs) - - self.model = build_model(latent_dim, - self.data_loader.num_encoder_tokens, - self.data_loader.num_decoder_tokens, - **kwargs) - - self.initialize_tensorkeys_for_functions() - - self.model.summary(print_fn=self.logger.info) - - self.logger.info(f'Train Set Size : {self.get_train_data_size()}') diff --git a/openfl-workspace/keras_nlp/src/taskrunner.py b/openfl-workspace/keras_nlp/src/taskrunner.py new file mode 100644 index 0000000000..88563452f7 --- /dev/null +++ b/openfl-workspace/keras_nlp/src/taskrunner.py @@ -0,0 +1,72 @@ +"""Copyright (C) 2020-2024 Intel Corporation + SPDX-License-Identifier: Apache-2.0 + +Licensed subject to the terms of the separately executed evaluation +license agreement between Intel Corporation and you. +""" +import keras + +from openfl.federated import KerasTaskRunner + + +class KerasNLP(KerasTaskRunner): + """A basic convolutional neural network model.""" + + def __init__(self, latent_dim, **kwargs): + """ + Init taskrunner. + + Args: + **kwargs: Additional parameters to pass to the function + """ + super().__init__(**kwargs) + + self.model = self.build_model(latent_dim, + self.data_loader.num_encoder_tokens, + self.data_loader.num_decoder_tokens, + **kwargs) + + self.initialize_tensorkeys_for_functions() + + self.model.summary(print_fn=self.logger.info) + + self.logger.info(f'Train Set Size : {self.get_train_data_size()}') + + def build_model(self, latent_dim, num_encoder_tokens, num_decoder_tokens, **kwargs): + """ + Define the model architecture. + + Args: + input_shape (numpy.ndarray): The shape of the data + num_classes (int): The number of classes of the dataset + Returns: + tensorflow.python.keras.engine.sequential.Sequential: The model defined in Keras + """ + encoder_inputs = keras.Input(shape=(None, num_encoder_tokens)) + encoder = keras.layers.LSTM(latent_dim, return_state=True) + encoder_outputs, state_h, state_c = encoder(encoder_inputs) + + # We discard `encoder_outputs` and only keep the states. + encoder_states = [state_h, state_c] + + # Set up the decoder, using `encoder_states` as initial state. + decoder_inputs = keras.Input(shape=(None, num_decoder_tokens)) + + # We set up our decoder to return full output sequences, + # and to return internal states as well. We don't use the + # return states in the training model, but we will use them in inference. + decoder_lstm = keras.layers.LSTM(latent_dim, return_sequences=True, return_state=True) + decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states) + decoder_dense = keras.layers.Dense(num_decoder_tokens, activation='softmax') + decoder_outputs = decoder_dense(decoder_outputs) + + # Define the model that will turn + # `encoder_input_data` & `decoder_input_data` into `decoder_target_data` + model = keras.Model([encoder_inputs, decoder_inputs], decoder_outputs) + + model.compile( + optimizer="RMSprop", + loss='categorical_crossentropy', metrics=['accuracy'] + ) + + return model diff --git a/openfl-workspace/tf_2dunet/.workspace b/openfl-workspace/tf_2dunet/.workspace deleted file mode 100644 index 3c2c5d08b4..0000000000 --- a/openfl-workspace/tf_2dunet/.workspace +++ /dev/null @@ -1,2 +0,0 @@ -current_plan_name: default - diff --git a/openfl-workspace/tf_2dunet/plan/data.yaml b/openfl-workspace/tf_2dunet/plan/data.yaml deleted file mode 100644 index 69a3568b14..0000000000 --- a/openfl-workspace/tf_2dunet/plan/data.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. - -# all keys under 'collaborators' corresponds to a specific colaborator name the corresponding dictionary has data_name, data_path pairs. -# Note that in the mnist case we do not store the data locally, and the data_path is used to pass an integer that helps the data object -# construct the shard of the mnist dataset to be use for this collaborator. -one,/raid/datasets/MICCAI_BraTS_2019_Data_Training/HGG/0 -two,/raid/datasets/MICCAI_BraTS_2019_Data_Training/HGG/1 diff --git a/openfl-workspace/tf_2dunet/plan/defaults b/openfl-workspace/tf_2dunet/plan/defaults deleted file mode 100644 index fb82f9c5b6..0000000000 --- a/openfl-workspace/tf_2dunet/plan/defaults +++ /dev/null @@ -1,2 +0,0 @@ -../../workspace/plan/defaults - diff --git a/openfl-workspace/tf_2dunet/src/__init__.py b/openfl-workspace/tf_2dunet/src/__init__.py deleted file mode 100644 index f1410b1298..0000000000 --- a/openfl-workspace/tf_2dunet/src/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""You may copy this file as the starting point of your own model.""" diff --git a/openfl-workspace/tf_2dunet/src/tf_2dunet.py b/openfl-workspace/tf_2dunet/src/tf_2dunet.py deleted file mode 100644 index 5073344050..0000000000 --- a/openfl-workspace/tf_2dunet/src/tf_2dunet.py +++ /dev/null @@ -1,250 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""You may copy this file as the starting point of your own model.""" - -import tensorflow.compat.v1 as tf - -from openfl.federated import TensorFlowTaskRunner - -tf.disable_v2_behavior() - - -class TensorFlow2DUNet(TensorFlowTaskRunner): - """Initialize. - - Args: - **kwargs: Additional parameters to pass to the function - - """ - - def __init__(self, **kwargs): - """Initialize. - - Args: - **kwargs: Additional parameters to pass to the function - - """ - super().__init__(**kwargs) - - self.create_model(**kwargs) - self.initialize_tensorkeys_for_functions() - - def create_model(self, - training_smoothing=32.0, - validation_smoothing=1.0, - **kwargs): - """Create the TensorFlow 2D U-Net model. - - Args: - training_smoothing (float): (Default=32.0) - validation_smoothing (float): (Default=1.0) - **kwargs: Additional parameters to pass to the function - - """ - config = tf.ConfigProto() - config.gpu_options.allow_growth = True - config.intra_op_parallelism_threads = 112 - config.inter_op_parallelism_threads = 1 - self.sess = tf.Session(config=config) - - self.X = tf.placeholder(tf.float32, self.input_shape) - self.y = tf.placeholder(tf.float32, self.input_shape) - self.output = define_model(self.X, use_upsampling=True, **kwargs) - - self.loss = dice_coef_loss(self.y, self.output, smooth=training_smoothing) - self.loss_name = dice_coef_loss.__name__ - self.validation_metric = dice_coef( - self.y, self.output, smooth=validation_smoothing) - self.validation_metric_name = dice_coef.__name__ - - self.global_step = tf.train.get_or_create_global_step() - - self.tvars = tf.trainable_variables() - - self.optimizer = tf.train.RMSPropOptimizer(1e-2) - - self.gvs = self.optimizer.compute_gradients(self.loss, self.tvars) - self.train_step = self.optimizer.apply_gradients(self.gvs, - global_step=self.global_step) - - self.opt_vars = self.optimizer.variables() - - # FIXME: Do we really need to share the opt_vars? - # Two opt_vars for one tvar: gradient and square sum for RMSprop. - self.fl_vars = self.tvars + self.opt_vars - - self.initialize_globals() - - -def dice_coef(y_true, y_pred, smooth=1.0, **kwargs): - """Dice coefficient. - - Calculate the Dice Coefficient - - Args: - y_true: Ground truth annotation array - y_pred: Prediction array from model - smooth (float): Laplace smoothing factor (Default=1.0) - **kwargs: Additional parameters to pass to the function - - Returns: - float: Dice cofficient metric - - """ - intersection = tf.reduce_sum(y_true * y_pred, axis=[1, 2, 3]) - coef = ( - (tf.constant(2.) * intersection + tf.constant(smooth)) - / (tf.reduce_sum(y_true, axis=[1, 2, 3]) - + tf.reduce_sum(y_pred, axis=[1, 2, 3]) + tf.constant(smooth)) - ) - return tf.reduce_mean(coef) - - -def dice_coef_loss(y_true, y_pred, smooth=1.0, **kwargs): - """Dice coefficient loss. - - Calculate the -log(Dice Coefficient) loss - - Args: - y_true: Ground truth annotation array - y_pred: Prediction array from model - smooth (float): Laplace smoothing factor (Default=1.0) - **kwargs: Additional parameters to pass to the function - - Returns: - float: -log(Dice cofficient) metric - - """ - intersection = tf.reduce_sum(y_true * y_pred, axis=(1, 2, 3)) - - term1 = -tf.log(tf.constant(2.0) * intersection + smooth) - term2 = tf.log(tf.reduce_sum(y_true, axis=(1, 2, 3)) - + tf.reduce_sum(y_pred, axis=(1, 2, 3)) + smooth) - - term1 = tf.reduce_mean(term1) - term2 = tf.reduce_mean(term2) - - loss = term1 + term2 - - return loss - - -CHANNEL_LAST = True -if CHANNEL_LAST: - concat_axis = -1 - data_format = 'channels_last' -else: - concat_axis = 1 - data_format = 'channels_first' - -tf.keras.backend.set_image_data_format(data_format) - - -def define_model(input_tensor, - use_upsampling=False, - n_cl_out=1, - dropout=0.2, - print_summary=True, - activation_function='relu', - seed=0xFEEDFACE, - depth=5, - dropout_at=None, - initial_filters=32, - batch_norm=True, - **kwargs): - """Define the TensorFlow model. - - Args: - input_tensor: input shape ot the model - use_upsampling (bool): True = use bilinear interpolation; - False = use transposed convolution (Default=False) - n_cl_out (int): Number of channels in input layer (Default=1) - dropout (float): Dropout percentage (Default=0.2) - print_summary (bool): True = print the model summary (Default = True) - activation_function: The activation function to use after convolutional - layers (Default='relu') - seed: random seed (Default=0xFEEDFACE) - depth (int): Number of max pooling layers in encoder (Default=5) - dropout_at: Layers to perform dropout after (Default=[2,3]) - initial_filters (int): Number of filters in first convolutional - layer (Default=32) - batch_norm (bool): True = use batch normalization (Default=True) - **kwargs: Additional parameters to pass to the function - - """ - if dropout_at is None: - dropout_at = [2, 3] - # Set keras learning phase to train - tf.keras.backend.set_learning_phase(True) - - # Don't initialize variables on the fly - tf.keras.backend.manual_variable_initialization(False) - - inputs = tf.keras.layers.Input(tensor=input_tensor, name='Images') - - if activation_function == 'relu': - activation = tf.nn.relu - elif activation_function == 'leakyrelu': - activation = tf.nn.leaky_relu - - params = { - 'activation': activation, - 'data_format': data_format, - 'kernel_initializer': tf.keras.initializers.he_uniform(seed=seed), - 'kernel_size': (3, 3), - 'padding': 'same', - } - - convb_layers = {} - - net = inputs - filters = initial_filters - for i in range(depth): - name = f'conv{i + 1}a' - net = tf.keras.layers.Conv2D(name=name, filters=filters, **params)(net) - if i in dropout_at: - net = tf.keras.layers.Dropout(dropout)(net) - name = f'conv{i + 1}b' - net = tf.keras.layers.Conv2D(name=name, filters=filters, **params)(net) - if batch_norm: - net = tf.keras.layers.BatchNormalization()(net) - convb_layers[name] = net - # only pool if not last level - if i != depth - 1: - name = f'pool{i + 1}' - net = tf.keras.layers.MaxPooling2D(name=name, pool_size=(2, 2))(net) - filters *= 2 - - # do the up levels - filters //= 2 - for i in range(depth - 1): - if use_upsampling: - up = tf.keras.layers.UpSampling2D( - name=f'up{depth + i + 1}', size=(2, 2))(net) - else: - up = tf.keras.layers.Conv2DTranspose( - name='transConv6', filters=filters, data_format=data_format, - kernel_size=(2, 2), strides=(2, 2), padding='same')(net) - net = tf.keras.layers.concatenate( - [up, convb_layers[f'conv{depth - i - 1}b']], - axis=concat_axis - ) - net = tf.keras.layers.Conv2D( - name=f'conv{depth + i + 1}a', - filters=filters, **params)(net) - net = tf.keras.layers.Conv2D( - name=f'conv{depth + i + 1}b', - filters=filters, **params)(net) - filters //= 2 - - net = tf.keras.layers.Conv2D(name='Mask', filters=n_cl_out, - kernel_size=(1, 1), data_format=data_format, - activation='sigmoid')(net) - - model = tf.keras.models.Model(inputs=[inputs], outputs=[net]) - - if print_summary: - print(model.summary()) - - return net diff --git a/openfl-workspace/tf_3dunet_brats/.workspace b/openfl-workspace/tf_3dunet_brats/.workspace deleted file mode 100644 index 3c2c5d08b4..0000000000 --- a/openfl-workspace/tf_3dunet_brats/.workspace +++ /dev/null @@ -1,2 +0,0 @@ -current_plan_name: default - diff --git a/openfl-workspace/tf_3dunet_brats/README.md b/openfl-workspace/tf_3dunet_brats/README.md deleted file mode 100644 index 573800a132..0000000000 --- a/openfl-workspace/tf_3dunet_brats/README.md +++ /dev/null @@ -1,166 +0,0 @@ -# TensorFlow 3D U-Net for the BraTS dataset - -This is a full example for training the Brain Tumor Segmentation 2020 ([BraTS2020](https://www.med.upenn.edu/cbica/brats2020/data.html)) with OpenFL. - -*Note: This is **not** the 3D U-Net model that was used in the paper and not the sharding used. Nevertheless, it should make a good template for how to train using OpenFL.* - -The files `src\dataloader.py` and `src\define_model.py` are where we define the TensorFlow [dataset loader](https://www.tensorflow.org/api_docs/python/tf/data/Dataset) and the 3D U-Net model. In `src\dataloader.py` we demonstrate how to use an out-of-memory data loader that pulls batches of data from files as needed. - -## Steps to run - -1. Download the [BraTS 2020 dataset](https://www.med.upenn.edu/cbica/brats2020/registration.html). It should be the one labeled **BraTS'20 Training Data: Segmentation Task**. - -2. Extract the `MICCAI_BraTS2020_TrainingData.zip` zip file to any folder. Let's call that folder `${DATA_PATH}`. The file structure of `${DATA_PATH}` should look like this: - -```bash -user@localhost ~$ tree ${DATA_PATH} -L 2 -${DATA_PATH}/MICCAI_BraTS2020_TrainingData -├── BraTS20_Training_001 -│   ├── BraTS20_Training_001_flair.nii.gz <── The MRI FLAIR channel (best one for prediction) -│   ├── BraTS20_Training_001_seg.nii.gz <── The ground truth label -│   ├── BraTS20_Training_001_t1.nii.gz <── The T1-weighted MRI channel -│   ├── BraTS20_Training_001_t1ce.nii.gz <── The T1-Contrast Enhanced-weighted MRI channel -│   └── BraTS20_Training_001_t2.nii.gz <── The T2-weighted MRI channel -├── BraTS20_Training_002 -│   ├── BraTS20_Training_002_flair.nii.gz -│   ├── BraTS20_Training_002_seg.nii.gz -│   ├── BraTS20_Training_002_t1.nii.gz -│   ├── BraTS20_Training_002_t1ce.nii.gz -│   └── BraTS20_Training_002_t2.nii.gz -├── ... -├── BraTS20_Training_369 -│   ├── BraTS20_Training_369_flair.nii.gz -│   ├── BraTS20_Training_369_seg.nii.gz -│   ├── BraTS20_Training_369_t1.nii.gz -│   ├── BraTS20_Training_369_t1ce.nii.gz -│   └── BraTS20_Training_369_t2.nii.gz -├── name_mapping.csv -└── survival_info.csv -``` -If `tree` is not installed, then run `sudo apt-get install tree` to install it (Ubuntu). - -3. In order for each collaborator to use separate slice of data, we split main folder into subfolders, one for each collaborator. **NOTE:** In the real world each collaborator will have it's own data and this split already exists. We're splitting here to simulate a federation with different participants. - -#### `split_directory.sh` -```bash -#!/bin/bash -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# Split the BraTS data directory into NUM_COLLABORATORS - -SOURCE=${1} # The directory where the BraTS dataset is located (e.g. ~/data/MICCAI_BraTS2020_TrainingData) -DESTINATION=${2} # The destination directory for the randomized, split training data folders -NUM_COLLABORATORS=${3:-2} # The number of collaborator splits for the subdirectories - -help() { - echo - echo "=======================================================================" - echo "~$ split_directory.sh BRATS_DATA_SOURCE_DIRECTORY DESTINATION_DIRECTORY" - echo "=======================================================================" - echo - echo "BRATS_DATA_SOURCE_DIRECTORY: The directory where the BraTS dataset is located (e.g. ~/data/MICCAI_BraTS2020_TrainingData)" - echo "DESTINATION DIRECTORY: The destination directory for the randomized, split training data folders (e.g. ~/brats_data_split)" - echo "NUM_COLLABORATORS: The number of collaborator splits for the subdirectories (default: 2)" - echo "-h, --help display this help and exit" - echo - echo -} - -if [ "$#" -lt 2 ] || ! [ -d ${1} ]; then - help - exit 1 -fi - -get_seeded_random() -{ - seed="$1" - openssl enc -aes-256-ctr -pass pass:"$seed" -nosalt \ - /dev/null -} - -# Remove the destination directory if it exists -if [ -d ${DESTINATION} ] -then - echo "Removing existing directory." - rm -r ${DESTINATION} -fi - -printf "Shard into ${NUM_COLLABORATORS} directories under ${DESTINATION}." -echo ' ' -spin='-\|/' - -n=0 -i=0 -# Find the subdirectories under the SOURCE directory and randomly shuffle them (seed is the same) -for f in `find ${SOURCE} -mindepth 1 -maxdepth 2 -type d | shuf --random-source=<(get_seeded_random 816)`; do - - ((n++)) - - # The folder to put the folder - idx=$((n % ${NUM_COLLABORATORS})) - - i=$(( (i+1) %4 )) - printf "\r${spin:$i:1} ${f}" - - d=${DESTINATION}/split_${idx}/ - - # Make the directory (if it doesn't exist) and copy the folder to it. - mkdir -p ${d} - cp -r ${f} ${d} - -done - -echo ' ' -echo ' ' -``` - -`~$ bash split_directory.sh ${DATA_PATH} ${NEW_PATH} ${NUMBER OF COLLABORATORS}` - -where `${NEW_PATH}` is where you want to copy the original data (and split it randomly into subdirectories). The default is 2 collaborators (so 2 splits). - -The new directories for the data are: -``` -${NEW_PATH} -├── split_0 -│   ├── BraTS20_Training_001 -│   ├── BraTS20_Training_002 -│   ├── BraTS20_Training_003 -│   ├── ... -└── split_1 - ├── BraTS20_Training_009 - ├── BraTS20_Training_014 - ├── BraTS20_Training_015 -    ├── ... -``` - -4. Now update the `plan/data.yaml` file to reflect the new data directories: - -``` -$ cat plan/data.yaml -# Copyright (C) 2020-2021 Intel Corporation -# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. - -# all keys under 'collaborators' corresponds to a specific colaborator name the corresponding dictionary has data_name, data_path pairs. -# Note that in the mnist case we do not store the data locally, and the data_path is used to pass an integer that helps the data object -# construct the shard of the mnist dataset to be use for this collaborator. -# -# collaborator_name,data_directory_path - -# You'll need to shard as necessary -# Symbolically link the ./data directory to whereever you have BraTS stored. -# e.g. ln -s ~/data/MICCAI_BraTS2020_TrainingData ./data/one - -one,${NEW_PATH}/split_0 -two,${NEW_PATH}/split_1 - -``` - -where you replace `${NEW_PATH}` by the new directory path - -5. We are ready to train! Try executing the [Hello Federation](https://openfl.readthedocs.io/en/latest/running_the_federation.baremetal.html#hello-federation-your-first-federated-learning-training) steps. Make sure you have `openfl` installed in your Python virtual environment. All you have to do is to specify collaborator data paths to slice folders. We have combined all 'Hello Federation' steps in a single bash script, so it is easier to test: - -```bash -bash tests/github/test_hello_federation.sh tf_3dunet_brats fed_work12345alpha81671 one123dragons beta34unicorns localhost --col1-data-path $NEW_PATH/split_0 --col2-data-path $NEW_PATH/$SUBFOLDER/split_1 --rounds-to-train 5 -``` -The result of the execution of the command above is 5 completed training rounds. diff --git a/openfl-workspace/tf_3dunet_brats/plan/cols.yaml b/openfl-workspace/tf_3dunet_brats/plan/cols.yaml deleted file mode 100644 index bdf28471ee..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/cols.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. - -collaborators: diff --git a/openfl-workspace/tf_3dunet_brats/plan/data.yaml b/openfl-workspace/tf_3dunet_brats/plan/data.yaml deleted file mode 100644 index d006410dda..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/data.yaml +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. - -# all keys under 'collaborators' corresponds to a specific colaborator name the corresponding dictionary has data_name, data_path pairs. -# Note that in the mnist case we do not store the data locally, and the data_path is used to pass an integer that helps the data object -# construct the shard of the mnist dataset to be use for this collaborator. -# -# collaborator_name,data_directory_path - -# You'll need to shard as necessary -# Symbolically link the ./data directory to whereever you have BraTS stored. -# e.g. ln -s ~/data/MICCAI_BraTS2020_TrainingData ./data/one - -one,~/MICCAI_BraTS2020_TrainingData/split_0 -two,~/MICCAI_BraTS2020_TrainingData/split_1 - diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/aggregator.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/aggregator.yaml deleted file mode 100644 index d3ef6e5082..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/aggregator.yaml +++ /dev/null @@ -1,4 +0,0 @@ -template : openfl.component.Aggregator -settings : - db_store_rounds : 1 - diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/assigner.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/assigner.yaml deleted file mode 100644 index 0b7e744475..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/assigner.yaml +++ /dev/null @@ -1,9 +0,0 @@ -template : openfl.component.RandomGroupedAssigner -settings : - task_groups : - - name : train_and_validate - percentage : 1.0 - tasks : - - aggregated_model_validation - - train - - locally_tuned_model_validation diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/collaborator.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/collaborator.yaml deleted file mode 100644 index a9c2e6eb7b..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/collaborator.yaml +++ /dev/null @@ -1,5 +0,0 @@ -template : openfl.component.Collaborator -settings : - opt_treatment : 'CONTINUE_LOCAL' - delta_updates : True - db_store_rounds : 1 diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/compression_pipeline.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/compression_pipeline.yaml deleted file mode 100644 index a508f94fd2..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/compression_pipeline.yaml +++ /dev/null @@ -1 +0,0 @@ -template: openfl.pipelines.NoCompressionPipeline diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/data_loader.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/data_loader.yaml deleted file mode 100644 index 33accd5ab2..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/data_loader.yaml +++ /dev/null @@ -1 +0,0 @@ -template: openfl.federated.DataLoader diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/defaults b/openfl-workspace/tf_3dunet_brats/plan/defaults/defaults deleted file mode 100644 index fb82f9c5b6..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/defaults +++ /dev/null @@ -1,2 +0,0 @@ -../../workspace/plan/defaults - diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/network.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/network.yaml deleted file mode 100644 index 9528631585..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/network.yaml +++ /dev/null @@ -1,9 +0,0 @@ -template: openfl.federation.Network -settings: - agg_addr : auto - agg_port : auto - hash_salt : auto - disable_tls : False - client_reconnect_interval : 5 - disable_client_auth : False - cert_folder : cert diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/task_runner.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/task_runner.yaml deleted file mode 100644 index b162724693..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/task_runner.yaml +++ /dev/null @@ -1 +0,0 @@ -template: openfl.federated.task_runner.CoreTaskRunner diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_fast_estimator.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_fast_estimator.yaml deleted file mode 100644 index 1548d4b225..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_fast_estimator.yaml +++ /dev/null @@ -1,22 +0,0 @@ -aggregated_model_validation: - function : validate - kwargs : - batch_size : 32 - apply : global - metrics : - - accuracy - -locally_tuned_model_validation: - function : validate - kwargs : - batch_size : 32 - apply : local - metrics : - - accuracy -train: - function : train - kwargs : - batch_size : 32 - epochs : 1 - metrics : - - loss diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_keras.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_keras.yaml deleted file mode 100644 index 79d067d8d2..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_keras.yaml +++ /dev/null @@ -1,23 +0,0 @@ -aggregated_model_validation: - function : validate - kwargs : - batch_size : 32 - apply : global - metrics : - - accuracy - -locally_tuned_model_validation: - function : validate - kwargs : - batch_size : 32 - apply : local - metrics : - - accuracy - -train: - function : train - kwargs : - batch_size : 32 - epochs : 1 - metrics : - - loss diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_tensorflow.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_tensorflow.yaml deleted file mode 100644 index 586a885b40..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_tensorflow.yaml +++ /dev/null @@ -1,23 +0,0 @@ -aggregated_model_validation: - function : validate - kwargs : - batch_size : 32 - apply : global - metrics : - - acc - -locally_tuned_model_validation: - function : validate - kwargs : - batch_size : 32 - apply : local - metrics : - - acc - -train: - function : train_batches - kwargs : - batch_size : 32 - num_batches : 1 - metrics : - - loss diff --git a/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_torch.yaml b/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_torch.yaml deleted file mode 100644 index a240c2003b..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/defaults/tasks_torch.yaml +++ /dev/null @@ -1,19 +0,0 @@ -aggregated_model_validation: - function : validate - kwargs : - apply : global - metrics : - - acc - -locally_tuned_model_validation: - function : validate - kwargs : - apply: local - metrics : - - acc - -train: - function : train_batches - kwargs : - metrics : - - loss diff --git a/openfl-workspace/tf_3dunet_brats/plan/plan.yaml b/openfl-workspace/tf_3dunet_brats/plan/plan.yaml deleted file mode 100644 index 84f63f8067..0000000000 --- a/openfl-workspace/tf_3dunet_brats/plan/plan.yaml +++ /dev/null @@ -1,73 +0,0 @@ -aggregator: - defaults: plan/defaults/aggregator.yaml - settings: - best_state_path: save/tf_3dunet_brats_best.pbuf - init_state_path: save/tf_3dunet_brats_init.pbuf - last_state_path: save/tf_3dunet_brats_latest.pbuf - db_store_rounds: 2 - rounds_to_train: 10 - template: openfl.component.Aggregator -assigner: - defaults: plan/defaults/assigner.yaml - settings: - task_groups: - - name: train_and_validate - percentage: 1.0 - tasks: - - aggregated_model_validation - - train - - locally_tuned_model_validation - template: openfl.component.RandomGroupedAssigner -collaborator: - defaults: plan/defaults/collaborator.yaml - settings: - db_store_rounds: 2 - delta_updates: true - opt_treatment: RESET - template: openfl.component.Collaborator -data_loader: - defaults: plan/defaults/data_loader.yaml - settings: - batch_size: 4 - crop_dim: 64 - num_classes: 1 - number_input_channels: 1 - percent_train: 0.8 - template: src.tf_brats_dataloader.TensorFlowBratsDataLoader -network: - defaults: plan/defaults/network.yaml -task_runner: - defaults: plan/defaults/task_runner.yaml - settings: - batch_norm: true - batch_size: 4 - depth: 4 - initial_filters: 16 - use_upsampling: false - template: src.tf_3dunet_model.TensorFlow3dUNet -tasks: - aggregated_model_validation: - function: validate - kwargs: - apply: global - batch_size: 4 - metrics: - - dice_coef - - soft_dice_coef - defaults: plan/defaults/tasks_tensorflow.yaml - locally_tuned_model_validation: - function: validate - kwargs: - apply: local - batch_size: 4 - metrics: - - dice_coef - - soft_dice_coef - settings: {} - train: - function: train - kwargs: - batch_size: 4 - epochs: 1 - metrics: - - loss diff --git a/openfl-workspace/tf_3dunet_brats/requirements.txt b/openfl-workspace/tf_3dunet_brats/requirements.txt deleted file mode 100644 index a540f97940..0000000000 --- a/openfl-workspace/tf_3dunet_brats/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -keras==2.13.1 -nibabel -numpy - -setuptools>=65.5.1 # not directly required, pinned by Snyk to avoid a vulnerability -tensorflow==2.13.0 diff --git a/openfl-workspace/tf_3dunet_brats/split_directory.sh b/openfl-workspace/tf_3dunet_brats/split_directory.sh deleted file mode 100755 index 425c19ae42..0000000000 --- a/openfl-workspace/tf_3dunet_brats/split_directory.sh +++ /dev/null @@ -1,70 +0,0 @@ -#!/bin/bash -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -# Split the BraTS data directory into NUM_COLLABORATORS - -SOURCE=${1} # The directory where the BraTS dataset is located (e.g. ~/data/MICCAI_BraTS2020_TrainingData) -DESTINATION=${2} # The destination directory for the randomized, split training data folders -NUM_COLLABORATORS=${3:-2} # The number of collaborator splits for the subdirectories - -help() { - echo - echo "=======================================================================" - echo "~$ split_directory.sh BRATS_DATA_SOURCE_DIRECTORY DESTINATION_DIRECTORY" - echo "=======================================================================" - echo - echo "BRATS_DATA_SOURCE_DIRECTORY: The directory where the BraTS dataset is located (e.g. ~/data/MICCAI_BraTS2020_TrainingData)" - echo "DESTINATION DIRECTORY: The destination directory for the randomized, split training data folders (e.g. ~/brats_data_split)" - echo "NUM_COLLABORATORS: The number of collaborator splits for the subdirectories (default: 2)" - echo "-h, --help display this help and exit" - echo - echo -} - -if [ "$#" -lt 2 ] || ! [ -d ${1} ]; then - help - exit 1 -fi - -get_seeded_random() -{ - seed="$1" - openssl enc -aes-256-ctr -pass pass:"$seed" -nosalt \ - /dev/null -} - -# Remove the destination directory if it exists -if [ -d ${DESTINATION} ] -then - echo "Removing existing directory." - rm -r ${DESTINATION} -fi - -printf "Shard into ${NUM_COLLABORATORS} directories under ${DESTINATION}." -echo ' ' -spin='-\|/' - -n=0 -i=0 -# Find the subdirectories under the SOURCE directory and randomly shuffle them (seed is the same) -for f in `find ${SOURCE} -mindepth 1 -maxdepth 2 -type d | shuf --random-source=<(get_seeded_random 816)`; do - - ((n++)) - - # The folder to put the folder - idx=$((n % ${NUM_COLLABORATORS})) - - i=$(( (i+1) %4 )) - printf "\r${spin:$i:1} ${f}" - - d=${DESTINATION}/split_${idx}/ - - # Make the directory (if it doesn't exist) and copy the folder to it. - mkdir -p ${d} - cp -r ${f} ${d} - -done - -echo ' ' -echo ' ' \ No newline at end of file diff --git a/openfl-workspace/tf_3dunet_brats/src/__init__.py b/openfl-workspace/tf_3dunet_brats/src/__init__.py deleted file mode 100644 index f1410b1298..0000000000 --- a/openfl-workspace/tf_3dunet_brats/src/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""You may copy this file as the starting point of your own model.""" diff --git a/openfl-workspace/tf_3dunet_brats/src/dataloader.py b/openfl-workspace/tf_3dunet_brats/src/dataloader.py deleted file mode 100644 index c91dca5495..0000000000 --- a/openfl-workspace/tf_3dunet_brats/src/dataloader.py +++ /dev/null @@ -1,303 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""You may copy this file as the starting point of your own model.""" - -import os - -import nibabel as nib -import numpy as np -import tensorflow as tf - - -class DatasetGenerator: - """Generate a TensorFlow data loader from the BraTS .nii.gz files.""" - - def __init__(self, crop_dim, - data_path, - batch_size=4, - number_input_channels=1, - num_classes=1, - train_test_split=0.80, - validate_test_split=0.5, - random_seed=816, - shard=0): - """Initialize the class.""" - self.data_path = os.path.abspath(os.path.expanduser(data_path)) - self.batch_size = batch_size - self.crop_dim = [crop_dim, crop_dim, crop_dim, number_input_channels] - self.num_input_channels = number_input_channels - self.num_classes = num_classes - self.random_seed = random_seed - - self.train_test_split = train_test_split - self.validate_test_split = validate_test_split - self.shard = shard - - self.create_file_list() - - if self.num_files > 0: - - self.ds_train, self.ds_val, self.ds_test = self.get_dataset() - - else: - - self.ds_train = None - self.ds_val = None - self.ds_test = None - raise ValueError(f'ERROR: No BraTS datafiles found under directory {self.data_path}') - - def create_file_list(self): - """ - Get list of the files from the BraTS raw data. - - Split into training and testing sets. - """ - extension = '_seg.nii.gz' - flair_extension = '_flair.nii.gz' - searchpath = os.path.join(self.data_path, "*/*" + extension) - filenames = tf.io.gfile.glob(searchpath) - - # check for uncompressed files - if not filenames: - extension = '_seg.nii' - flair_extension = '_flair.nii' - searchpath = os.path.join(self.data_path, "*/*" + extension) - filenames = tf.io.gfile.glob(searchpath) - - # Create a dictionary of tuples with image filename and label filename - - self.num_files = len(filenames) - self.filenames = {} - for idx, filename in enumerate(filenames): - self.filenames[idx] = [filename.replace(extension, flair_extension), filename] - - def z_normalize_img(self, img): - """ - Normalize the image. - - The mean value for each image is 0 and the standard deviation is 1. - """ - # TODO: Correct this for multiple MRI channels - return (img - np.mean(img)) / np.std(img) - - def crop(self, img, msk, randomize): - """Randomly crop the image and mask.""" - slices = [] - - # Do we randomize? - is_random = randomize and np.random.rand() > 0.5 - - for idx in range(len(img.shape) - 1): # Go through each dimension - - croplen = self.crop_dim[idx] - imglen = img.shape[idx] - - start = (imglen - croplen) // 2 - - ratio_crop = 0.20 # Crop up this this % of pixels for offset - # Number of pixels to offset crop in this dimension - offset = int(np.floor(start * ratio_crop)) - - if offset > 0: - if is_random: - start += np.random.choice(range(-offset, offset)) - if ((start + croplen) > imglen): # Don't fall off the image - start = (imglen - croplen) // 2 - else: - start = 0 - - slices.append(slice(start, start + croplen)) - - return img[tuple(slices)], msk[tuple(slices)] - - def augment_data(self, img, msk): - """ - Augmentation the input images. - - Flip image and mask. Rotate image and mask. - """ - # Determine if axes are equal and can be rotated - # If the axes aren't equal then we can't rotate them. - equal_dim_axis = [] - for idx in range(0, len(self.crop_dim)): - for jdx in range(idx + 1, len(self.crop_dim)): - if self.crop_dim[idx] == self.crop_dim[jdx]: - equal_dim_axis.append([idx, jdx]) # Valid rotation axes - dim_to_rotate = equal_dim_axis - - if np.random.rand() > 0.5: - # Random 0,1 (axes to flip) - ax = np.random.choice(np.arange(len(self.crop_dim) - 1)) - img = np.flip(img, ax) - msk = np.flip(msk, ax) - - elif (len(dim_to_rotate) > 0) and (np.random.rand() > 0.5): - rot = np.random.choice([1, 2, 3]) # 90, 180, or 270 degrees - - # This will choose the axes to rotate - # Axes must be equal in size - random_axis = dim_to_rotate[np.random.choice(len(dim_to_rotate))] - - img = np.rot90(img, rot, axes=random_axis) # Rotate axes 0 and 1 - msk = np.rot90(msk, rot, axes=random_axis) # Rotate axes 0 and 1 - - return img, msk - - def read_nifti_file(self, idx, randomize=False): - """Read Nifti file.""" - idx = idx.numpy() - imgfile = self.filenames[idx][0] - mskfile = self.filenames[idx][1] - - img_temp = np.array(nib.load(imgfile).dataobj) - img_temp = np.rot90(img_temp) - - img = np.zeros(list(img_temp.shape) + [self.num_input_channels]) - # Normalize - img_temp = self.z_normalize_img(img_temp) - - img[..., 0] = img_temp - - for channel in range(1, self.num_input_channels): - - if channel == 1: - imgfile = self.filenames[idx][1].replace('_flair', '_t1') - elif channel == 2: - imgfile = self.filenames[idx][1].replace('_flair', '_t1ce') - elif channel == 3: - imgfile = self.filenames[idx][1].replace('_flair', '_t2') - - img_temp = np.array(nib.load(imgfile).dataobj) - - img_temp = np.rot90(img_temp) - - # Normalize - img_temp = self.z_normalize_img(img_temp) - - img[..., channel] = img_temp - - msk = np.rot90(np.array(nib.load(mskfile).dataobj)) - msk = np.expand_dims(msk, -1) - - # labels: { - # 0: background, - # 1: edema, - # 2: non-enhancing tumor, - # 3: enhancing tumour} - - # Combine all masks but background - if self.num_classes == 1: - msk[msk > 0] = 1.0 - else: - msk_temp = np.zeros(list(msk.shape) + [self.num_classes]) - for channel in range(self.num_classes): - msk_temp[msk == channel, channel] = 1.0 - msk = msk_temp - - # Crop - img, msk = self.crop(img, msk, randomize) - - # Randomly rotate - if randomize: - img, msk = self.augment_data(img, msk) - - return img, msk - - def get_input_shape(self): - """Get the shape of the input.""" - return self.crop_dim - - def plot_images(self, ds, slice_num=90): - """Plot images from dataset.""" - import matplotlib.pyplot as plt - - plt.figure(figsize=(20, 20)) - - num_cols = 2 - - msk_channel = 0 - img_channel = 0 - - for img, msk in ds.take(1): - bs = img.shape[0] - - for idx in range(bs): - plt.subplot(bs, num_cols, idx * num_cols + 1) - plt.imshow(img[idx, :, :, slice_num, img_channel], cmap='bone') - plt.title('MRI', fontsize=18) - plt.subplot(bs, num_cols, idx * num_cols + 2) - plt.imshow(msk[idx, :, :, slice_num, msk_channel], cmap='bone') - plt.title('Tumor', fontsize=18) - - plt.show() - - print(f'Mean pixel value of image = {np.mean(img[0, :, :, :, 0])}') - - def display_train_images(self, slice_num=90): - """Plot some training images.""" - self.plot_images(self.ds_train, slice_num) - - def display_validation_images(self, slice_num=90): - """Plot some validation images.""" - self.plot_images(self.ds_val, slice_num) - - def display_test_images(self, slice_num=90): - """Plot some test images.""" - self.plot_images(self.ds_test, slice_num) - - def get_train(self): - """Return train dataset.""" - return self.ds_train - - def get_test(self): - """Return test dataset.""" - return self.ds_test - - def get_validate(self): - """Return validation dataset.""" - return self.ds_val - - def get_dataset(self): - """Create a TensorFlow data loader.""" - self.num_train = int(self.num_files * self.train_test_split) - numvaltest = self.num_files - self.num_train - - ds = tf.data.Dataset.range(self.num_files).shuffle( - self.num_files, self.random_seed) # Shuffle the dataset - - # Horovod Sharding - # Here we are not actually dividing the dataset into shards - # but instead just reshuffling the training dataset for every - # shard. Then in the training loop we just go through the training - # dataset but the number of steps is divided by the number of shards. - ds_train = ds.take(self.num_train).shuffle( - self.num_train, self.shard) # Reshuffle based on shard - ds_val_test = ds.skip(self.num_train) - self.num_val = int(numvaltest * self.validate_test_split) - self.num_test = self.num_train - self.num_val - ds_val = ds_val_test.take(self.num_val) - ds_test = ds_val_test.skip(self.num_val) - - ds_train = ds_train.map(lambda x: tf.py_function(self.read_nifti_file, - [x, True], [tf.float32, tf.float32]), - num_parallel_calls=tf.data.experimental.AUTOTUNE) - ds_val = ds_val.map(lambda x: tf.py_function(self.read_nifti_file, - [x, False], [tf.float32, tf.float32]), - num_parallel_calls=tf.data.experimental.AUTOTUNE) - ds_test = ds_test.map(lambda x: tf.py_function(self.read_nifti_file, - [x, False], [tf.float32, tf.float32]), - num_parallel_calls=tf.data.experimental.AUTOTUNE) - - ds_train = ds_train.batch(self.batch_size, drop_remainder=True) - ds_train = ds_train.prefetch(tf.data.experimental.AUTOTUNE) - - batch_size_val = max(1, self.batch_size // 2) # Could be any batch size you'd like - ds_val = ds_val.batch(batch_size_val, drop_remainder=True) - ds_val = ds_val.prefetch(tf.data.experimental.AUTOTUNE) - - batch_size_test = max(1, self.batch_size // 2) # Could be any batch size you'd like - ds_test = ds_test.batch(batch_size_test, drop_remainder=True) - ds_test = ds_test.prefetch(tf.data.experimental.AUTOTUNE) - - return ds_train, ds_val, ds_test diff --git a/openfl-workspace/tf_3dunet_brats/src/define_model.py b/openfl-workspace/tf_3dunet_brats/src/define_model.py deleted file mode 100644 index 148d66e9ad..0000000000 --- a/openfl-workspace/tf_3dunet_brats/src/define_model.py +++ /dev/null @@ -1,158 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""You may copy this file as the starting point of your own model.""" - -import tensorflow as tf - - -def dice_coef(target, prediction, axis=(1, 2, 3), smooth=0.0001): - """ - Sorenson Dice. - - Returns - ------- - dice coefficient (float) - """ - prediction = tf.round(prediction) # Round to 0 or 1 - - intersection = tf.reduce_sum(target * prediction, axis=axis) - union = tf.reduce_sum(target + prediction, axis=axis) - numerator = tf.constant(2.) * intersection + smooth - denominator = union + smooth - coef = numerator / denominator - - return tf.reduce_mean(coef) - - -def soft_dice_coef(target, prediction, axis=(1, 2, 3), smooth=0.0001): - """ - Soft Sorenson Dice. - - Does not round the predictions to either 0 or 1. - - Returns - ------- - soft dice coefficient (float) - """ - intersection = tf.reduce_sum(target * prediction, axis=axis) - union = tf.reduce_sum(target + prediction, axis=axis) - numerator = tf.constant(2.) * intersection + smooth - denominator = union + smooth - coef = numerator / denominator - - return tf.reduce_mean(coef) - - -def dice_loss(target, prediction, axis=(1, 2, 3), smooth=0.0001): - """ - Sorenson (Soft) Dice loss. - - Using -log(Dice) as the loss since it is better behaved. - Also, the log allows avoidance of the division which - can help prevent underflow when the numbers are very small. - - Returns - ------- - dice loss (float) - """ - intersection = tf.reduce_sum(prediction * target, axis=axis) - p = tf.reduce_sum(prediction, axis=axis) - t = tf.reduce_sum(target, axis=axis) - numerator = tf.reduce_mean(intersection + smooth) - denominator = tf.reduce_mean(t + p + smooth) - dice_loss = -tf.math.log(2. * numerator) + tf.math.log(denominator) - - return dice_loss - - -def build_model(input_shape, - n_cl_out=1, - use_upsampling=False, - dropout=0.2, - print_summary=True, - seed=816, - depth=5, - dropout_at=(2, 3), - initial_filters=16, - batch_norm=True, - **kwargs): - """Build the TensorFlow model. - - Args: - input_tensor: input shape ot the model - use_upsampling (bool): True = use bilinear interpolation; - False = use transposed convolution (Default=False) - n_cl_out (int): Number of channels in output layer (Default=1) - dropout (float): Dropout percentage (Default=0.2) - print_summary (bool): True = print the model summary (Default = True) - seed: random seed (Default=816) - depth (int): Number of max pooling layers in encoder (Default=5) - dropout_at: Layers to perform dropout after (Default=[2,3]) - initial_filters (int): Number of filters in first convolutional - layer (Default=16) - batch_norm (bool): True = use batch normalization (Default=True) - **kwargs: Additional parameters to pass to the function - """ - if (input_shape[0] % (2**depth)) > 0: - raise ValueError(f'Crop dimension must be a multiple of 2^(depth of U-Net) = {2**depth}') - - inputs = tf.keras.layers.Input(input_shape, name='brats_mr_image') - - activation = tf.keras.activations.relu - - params = {'kernel_size': (3, 3, 3), 'activation': activation, - 'padding': 'same', - 'kernel_initializer': tf.keras.initializers.he_uniform(seed=seed)} - - convb_layers = {} - - net = inputs - filters = initial_filters - for i in range(depth): - name = f'conv{i + 1}a' - net = tf.keras.layers.Conv3D(name=name, filters=filters, **params)(net) - if i in dropout_at: - net = tf.keras.layers.Dropout(dropout)(net) - name = f'conv{i + 1}b' - net = tf.keras.layers.Conv3D(name=name, filters=filters, **params)(net) - if batch_norm: - net = tf.keras.layers.BatchNormalization()(net) - convb_layers[name] = net - # only pool if not last level - if i != depth - 1: - name = f'pool{i + 1}' - net = tf.keras.layers.MaxPooling3D(name=name, pool_size=(2, 2, 2))(net) - filters *= 2 - - # do the up levels - filters //= 2 - for i in range(depth - 1): - if use_upsampling: - up = tf.keras.layers.UpSampling3D( - name=f'up{depth + i + 1}', size=(2, 2, 2))(net) - else: - up = tf.keras.layers.Conv3DTranspose(name=f'transConv{depth + i + 1}', - filters=filters, - kernel_size=(2, 2, 2), - strides=(2, 2, 2), - padding='same')(net) - net = tf.keras.layers.concatenate( - [up, convb_layers[f'conv{depth - i - 1}b']], - axis=-1 - ) - net = tf.keras.layers.Conv3D( - name=f'conv{depth + i + 1}a', - filters=filters, **params)(net) - net = tf.keras.layers.Conv3D( - name=f'conv{depth + i + 1}b', - filters=filters, **params)(net) - filters //= 2 - - net = tf.keras.layers.Conv3D(name='prediction', filters=n_cl_out, - kernel_size=(1, 1, 1), - activation='sigmoid')(net) - - model = tf.keras.models.Model(inputs=[inputs], outputs=[net]) - - return model diff --git a/openfl-workspace/tf_3dunet_brats/src/tf_3dunet_model.py b/openfl-workspace/tf_3dunet_brats/src/tf_3dunet_model.py deleted file mode 100644 index 25b3a4fdb1..0000000000 --- a/openfl-workspace/tf_3dunet_brats/src/tf_3dunet_model.py +++ /dev/null @@ -1,215 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""You may copy this file as the starting point of your own model.""" - -import tensorflow as tf - -from openfl.federated import KerasTaskRunner -from .define_model import build_model -from .define_model import dice_coef -from .define_model import dice_loss -from .define_model import soft_dice_coef - - -class TensorFlow3dUNet(KerasTaskRunner): - """Initialize. - - Args: - **kwargs: Additional parameters to pass to the function - - """ - - def __init__(self, initial_filters=16, - depth=5, - batch_norm=True, - use_upsampling=False, - **kwargs): - """Initialize. - - Args: - **kwargs: Additional parameters to pass to the function - - """ - super().__init__(**kwargs) - - self.model = self.create_model( - input_shape=self.feature_shape, - n_cl_out=self.data_loader.num_classes, - initial_filters=initial_filters, - use_upsampling=use_upsampling, - depth=depth, - batch_norm=batch_norm, - **kwargs - ) - self.initialize_tensorkeys_for_functions() - - self.model.summary(print_fn=self.logger.info, line_length=120) - - def create_model(self, - input_shape, - n_cl_out=1, - use_upsampling=False, - dropout=0.2, - print_summary=True, - seed=816, - depth=5, - dropout_at=(2, 3), - initial_filters=16, - batch_norm=True, - **kwargs): - """Create the TensorFlow 3D U-Net CNN model. - - Args: - input_shape (list): input shape of the data - n_cl_out (int): Number of output classes in label (Default=1) - **kwargs: Additional parameters to pass to the function - - """ - # - # Define Model - # - model = build_model(input_shape, - n_cl_out=n_cl_out, - use_upsampling=use_upsampling, - dropout=dropout, - print_summary=print_summary, - seed=seed, - depth=depth, - dropout_at=dropout_at, - initial_filters=initial_filters, - batch_norm=batch_norm) - - self.optimizer = tf.keras.optimizers.legacy.Adam() - - model.compile( - loss=dice_loss, - optimizer=self.optimizer, - metrics=[dice_coef, soft_dice_coef], - ) - - self.tvars = model.layers - print(f'layer names: {[var.name for var in self.tvars]}') - - self.opt_vars = self.optimizer.variables() - print(f'optimizer vars: {self.opt_vars}') - - # Two opt_vars for one tvar: gradient and square sum for RMSprop. - self.fl_vars = self.tvars + self.opt_vars - - return model - - -if __name__ == '__main__': - - from tf_brats_dataloader import DatasetGenerator - import os - - import argparse - - parser = argparse.ArgumentParser( - description='Train 3D U-Net model', add_help=True, - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - - parser.add_argument('--data_path', - default='~/data/MICCAI_BraTS2020_TrainingData/', - # Or wherever you unzipped the BraTS datset, - help='Root directory for BraTS 2020 dataset') - parser.add_argument('--epochs', - type=int, - default=5, - help='Number of epochs') - parser.add_argument('--crop_dim', - type=int, - default=64, - help='Crop all dimensions to this (height, width, depth)') - parser.add_argument('--batch_size', - type=int, - default=4, - help='Training batch size') - parser.add_argument('--train_test_split', - type=float, - default=0.80, - help='Train/test split (0-1)') - parser.add_argument('--validate_test_split', - type=float, - default=0.50, - help='Validation/test split (0-1)') - parser.add_argument('--number_input_channels', - type=int, - default=1, - help='Number of input channels') - parser.add_argument('--num_classes', - type=int, - default=1, - help='Number of output classes/channels') - parser.add_argument('--random_seed', - default=816, - help='Random seed for determinism') - parser.add_argument('--print_model', - action='store_true', - default=True, - help='Print the summary of the model layers') - parser.add_argument('--filters', - type=int, - default=16, - help='Number of filters in the first convolutional layer') - parser.add_argument('--use_upsampling', - action='store_true', - default=False, - help='Use upsampling instead of transposed convolution') - parser.add_argument('--use_batchnorm', - action='store_true', - default=True, - help='Use batch normalization') - parser.add_argument('--saved_model_name', - default='saved_model_3DUnet', - help='Save model to this path') - - args = parser.parse_args() - - print(args) - - brats_data = DatasetGenerator(args.crop_dim, - data_path=os.path.abspath(os.path.expanduser(args.data_path)), - batch_size=args.batch_size, - train_test_split=args.train_test_split, - validate_test_split=args.validate_test_split, - number_input_channels=args.number_input_channels, - num_classes=args.num_classes, - random_seed=args.random_seed - ) - - model = build_model([args.crop_dim, args.crop_dim, args.crop_dim, args.number_input_channels], - use_upsampling=args.use_upsampling, - n_cl_out=args.num_classes, - dropout=0.2, - print_summary=args.print_model, - seed=args.random_seed, - depth=5, - dropout_at=[2, 3], - initial_filters=args.filters, - batch_norm=args.use_batchnorm - ) - - model.compile(loss=dice_loss, - optimizer=tf.keras.optimizers.legacy.Adam(learning_rate=0.01), - metrics=[dice_coef, soft_dice_coef] - ) - - checkpoint = tf.keras.callbacks.ModelCheckpoint(args.saved_model_name, - verbose=1, - save_best_only=True) - - # TensorBoard - import datetime - logs_dir = os.path.join('tensorboard_logs', - datetime.datetime.now().strftime('%Y%m%d-%H%M%S')) - tb_logs = tf.keras.callbacks.TensorBoard(log_dir=logs_dir) - - callbacks = [checkpoint, tb_logs] - - history = model.fit(brats_data.ds_train, - validation_data=brats_data.ds_val, - epochs=args.epochs, - callbacks=callbacks) diff --git a/openfl-workspace/tf_3dunet_brats/src/tf_brats_dataloader.py b/openfl-workspace/tf_3dunet_brats/src/tf_brats_dataloader.py deleted file mode 100644 index 85e5c576c3..0000000000 --- a/openfl-workspace/tf_3dunet_brats/src/tf_brats_dataloader.py +++ /dev/null @@ -1,99 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""You may copy this file as the starting point of your own model.""" - - -import os - -from openfl.federated import TensorFlowDataLoader -from .dataloader import DatasetGenerator - - -class TensorFlowBratsDataLoader(TensorFlowDataLoader): - """TensorFlow Data Loader for the BraTS dataset.""" - - def __init__(self, data_path, batch_size=4, - crop_dim=64, percent_train=0.8, - pre_split_shuffle=True, - number_input_channels=1, - num_classes=1, - **kwargs): - """Initialize. - - Args: - data_path: The file path for the BraTS dataset - batch_size (int): The batch size to use - crop_dim (int): Crop the original image to this size on each dimension - percent_train (float): The percentage of the data to use for training (Default=0.8) - pre_split_shuffle (bool): True= shuffle the dataset before - performing the train/validate split (Default=True) - **kwargs: Additional arguments, passed to super init - - Returns: - Data loader with BraTS data - """ - super().__init__(batch_size, **kwargs) - - self.data_path = os.path.abspath(os.path.expanduser(data_path)) - self.batch_size = batch_size - self.crop_dim = [crop_dim, crop_dim, crop_dim, number_input_channels] - self.num_input_channels = number_input_channels - self.num_classes = num_classes - - self.train_test_split = percent_train - - self.brats_data = DatasetGenerator(crop_dim, - data_path=data_path, - number_input_channels=number_input_channels, - batch_size=batch_size, - train_test_split=percent_train, - validate_test_split=0.5, - num_classes=num_classes, - random_seed=816) - - def get_feature_shape(self): - """ - Get the shape of an example feature array. - - Returns: - tuple: shape of an example feature array - """ - return tuple(self.brats_data.get_input_shape()) - - def get_train_loader(self, batch_size=None, num_batches=None): - """ - Get training data loader. - - Returns - ------- - loader object - """ - return self.brats_data.ds_train - - def get_valid_loader(self, batch_size=None): - """ - Get validation data loader. - - Returns: - loader object - """ - return self.brats_data.ds_val - - def get_train_data_size(self): - """ - Get total number of training samples. - - Returns: - int: number of training samples - """ - return self.brats_data.num_train - - def get_valid_data_size(self): - """ - Get total number of validation samples. - - Returns: - int: number of validation samples - """ - return self.brats_data.num_val diff --git a/openfl-workspace/tf_cnn_histology/.workspace b/openfl-workspace/tf_cnn_histology/.workspace deleted file mode 100644 index 3c2c5d08b4..0000000000 --- a/openfl-workspace/tf_cnn_histology/.workspace +++ /dev/null @@ -1,2 +0,0 @@ -current_plan_name: default - diff --git a/openfl-workspace/tf_cnn_histology/plan/cols.yaml b/openfl-workspace/tf_cnn_histology/plan/cols.yaml deleted file mode 100644 index bdf28471ee..0000000000 --- a/openfl-workspace/tf_cnn_histology/plan/cols.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. - -collaborators: diff --git a/openfl-workspace/tf_cnn_histology/plan/defaults b/openfl-workspace/tf_cnn_histology/plan/defaults deleted file mode 100644 index fb82f9c5b6..0000000000 --- a/openfl-workspace/tf_cnn_histology/plan/defaults +++ /dev/null @@ -1,2 +0,0 @@ -../../workspace/plan/defaults - diff --git a/openfl-workspace/tf_cnn_histology/plan/plan.yaml b/openfl-workspace/tf_cnn_histology/plan/plan.yaml deleted file mode 100644 index f834794113..0000000000 --- a/openfl-workspace/tf_cnn_histology/plan/plan.yaml +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. - -aggregator : - defaults : plan/defaults/aggregator.yaml - template : openfl.component.Aggregator - settings : - init_state_path : save/tf_cnn_histology_init.pbuf - last_state_path : save/tf_cnn_histology_latest.pbuf - best_state_path : save/tf_cnn_histology_best.pbuf - db_store_rounds: 2 - rounds_to_train : 10 - -collaborator : - defaults : plan/defaults/collaborator.yaml - template : openfl.component.Collaborator - settings : - delta_updates : true - db_store_rounds: 2 - opt_treatment : RESET - -data_loader : - defaults : plan/defaults/data_loader.yaml - template : src.tfhistology_inmemory.TensorFlowHistologyInMemory - settings : - batch_size: 64 - percent_train: 0.8 - collaborator_count : 2 - data_group_name : histology - -task_runner : - defaults : plan/defaults/task_runner.yaml - template : src.tf_cnn.TensorFlowCNN - -network : - defaults : plan/defaults/network.yaml - -assigner : - defaults : plan/defaults/assigner.yaml - -tasks: - defaults: plan/defaults/tasks_tensorflow.yaml - aggregated_model_validation: - function: validate - kwargs: - apply: global - batch_size: 32 - metrics: - - sparse_categorical_accuracy - locally_tuned_model_validation: - function: validate - kwargs: - apply: local - batch_size: 32 - metrics: - - sparse_categorical_accuracy - settings: {} - train: - function: train - kwargs: - batch_size: 32 - epochs: 1 - metrics: - - loss \ No newline at end of file diff --git a/openfl-workspace/tf_cnn_histology/requirements.txt b/openfl-workspace/tf_cnn_histology/requirements.txt deleted file mode 100644 index 59ee6430c8..0000000000 --- a/openfl-workspace/tf_cnn_histology/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -pillow -tensorflow==2.13 -tensorflow-datasets diff --git a/openfl-workspace/tf_cnn_histology/src/__init__.py b/openfl-workspace/tf_cnn_histology/src/__init__.py deleted file mode 100644 index f1410b1298..0000000000 --- a/openfl-workspace/tf_cnn_histology/src/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -"""You may copy this file as the starting point of your own model.""" diff --git a/openfl-workspace/tf_cnn_histology/src/tf_cnn.py b/openfl-workspace/tf_cnn_histology/src/tf_cnn.py deleted file mode 100644 index 29e40a842c..0000000000 --- a/openfl-workspace/tf_cnn_histology/src/tf_cnn.py +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""You may copy this file as the starting point of your own model.""" - -import tensorflow as tf - -from openfl.federated import KerasTaskRunner - - -class TensorFlowCNN(KerasTaskRunner): - """Initialize. - - Args: - **kwargs: Additional parameters to pass to the function - - """ - - def __init__(self, **kwargs): - """Initialize. - - Args: - **kwargs: Additional parameters to pass to the function - - """ - super().__init__(**kwargs) - - self.model = self.create_model( - self.feature_shape, - self.data_loader.num_classes, - **kwargs - ) - self.initialize_tensorkeys_for_functions() - - def create_model(self, - input_shape, - num_classes, - training_smoothing=32.0, - validation_smoothing=1.0, - **kwargs): - """Create the TensorFlow CNN Histology model. - - Args: - training_smoothing (float): (Default=32.0) - validation_smoothing (float): (Default=1.0) - **kwargs: Additional parameters to pass to the function - - """ - print(tf.config.threading.get_intra_op_parallelism_threads()) - print(tf.config.threading.get_inter_op_parallelism_threads()) - # ## Define Model - # - # Convolutional neural network model - - inputs = tf.keras.layers.Input(shape=input_shape) - conv = tf.keras.layers.Conv2D( - filters=16, kernel_size=(3, 3), padding='same', activation='relu')(inputs) - conv = tf.keras.layers.Conv2D( - filters=32, kernel_size=(3, 3), padding='same', activation='relu')(conv) - maxpool = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(conv) - - conv = tf.keras.layers.Conv2D( - filters=64, kernel_size=(3, 3), padding='same', activation='relu')(maxpool) - conv = tf.keras.layers.Conv2D( - filters=128, kernel_size=(3, 3), padding='same', activation='relu')(conv) - concat = tf.keras.layers.concatenate([maxpool, conv]) - maxpool = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(concat) - - conv = tf.keras.layers.Conv2D( - filters=256, kernel_size=(3, 3), padding='same', activation='relu')(maxpool) - conv = tf.keras.layers.Conv2D( - filters=512, kernel_size=(3, 3), padding='same', activation='relu')(conv) - concat = tf.keras.layers.concatenate([maxpool, conv]) - maxpool = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(concat) - - conv = tf.keras.layers.Conv2D( - filters=256, kernel_size=(3, 3), padding='same', activation='relu')(maxpool) - conv = tf.keras.layers.Conv2D( - filters=512, kernel_size=(3, 3), padding='same', activation='relu')(conv) - concat = tf.keras.layers.concatenate([maxpool, conv]) - maxpool = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(concat) - - flat = tf.keras.layers.Flatten()(maxpool) - dense = tf.keras.layers.Dense(128)(flat) - drop = tf.keras.layers.Dropout(0.5)(dense) - - predict = tf.keras.layers.Dense(num_classes)(drop) - - model = tf.keras.models.Model(inputs=[inputs], outputs=[predict]) - - self.optimizer = tf.keras.optimizers.legacy.Adam() - - model.compile( - loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), - optimizer=self.optimizer, - metrics=[tf.keras.metrics.SparseCategoricalAccuracy()], - ) - - self.tvars = model.layers - print(f'layer names: {[var.name for var in self.tvars]}') - - self.opt_vars = self.optimizer.variables() - print(f'optimizer vars: {self.opt_vars}') - - # Two opt_vars for one tvar: gradient and square sum for RMSprop. - self.fl_vars = self.tvars + self.opt_vars - - return model diff --git a/openfl-workspace/tf_cnn_histology/src/tfds_utils.py b/openfl-workspace/tf_cnn_histology/src/tfds_utils.py deleted file mode 100644 index 92977ebad0..0000000000 --- a/openfl-workspace/tf_cnn_histology/src/tfds_utils.py +++ /dev/null @@ -1,126 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""You may copy this file as the starting point of your own model.""" - -from logging import getLogger - -import numpy as np -import tensorflow_datasets as tfds - -logger = getLogger(__name__) - - -def one_hot(labels, classes): - """ - One Hot encode a vector. - - Args: - labels (list): List of labels to onehot encode - classes (int): Total number of categorical classes - - Returns: - np.array: Matrix of one-hot encoded labels - """ - return np.eye(classes)[labels] - - -def _load_raw_datashards(shard_num, collaborator_count): - """ - Load the raw data by shard. - - Returns tuples of the dataset shard divided into training and validation. - - Args: - shard_num (int): The shard number to use - collaborator_count (int): The number of collaborators in the federation - - Returns: - 2 tuples: (image, label) of the training, validation dataset - """ - (ds), metadata = tfds.load('colorectal_histology', data_dir='.', - shuffle_files=False, split='train', batch_size=-1, - with_info=True, as_supervised=True) - - image, label = tfds.as_numpy(ds) - - np.random.seed(42) - shuf = np.random.permutation(len(image)) - image = image[shuf] - label = label[shuf] - - split = int(len(image) * 0.8) - - X_train_tot = image[:split] - y_train_tot = label[:split] - - X_valid_tot = image[split:] - y_valid_tot = label[split:] - - shard_num = int(shard_num) - - # create the shards - X_train = X_train_tot[shard_num::collaborator_count] - y_train = y_train_tot[shard_num::collaborator_count] - - X_valid = X_valid_tot[shard_num::collaborator_count] - y_valid = y_valid_tot[shard_num::collaborator_count] - - return (X_train, y_train), (X_valid, y_valid) - - -def load_histology_shard(shard_num, collaborator_count, categorical=True, - channels_last=True, **kwargs): - """ - Load the colorectal histology dataset. - - Args: - shard_num (int): The shard to use from the dataset - collaborator_count (int): The number of collaborators in the federation - categorical (bool): True = convert the labels to one-hot encoded - vectors (Default = True) - channels_last (bool): True = The input images have the channels last - (Default = True) - **kwargs: Additional parameters to pass to the function - - Returns: - list: The input shape - int: The number of classes - numpy.ndarray: The training data - numpy.ndarray: The training labels - numpy.ndarray: The validation data - numpy.ndarray: The validation labels - """ - num_classes = 8 - img_rows = 150 - img_cols = 150 - channels = 3 - - (X_train, y_train), (X_valid, y_valid) = _load_raw_datashards( - shard_num, collaborator_count) - - if channels_last: - X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, channels) - X_valid = X_valid.reshape(X_valid.shape[0], img_rows, img_cols, channels) - input_shape = (img_rows, img_cols, channels) - else: - X_train = X_train.reshape(X_train.shape[0], channels, img_rows, img_cols) - X_valid = X_valid.reshape(X_valid.shape[0], channels, img_rows, img_cols) - input_shape = (channels, img_rows, img_cols) - - X_train = X_train.astype('float32') - X_valid = X_valid.astype('float32') - X_train /= 255 - X_valid /= 255 - - logger.info(f'Histology > X_train Shape : {X_train.shape}') - logger.info(f'Histology > y_train Shape : {y_train.shape}') - logger.info(f'Histology > Train Samples : {X_train.shape[0]}') - logger.info(f'Histology > Valid Samples : {X_valid.shape[0]}') - - if categorical: - # convert class vectors to binary class matrices - y_train = one_hot(y_train, num_classes) - y_valid = one_hot(y_valid, num_classes) - - return input_shape, num_classes, X_train, y_train, X_valid, y_valid diff --git a/openfl-workspace/tf_cnn_histology/src/tfhistology_inmemory.py b/openfl-workspace/tf_cnn_histology/src/tfhistology_inmemory.py deleted file mode 100644 index 69cf5fc7e6..0000000000 --- a/openfl-workspace/tf_cnn_histology/src/tfhistology_inmemory.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (C) 2020-2021 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -"""You may copy this file as the starting point of your own model.""" - -from openfl.federated import TensorFlowDataLoader -from .tfds_utils import load_histology_shard - - -class TensorFlowHistologyInMemory(TensorFlowDataLoader): - """TensorFlow Data Loader for Colorectal Histology Dataset.""" - - def __init__(self, data_path, batch_size, **kwargs): - """ - Initialize. - - Args: - data_path: File path for the dataset - batch_size (int): The batch size for the data loader - **kwargs: Additional arguments, passed to super init and load_mnist_shard - """ - super().__init__(batch_size, **kwargs) - - _, num_classes, X_train, y_train, X_valid, y_valid = load_histology_shard( - shard_num=data_path, - categorical=False, **kwargs - ) - - self.X_train = X_train - self.y_train = y_train - self.X_valid = X_valid - self.y_valid = y_valid - - self.num_classes = num_classes diff --git a/openfl-workspace/workspace/plan/defaults/tasks_keras.yaml b/openfl-workspace/workspace/plan/defaults/tasks_keras.yaml index 79d067d8d2..0ef460da87 100644 --- a/openfl-workspace/workspace/plan/defaults/tasks_keras.yaml +++ b/openfl-workspace/workspace/plan/defaults/tasks_keras.yaml @@ -1,5 +1,5 @@ aggregated_model_validation: - function : validate + function : validate_task kwargs : batch_size : 32 apply : global @@ -7,7 +7,7 @@ aggregated_model_validation: - accuracy locally_tuned_model_validation: - function : validate + function : validate_task kwargs : batch_size : 32 apply : local @@ -15,7 +15,7 @@ locally_tuned_model_validation: - accuracy train: - function : train + function : train_task kwargs : batch_size : 32 epochs : 1 diff --git a/openfl-workspace/workspace/plan/defaults/tasks_tensorflow.yaml b/openfl-workspace/workspace/plan/defaults/tasks_tensorflow.yaml deleted file mode 100644 index 6d000cc618..0000000000 --- a/openfl-workspace/workspace/plan/defaults/tasks_tensorflow.yaml +++ /dev/null @@ -1,23 +0,0 @@ -aggregated_model_validation: - function : validate - kwargs : - batch_size : 32 - apply : global - metrics : - - acc - -locally_tuned_model_validation: - function : validate - kwargs : - batch_size : 32 - apply : local - metrics : - - acc - -train: - function : train_batches - kwargs : - batch_size : 32 - metrics : - - loss - epochs : 1 diff --git a/openfl/federated/__init__.py b/openfl/federated/__init__.py index 369ca3b54f..9a47b60633 100644 --- a/openfl/federated/__init__.py +++ b/openfl/federated/__init__.py @@ -10,11 +10,11 @@ from openfl.federated.plan import Plan # NOQA from openfl.federated.task import TaskRunner # NOQA -if util.find_spec("tensorflow") is not None: +if util.find_spec("keras") is not None: from openfl.federated.data import FederatedDataSet # NOQA - from openfl.federated.data import KerasDataLoader, TensorFlowDataLoader + from openfl.federated.data import KerasDataLoader from openfl.federated.task import FederatedModel # NOQA - from openfl.federated.task import KerasTaskRunner, TensorFlowTaskRunner + from openfl.federated.task import KerasTaskRunner if util.find_spec("torch") is not None: from openfl.federated.data import FederatedDataSet # NOQA from openfl.federated.data import PyTorchDataLoader diff --git a/openfl/federated/data/__init__.py b/openfl/federated/data/__init__.py index 67cfcc094e..fe12bd9f5a 100644 --- a/openfl/federated/data/__init__.py +++ b/openfl/federated/data/__init__.py @@ -15,10 +15,9 @@ from openfl.federated.data.loader import DataLoader # NOQA -if util.find_spec("tensorflow") is not None: +if util.find_spec("keras") is not None: from openfl.federated.data.federated_data import FederatedDataSet # NOQA from openfl.federated.data.loader_keras import KerasDataLoader # NOQA - from openfl.federated.data.loader_tf import TensorFlowDataLoader # NOQA if util.find_spec("torch") is not None: from openfl.federated.data.federated_data import FederatedDataSet # NOQA diff --git a/openfl/federated/data/loader_tf.py b/openfl/federated/data/loader_tf.py deleted file mode 100644 index 23736ead8b..0000000000 --- a/openfl/federated/data/loader_tf.py +++ /dev/null @@ -1,130 +0,0 @@ -# Copyright 2020-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -"""TensorflowDataLoader module.""" - -import numpy as np - -from openfl.federated.data.loader import DataLoader - - -class TensorFlowDataLoader(DataLoader): - """A class used to represent a Federation Data Loader for TensorFlow - models. - - Attributes: - batch_size (int): Size of batches used for all data loaders. - X_train (np.array): Training features. - y_train (np.array): Training labels. - X_valid (np.array): Validation features. - y_valid (np.array): Validation labels. - """ - - def __init__(self, batch_size, **kwargs): - """Initializes the TensorFlowDataLoader object with the batch size and - any additional arguments. - - Args: - batch_size (int): The size of batches used for all data loaders. - kwargs: Additional arguments to pass to the function. - """ - self.batch_size = batch_size - self.X_train = None - self.y_train = None - self.X_valid = None - self.y_valid = None - - # Child classes should have init signature: - # (self, batch_size, **kwargs), should call this __init__ and then - # define self.X_train, self.y_train, self.X_valid, and self.y_valid - - def get_feature_shape(self): - """Returns the shape of an example feature array. - - Returns: - tuple: The shape of an example feature array. - """ - return self.X_train[0].shape - - def get_train_loader(self, batch_size=None): - """Returns the data loader for the training data. - - Args: - batch_size (int, optional): The batch size for the data loader - (default is None). - - Returns: - DataLoader: The DataLoader object for the training data. - """ - return self._get_batch_generator(X=self.X_train, y=self.y_train, batch_size=batch_size) - - def get_valid_loader(self, batch_size=None): - """Returns the data loader for the validation data. - - Args: - batch_size (int, optional): The batch size for the data loader - (default is None). - - Returns: - DataLoader: The DataLoader object for the validation data. - """ - return self._get_batch_generator(X=self.X_valid, y=self.y_valid, batch_size=batch_size) - - def get_train_data_size(self): - """Returns the total number of training samples. - - Returns: - int: The total number of training samples. - """ - return self.X_train.shape[0] - - def get_valid_data_size(self): - """Returns the total number of validation samples. - - Returns: - int: The total number of validation samples. - """ - return self.X_valid.shape[0] - - @staticmethod - def _batch_generator(X, y, idxs, batch_size, num_batches): - """Generates batches of data. - - Args: - X (np.array): The input data. - y (np.array): The label data. - idxs (np.array): The index of the dataset. - batch_size (int): The batch size for the data loader. - num_batches (int): The number of batches. - - Yields: - tuple: The input data and label data for each batch. - """ - for i in range(num_batches): - a = i * batch_size - b = a + batch_size - yield X[idxs[a:b]], y[idxs[a:b]] - - def _get_batch_generator(self, X, y, batch_size): - """Returns the dataset generator. - - Args: - X (np.array): The input data. - y (np.array): The label data. - batch_size (int): The batch size for the data loader. - - Returns: - generator: The dataset generator. - """ - if batch_size is None: - batch_size = self.batch_size - - # shuffle data indices - idxs = np.random.permutation(np.arange(X.shape[0])) - - # compute the number of batches - num_batches = int(np.ceil(X.shape[0] / batch_size)) - - # build the generator and return it - return self._batch_generator(X, y, idxs, batch_size, num_batches) diff --git a/openfl/federated/task/__init__.py b/openfl/federated/task/__init__.py index bdb6313e61..5b79f28193 100644 --- a/openfl/federated/task/__init__.py +++ b/openfl/federated/task/__init__.py @@ -15,10 +15,9 @@ from openfl.federated.task.runner import TaskRunner # NOQA -if util.find_spec("tensorflow") is not None: +if util.find_spec("keras") is not None: from openfl.federated.task.fl_model import FederatedModel # NOQA from openfl.federated.task.runner_keras import KerasTaskRunner # NOQA - from openfl.federated.task.runner_tf import TensorFlowTaskRunner # NOQA if util.find_spec("torch") is not None: from openfl.federated.task.fl_model import FederatedModel # NOQA from openfl.federated.task.runner_pt import PyTorchTaskRunner # NOQA diff --git a/openfl/federated/task/runner_keras.py b/openfl/federated/task/runner_keras.py index 2d992c83bb..e2dd069f72 100644 --- a/openfl/federated/task/runner_keras.py +++ b/openfl/federated/task/runner_keras.py @@ -3,11 +3,12 @@ """ -Base classes for developing a ke.Model() Federated Learning model. +Base classes for developing a keras.Model() Federated Learning model. You may copy this file as the starting point of your own keras model. """ +import copy from warnings import catch_warnings, simplefilter import numpy as np @@ -18,15 +19,14 @@ with catch_warnings(): simplefilter(action="ignore") - import tensorflow as tf - import tensorflow.keras as ke + import keras class KerasTaskRunner(TaskRunner): """The base model for Keras models in the federation. Attributes: - model (ke.Model): The Keras model. + model (keras.Model): The Keras model. model_tensor_names (list): List of model tensor names. required_tensorkeys_for_function (dict): A map of all of the required tensors for each of the public functions in KerasTaskRunner. @@ -40,14 +40,13 @@ def __init__(self, **kwargs): """ super().__init__(**kwargs) - self.model = ke.Model() + self.model = keras.models.Model() self.model_tensor_names = [] # this is a map of all of the required tensors for each of the public # functions in KerasTaskRunner self.required_tensorkeys_for_function = {} - ke.backend.clear_session() def rebuild_model(self, round_num, input_tensor_dict, validation=False): """Parse tensor names and update weights of model. Handles the @@ -67,7 +66,7 @@ def rebuild_model(self, round_num, input_tensor_dict, validation=False): else: self.set_tensor_dict(input_tensor_dict, with_opt_vars=False) - def train( + def train_task( self, col_name, round_num, @@ -102,7 +101,7 @@ def train( self.rebuild_model(round_num, input_tensor_dict) for epoch in range(epochs): self.logger.info("Run %s epoch of %s round", epoch, round_num) - results = self.train_iteration( + results = self.train_( self.data_loader.get_train_loader(batch_size), metrics=metrics, **kwargs, @@ -162,9 +161,10 @@ def train( if self.opt_treatment == "CONTINUE_GLOBAL": self.initialize_tensorkeys_for_functions(with_opt_vars=True) + self.update_tensorkeys_for_functions() return global_tensor_dict, local_tensor_dict - def train_iteration(self, batch_generator, metrics: list = None, **kwargs): + def train_(self, batch_generator, metrics: list = None, **kwargs): """Train single epoch. Override this function for custom training. Args: @@ -182,16 +182,16 @@ def train_iteration(self, batch_generator, metrics: list = None, **kwargs): # initialization (build_model). # If metrics are added (i.e. not a subset of what was originally # defined) then the model must be recompiled. - model_metrics_names = self.model.metrics_names + results = self.model.get_metrics_result() # TODO if there are new metrics in the flplan that were not included # in the originally # compiled model, that behavior is not currently handled. for param in metrics: - if param not in model_metrics_names: + if param not in results: raise ValueError( f"KerasTaskRunner does not support specifying new metrics. " - f"Param_metrics = {metrics}, model_metrics_names = {model_metrics_names}" + f"Param_metrics = {metrics}" ) history = self.model.fit(batch_generator, verbose=2, **kwargs) @@ -201,7 +201,7 @@ def train_iteration(self, batch_generator, metrics: list = None, **kwargs): results.append(Metric(name=metric, value=np.array(value))) return results - def validate(self, col_name, round_num, input_tensor_dict, **kwargs): + def validate_task(self, col_name, round_num, input_tensor_dict, **kwargs): """Run the trained model on validation data; report results. Args: @@ -224,20 +224,17 @@ def validate(self, col_name, round_num, input_tensor_dict, **kwargs): self.rebuild_model(round_num, input_tensor_dict, validation=True) param_metrics = kwargs["metrics"] - vals = self.model.evaluate(self.data_loader.get_valid_loader(batch_size), verbose=2) - model_metrics_names = self.model.metrics_names - if type(vals) is not list: - vals = [vals] - ret_dict = dict(zip(model_metrics_names, vals)) + self.model.evaluate(self.data_loader.get_valid_loader(batch_size), verbose=1) + results = self.model.get_metrics_result() # TODO if there are new metrics in the flplan that were not included in # the originally compiled model, that behavior is not currently # handled. for param in param_metrics: - if param not in model_metrics_names: + if param not in results: raise ValueError( f"KerasTaskRunner does not support specifying new metrics. " - f"Param_metrics = {param_metrics}, model_metrics_names = {model_metrics_names}" + f"Param_metrics = {param_metrics}" ) origin = col_name @@ -249,7 +246,7 @@ def validate(self, col_name, round_num, input_tensor_dict, **kwargs): tags = ("metric",) tags = change_tags(tags, add_field=suffix) output_tensor_dict = { - TensorKey(metric, origin, round_num, True, tags): np.array(ret_dict[metric]) + TensorKey(metric, origin, round_num, True, tags): np.array(results[metric]) for metric in param_metrics } @@ -261,7 +258,7 @@ def save_native(self, filepath): Args: filepath (str): The file path to save the model. """ - self.model.save(filepath) + self.model.export(filepath) def load_native(self, filepath): """Load model. @@ -269,7 +266,7 @@ def load_native(self, filepath): Args: filepath (str): The file path to load the model. """ - self.model = ke.models.load_model(filepath) + self.model = keras.models.load_model(filepath) @staticmethod def _get_weights_names(obj): @@ -282,7 +279,12 @@ def _get_weights_names(obj): Returns: weight_names (list): The weight name list. """ - weight_names = [weight.name for weight in obj.weights] + if isinstance(obj, keras.optimizers.Optimizer): + weight_names = [weight.name for weight in obj.variables] + else: + weight_names = [ + layer.name + "/" + weight.name for layer in obj.layers for weight in layer.weights + ] return weight_names @staticmethod @@ -299,10 +301,19 @@ def _get_weights_dict(obj, suffix=""): weights_dict (dict): The weight dictionary. """ weights_dict = {} - weight_names = [weight.name for weight in obj.weights] - weight_values = obj.get_weights() - for name, value in zip(weight_names, weight_values): - weights_dict[name + suffix] = value + weight_names = KerasTaskRunner._get_weights_names(obj) + if isinstance(obj, keras.optimizers.Optimizer): + weights_dict = { + weight_names[i] + suffix: weight.numpy() + for i, weight in enumerate(copy.deepcopy(obj.variables)) + } + else: + weight_name_index = 0 + for layer in obj.layers: + if weight_name_index < len(weight_names) and len(layer.get_weights()) > 0: + for weight in layer.get_weights(): + weights_dict[weight_names[weight_name_index] + suffix] = weight + weight_name_index += 1 return weights_dict @staticmethod @@ -314,7 +325,7 @@ def _set_weights_dict(obj, weights_dict): the weights. weights_dict (dict): The weight dictionary. """ - weight_names = [weight.name for weight in obj.weights] + weight_names = KerasTaskRunner._get_weights_names(obj) weight_values = [weights_dict[name] for name in weight_names] obj.set_weights(weight_values) @@ -349,45 +360,20 @@ def set_tensor_dict(self, tensor_dict, with_opt_vars): if with_opt_vars is False: # It is possible to pass in opt variables from the input tensor # dict. This will make sure that the correct layers are updated - model_weight_names = [weight.name for weight in self.model.weights] + model_weight_names = self._get_weights_names(self.model) model_weights_dict = {name: tensor_dict[name] for name in model_weight_names} self._set_weights_dict(self.model, model_weights_dict) else: - model_weight_names = [weight.name for weight in self.model.weights] + model_weight_names = self._get_weights_names(self.model) model_weights_dict = {name: tensor_dict[name] for name in model_weight_names} - opt_weight_names = [weight.name for weight in self.model.optimizer.weights] + opt_weight_names = self._get_weights_names(self.model.optimizer) opt_weights_dict = {name: tensor_dict[name] for name in opt_weight_names} self._set_weights_dict(self.model, model_weights_dict) self._set_weights_dict(self.model.optimizer, opt_weights_dict) def reset_opt_vars(self): """Resets the optimizer variables.""" - for var in self.model.optimizer.variables(): - var.assign(tf.zeros_like(var)) - self.logger.debug("Optimizer variables reset") - - def set_required_tensorkeys_for_function(self, func_name, tensor_key, **kwargs): - """ - Set the required tensors for specified function that could be called as part of a task. - - By default, this is just all of the layers and optimizer of the model. - Custom tensors should be added to this function. - - Args: - func_name (str): The function name. - tensor_key (TensorKey): The tensor key. - **kwargs: Any function arguments. - """ - # TODO there should be a way to programmatically iterate through all - # of the methods in the class and declare the tensors. - # For now this is done manually - - if func_name == "validate": - # Should produce 'apply=global' or 'apply=local' - local_model = "apply" + kwargs["apply"] - self.required_tensorkeys_for_function[func_name][local_model].append(tensor_key) - else: - self.required_tensorkeys_for_function[func_name].append(tensor_key) + pass def get_required_tensorkeys_for_function(self, func_name, **kwargs): """Get the required tensors for specified function that could be called @@ -402,7 +388,7 @@ def get_required_tensorkeys_for_function(self, func_name, **kwargs): Returns: list: List of TensorKey objects. """ - if func_name == "validate": + if func_name == "validate_task": local_model = "apply=" + str(kwargs["apply"]) return self.required_tensorkeys_for_function[func_name][local_model] else: @@ -423,18 +409,18 @@ def update_tensorkeys_for_functions(self): opt_names = self._get_weights_names(self.model.optimizer) tensor_names = model_layer_names + opt_names self.logger.debug("Updating model tensor names: %s", tensor_names) - self.required_tensorkeys_for_function["train"] = [ - TensorKey(tensor_name, "GLOBAL", 0, ("model",)) for tensor_name in tensor_names + self.required_tensorkeys_for_function["train_task"] = [ + TensorKey(tensor_name, "GLOBAL", 0, False, ("model",)) for tensor_name in tensor_names ] # Validation may be performed on local or aggregated (global) model, # so there is an extra lookup dimension for kwargs - self.required_tensorkeys_for_function["validate"] = {} - self.required_tensorkeys_for_function["validate"]["local_model=True"] = [ - TensorKey(tensor_name, "LOCAL", 0, ("trained",)) for tensor_name in tensor_names + self.required_tensorkeys_for_function["validate_task"] = {} + self.required_tensorkeys_for_function["validate_task"]["apply=local"] = [ + TensorKey(tensor_name, "LOCAL", 0, False, ("trained",)) for tensor_name in tensor_names ] - self.required_tensorkeys_for_function["validate"]["local_model=False"] = [ - TensorKey(tensor_name, "GLOBAL", 0, ("model",)) for tensor_name in tensor_names + self.required_tensorkeys_for_function["validate_task"]["apply=global"] = [ + TensorKey(tensor_name, "GLOBAL", 0, False, ("model",)) for tensor_name in tensor_names ] def initialize_tensorkeys_for_functions(self, with_opt_vars=False): @@ -467,28 +453,28 @@ def initialize_tensorkeys_for_functions(self, with_opt_vars=False): **self.tensor_dict_split_fn_kwargs, ) - self.required_tensorkeys_for_function["train"] = [ + self.required_tensorkeys_for_function["train_task"] = [ TensorKey(tensor_name, "GLOBAL", 0, False, ("model",)) for tensor_name in global_model_dict ] - self.required_tensorkeys_for_function["train"] += [ + self.required_tensorkeys_for_function["train_task"] += [ TensorKey(tensor_name, "LOCAL", 0, False, ("model",)) for tensor_name in local_model_dict ] # Validation may be performed on local or aggregated (global) model, # so there is an extra lookup dimension for kwargs - self.required_tensorkeys_for_function["validate"] = {} + self.required_tensorkeys_for_function["validate_task"] = {} # TODO This is not stateless. The optimizer will not be - self.required_tensorkeys_for_function["validate"]["apply=local"] = [ + self.required_tensorkeys_for_function["validate_task"]["apply=local"] = [ TensorKey(tensor_name, "LOCAL", 0, False, ("trained",)) for tensor_name in {**global_model_dict_val, **local_model_dict_val} ] - self.required_tensorkeys_for_function["validate"]["apply=global"] = [ + self.required_tensorkeys_for_function["validate_task"]["apply=global"] = [ TensorKey(tensor_name, "GLOBAL", 0, False, ("model",)) for tensor_name in global_model_dict_val ] - self.required_tensorkeys_for_function["validate"]["apply=global"] += [ + self.required_tensorkeys_for_function["validate_task"]["apply=global"] += [ TensorKey(tensor_name, "LOCAL", 0, False, ("model",)) for tensor_name in local_model_dict_val ] diff --git a/openfl/federated/task/runner_tf.py b/openfl/federated/task/runner_tf.py deleted file mode 100644 index 7f21f2eb29..0000000000 --- a/openfl/federated/task/runner_tf.py +++ /dev/null @@ -1,485 +0,0 @@ -# Copyright 2020-2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - - -"""TensorFlowTaskRunner module.""" - -import numpy as np -import tensorflow.compat.v1 as tf -from tqdm import tqdm - -from openfl.federated.task.runner import TaskRunner -from openfl.utilities import TensorKey -from openfl.utilities.split import split_tensor_dict_for_holdouts - - -class TensorFlowTaskRunner(TaskRunner): - """Base class for TensorFlow models in the Federated Learning solution. - - Attributes: - assign_ops (tf.Operation): TensorFlow operations for assignment. - placeholders (tf.Tensor): TensorFlow placeholders for tensors. - tvar_assign_ops (tf.Operation): TensorFlow operations for assignment - of trainable variables. - tvar_placeholders (tf.Tensor): TensorFlow placeholders for trainable - variables. - input_shape (tuple): Shape of the input features. - required_tensorkeys_for_function (dict): Required tensorkeys for all - public functions in TensorFlowTaskRunner. - sess (tf.Session): TensorFlow session. - X (tf.Tensor): Input features to the model. - y (tf.Tensor): Input labels to the model. - train_step (tf.Operation): Optimizer train step operation. - loss (tf.Tensor): Model loss function. - output (tf.Tensor): Model output tensor. - validation_metric (tf.Tensor): Function used to validate the model - outputs against labels. - tvars (list): TensorFlow trainable variables. - opt_vars (list): Optimizer variables. - fl_vars (list): Trainable variables and optimizer variables. - - .. note:: - Child classes should have __init__ function signature (self, data, - kwargs), - and should overwrite at least the following while defining the model. - """ - - def __init__(self, **kwargs): - """Initializes the TensorFlowTaskRunner object. - - Args: - **kwargs: Additional parameters to pass to the function. - """ - tf.disable_v2_behavior() - - super().__init__(**kwargs) - - self.assign_ops = None - self.placeholders = None - - self.tvar_assign_ops = None - self.tvar_placeholders = None - - # construct the shape needed for the input features - self.input_shape = (None,) + self.data_loader.get_feature_shape() - - # Required tensorkeys for all public functions in TensorFlowTaskRunner - self.required_tensorkeys_for_function = {} - - # tensorflow session - self.sess = None - # input featrures to the model - self.X = None - # input labels to the model - self.y = None - # optimizer train step operation - self.train_step = None - # model loss function - self.loss = None - # model output tensor - self.output = None - # function used to validate the model outputs against labels - self.validation_metric = None - # tensorflow trainable variables - self.tvars = None - # self.optimizer.variables() once self.optimizer is defined - self.opt_vars = None - # self.tvars + self.opt_vars - self.fl_vars = None - - def rebuild_model(self, round_num, input_tensor_dict, validation=False): - """Parse tensor names and update weights of model. Handles the - optimizer treatment. - - Args: - round_num (int): The round number. - input_tensor_dict (dict): The input tensor dictionary. - validation (bool): If True, perform validation. Default is False. - - Returns: - None - """ - if self.opt_treatment == "RESET": - self.reset_opt_vars() - self.set_tensor_dict(input_tensor_dict, with_opt_vars=False) - elif round_num > 0 and self.opt_treatment == "CONTINUE_GLOBAL" and not validation: - self.set_tensor_dict(input_tensor_dict, with_opt_vars=True) - else: - self.set_tensor_dict(input_tensor_dict, with_opt_vars=False) - - def train_batches( - self, col_name, round_num, input_tensor_dict, epochs=1, use_tqdm=False, **kwargs - ): - """ - Perform the training. - - Is expected to perform draws randomly, without replacement until data - is exausted. Then data is replaced and shuffled and draws continue. - - Args: - col_name (str): The column name. - round_num (int): The round number. - input_tensor_dict (dict): The input tensor dictionary. - epochs (int): Number of epochs to train. Default is 1. - use_tqdm (bool): If True, use tqdm to print a progress bar. - Default is False. - **kwargs: Additional parameters to pass to the function. - - Returns: - float: loss metric. - """ - batch_size = self.data_loader.batch_size - - if kwargs["batch_size"]: - batch_size = kwargs["batch_size"] - - # rebuild model with updated weights - self.rebuild_model(round_num, input_tensor_dict) - - tf.keras.backend.set_learning_phase(True) - losses = [] - - for epoch in range(epochs): - self.logger.info("Run %s epoch of %s round", epoch, round_num) - # get iterator for batch draws (shuffling happens here) - gen = self.data_loader.get_train_loader(batch_size) - if use_tqdm: - gen = tqdm.tqdm(gen, desc="training epoch") - - for X, y in gen: - losses.append(self.train_batch(X, y)) - - # Output metric tensors (scalar) - origin = col_name - tags = ("trained",) - output_metric_dict = { - TensorKey(self.loss_name, origin, round_num, True, ("metric",)): np.array( - np.mean(losses) - ) - } - - # output model tensors (Doesn't include TensorKey) - output_model_dict = self.get_tensor_dict(with_opt_vars=True) - global_model_dict, local_model_dict = split_tensor_dict_for_holdouts( - self.logger, output_model_dict, **self.tensor_dict_split_fn_kwargs - ) - - # Create global tensorkeys - global_tensorkey_model_dict = { - TensorKey(tensor_name, origin, round_num, False, tags): nparray - for tensor_name, nparray in global_model_dict.items() - } - # Create tensorkeys that should stay local - local_tensorkey_model_dict = { - TensorKey(tensor_name, origin, round_num, False, tags): nparray - for tensor_name, nparray in local_model_dict.items() - } - # The train/validate aggregated function of the next round will - # look for the updated model parameters. - # This ensures they will be resolved locally - next_local_tensorkey_model_dict = { - TensorKey(tensor_name, origin, round_num + 1, False, ("model",)): nparray - for tensor_name, nparray in local_model_dict.items() - } - - global_tensor_dict = { - **output_metric_dict, - **global_tensorkey_model_dict, - } - local_tensor_dict = { - **local_tensorkey_model_dict, - **next_local_tensorkey_model_dict, - } - - # Update the required tensors if they need to be pulled from - # the aggregator - # TODO this logic can break if different collaborators have different - # roles between rounds. - # For example, if a collaborator only performs validation in the first - # round but training in the second, it has no way of knowing the - # optimizer state tensor names to request from the aggregator because - # these are only created after training occurs. A work around could - # involve doing a single epoch of training on random data to get the - # optimizer names, and then throwing away the model. - if self.opt_treatment == "CONTINUE_GLOBAL": - self.initialize_tensorkeys_for_functions(with_opt_vars=True) - - return global_tensor_dict, local_tensor_dict - - def train_batch(self, X, y): - """Train the model on a single batch. - - Args: - X (tf.Tensor): Input to the model. - y (tf.Tensor): Ground truth label to the model. - - Returns: - loss (float): loss metric. - """ - feed_dict = {self.X: X, self.y: y} - - # run the train step and return the loss - _, loss = self.sess.run([self.train_step, self.loss], feed_dict=feed_dict) - - return loss - - def validate(self, col_name, round_num, input_tensor_dict, use_tqdm=False, **kwargs): - """ - Run validation. - - Args: - col_name (str): The column name. - round_num (int): The round number. - input_tensor_dict (dict): The input tensor dictionary. - use_tqdm (bool): If True, use tqdm to print a progress bar. - Default is False. - **kwargs: Additional parameters to pass to the function. - - Returns: - output_tensor_dict (dict): {: }. - """ - batch_size = self.data_loader.batch_size - - if kwargs["batch_size"]: - batch_size = kwargs["batch_size"] - - self.rebuild_model(round_num, input_tensor_dict, validation=True) - - tf.keras.backend.set_learning_phase(False) - - score = 0 - - gen = self.data_loader.get_valid_loader(batch_size) - if use_tqdm: - gen = tqdm.tqdm(gen, desc="validating") - - for X, y in gen: - weight = X.shape[0] / self.data_loader.get_valid_data_size() - _, s = self.validate_batch(X, y) - score += s * weight - - origin = col_name - suffix = "validate" - if kwargs["apply"] == "local": - suffix += "_local" - else: - suffix += "_agg" - tags = ("metric", suffix) - output_tensor_dict = { - TensorKey(self.validation_metric_name, origin, round_num, True, tags): np.array(score) - } - - # return empty dict for local metrics - return output_tensor_dict, {} - - def validate_batch(self, X, y): - """Validate the model on a single local batch. - - Args: - X (tf.Tensor): Input to the model. - y (tf.Tensor): Ground truth label to the model. - - Returns: - float: loss metric. - """ - feed_dict = {self.X: X, self.y: y} - - return self.sess.run([self.output, self.validation_metric], feed_dict=feed_dict) - - def get_tensor_dict(self, with_opt_vars=True): - """Get the dictionary weights. - - Get the weights from the tensor. - - Args: - with_opt_vars (bool): Specify if we also want to get the variables - of the optimizer. Default is True. - - Returns: - dict: The weight dictionary {: }. - """ - if with_opt_vars is True: - variables = self.fl_vars - else: - variables = self.tvars - - # FIXME: do this in one call? - return {var.name: val for var, val in zip(variables, self.sess.run(variables))} - - def set_tensor_dict(self, tensor_dict, with_opt_vars): - """Set the tensor dictionary. - - Set the model weights with a tensor dictionary: - {: }. - - Args: - tensor_dict (dict): The model weights dictionary. - with_opt_vars (bool): Specify if we also want to set the variables - of the optimizer. - - Returns: - None - """ - if with_opt_vars: - self.assign_ops, self.placeholders = tf_set_tensor_dict( - tensor_dict, - self.sess, - self.fl_vars, - self.assign_ops, - self.placeholders, - ) - else: - self.tvar_assign_ops, self.tvar_placeholders = tf_set_tensor_dict( - tensor_dict, - self.sess, - self.tvars, - self.tvar_assign_ops, - self.tvar_placeholders, - ) - - def reset_opt_vars(self): - """Reinitialize the optimizer variables. - - Returns: - None - """ - for v in self.opt_vars: - v.initializer.run(session=self.sess) - - def initialize_globals(self): - """Initialize Global Variables. - - Initialize all global variables - - Returns: - None - """ - self.sess.run(tf.global_variables_initializer()) - - def _get_weights_names(self, with_opt_vars=True): - """Get the weights. - - Args: - with_opt_vars (bool): Specify if we also want to get the variables - of the optimizer. Default is True. - - Returns: - list: The weight names list. - """ - if with_opt_vars is True: - variables = self.fl_vars - else: - variables = self.tvars - - return [var.name for var in variables] - - def get_required_tensorkeys_for_function(self, func_name, **kwargs): - """Get the required tensors for specified function that could be called - as part of a task. - - By default, this is just all of the layers and optimizer of the model. - - Args: - func_name (str): The function name. - **kwargs: Additional parameters to pass to the function. - - Returns: - required_tensorkeys_for_function (list): List of required - TensorKey. [TensorKey]. - """ - if func_name == "validate": - local_model = "apply=" + str(kwargs["apply"]) - return self.required_tensorkeys_for_function[func_name][local_model] - else: - return self.required_tensorkeys_for_function[func_name] - - def initialize_tensorkeys_for_functions(self, with_opt_vars=False): - """Set the required tensors for all publicly accessible methods that - could be called as part of a task. - - By default, this is just all of the layers and optimizer of the model. - Custom tensors should be added to this function - - Args: - with_opt_vars (bool): Specify if we also want to set the variables - of the optimizer. Default is False. - - Returns: - None - """ - # TODO there should be a way to programmatically iterate through - # all of the methods in the class and declare the tensors. - # For now this is done manually - - output_model_dict = self.get_tensor_dict(with_opt_vars=with_opt_vars) - global_model_dict, local_model_dict = split_tensor_dict_for_holdouts( - self.logger, output_model_dict, **self.tensor_dict_split_fn_kwargs - ) - if not with_opt_vars: - global_model_dict_val = global_model_dict - local_model_dict_val = local_model_dict - else: - output_model_dict = self.get_tensor_dict(with_opt_vars=False) - global_model_dict_val, local_model_dict_val = split_tensor_dict_for_holdouts( - self.logger, output_model_dict, **self.tensor_dict_split_fn_kwargs - ) - - self.required_tensorkeys_for_function["train_batches"] = [ - TensorKey(tensor_name, "GLOBAL", 0, False, ("model",)) - for tensor_name in global_model_dict - ] - self.required_tensorkeys_for_function["train_batches"] += [ - TensorKey(tensor_name, "LOCAL", 0, False, ("model",)) - for tensor_name in local_model_dict - ] - - # Validation may be performed on local or aggregated (global) - # model, so there is an extra lookup dimension for kwargs - self.required_tensorkeys_for_function["validate"] = {} - # TODO This is not stateless. The optimizer will not be - self.required_tensorkeys_for_function["validate"]["apply=local"] = [ - TensorKey(tensor_name, "LOCAL", 0, False, ("trained",)) - for tensor_name in {**global_model_dict_val, **local_model_dict_val} - ] - self.required_tensorkeys_for_function["validate"]["apply=global"] = [ - TensorKey(tensor_name, "GLOBAL", 0, False, ("model",)) - for tensor_name in global_model_dict_val - ] - self.required_tensorkeys_for_function["validate"]["apply=global"] += [ - TensorKey(tensor_name, "LOCAL", 0, False, ("model",)) - for tensor_name in local_model_dict_val - ] - - -# FIXME: what's a nicer construct than this? ugly interface. Perhaps we -# get an object with an assumed interface that lets is set/get these? -# Note that this will return the assign_ops and placeholder nodes it uses -# if called with None, it will create them. -# to avoid inflating the graph, caller should keep these and pass them back -# What if we want to set a different group of vars in the middle? -# It is good if it is the subset of the original variables. -def tf_set_tensor_dict(tensor_dict, session, variables, assign_ops=None, placeholders=None): - """Tensorflow set tensor dictionary. - - Args: - tensor_dict (dict): Dictionary of tensors. - session (tf.Session): TensorFlow session. - variables (list): List of TensorFlow variables. - assign_ops (tf.Operation, optional): TensorFlow operations for - assignment. Default is None. - placeholders (tf.Tensor, optional): TensorFlow placeholders for - tensors. Default is None. - - Returns: - assign_ops (tf.Operation): TensorFlow operations for assignment. - placeholders (tf.Tensor): TensorFlow placeholders for tensors. - """ - if placeholders is None: - placeholders = {v.name: tf.placeholder(v.dtype, shape=v.shape) for v in variables} - if assign_ops is None: - assign_ops = {v.name: tf.assign(v, placeholders[v.name]) for v in variables} - - for k, v in tensor_dict.items(): - session.run(assign_ops[k], feed_dict={placeholders[k]: v}) - - return assign_ops, placeholders diff --git a/tests/github/test_hello_federation.py b/tests/github/test_hello_federation.py index e9071eed1c..e6b84b8de2 100644 --- a/tests/github/test_hello_federation.py +++ b/tests/github/test_hello_federation.py @@ -75,7 +75,7 @@ def main(): # Convert model to native format if save_model: check_call( - ['fx', 'model', 'save', '-i', f'./save/{template}_last.pbuf', '-o', save_model], + ['fx', 'model', 'save', '-i', f'./save/last.pbuf', '-o', save_model], cwd=workspace_root) os.chdir(origin_dir)