Merge branch 'develop' into tensorflow

securefederatedai · Feb 7, 2025 · 6cc7747 · 6cc7747
2 parents c72efff + b18e978
commit 6cc7747
Show file tree

Hide file tree

Showing 49 changed files with 981 additions and 1,529 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1 @@
+*.ipynb linguist-vendored
diff --git a/docs/about/features_index/taskrunner.rst b/docs/about/features_index/taskrunner.rst
@@ -162,6 +162,15 @@ STEP 1: Create a Workspace
  - :code:`tf_cnn_histology`: a workspace with a simple `TensorFlow <http://tensorflow.org>`__ CNN model that will download the `Colorectal Histology <https://zenodo.org/record/53169#.XGZemKwzbmG>`_ dataset and train in a federation.
  - :code:`keras/histology`: a workspace with a simple `PyTorch <http://pytorch.org/>`__ CNN model that will download the `Colorectal Histology <https://zenodo.org/record/53169#.XGZemKwzbmG>`_ dataset and train in a federation.
  - :code:`torch/mnist`: a workspace with a simple `PyTorch <http://pytorch.org>`__ CNN model that will download the `MNIST <http://yann.lecun.com/exdb/mnist/>`_ dataset and train in a federation.
+ - :code:`keras/jax/mnist`: a workspace with a simple `Keras <http://keras.io/>`__ CNN model that will download the `MNIST <http://yann.lecun.com/exdb/mnist/>`_ dataset and train in a federation with jax as backend. You can export the environment variable KERAS_BACKEND to configure your backend. Available backend options are: "jax", "tensorflow", "torch". Example:
+
+     .. code-block:: shell
+
+       $ export KERAS_BACKEND="jax"
+
+.. note::
+
+    Please ensure KERAS_BACKEND is set in the environment where you plan on using OpenFL before executing any fx command.
 
   See the complete list of available templates.
 

diff --git a/docs/developer_guide/advanced_topics/overriding_agg_fn.rst b/docs/developer_guide/advanced_topics/overriding_agg_fn.rst
@@ -26,121 +26,6 @@ Choose from the following predefined aggregation functions:
 - ``openfl.interface.aggregation_functions.YogiAdaptiveAggregation``
 
 
-.. _adaptive_aggregation_functions:
-
-Adaptive Aggregation Functions
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-.. note::
-    To create adaptive aggregation functions,
-    the user must specify parameters for the aggregation optimizer
-    (``NumPyAdagrad``, ``NumPyAdam`` or ``NumPyYogi``) that will aggregate
-    the global model. Theese parameters parameters are passed via **keywords**.
-
-    Also, user must pass one of the arguments: ``params``
-    - model parameters (a dictionary with named model parameters
-    in the form of numpy arrays), or pass ``model_interface``
-    - an instance of the `ModelInterface <https://github.com/intel/openfl/blob/develop/openfl/interface/interactive_api/experiment.py>`_ class.
-    If user pass both ``params`` and ``model_interface``,
-    then the optimizer parameters are initialized via
-    ``params``, ignoring ``model_interface`` argument.
-
-    See the `AdagradAdaptiveAggregation
-    <https://github.com/intel/openfl/blob/develop/openfl/interface/aggregation_functions/adagrad_adaptive_aggregation.py>`_
-    definitions for details.
-
-    `Adaptive federated optimization <https://arxiv.org/pdf/2003.00295.pdf>`_ original paper.
-
-``AdagradAdaptiveAggregation`` usage example:
-
-.. code-block:: python
-
-    from openfl.interface.interactive_api.experiment import TaskInterface, ModelInterface
-    from openfl.interface.aggregation_functions import AdagradAdaptiveAggregation
-
-    TI = TaskInterface()
-    MI = ModelInterface(model=model,
-                        optimizer=optimizer,
-                        framework_plugin=framework_adapter)
-    ...
-
-    # Creating aggregation function
-    agg_fn = AdagradAdaptiveAggregation(model_interface=MI,
-                                        learning_rate=0.4)
-
-    # Define training task
-    @TI.register_fl_task(model='model', data_loader='train_loader', \
-                            device='device', optimizer='optimizer')
-    @TI.set_aggregation_function(agg_fn)
-    def train(...):
-    ...
-
-You can define your own numpy based optimizer,
-which will be used for global model aggreagation:
-
-.. code-block:: python
-
-    from openfl.utilities.optimizers.numpy.base_optimizer import Optimizer
-
-    class MyOpt(Optimizer):
-        """My optimizer implementation."""
-
-        def __init__(
-            self,
-            *,
-            params: Optional[Dict[str, np.ndarray]] = None,
-            model_interface=None,
-            learning_rate: float = 0.001,
-            param1: Any = None,
-            param2: Any = None
-        ) -> None:
-            """Initialize.
-
-            Args:
-                params: Parameters to be stored for optimization.
-                model_interface: Model interface instance to provide parameters.
-                learning_rate: Tuning parameter that determines
-                    the step size at each iteration.
-                param1: My own defined parameter.
-                param2: My own defined parameter.
-            """
-            super().__init__()
-            pass # Your code here!
-
-        def step(self, gradients: Dict[str, np.ndarray]) -> None:
-            """
-            Perform a single step for parameter update.
-
-            Implement your own optimizer weights update rule.
-
-            Args:
-                gradients: Partial derivatives with respect to optimized parameters.
-            """
-            pass # Your code here!
-    ...
-
-    from openfl.interface.aggregation_functions import WeightedAverage
-    from openfl.interface.aggregation_functions.core import AdaptiveAggregation
-
-    # Creating your implemented optimizer instance based on numpy:
-    my_own_optimizer = MyOpt(model_interface=MI, learning_rate=0.01)
-
-    # Creating aggregation function
-    agg_fn = AdaptiveAggregation(optimizer=my_own_optimizer,
-                                 agg_func=WeightedAverage()) # WeightedAverage() is used for aggregating
-                                                             # parameters that are not inside the given optimizer.
-
-    # Define training task
-    @TI.register_fl_task(model='model', data_loader='train_loader', \
-                            device='device', optimizer='optimizer')
-    @TI.set_aggregation_function(agg_fn)
-    def train(...):
-    ...
-
-.. note::
-    If you do not understand how to write your own numpy based optimizer, please see the `NumPyAdagrad <https://github.com/intel/openfl/blob/develop/openfl/utilities/optimizers/numpy/adagrad_optimizer.py>`_ and
-    `AdaptiveAggregation <https://github.com/intel/openfl/blob/develop/openfl/interface/aggregation_functions/core/adaptive_aggregation.py>`_ definitions for details.
-
 Custom Aggregation Functions
 ----------------------------
 

diff --git a/docs/developer_guide/structure/plugins.rst b/docs/developer_guide/structure/plugins.rst
@@ -46,56 +46,4 @@ implement the :code:`serialization_setup` method to prepare the model object for
 
     .. code-block:: python
 
-        def serialization_setup():
-
-
-.. _serializer_plugin:
-
-Experiment Serializer
-######################
-
-The Serializer plugin is used on the frontend Python API to serialize the Experiment components and then on Envoys to deserialize them.
-Currently, the default serializer plugin is based on pickling. It is a **required** plugin.
-
-The serializer plugin must implement the :code:`serialize` method that creates a Python object representation on disk.
-
-    .. code-block:: python
-
-       @staticmethod
-       def serialize(object_, filename: str) -> None:
-
-The plugin must also implement the :code:`restore_object` method that will load previously serialized object from disk.
-
-    .. code-block:: python
-
-       @staticmethod
-       def restore_object(filename: str):
-
-
-.. _device_monitor_plugin:
-
-CUDA Device Monitor
-######################
-
-The CUDA Device Monitor plugin is an **optional** plugin for Envoys that can gather status information about GPU devices. 
-This information may be used by Envoys and included in a healthcheck message that is sent to the Director. 
-Therefore, you can query this Envoy Registry information from the Director to determine the status of CUDA devices.
-
-CUDA Device Monitor plugin must implement the following interface:
-
-    .. code-block:: python
-
-       class CUDADeviceMonitor:
-
-          def get_driver_version(self) -> str:
-             ...
-
-          def get_device_memory_total(self, index: int) -> int:
-             ...
-
-          def get_device_memory_utilized(self, index: int) -> int:
-             ...
-
-          def get_device_utilization(self, index: int) -> str:
-             """It is just a general method that returns a string that may be shown to the frontend user."""
-             ...
+        def serialization_setup():
diff --git a/docs/developer_guide/utilities/splitters_data.rst b/docs/developer_guide/utilities/splitters_data.rst
@@ -13,21 +13,8 @@ OpenFL allows you to specify custom data splits **for simulation runs on a singl
 You may apply data splitters differently depending on the OpenFL workflow that you follow. 
 
 
-OPTION 1: Use **Native Python API** (Aggregator-Based Workflow) Functions to Split the Data (Deprecated)
-===========================================================================================
-
-Predefined OpenFL data splitters functions are as follows:
-
-- ``openfl.utilities.data_splitters.EqualNumPyDataSplitter`` (default)
-- ``openfl.utilities.data_splitters.RandomNumPyDataSplitter``
-- ``openfl.interface.aggregation_functions.LogNormalNumPyDataSplitter``, which assumes the ``data`` argument as ``np.ndarray`` of integers (labels)
-- ``openfl.interface.aggregation_functions.DirichletNumPyDataSplitter``, which assumes the ``data`` argument as ``np.ndarray`` of integers (labels)
-
-Alternatively, you can create an `implementation <https://github.com/intel/openfl/blob/develop/openfl/utilities/data_splitters/numpy.py>`_ of :class:`openfl.plugins.data_splitters.NumPyDataSplitter` and pass it to the :code:`FederatedDataset` function as either ``train_splitter`` or ``valid_splitter`` keyword argument.
-
-
-OPTION 2: Use Dataset Splitters in your Shard Descriptor
-========================================================
+Use Dataset Splitters in your Shard Descriptor
+===================================================
 
 Apply one of previously mentioned splitting function on your data to perform a simulation. 
 

diff --git a/openfl-workspace/keras/jax/mnist/.workspace b/openfl-workspace/keras/jax/mnist/.workspace
@@ -0,0 +1,2 @@
+current_plan_name: default
+
diff --git a/openfl-workspace/keras/jax/mnist/plan/cols.yaml b/openfl-workspace/keras/jax/mnist/plan/cols.yaml
@@ -0,0 +1,5 @@
+# Copyright (C) 2020-2025 Intel Corporation
+# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you.
+
+collaborators:
+
diff --git a/openfl-workspace/keras/jax/mnist/plan/data.yaml b/openfl-workspace/keras/jax/mnist/plan/data.yaml
@@ -0,0 +1,7 @@
+# Copyright (C) 2020-2025 Intel Corporation
+# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you.
+
+# collaborator_name,data_directory_path
+one,1
+
+
diff --git a/openfl-workspace/keras/jax/mnist/plan/defaults b/openfl-workspace/keras/jax/mnist/plan/defaults
@@ -0,0 +1,2 @@
+../../workspace/plan/defaults
+
diff --git a/openfl-workspace/keras/jax/mnist/plan/plan.yaml b/openfl-workspace/keras/jax/mnist/plan/plan.yaml
@@ -0,0 +1,42 @@
+# Copyright (C) 2020-2025 Intel Corporation
+# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you.
+
+aggregator :
+  defaults : plan/defaults/aggregator.yaml
+  template : openfl.component.Aggregator
+  settings :
+    init_state_path : save/init.pbuf
+    best_state_path : save/best.pbuf
+    last_state_path : save/last.pbuf
+    rounds_to_train : 10
+
+collaborator :
+  defaults : plan/defaults/collaborator.yaml
+  template : openfl.component.Collaborator
+  settings :
+    delta_updates    : false
+    opt_treatment    : RESET
+
+data_loader :
+  defaults : plan/defaults/data_loader.yaml
+  template : src.dataloader.JAXMNISTInMemory
+  settings :
+    collaborator_count : 2
+    data_group_name    : mnist
+    batch_size         : 256
+
+task_runner :
+  defaults : plan/defaults/task_runner.yaml
+  template : src.taskrunner.JAXCNN
+
+network :
+  defaults : plan/defaults/network.yaml
+
+assigner :
+  defaults : plan/defaults/assigner.yaml
+
+tasks :
+  defaults : plan/defaults/tasks_keras.yaml
+
+compression_pipeline :
+  defaults : plan/defaults/compression_pipeline.yaml
diff --git a/openfl-workspace/keras/jax/mnist/requirements.txt b/openfl-workspace/keras/jax/mnist/requirements.txt
@@ -0,0 +1,3 @@
+jax==0.5.0
+keras==3.8.0
+tensorflow==2.18.0
diff --git a/openfl-workspace/keras/jax/mnist/src/__init__.py b/openfl-workspace/keras/jax/mnist/src/__init__.py
@@ -0,0 +1,3 @@
+# Copyright (C) 2020-2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+"""You may copy this file as the starting point of your own model."""
diff --git a/openfl-workspace/keras/jax/mnist/src/dataloader.py b/openfl-workspace/keras/jax/mnist/src/dataloader.py
@@ -0,0 +1,42 @@
+# Copyright (C) 2020-2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+"""You may copy this file as the starting point of your own model."""
+
+from openfl.federated import KerasDataLoader
+from .mnist_utils import load_mnist_shard
+
+
+class JAXMNISTInMemory(KerasDataLoader):
+    """Data Loader for MNIST Dataset."""
+
+    def __init__(self, data_path, batch_size, **kwargs):
+        """
+        Initialize.
+
+        Args:
+            data_path: File path for the dataset
+            batch_size (int): The batch size for the data loader
+            **kwargs: Additional arguments, passed to super init and load_mnist_shard
+        """
+        super().__init__(batch_size, **kwargs)
+
+        try:
+            int(data_path)
+        except:
+            raise ValueError(
+                "Expected `%s` to be representable as `int`, as it refers to the data shard " +
+                "number used by the collaborator.",
+                data_path
+            )
+
+        _, num_classes, X_train, y_train, X_valid, y_valid = load_mnist_shard(
+            shard_num=int(data_path), **kwargs
+        )
+
+        self.X_train = X_train
+        self.y_train = y_train
+        self.X_valid = X_valid
+        self.y_valid = y_valid
+
+        self.num_classes = num_classes