diff --git a/openfl-workspace/torch_template/src/dataloader.py b/openfl-workspace/torch_template/src/dataloader.py index 070561f864..fcdc342588 100644 --- a/openfl-workspace/torch_template/src/dataloader.py +++ b/openfl-workspace/torch_template/src/dataloader.py @@ -1,5 +1,6 @@ # Copyright (C) 2024 Intel Corporation -# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. +# Licensed subject to the terms of the separately executed evaluation license agreement between +# Intel Corporation and you. from openfl.federated import PyTorchDataLoader @@ -13,14 +14,18 @@ class TemplateDataLoader(PyTorchDataLoader): The `plan.yaml` modifications should be done under the `/plan/plan.yaml` section: ``` data_loader: - defaults : plan/defaults/data_loader.yaml - template: src.dataloader.TemplateDataLoader # Modify this line appropriately if you change the class name + defaults: plan/defaults/data_loader.yaml + template: src.dataloader.TemplateDataLoader # Modify this line appropriately if you + change the class name settings: - # Add additional arguments (such as batch_size) that you wish to pass through `def __init__():` + # Add additional arguments (such as batch_size) that you wish + # to pass through `def __init__():` # You do not need to pass in data_path here. It will be set by the collaborators ``` - `batch_size` is passed to the `super().`__init__` method to ensure that the superclass is properly initialized with the specified batch size. - After calling `super().__init__`, define `self.X_train`, `self.y_train`, `self.X_valid`, and `self.y_valid`. + `batch_size` is passed to the `super().`__init__` method to ensure that the superclass + is properly initialized with the specified batch size. + After calling `super().__init__`, define `self.X_train`, `self.y_train`, + `self.X_valid`, and `self.y_valid`. """ def __init__(self, data_path, batch_size, **kwargs): @@ -67,4 +72,5 @@ def load_dataset(data_path, **kwargs): return X_train, y_train, X_valid, y_valid -raise NotImplementedError("Use /src/dataloader.py template to create a custom dataloader. Then remove this line.") +raise NotImplementedError("Use /src/dataloader.py template to " + "create a custom dataloader. Then remove this line.") diff --git a/openfl-workspace/torch_template/src/taskrunner.py b/openfl-workspace/torch_template/src/taskrunner.py index b008d02914..5f3e7393bf 100644 --- a/openfl-workspace/torch_template/src/taskrunner.py +++ b/openfl-workspace/torch_template/src/taskrunner.py @@ -1,5 +1,6 @@ # Copyright (C) 2024 Intel Corporation -# Licensed subject to the terms of the separately executed evaluation license agreement between Intel Corporation and you. +# Licensed subject to the terms of the separately executed evaluation license agreement between +# Intel Corporation and you. import numpy as np @@ -12,17 +13,21 @@ class TemplateTaskRunner(PyTorchTaskRunner): """Template Task Runner for PyTorch. - This class should be used as a template to create a custom Task Runner for your specific model and training workload. + This class should be used as a template to create a custom Task Runner for your specific model + and training workload. After generating this template, you should: - 1. Define your model, optimizer, and loss function as you would in PyTorch. PyTorchTaskRunner inherits from torch.nn.Module. - 2. Implement the `train_` and `validate_` functions to define a single train and validate epoch of your workload. + 1. Define your model, optimizer, and loss function as you would in PyTorch. + PyTorchTaskRunner inherits from torch.nn.Module. + 2. Implement the `train_` and `validate_` functions to define a single train + and validate epoch of your workload. 3. Modify the `plan.yaml` file to use this Task Runner. The `plan.yaml` modifications should be done under the `/plan/plan.yaml` section: ``` task_runner: defaults : plan/defaults/task_runner.yaml - template: src.taskrunner.TemplateTaskRunner # Modify this line appropriately if you change the class name + template: src.taskrunner.TemplateTaskRunner # Modify this line appropriately + if you change the class name settings: # Add additional arguments that you wish to pass through `__init__` ``` @@ -87,8 +92,8 @@ def validate_( """Single validation epoch. Args: - validation_dataloader: Validation dataset batch generator. Yields (samples, targets) tuples. - of size = `self.validation_dataloader.batch_size`. + validation_dataloader: Validation dataset batch generator. Yields (samples, targets) + tuples of size = `self.validation_dataloader.batch_size`. Returns: Metric: An object containing the name of the metric and its value as an np.ndarray. diff --git a/openfl/interface/cli.py b/openfl/interface/cli.py index ba7b9c2133..ff26eddc57 100755 --- a/openfl/interface/cli.py +++ b/openfl/interface/cli.py @@ -182,9 +182,20 @@ def cli(context, log_level, no_warnings): # This will be overridden later with user selected debugging level disable_warnings() log_file = os.getenv("LOG_FILE") - # Validate log_file using allow list approach - if log_file and not re.match(r"^[\w\-.]+$", log_file): - raise ValueError("Invalid log file path") + # Validate log_file with tighter restrictions + if log_file: + log_file = os.path.normpath(log_file) + if ( + not re.match(r"^logs/[\w\-.]+$", log_file) + or ".." in log_file + or log_file.startswith("/") + ): + raise ValueError("Invalid log file path") + # Ensure the log file is in the 'logs' directory + allowed_directory = Path("logs").resolve() + full_path = (allowed_directory / log_file).resolve() + if not str(full_path).startswith(str(allowed_directory)): + raise ValueError("Log file path is not allowed") setup_logging(log_level, log_file) sys.stdout.reconfigure(encoding="utf-8") diff --git a/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/dataset.py b/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/dataset.py index 80fec58638..47939f9951 100644 --- a/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/dataset.py +++ b/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/dataset.py @@ -3,7 +3,7 @@ from torch.utils.data import Dataset, DataLoader from torchvision import transforms as tsf -from tests.github.interactive_api_director.experiments.pytorch_kvasir_unet.data_loader import read_data +from tests.github.interactive_api_director.experiments.pytorch_kvasir_unet.data_loader import read_data # noqa: E501 from openfl.interface.interactive_api.experiment import DataInterface diff --git a/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/experiment.py b/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/experiment.py index 4b8e0b48fd..da57352169 100644 --- a/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/experiment.py +++ b/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/experiment.py @@ -4,7 +4,7 @@ import PIL import torch.optim as optim from tests.github.interactive_api_director.experiments.pytorch_kvasir_unet.model import UNet -from tests.github.interactive_api_director.experiments.pytorch_kvasir_unet.tasks import task_interface +from tests.github.interactive_api_director.experiments.pytorch_kvasir_unet.tasks import task_interface # noqa: E501 from tests.github.interactive_api_director.experiments.pytorch_kvasir_unet.tasks import validate from torch.utils.data import DataLoader from torch.utils.data import Dataset @@ -37,7 +37,8 @@ def run(): optimizer_adam = optim.Adam(model_unet.parameters(), lr=1e-4) framework_adapter = 'openfl.plugins.frameworks_adapters.pytorch_adapter.FrameworkAdapterPlugin' - MI = ModelInterface(model=model_unet, optimizer=optimizer_adam, framework_plugin=framework_adapter) + MI = ModelInterface(model=model_unet, optimizer=optimizer_adam, + framework_plugin=framework_adapter) # Save the initial model state initial_model = deepcopy(model_unet) @@ -60,7 +61,8 @@ def run(): # Register dataset -# We extract User dataset class implementation. Is it convinient? What if the dataset is not a class? +# We extract User dataset class implementation. +# Is it convinient? What if the dataset is not a class? class KvasirShardDataset(Dataset): def __init__(self, dataset): @@ -113,7 +115,8 @@ def shard_descriptor(self, shard_descriptor): validation_size = max(1, int(len(self._shard_dataset) * self.validation_fraction)) self.train_indeces = np.arange(len(self._shard_dataset) - validation_size) - self.val_indeces = np.arange(len(self._shard_dataset) - validation_size, len(self._shard_dataset)) + self.val_indeces = np.arange(len(self._shard_dataset) - validation_size, + len(self._shard_dataset)) def get_train_loader(self, **kwargs): """ diff --git a/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/model.py b/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/model.py index f8102c1ad4..ab5ce27a84 100644 --- a/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/model.py +++ b/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/model.py @@ -1,7 +1,7 @@ import torch import torch.nn as nn -from tests.github.interactive_api_director.experiments.pytorch_kvasir_unet.layers import DoubleConv, Down, Up +from tests.github.interactive_api_director.experiments.pytorch_kvasir_unet.layers import DoubleConv, Down, Up # noqa: E501 """ UNet model definition diff --git a/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/tasks.py b/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/tasks.py index ca9adb339a..555240dd6c 100644 --- a/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/tasks.py +++ b/tests/github/interactive_api_director/experiments/pytorch_kvasir_unet/tasks.py @@ -3,7 +3,7 @@ import numpy as np from openfl.interface.interactive_api.experiment import TaskInterface -from tests.github.interactive_api_director.experiments.pytorch_kvasir_unet.layers import soft_dice_loss, soft_dice_coef +from tests.github.interactive_api_director.experiments.pytorch_kvasir_unet.layers import soft_dice_loss, soft_dice_coef # noqa: E501 task_interface = TaskInterface() @@ -58,8 +58,8 @@ def validate(unet_model, val_loader, device): for data, target in val_loader: samples = target.shape[0] total_samples += samples - data, target = torch.tensor(data).to(device), \ - torch.tensor(target).to(device, dtype=torch.int64) + data = torch.tensor(data).to(device) + target = torch.tensor(target).to(device, dtype=torch.int64) output = unet_model(data) val = soft_dice_coef(output, target) val_score += val.sum().cpu().numpy() diff --git a/tests/github/interactive_api_director/experiments/tensorflow_mnist/dataset.py b/tests/github/interactive_api_director/experiments/tensorflow_mnist/dataset.py index 94456ed917..f1967bfeb0 100644 --- a/tests/github/interactive_api_director/experiments/tensorflow_mnist/dataset.py +++ b/tests/github/interactive_api_director/experiments/tensorflow_mnist/dataset.py @@ -25,7 +25,8 @@ def shard_descriptor(self, shard_descriptor): self._shard_descriptor = shard_descriptor validation_size = len(self.shard_descriptor) // 10 self.train_indices = np.arange(len(self.shard_descriptor) - validation_size) - self.val_indices = np.arange(len(self.shard_descriptor) - validation_size, len(self.shard_descriptor)) + self.val_indices = np.arange(len(self.shard_descriptor) - validation_size, + len(self.shard_descriptor)) def get_train_loader(self, **kwargs): """ diff --git a/tests/github/interactive_api_director/experiments/tensorflow_mnist/experiment.py b/tests/github/interactive_api_director/experiments/tensorflow_mnist/experiment.py index 6007b5103d..ec7bed09da 100644 --- a/tests/github/interactive_api_director/experiments/tensorflow_mnist/experiment.py +++ b/tests/github/interactive_api_director/experiments/tensorflow_mnist/experiment.py @@ -6,9 +6,9 @@ from tests.github.interactive_api_director.experiments.tensorflow_mnist.settings import model from tests.github.interactive_api_director.experiments.tensorflow_mnist.settings import optimizer from tests.github.interactive_api_director.experiments.tensorflow_mnist.settings import loss_fn -from tests.github.interactive_api_director.experiments.tensorflow_mnist.settings import train_acc_metric -from tests.github.interactive_api_director.experiments.tensorflow_mnist.settings import val_acc_metric -from tests.github.interactive_api_director.experiments.tensorflow_mnist.envoy.shard_descriptor import MNISTShardDescriptor +from tests.github.interactive_api_director.experiments.tensorflow_mnist.settings import train_acc_metric # noqa: E501 +from tests.github.interactive_api_director.experiments.tensorflow_mnist.settings import val_acc_metric # noqa: E501 +from tests.github.interactive_api_director.experiments.tensorflow_mnist.envoy.shard_descriptor import MNISTShardDescriptor # noqa: E501 def run(): @@ -16,19 +16,22 @@ def run(): client_id = 'frontend' # 1) Run with API layer - Director mTLS - # If the user wants to enable mTLS their must provide CA root chain, and signed key pair to the federation interface + # If the user wants to enable mTLS their must provide CA root chain, + # and signed key pair to the federation interface # cert_chain = 'cert/root_ca.crt' # API_certificate = 'cert/frontend.crt' # API_private_key = 'cert/frontend.key' - # federation = Federation(client_id='frontend', director_node_fqdn='localhost', director_port='50051', - # cert_chain=cert_chain, api_cert=API_certificate, api_private_key=API_private_key) + # federation = Federation(client_id='frontend', director_node_fqdn='localhost', + # director_port='50051', cert_chain=cert_chain, + # api_cert=API_certificate, api_private_key=API_private_key) - # -------------------------------------------------------------------------------------------------------------------- + # --------------------------------------------------------------------------------------------- # 2) Run with TLS disabled (trusted environment) # Federation can also determine local fqdn automatically - federation = Federation(client_id=client_id, director_node_fqdn='localhost', director_port='50051', tls=False) + federation = Federation(client_id=client_id, director_node_fqdn='localhost', + director_port='50051', tls=False) shard_registry = federation.get_shard_registry() print(shard_registry) @@ -49,7 +52,8 @@ def function_defined_in_notebook(some_parameter): TI = TaskInterface() # Task interface currently supports only standalone functions. - @TI.register_fl_task(model='model', data_loader='train_dataset', device='device', optimizer='optimizer') + @TI.register_fl_task(model='model', data_loader='train_dataset', + device='device', optimizer='optimizer') def train(model, train_dataset, optimizer, device, loss_fn=loss_fn, warmup=False): # Iterate over the batches of the dataset. for step, (x_batch_train, y_batch_train) in enumerate(train_dataset): @@ -101,14 +105,14 @@ def validate(model, val_dataset, device): # create an experimnet in federation experiment_name = 'mnist_test_experiment' - fl_experiment = FLExperiment( - federation=federation, - experiment_name=experiment_name, - serializer_plugin='openfl.plugins.interface_serializer.' - 'keras_serializer.KerasSerializer') + fl_experiment = FLExperiment(federation=federation, + experiment_name=experiment_name, + serializer_plugin='openfl.plugins.interface_serializer.' + 'keras_serializer.KerasSerializer') # If I use autoreload I got a pickling error - # The following command zips the workspace and python requirements to be transfered to collaborator nodes + # The following command zips the workspace and python requirements + # to be transfered to collaborator nodes fl_experiment.start(model_provider=MI, task_keeper=TI, data_loader=fed_dataset,