Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

change opt treatment and device policy to enum values #374

Open
wants to merge 10 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions openfl-tutorials/Federated_FedProx_Keras_MNIST_Tutorial.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,8 @@
"outputs": [],
"source": [
"#Run experiment, return trained FederatedModel\n",
"final_fl_model = fx.run_experiment(collaborators,override_config={'aggregator.settings.rounds_to_train':5, 'collaborator.settings.opt_treatment': 'CONTINUE_GLOBAL'})"
"from openfl.component.collaborator.collaborator import OptTreatment\n",
"final_fl_model = fx.run_experiment(collaborators,override_config={'aggregator.settings.rounds_to_train':5, 'collaborator.settings.opt_treatment': OptTreatment.CONTINUE_GLOBAL})"
]
},
{
Expand Down Expand Up @@ -354,7 +355,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -368,7 +369,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.8"
"version": "3.8.9"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please clear the notebook metadata

}
},
"nbformat": 4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -429,13 +429,15 @@
"outputs": [],
"source": [
"# The following command zips the workspace and python requirements to be transfered to collaborator nodes\n",
"from openfl.component.collaborator.collaborator import DevicePolicy, OptTreatment\n",
"\n",
"fl_experiment.start(\n",
" model_provider=MI,\n",
" task_keeper=TI,\n",
" data_loader=fed_dataset,\n",
" rounds_to_train=10,\n",
" opt_treatment=\"CONTINUE_GLOBAL\",\n",
" device_assignment_policy=\"CUDA_PREFERRED\",\n",
" opt_treatment=OptTreatment.CONTINUE_GLOBAL,\n",
" device_assignment_policy=DevicePolicy.CUDA_PREFERRED,\n",
")"
]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -510,12 +510,14 @@
"outputs": [],
"source": [
"# The following command zips the workspace and python requirements to be transfered to collaborator nodes\n",
"from openfl.component.collaborator.collaborator import DevicePolicy, OptTreatment\n",
"\n",
"fl_experiment.start(model_provider=MI,\n",
" task_keeper=TI,\n",
" data_loader=fed_dataset,\n",
" rounds_to_train=5,\n",
" opt_treatment='CONTINUE_GLOBAL',\n",
" device_assignment_policy='CUDA_PREFERRED')"
" opt_treatment=OptTreatment.CONTINUE_GLOBAL,\n",
" device_assignment_policy=DevicePolicy.CUDA_PREFERRED)"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -463,12 +463,15 @@
"outputs": [],
"source": [
"# The following command zips the workspace and python requirements to be transfered to collaborator nodes\n",
"from openfl.component.collaborator.collaborator import DevicePolicy, OptTreatment\n",
"\n",
"fl_experiment.start(\n",
" model_provider=model_interface, \n",
" task_keeper=task_interface,\n",
" data_loader=fed_dataset,\n",
" rounds_to_train=5,\n",
" opt_treatment='CONTINUE_GLOBAL'\n",
" opt_treatment=OptTreatment.CONTINUE_GLOBAL,\n",
" device_assignment_policy=DevicePolicy.CUDA_PREFERRED\n",
")"
]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -474,12 +474,15 @@
"outputs": [],
"source": [
"# The following command zips the workspace and python requirements to be transfered to collaborator nodes\n",
"from openfl.component.collaborator.collaborator import DevicePolicy, OptTreatment\n",
"\n",
"fl_experiment.start(\n",
" model_provider=model_interface, \n",
" task_keeper=task_interface,\n",
" data_loader=fed_dataset,\n",
" rounds_to_train=5,\n",
" opt_treatment='CONTINUE_GLOBAL'\n",
" opt_treatment=OptTreatment.CONTINUE_GLOBAL,\n",
" device_assignment_policy=DevicePolicy.CUDA_PREFERRED\n",
")"
]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -438,13 +438,15 @@
"metadata": {},
"outputs": [],
"source": [
"from openfl.component.collaborator.collaborator import DevicePolicy, OptTreatment\n",
"\n",
"fl_experiment.start(\n",
" model_provider=MI,\n",
" task_keeper=TI,\n",
" data_loader=fed_dataset,\n",
" rounds_to_train=2,\n",
" opt_treatment=\"CONTINUE_GLOBAL\",\n",
" device_assignment_policy=\"CUDA_PREFERRED\",\n",
" opt_treatment=OptTreatment.CONTINUE_GLOBAL,\n",
" device_assignment_policy=DevicePolicy.CUDA_PREFERRED,\n",
")"
]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -484,12 +484,14 @@
"# If I use autoreload I got a pickling error\n",
"\n",
"# The following command zips the workspace and python requirements to be transfered to collaborator nodes\n",
"from openfl.component.collaborator.collaborator import DevicePolicy, OptTreatment\n",
"\n",
"fl_experiment.start(model_provider=MI, \n",
" task_keeper=TI,\n",
" data_loader=fed_dataset,\n",
" rounds_to_train=2,\n",
" opt_treatment='CONTINUE_GLOBAL',\n",
" device_assignment_policy='CUDA_PREFERRED')\n"
" opt_treatment=OptTreatment.CONTINUE_GLOBAL,\n",
" device_assignment_policy=DevicePolicy.CUDA_PREFERRED)\n"
]
},
{
Expand Down Expand Up @@ -584,7 +586,7 @@
"source": [
"MI = ModelInterface(model=best_model, optimizer=optimizer_adam, framework_plugin=framework_adapter)\n",
"fl_experiment.start(model_provider=MI, task_keeper=TI, data_loader=fed_dataset, rounds_to_train=4, \\\n",
" opt_treatment='CONTINUE_GLOBAL')"
" opt_treatment=OptTreatment.CONTINUE_GLOBAL)"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -596,13 +596,15 @@
"metadata": {},
"outputs": [],
"source": [
"from openfl.component.collaborator.collaborator import DevicePolicy, OptTreatment\n",
"\n",
"fl_experiment.start(\n",
" model_provider=MI,\n",
" task_keeper=TI,\n",
" data_loader=fed_dataset,\n",
" rounds_to_train=10,\n",
" opt_treatment=\"CONTINUE_GLOBAL\",\n",
" device_assignment_policy=\"CUDA_PREFERRED\",\n",
" opt_treatment=OptTreatment.CONTINUE_GLOBAL,\n",
" device_assignment_policy=DevicePolicy.CUDA_PREFERRED,\n",
")"
]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -965,12 +965,14 @@
"# If I use autoreload I got a pickling error\n",
"\n",
"# The following command zips the workspace and python requirements to be transfered to collaborator nodes\n",
"from openfl.component.collaborator.collaborator import DevicePolicy, OptTreatment\n",
"\n",
"fl_experiment.start(model_provider=MI, \n",
" task_keeper=TI,\n",
" data_loader=fed_dataset,\n",
" rounds_to_train=10,\n",
" opt_treatment='CONTINUE_GLOBAL',\n",
" device_assignment_policy='CUDA_PREFERRED')\n"
" opt_treatment=OptTreatment.CONTINUE_GLOBAL,\n",
" device_assignment_policy=DevicePolicy.CUDA_PREFERRED)\n"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -543,11 +543,14 @@
"# If I use autoreload I got a pickling error\n",
"\n",
"# The following command zips the workspace and python requirements to be transfered to collaborator nodes\n",
"from openfl.component.collaborator.collaborator import DevicePolicy, OptTreatment\n",
"\n",
"fl_experiment.start(model_provider=MI, \n",
" task_keeper=TI,\n",
" data_loader=fed_dataset,\n",
" rounds_to_train=3,\n",
" opt_treatment='RESET')"
" opt_treatment=OptTreatment.RESET,\n",
" device_assignment_policy=DevicePolicy.CUDA_PREFERRED)"
]
},
{
Expand Down Expand Up @@ -590,4 +593,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -450,12 +450,15 @@
"outputs": [],
"source": [
"# The following command zips the workspace and python requirements to be transfered to collaborator nodes\n",
"from openfl.component.collaborator.collaborator import DevicePolicy, OptTreatment\n",
"\n",
"fl_experiment.start(\n",
" model_provider=model_interface, \n",
" task_keeper=task_interface,\n",
" data_loader=fed_dataset,\n",
" rounds_to_train=5,\n",
" opt_treatment='CONTINUE_GLOBAL'\n",
" opt_treatment=OptTreatment.CONTINUE_GLOBAL,\n",
" device_assignment_policy=DevicePolicy.CUDA_PREFERRED\n",
")"
]
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -360,11 +360,14 @@
"outputs": [],
"source": [
"# The following command zips the workspace and python requirements to be transfered to collaborator nodes\n",
"from openfl.component.collaborator.collaborator import DevicePolicy, OptTreatment\n",
"\n",
"fl_experiment.start(model_provider=MI, \n",
" task_keeper=TI,\n",
" data_loader=fed_dataset,\n",
" rounds_to_train=5,\n",
" opt_treatment='CONTINUE_GLOBAL')"
" opt_treatment=OptTreatment.CONTINUE_GLOBAL,\n",
" device_assignment_policy=DevicePolicy.CUDA_PREFERRED)"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -376,11 +376,14 @@
"# If I use autoreload I got a pickling error\n",
"\n",
"# The following command zips the workspace and python requirements to be transfered to collaborator nodes\n",
"from openfl.component.collaborator.collaborator import DevicePolicy, OptTreatment\n",
"\n",
"fl_experiment.start(model_provider=MI, \n",
" task_keeper=TI,\n",
" data_loader=fed_dataset,\n",
" rounds_to_train=20,\n",
" opt_treatment='RESET')"
" opt_treatment=OptTreatment.RESET,\n",
" device_assignment_policy=DevicePolicy.CUDA_PREFERRED)"
]
},
{
Expand Down
14 changes: 9 additions & 5 deletions openfl/component/collaborator/collaborator.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ class DevicePolicy(Enum):
"""Device assignment policy."""

CPU_ONLY = 1

CUDA_PREFERRED = 2


Expand Down Expand Up @@ -74,8 +73,8 @@ def __init__(self,
client,
task_runner,
task_config,
opt_treatment='RESET',
device_assignment_policy='CPU_ONLY',
opt_treatment=OptTreatment.RESET,
device_assignment_policy=DevicePolicy.CPU_ONLY,
delta_updates=False,
compression_pipeline=None,
db_store_rounds=1,
Expand Down Expand Up @@ -106,14 +105,19 @@ def __init__(self,
self.logger = getLogger(__name__)

# RESET/CONTINUE_LOCAL/CONTINUE_GLOBAL
if hasattr(OptTreatment, opt_treatment):
if isinstance(opt_treatment, str) and hasattr(OptTreatment, opt_treatment):
self.opt_treatment = OptTreatment[opt_treatment]
elif isinstance(opt_treatment, Enum):
self.opt_treatment = opt_treatment
else:
self.logger.error(f'Unknown opt_treatment: {opt_treatment.name}.')
raise NotImplementedError(f'Unknown opt_treatment: {opt_treatment}.')

if hasattr(DevicePolicy, device_assignment_policy):
if isinstance(device_assignment_policy, str) and hasattr(
DevicePolicy, device_assignment_policy):
self.device_assignment_policy = DevicePolicy[device_assignment_policy]
elif isinstance(device_assignment_policy, Enum):
self.device_assignment_policy = device_assignment_policy
else:
self.logger.error('Unknown device_assignment_policy: '
f'{device_assignment_policy.name}.')
Expand Down
3 changes: 2 additions & 1 deletion openfl/federated/task/runner_fe.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import numpy as np

from openfl.component.collaborator.collaborator import OptTreatment
from openfl.utilities import split_tensor_dict_for_holdouts
from openfl.utilities import TensorKey
from .runner import TaskRunner
Expand Down Expand Up @@ -148,7 +149,7 @@ def train(self, col_name, round_num, input_tensor_dict, epochs, **kwargs):
# A work around could involve doing a single epoch of training
# on random data to get the optimizer names, and then throwing away
# the model.
if self.opt_treatment == 'CONTINUE_GLOBAL':
if self.opt_treatment == OptTreatment.CONTINUE_GLOBAL:
self.initialize_tensorkeys_for_functions(with_opt_vars=True)

return global_tensor_dict, local_tensor_dict
Expand Down
7 changes: 4 additions & 3 deletions openfl/federated/task/runner_keras.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import numpy as np

from openfl.component.collaborator.collaborator import OptTreatment
from openfl.utilities import Metric
from openfl.utilities import split_tensor_dict_for_holdouts
from openfl.utilities import TensorKey
Expand Down Expand Up @@ -51,10 +52,10 @@ def rebuild_model(self, round_num, input_tensor_dict, validation=False):
-------
None
"""
if self.opt_treatment == 'RESET':
if self.opt_treatment == OptTreatment.RESET:
self.reset_opt_vars()
self.set_tensor_dict(input_tensor_dict, with_opt_vars=False)
elif (round_num > 0 and self.opt_treatment == 'CONTINUE_GLOBAL'
elif (round_num > 0 and self.opt_treatment == OptTreatment.CONTINUE_GLOBAL
and not validation):
self.set_tensor_dict(input_tensor_dict, with_opt_vars=True)
else:
Expand Down Expand Up @@ -139,7 +140,7 @@ def train(self, col_name, round_num, input_tensor_dict,
# these are only created after training occurs. A work around could
# involve doing a single epoch of training on random data to get the
# optimizer names, and then throwing away the model.
if self.opt_treatment == 'CONTINUE_GLOBAL':
if self.opt_treatment == OptTreatment.CONTINUE_GLOBAL:
self.initialize_tensorkeys_for_functions(with_opt_vars=True)

return global_tensor_dict, local_tensor_dict
Expand Down
7 changes: 4 additions & 3 deletions openfl/federated/task/runner_pt.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import torch.nn as nn
import tqdm

from openfl.component.collaborator.collaborator import OptTreatment
from openfl.utilities import Metric
from openfl.utilities import split_tensor_dict_for_holdouts
from openfl.utilities import TensorKey
Expand Down Expand Up @@ -63,11 +64,11 @@ def rebuild_model(self, round_num, input_tensor_dict, validation=False):
Returns:
None
"""
if self.opt_treatment == 'RESET':
if self.opt_treatment == OptTreatment.RESET:
self.reset_opt_vars()
self.set_tensor_dict(input_tensor_dict, with_opt_vars=False)
elif (self.training_round_completed
and self.opt_treatment == 'CONTINUE_GLOBAL' and not validation):
and self.opt_treatment == OptTreatment.CONTINUE_GLOBAL and not validation):
self.set_tensor_dict(input_tensor_dict, with_opt_vars=True)
else:
self.set_tensor_dict(input_tensor_dict, with_opt_vars=False)
Expand Down Expand Up @@ -207,7 +208,7 @@ def train_batches(self, col_name, round_num, input_tensor_dict,
# these are only created after training occurs. A work around could
# involve doing a single epoch of training on random data to get the
# optimizer names, and then throwing away the model.
if self.opt_treatment == 'CONTINUE_GLOBAL':
if self.opt_treatment == OptTreatment.CONTINUE_GLOBAL:
self.initialize_tensorkeys_for_functions(with_opt_vars=True)

# This will signal that the optimizer values are now present,
Expand Down
7 changes: 4 additions & 3 deletions openfl/federated/task/runner_tf.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import tensorflow.compat.v1 as tf
from tqdm import tqdm

from openfl.component.collaborator.collaborator import OptTreatment
from openfl.utilities import split_tensor_dict_for_holdouts
from openfl.utilities import TensorKey
from .runner import TaskRunner
Expand Down Expand Up @@ -74,10 +75,10 @@ def rebuild_model(self, round_num, input_tensor_dict, validation=False):
Returns:
None
"""
if self.opt_treatment == 'RESET':
if self.opt_treatment == OptTreatment.RESET:
self.reset_opt_vars()
self.set_tensor_dict(input_tensor_dict, with_opt_vars=False)
elif (round_num > 0 and self.opt_treatment == 'CONTINUE_GLOBAL'
elif (round_num > 0 and self.opt_treatment == OptTreatment.CONTINUE_GLOBAL
and not validation):
self.set_tensor_dict(input_tensor_dict, with_opt_vars=True)
else:
Expand Down Expand Up @@ -172,7 +173,7 @@ def train_batches(self, col_name, round_num, input_tensor_dict,
# these are only created after training occurs. A work around could
# involve doing a single epoch of training on random data to get the
# optimizer names, and then throwing away the model.
if self.opt_treatment == 'CONTINUE_GLOBAL':
if self.opt_treatment == OptTreatment.CONTINUE_GLOBAL:
self.initialize_tensorkeys_for_functions(with_opt_vars=True)

return global_tensor_dict, local_tensor_dict
Expand Down
Loading