Skip to content

Commit

Permalink
Add systes generators
Browse files Browse the repository at this point in the history
  • Loading branch information
bacox committed Mar 11, 2022
1 parent dbcf49b commit 849021f
Show file tree
Hide file tree
Showing 22 changed files with 444 additions and 34 deletions.
1 change: 1 addition & 0 deletions configs/dev_mnist/descr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,4 @@ system:
# nic: 'enp3s0'
clients:
amount: 2
num_clients: 2
9 changes: 6 additions & 3 deletions configs/dev_mnist/exps/fedavg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ profiling_time: 100
warmup_round: false
output_location: 'output/dev_p2'
tensor_board_active: true
clients_per_round: 2
clients_per_round: 4
node_groups:
slow: [1, 1]
medium: [2, 2]
Expand All @@ -28,8 +28,11 @@ system:
nic: 'eth0'
# nic: 'enp3s0'
clients:
amount: 2
amount: 4
num_clients: 4
# Individual configuration
offload_stategy: vanilla
deadline: 500
experiment_prefix: 'dev_mnist_fedavg'
single_machine: false
real_time: true
experiment_prefix: 'util_fedavg'
38 changes: 38 additions & 0 deletions configs/dev_mnist/exps/fedavg_direct.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
---
# Experiment configuration
total_epochs: 3
epochs_per_cycle: 1
wait_for_clients: true
net: MNISTCNN
dataset: mnist
# Use cuda is available; setting to false will force CPU
cuda: false
profiling_time: 100
warmup_round: false
output_location: 'output/dev_p2'
tensor_board_active: true
clients_per_round: 2
node_groups:
slow: [1, 1]
medium: [2, 2]
fast: [3, 3]
sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default)
#sampler: "uniform" # "limit labels" || "q sampler" || "dirichlet" || "uniform" (default)
sampler_args:
- 0.07 # label limit || q probability || alpha || unused
- 42 # random seed || random seed || random seed || unused
system:
federator:
# hostname: '131.180.203.94'
hostname: '10.5.0.11'
nic: 'eth0'
# nic: 'enp3s0'
clients:
amount: 2
num_clients: 2
# Individual configuration
offload_stategy: vanilla
deadline: 500
single_machine: true
real_time: false
experiment_prefix: 'util_fedavg_direct'
4 changes: 3 additions & 1 deletion configs/dev_mnist/fedavg.cfg.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Individual configuration
offload_stategy: vanilla
deadline: 500
deadline: 500
single_machine: false
real_time: true
5 changes: 5 additions & 0 deletions configs/dev_mnist/fedavg_direct.cfg.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Individual configuration
offload_stategy: vanilla
deadline: 500
single_machine: true
real_time: false
9 changes: 4 additions & 5 deletions configs/dev_mnist/gen.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
from pathlib import Path

if __name__ == '__main__':
base_path = f'configs/{Path(__file__).parent.name}'
path = Path(base_path)
descr_path = path / 'descr.yaml'
base_path = Path(__file__).parent
descr_path = base_path / 'descr.yaml'

exp_cfg_list = [x for x in path.iterdir() if '.cfg' in x.suffixes]
exp_cfg_list = [x for x in base_path.iterdir() if '.cfg' in x.suffixes]
descr_data = ''
with open(descr_path) as descr_f:
descr_data = descr_f.read()
exps_path = path / 'exps'
exps_path = base_path / 'exps'
exps_path.mkdir(parents=True, exist_ok=True)
for exp_cfg in exp_cfg_list:
exp_cfg_data = ''
Expand Down
2 changes: 1 addition & 1 deletion configs/dev_mnist/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
if __name__ == '__main__':
name = 'dev'
generate_docker(name)
base_path = f'configs/{Path(__file__).parent.name}'
base_path = f'{Path(__file__).parent}'
exp_list = [
'fedavg.yaml',
]
Expand Down
1 change: 1 addition & 0 deletions configs/dev_mnist_all/exps/fedavg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,4 @@ system:
offload_stategy: vanilla
deadline: 500000
experiment_prefix: 'dev_mnist_all_fedavg'
single_machine: false
2 changes: 2 additions & 0 deletions deploy/dev/client_stub_default.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ client_name: # name can be anything
- RANK={rank}
- WORLD_SIZE={world_size}
- EXP_CONFIG=${EXP_CONFIG_FILE}
- MASTER_HOSTNAME=10.5.0.11
- NIC=eth0
ports:
- "5002:5000" # {machine-port}:{docker-port}
depends_on:
Expand Down
2 changes: 2 additions & 0 deletions deploy/dev/client_stub_fast.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ client_name: # name can be anything
- RANK={rank}
- WORLD_SIZE={world_size}
- EXP_CONFIG=${EXP_CONFIG_FILE}
- MASTER_HOSTNAME=10.5.0.11
- NIC=eth0
ports:
- "5002:5000" # {machine-port}:{docker-port}
depends_on:
Expand Down
2 changes: 2 additions & 0 deletions deploy/dev/client_stub_medium.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ client_name: # name can be anything
- RANK={rank}
- WORLD_SIZE={world_size}
- EXP_CONFIG=${EXP_CONFIG_FILE}
- MASTER_HOSTNAME=10.5.0.11
- NIC=eth0
ports:
- "5002:5000" # {machine-port}:{docker-port}
depends_on:
Expand Down
2 changes: 2 additions & 0 deletions deploy/dev/client_stub_slow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ client_name: # name can be anything
- RANK={rank}
- WORLD_SIZE={world_size}
- EXP_CONFIG=${EXP_CONFIG_FILE}
- MASTER_HOSTNAME=10.5.0.11
- NIC=eth0
ports:
- "5002:5000" # {machine-port}:{docker-port}
depends_on:
Expand Down
2 changes: 2 additions & 0 deletions deploy/dev/system_stub.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ services:
- RANK=0
- WORLD_SIZE={world_size}
- EXP_CONFIG=${EXP_CONFIG_FILE}
- MASTER_HOSTNAME=10.5.0.11
- NIC=eth0
ports:
- "5000:5000" # {machine-port}:{docker-port}
networks:
Expand Down
26 changes: 26 additions & 0 deletions deploy/dev_generate/client_stub_medium.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
client_name: # name can be anything
# container_name: federation-lab-client2 # what the name for this container would be
cpuset: {cpu_set}
restart: "no" # if it crashes for example
build: . # look for the docker file where this file is currently located
volumes:
# - ./docker_data:/opt/federation-lab/data
- ./data:/opt/federation-lab/data
- ./default_models:/opt/federation-lab/default_models
- ./data_loaders:/opt/federation-lab/data_loaders
- ./fltk:/opt/federation-lab/fltk
environment:
- PYTHONUNBUFFERED=1
- RANK={rank}
- WORLD_SIZE={world_size}
- EXP_CONFIG=${EXP_CONFIG_FILE}
- MASTER_HOSTNAME=10.5.0.11
- NIC=eth0
ports:
- "5002:5000" # {machine-port}:{docker-port}
depends_on:
- "fl_server"
deploy:
resources:
limits:
cpus: '2'
19 changes: 19 additions & 0 deletions deploy/dev_generate/description.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
federator:
stub-name: system_stub.yml
pin-cores: true
num-cores: 1
clients:
fast:
stub-name: stub_default.yml
amount: 20
pin-cores: true
num-cores: 1
cpu-speed: 0.5
cpu-variation: 0.16
slow:
stub-name: stub_default.yml
amount: 0
pin-cores: true
num-cores: 1
cpu-speed: 1
cpu-variation: 0
26 changes: 26 additions & 0 deletions deploy/dev_generate/stub_default.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
client_name: # name can be anything
# container_name: federation-lab-client2 # what the name for this container would be
cpuset: '{cpu_set}'
restart: "no" # if it crashes for example
build: . # look for the docker file where this file is currently located
volumes:
- ./data:/opt/federation-lab/data
# - ./docker_data:/opt/federation-lab/data
- ./default_models:/opt/federation-lab/default_models
- ./data_loaders:/opt/federation-lab/data_loaders
- ./fltk:/opt/federation-lab/fltk
environment:
- PYTHONUNBUFFERED=1
- RANK={rank}
- WORLD_SIZE={world_size}
- EXP_CONFIG=${EXP_CONFIG_FILE}
- MASTER_HOSTNAME=10.5.0.11
- NIC=eth0
ports:
- "5002:5000" # {machine-port}:{docker-port}
depends_on:
- "fl_server"
deploy:
resources:
limits:
cpus: '{num_cpus}'
25 changes: 25 additions & 0 deletions deploy/dev_generate/stub_fast.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
client_name: # name can be anything
cpuset: {cpu_set}
restart: "no" # if it crashes for example
build: . # look for the docker file where this file is currently located
volumes:
# - ./docker_data:/opt/federation-lab/data
- ./data:/opt/federation-lab/data
- ./default_models:/opt/federation-lab/default_models
- ./data_loaders:/opt/federation-lab/data_loaders
- ./fltk:/opt/federation-lab/fltk
environment:
- PYTHONUNBUFFERED=1
- RANK={rank}
- WORLD_SIZE={world_size}
- EXP_CONFIG=${EXP_CONFIG_FILE}
- MASTER_HOSTNAME=10.5.0.11
- NIC=eth0
ports:
- "5002:5000" # {machine-port}:{docker-port}
depends_on:
- "fl_server"
deploy:
resources:
limits:
cpus: {num_cpus}
29 changes: 29 additions & 0 deletions deploy/dev_generate/system_stub.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# creating a multi-container docker
version: "3.3"
services:
fl_server: # name can be anything
container_name: federation-lab-server # what the name for this container would be
cpuset: '0-2'
restart: "no" # if it crashes for example
build: . # look for the docker file where this file is currently located
volumes:
# - ./data/MNIST:/opt/federation-lab/data/MNIST
- ./data:/opt/federation-lab/data
- ./output:/opt/federation-lab/output
- ./fltk:/opt/federation-lab/fltk
environment:
- PYTHONUNBUFFERED=1
- RANK=0
- WORLD_SIZE={world_size}
- EXP_CONFIG=${EXP_CONFIG_FILE}
- MASTER_HOSTNAME=10.5.0.11
- NIC=eth0
ports:
- "5000:5000" # {machine-port}:{docker-port}
networks:
default:
ipv4_address: 10.5.0.11
networks:
default:
external:
name: local_network_dev
35 changes: 26 additions & 9 deletions fltk/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,16 +93,18 @@

from fltk.core.client import Client

print(sys.path)
# print(sys.path)
# from fltk.core.federator import Federator as Fed
print(list(Path.cwd().iterdir()))
# print(list(Path.cwd().iterdir()))
import argparse
from enum import Enum
from pathlib import Path

from fltk.core.federator import Federator
from fltk.util.config import Config
from fltk.util.definitions import Aggregations, Optimizations
from fltk.util.generate_experiments import generate


def run_single(config_path: Path):

Expand Down Expand Up @@ -152,7 +154,7 @@ def run_remote(config_path: Path, rank: int, nic=None, host=None):
init_method=f'tcp://{os.environ["MASTER_ADDR"]}:{os.environ["MASTER_PORT"]}'
)
if rank != 0:
print(f'Starting worker {rank}')
print(f'Starting worker {rank} with world size={config.world_size}')
rpc.init_rpc(
f"client{rank}",
rank=rank,
Expand Down Expand Up @@ -194,10 +196,20 @@ def add_default_arguments(parser):
parser = argparse.ArgumentParser(prog='fltk', description='Experiment launcher for the Federated Learning Testbed (fltk)')
subparsers = parser.add_subparsers(dest="action", required=True)

launch_parser = subparsers.add_parser('launch-util')
util_docker_parser = subparsers.add_parser('util-docker')
util_docker_parser.add_argument('name', type=str)
util_docker_parser.add_argument('--clients', type=int)
util_generate_parser = subparsers.add_parser('util-generate')
util_generate_parser.add_argument('path', type=str)
util_run_parser = subparsers.add_parser('util-run')
util_run_parser.add_argument('path', type=str)

# launch_parser.add_argument('action', choices=['docker', 'generate', 'run'])
# launch_parser.add_argument('path', help='path or key')

remote_parser = subparsers.add_parser('remote')
single_machine_parser = subparsers.add_parser('single')
add_default_arguments(launch_parser)
# add_default_arguments(launch_parser)
add_default_arguments(remote_parser)
add_default_arguments(single_machine_parser)

Expand All @@ -211,10 +223,15 @@ def add_default_arguments(parser):
# util_parser.add_argument('action')
# print(sys.argv)
args = parser.parse_args()
if args.action == 'launch-util':
pass
# run_single(Path(args.config))
if args.action == 'remote':
if args.action == 'util-docker':
print('docker')
elif args.action == 'util-generate':
path = Path(args.path)
print(f'generate for {path}')
generate(path)
elif args.action == 'util-run':
print('run') # run_single(Path(args.config))
elif args.action == 'remote':
run_remote(Path(args.config), args.rank, args.nic, args.host)
else:
# Run single machine mode
Expand Down
Loading

0 comments on commit 849021f

Please sign in to comment.