Skip to content

Commit

Permalink
Update to minimal working version, so many 'features'
Browse files Browse the repository at this point in the history
  • Loading branch information
JMGaljaard committed May 30, 2021
1 parent db7d3aa commit ffd2aed
Show file tree
Hide file tree
Showing 15 changed files with 232 additions and 95 deletions.
1 change: 0 additions & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
venv
default_models
data_loaders
data/cifar-10-batches-py
data/cifar-100-python.tar.gz
Expand Down
6 changes: 3 additions & 3 deletions charts/templates/client-slow.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@ spec:
- name: PYTHONUNBUFFERED
value: "1"
- name: RANK
value: {{ $rank }}
value: {{ quote $rank }}
- name: TP_SOCKET_IFNAME
value: eth0
value: "eth0"
- name: WORLD_SIZE
value: {{ $worldsize }}
value: {{ quote $worldsize }}
image: gcr.io/cs4290-dml/fltk:latest
name: client-slow
resources:
Expand Down
4 changes: 2 additions & 2 deletions charts/templates/fl-server-pod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ spec:
- name: RANK
value: "0"
- name: TP_SOCKET_IFNAME
value: eth0
value: "eth0"
- name: WORLD_SIZE
value: {{ (.Values.fltk.worldsize | int) }}
value: {{ quote (.Values.fltk.worldsize | int) }}
image: gcr.io/cs4290-dml/fltk:latest
name: federation-lab-server
ports:
Expand Down
29 changes: 16 additions & 13 deletions configs/cloud_experiment.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
---
# Experiment configuration
# Experiment configuration
total_epochs: 5
epochs_per_cycle: 1
wait_for_clients: true
net: Cifar10CNN
dataset: cifar10
net: FashionMNISTCNN
dataset: fashion-mnist
sampler: "uniform"
sampler_args:
- 0.5 # p degree
Expand All @@ -14,20 +15,22 @@ cuda: false
experiment_prefix: 'experiment_single_machine'
output_location: 'output'
tensor_board_active: true
clients_per_round: 1
clients_per_round: 20
system:
federator:
hostname: '10.128.0.2'
nic: 'ens4'
# Use the SERVICE provided by the fl-server to connect
hostname: 'fl-server.default.svc.cluster.local'
# Default NIC is eth0
nic: 'eth0'
clients:
# TODO set this in environment variable
amount: 1
amount: 20
# For a simple config is provided in configs/poison.example.yaml
poison:
experiments:
min_ratio: 0.0
max_ratio: 0.25
steps: 2
seed: 420
ratio: 0.2
attack:
scenario: "fraction"
type: "LABEL_FLIP"
type: "flip"
config:
- 4: 6
- 6: 4

6 changes: 3 additions & 3 deletions configs/local_experiment.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
---
# Experiment configuration
# Experiment configuration
total_epochs: 5
total_epochs: 2
epochs_per_cycle: 1
wait_for_clients: true
net: Cifar10CNN
Expand All @@ -15,13 +15,13 @@ cuda: false
experiment_prefix: 'experiment_single_machine'
output_location: 'output'
tensor_board_active: true
clients_per_round: 1
clients_per_round: 2
system:
federator:
hostname: '172.18.0.2'
nic: 'eth0'
clients:
amount: 1
amount: 2
# For a simple config is provided in configs/poison.example.yaml
poison:
seed: 420
Expand Down
44 changes: 42 additions & 2 deletions docker-compose-gcloud.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ services:
- MASTER_PORT=5000
- PYTHONUNBUFFERED=1
- RANK=1
- WORLD_SIZE=2
- WORLD_SIZE=20
- GLOO_SOCKET_IFNAME=eth0
- TP_SOCKET_IFNAME=eth0
depends_on:
Expand All @@ -44,7 +44,26 @@ services:
- MASTER_PORT=5000
- PYTHONUNBUFFERED=1
- RANK=2
- WORLD_SIZE=2
- WORLD_SIZE=20
- GLOO_SOCKET_IFNAME=eth0
- TP_SOCKET_IFNAME=eth0
depends_on:
- fl_server
deploy:
resources:
limits:
cpus: '0.5'
memory: 1024M

client_slow_3:
restart: 'no'
image: gcr.io/cs4290-dml/fltk:latest
command: python3 -m fltk single configs/local_experiment.yaml --rank=3
environment:
- MASTER_PORT=5000
- PYTHONUNBUFFERED=1
- RANK=3
- WORLD_SIZE=20
- GLOO_SOCKET_IFNAME=eth0
- TP_SOCKET_IFNAME=eth0
depends_on:
Expand All @@ -54,3 +73,24 @@ services:
limits:
cpus: '0.5'
memory: 1024M

client_slow_4:
restart: 'no'
image: gcr.io/cs4290-dml/fltk:latest
command: python3 -m fltk single configs/local_experiment.yaml --rank=3
environment:
- MASTER_PORT=5000
- PYTHONUNBUFFERED=1
- RANK=4
-
- WORLD_SIZE=20
- GLOO_SOCKET_IFNAME=eth0
- TP_SOCKET_IFNAME=eth0
depends_on:
- fl_server
deploy:
resources:
limits:
cpus: '0.5'
memory: 1024M

8 changes: 6 additions & 2 deletions fltk/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ def main():
yaml_data = yaml.load(config_file, Loader=yaml.FullLoader)
cfg.merge_yaml(yaml_data)
if args.mode == 'poison':
perform_poison_experiment(args, cfg, parser, yaml_data)
for ratio in [0.0, 0.05, 0.1, 0.15, 0.2]:
perform_poison_experiment(args, cfg, parser, yaml_data, ratio)
elif args.mode == 'single':
perform_single_experiment(args, cfg, parser, yaml_data)
else:
Expand Down Expand Up @@ -106,7 +107,7 @@ def perform_single_experiment(args, cfg, parser, yaml_data):



def perform_poison_experiment(args, cfg, parser, yaml_data):
def perform_poison_experiment(args, cfg, parser, yaml_data, ratio=None):
"""
Function to start poisoned experiment.
"""
Expand All @@ -129,6 +130,9 @@ def perform_poison_experiment(args, cfg, parser, yaml_data):
if not nic:
nic = yaml_data['system']['federator']['nic']
print(f'rank={args.rank}, world_size={world_size}, host={master_address}, args=cfg, nic={nic}')
if ratio:
print(f'Setting ratio to {ratio}')
attack.ratio = ratio
run_single(rank=args.rank, world_size=world_size, host=master_address, args=cfg, nic=nic, attack=attack)


Expand Down
27 changes: 26 additions & 1 deletion fltk/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,32 @@ def init_device(self):
else:
return torch.device("cpu")

def reset_model(self):
"""
Function to reset the learning process. In addition, reset the loss function and the
optimizer, in case this uses certain decay according to some internal counter.
@return: None
@rtype: None
"""
# Load the default model
self.set_net(self.load_default_model())
self.loss_function = self.args.get_loss_function()()
self.optimizer = torch.optim.SGD(self.net.parameters(),
lr=self.args.get_learning_rate(),
momentum=self.args.get_momentum())
self.scheduler = MinCapableStepLR(self.args.get_logger(), self.optimizer,
self.args.get_scheduler_step_size(),
self.args.get_scheduler_gamma(),
self.args.get_min_lr())
# Set the epoch counter
self.epoch_counter = 0

def ping(self):
"""
Aliveness checker for the federator during initialization.
@return: String to the important question, `ping?', which is pong.
@rtype: str
"""
return 'pong'

def rpc_test(self):
Expand Down Expand Up @@ -300,7 +325,7 @@ def run_epochs(self, num_epoch, pill: PoisonPill = None):
# Copy GPU tensors to CPU
for k, v in weights.items():
weights[k] = v.cpu()
return data, weights
return data, self.net.state_dict()

def save_model(self, epoch, suffix):
"""
Expand Down
23 changes: 11 additions & 12 deletions fltk/datasets/distributed/cifar10.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
import logging

from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader, DistributedSampler

from fltk.datasets.distributed.dataset import DistDataset
from fltk.strategy.data_samplers import get_sampler
import logging

from fltk.util.poison.poisonpill import PoisonPill


class DistCIFAR10Dataset(DistDataset):

def __init__(self, args, pill: PoisonPill = None):
super(DistCIFAR10Dataset, self).__init__(args, pill)
self.get_args().get_logger().debug(f"Instantiated CIFAR10 train data, with pill: {pill}")
self.init_train_dataset()
self.init_test_dataset()
if pill:
self.ingest_pill(pill)

def init_train_dataset(self, pill: PoisonPill = None):

def init_train_dataset(self):
dist_loader_text = "distributed" if self.args.get_distributed() else ""
self.get_args().get_logger().debug(f"Loading '{dist_loader_text}' CIFAR10 train data")
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
Expand All @@ -28,12 +28,11 @@ def init_train_dataset(self, pill: PoisonPill = None):
transforms.ToTensor(),
normalize
])

self.train_dataset = datasets.CIFAR10(root=self.get_args().get_data_path(), train=True, download=True,
transform=transform)
transform=transform,
target_transform=None if not self.pill else self.pill.poison_targets())

# Poison
if pill:
self.ingest_pill(pill)
self.train_sampler = get_sampler(self.train_dataset, self.args)
self.train_loader = DataLoader(self.train_dataset, batch_size=16, sampler=self.train_sampler)
logging.info("this client gets {} samples".format(len(self.train_sampler)))
Expand All @@ -46,8 +45,8 @@ def init_test_dataset(self):
transforms.ToTensor(),
normalize
])
# TODO: decide on whether to poison test or not target_transform=None if not self.pill else self.pill.poison_targets()
self.test_dataset = datasets.CIFAR10(root=self.get_args().get_data_path(), train=False, download=True,
transform=transform)
transform=transform)
self.test_sampler = get_sampler(self.test_dataset, self.args)
self.test_loader = DataLoader(self.test_dataset, batch_size=16, sampler=self.test_sampler)

2 changes: 1 addition & 1 deletion fltk/datasets/distributed/cifar100.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def init_train_dataset(self):
normalize
])
self.train_dataset = datasets.CIFAR100(root=self.get_args().get_data_path(), train=True, download=True,
transform=transform)
transform=transform, target_transform=self.pill.poison_targets())
self.train_sampler = get_sampler(self.train_dataset, self.args)
self.train_loader = DataLoader(self.train_dataset, batch_size=16, sampler=self.train_sampler)

Expand Down
11 changes: 0 additions & 11 deletions fltk/datasets/distributed/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,14 +139,3 @@ def init_test_dataset(self):
# :return: tuple
# """
# return (next(iter(data_loader))[0].numpy(), next(iter(data_loader))[1].numpy())
def ingest_pill(self, pill: PoisonPill):
"""
Drink the CoolAid, apply poison to the input regarding the pill. Note that the pill may implement a noop,
meaning that this has no real result.
@param pill:
@type pill:
@return:
@rtype:
"""
self.train_dataset.targets = pill.poison_targets(self.train_dataset.targets)
self.test_dataset.targets = pill.poison_targets(self.test_dataset)
23 changes: 15 additions & 8 deletions fltk/datasets/distributed/fashion_mnist.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
from fltk.datasets.distributed import DistDataset
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader, DistributedSampler

from fltk.datasets.distributed import DistDataset
from fltk.strategy.data_samplers import get_sampler
from fltk.util.poison.poisonpill import PoisonPill


class DistFashionMNISTDataset(DistDataset):

def __init__(self, args):
super(DistFashionMNISTDataset, self).__init__(args)
def __init__(self, args, pill: PoisonPill = None):
super(DistFashionMNISTDataset, self).__init__(args, pill)
self.init_train_dataset()
self.init_test_dataset()

Expand All @@ -18,22 +19,26 @@ def init_train_dataset(self):
self.get_args().get_logger().debug(f"Loading '{dist_loader_text}' Fashion MNIST train data")

self.train_dataset = datasets.FashionMNIST(root=self.get_args().get_data_path(), train=True, download=True,
transform=transforms.Compose([transforms.ToTensor()]))
transform=transforms.Compose([transforms.ToTensor()]),
target_transform=None if not self.pill else self.pill.poison_targets())
self.train_sampler = get_sampler(self.train_dataset, self.args)
self.train_loader = DataLoader(self.train_dataset, batch_size=16, sampler=self.train_sampler)

def init_test_dataset(self):
dist_loader_text = "distributed" if self.args.get_distributed() else ""
self.get_args().get_logger().debug(f"Loading '{dist_loader_text}' Fashion MNIST test data")
self.test_dataset = datasets.FashionMNIST(root=self.get_args().get_data_path(), train=False, download=True,
transform=transforms.Compose([transforms.ToTensor()]))
transform=transforms.Compose([transforms.ToTensor()]),
target_transform=None if not self.pill else self.pill.poison_targets())
self.test_sampler = get_sampler(self.test_dataset, self.args)
self.test_loader = DataLoader(self.test_dataset, batch_size=16, sampler=self.test_sampler)

def load_train_dataset(self):
self.get_args().get_logger().debug("Loading Fashion MNIST train data")

train_dataset = datasets.FashionMNIST(self.get_args().get_data_path(), train=True, download=True, transform=transforms.Compose([transforms.ToTensor()]))
train_dataset = datasets.FashionMNIST(self.get_args().get_data_path(), train=True, download=True,
transform=transforms.Compose([transforms.ToTensor()]),
target_transform=None if not self.pill else self.pill.poison_targets())
train_loader = DataLoader(train_dataset, batch_size=len(train_dataset))

train_data = self.get_tuple_from_data_loader(train_loader)
Expand All @@ -45,7 +50,9 @@ def load_train_dataset(self):
def load_test_dataset(self):
self.get_args().get_logger().debug("Loading Fashion MNIST test data")

test_dataset = datasets.FashionMNIST(self.get_args().get_data_path(), train=False, download=True, transform=transforms.Compose([transforms.ToTensor()]))
# TODO: target_transform=None if not self.pill else self.pill.poison_targets()
test_dataset = datasets.FashionMNIST(self.get_args().get_data_path(), train=False, download=True,
transform=transforms.Compose([transforms.ToTensor()]))
test_loader = DataLoader(test_dataset, batch_size=len(test_dataset))

test_data = self.get_tuple_from_data_loader(test_loader)
Expand Down
Loading

0 comments on commit ffd2aed

Please sign in to comment.