diff --git a/.dockerignore b/.dockerignore index d6cda2dc..e69de29b 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,8 +0,0 @@ -venv -data_loaders -data/cifar-10-batches-py -data/cifar-100-python.tar.gz -data/FashionMNIST -data/cifar-100-python -data/cifar-10-python.tar.gz -simple_example diff --git a/Dockerfile b/Dockerfile index df0a0ffb..4e249583 100644 --- a/Dockerfile +++ b/Dockerfile @@ -18,8 +18,10 @@ WORKDIR /opt/federation-lab RUN apt-get update \ && apt-get install -y vim curl python3 python3-pip net-tools iproute2 +COPY data/ ./data +COPY default_models ./default_models # Copy the current folder to the working directory -ADD setup.py default_models requirements.txt ./ +ADD setup.py requirements.txt ./ # Use cache for pip, otherwise we repeatedly pull from repository RUN --mount=type=cache,target=/root/.cache/pip python3 -m pip install -r requirements.txt diff --git a/charts/templates/fl-server-claim-persistentvolumeclaim.yaml b/charts/federator/templates/fl-server-claim-persistentvolumeclaim.yaml similarity index 92% rename from charts/templates/fl-server-claim-persistentvolumeclaim.yaml rename to charts/federator/templates/fl-server-claim-persistentvolumeclaim.yaml index ca546ada..076cea03 100644 --- a/charts/templates/fl-server-claim-persistentvolumeclaim.yaml +++ b/charts/federator/templates/fl-server-claim-persistentvolumeclaim.yaml @@ -10,5 +10,5 @@ spec: - ReadWriteOnce resources: requests: - storage: 10Gi + storage: 20Gi status: {} diff --git a/charts/templates/fl-server-pod.yaml b/charts/federator/templates/fl-server-pod.yaml similarity index 58% rename from charts/templates/fl-server-pod.yaml rename to charts/federator/templates/fl-server-pod.yaml index 3a76fec9..847a87db 100644 --- a/charts/templates/fl-server-pod.yaml +++ b/charts/federator/templates/fl-server-pod.yaml @@ -2,7 +2,7 @@ apiVersion: v1 kind: Pod metadata: annotations: - kompose.cmd: kompose convert -c -f ../docker-compose-gcloud.yml + kompose.cmd: kompose convert -f ../docker-compose.yml kompose.version: 1.22.0 (HEAD) creationTimestamp: null labels: @@ -10,19 +10,18 @@ metadata: name: fl-server spec: containers: - - env: - - name: GLOO_SOCKET_IFNAME - value: eth0 + - args: + - python3 + - -m + - fltk + - single + - configs/cloud_experiment.yaml + - --rank=0 + env: - name: MASTER_PORT value: "5000" - name: PYTHONUNBUFFERED value: "1" - - name: RANK - value: "0" - - name: TP_SOCKET_IFNAME - value: "eth0" - - name: WORLD_SIZE - value: {{ quote (.Values.fltk.worldsize | int) }} image: gcr.io/cs4290-dml/fltk:latest name: federation-lab-server ports: @@ -30,10 +29,10 @@ spec: resources: {} volumeMounts: - mountPath: /opt/federation-lab/output - name: fl-server-claim + name: fl-server-claim0 restartPolicy: Never volumes: - - name: fl-server-claim + - name: fl-server-claim0 persistentVolumeClaim: - claimName: fl-server-claim + claimName: fl-server-claim0 status: {} diff --git a/charts/templates/fl-server-service.yaml b/charts/federator/templates/fl-server-service.yaml similarity index 100% rename from charts/templates/fl-server-service.yaml rename to charts/federator/templates/fl-server-service.yaml diff --git a/charts/worker/Chart.yaml b/charts/worker/Chart.yaml new file mode 100644 index 00000000..1384f9b5 --- /dev/null +++ b/charts/worker/Chart.yaml @@ -0,0 +1,9 @@ +name: ../docker-compose-gcloud +description: A generated Helm Chart for ../docker-compose-gcloud from Skippbox Kompose +version: 0.0.1 +apiVersion: v1 +appVersion: 1.16.0 +keywords: + - ../docker-compose-gcloud +sources: +home: diff --git a/charts/templates/client-slow.yaml b/charts/worker/templates/client-slow.yaml similarity index 94% rename from charts/templates/client-slow.yaml rename to charts/worker/templates/client-slow.yaml index 32036d06..677ba786 100644 --- a/charts/templates/client-slow.yaml +++ b/charts/worker/templates/client-slow.yaml @@ -21,7 +21,7 @@ spec: - -m - fltk - single - - configs/local_experiment.yaml + - configs/cloud_experiment.yaml - --rank={{ . }} env: - name: GLOO_SOCKET_IFNAME @@ -33,7 +33,7 @@ spec: - name: RANK value: {{ quote $rank }} - name: TP_SOCKET_IFNAME - value: "eth0" + value: eth0 - name: WORLD_SIZE value: {{ quote $worldsize }} image: gcr.io/cs4290-dml/fltk:latest diff --git a/charts/worker/values.yaml b/charts/worker/values.yaml new file mode 100644 index 00000000..61f1fe33 --- /dev/null +++ b/charts/worker/values.yaml @@ -0,0 +1,7 @@ +fltk: + worldsize: 10 + config: cloud_experiment.yaml + port: 5001 +worker: + cpu: 500m + memory: 1073742000 diff --git a/fltk/nets/util/__init__.py b/fltk/nets/util/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/fltk/nets/util/utils.py b/fltk/nets/util/utils.py new file mode 100644 index 00000000..15ecb95a --- /dev/null +++ b/fltk/nets/util/utils.py @@ -0,0 +1,57 @@ +from collections import OrderedDict +from typing import Union + +import torch + +from fltk.util.base_config import BareConfig +import os + +def flatten_params(model_description: Union[torch.nn.Module, OrderedDict]): + """ + flattens all parameters into a single column vector. Returns the dictionary to recover them + :param: parameters: a generator or list of all the parameters + :return: a dictionary: {"params": [#params, 1], + "indices": [(start index, end index) for each param] **Note end index in uninclusive** + """ + if isinstance(model_description, torch.nn.Module): + parameters = model_description.parameters() + else: + parameters = model_description.values() + l = [torch.flatten(p) for p in parameters] + flat = torch.cat(l).view(-1, 1) + return flat + + +def recover_flattened(flat_params, model): + """ + Gives a list of recovered parameters from their flattened form + :param flat_params: [#params, 1] + :param indices: a list detaling the start and end index of each param [(start, end) for param] + :param model: the model that gives the params with correct shapes + :return: the params, reshaped to the ones in the model, with the same order as those in the model + """ + indices = [] + s = 0 + for p in model.parameters(): + size = p.shape[0] + indices.append((s, s+size)) + s += size + l = [flat_params[s:e] for (s, e) in indices] + for i, p in enumerate(model.parameters()): + l[i] = l[i].view(*p.shape) + return l + +def initialize_default_model(config: BareConfig, model_class) -> torch.nn.Module: + """ + Load a default model dictionary into a torch model. + @param model: + @type model: + @param config: + @type config: + @return: + @rtype: + """ + model = model_class() + default_model_path = f"{config.get_default_model_folder_path()}/{model_class.__name__}.model" + model.load_state_dict(torch.load(default_model_path)) + return model \ No newline at end of file