-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathserver.py
558 lines (423 loc) · 17.7 KB
/
server.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
import json
import pickle
import sys
import traceback
import random
import uuid
from flask import Flask, request, jsonify, render_template
from flask_socketio import SocketIO
from flask_mqtt import Mqtt
from flask_cors import CORS
from common import person_classifier
from common.aggregation_scheme import get_aggregation_scheme
from common.clientblock import ClientBlock
from common.clusterblock import ClusterBlock
from common.datablock import Datablock
from common.models import PersonBinaryClassifier
from common.networkblock import Networkblock
from common.result_data import as_result_data
from utils import constants
from utils.enums import LearningType, ClientState
from utils.model_helper import encode_state_dictionary
from utils.mqtt_helper import MessageType, send_typed_message
import sqlite3
import uuid
import datetime
sys.path.append('.')
app = Flask(__name__)
#app.config['MQTT_BROKER_URL'] = 'localhost'
app.config['MQTT_BROKER_URL'] = 'broker.hivemq.com'
app.config['MQTT_BROKER_PORT'] = 1883
app.config['MQTT_KEEPALIVE'] = 1000
app.config['MQTT_REFRESH_TIME'] = 1.0 # refresh time in seconds
app.config['MQTT_KEEPALIVE'] = 1000
app.config['SECRET_KEY'] = 'secret!'
cors = CORS(app)
socketio = SocketIO(app, cors_allowed_origins="*")
mqtt = Mqtt(app, mqtt_logging=True)
# global variables
PACKET_SIZE = 3000
CLIENTS = {}
CLIENT_DATABLOCKS = {}
CLIENT_NETWORKS = {}
CLUSTERS = {}
NETWORK = None
TEST_DATABLOCKS = dict()
CENTRALIZED_EPOCHS = 5
RUN_ID = None
#CLUSTER_NAMES = ["water", "ground", "solid", "sky", "plant", "structural", "building", "food-stuff", "textile", "furniture-stuff", "window", "floor", "ceiling", "wall", "raw-material"]
CLUSTER_NAMES = ["water", "ground"]
@app.route('/getAllRuns', methods=['GET'])
def get_runs():
conn = sqlite3.connect("runs.db")
cursor = conn.cursor()
cursor.execute("SELECT * FROM runs")
rows = cursor.fetchall()
conn.close()
return jsonify(rows)
@app.route('/executeRun', methods=['POST'])
def execute_run():
global CLUSTER_NAMES
if request.method == 'POST':
body = request.get_json()
num_clients = int(body.get('numDevices', 2))
num_clusters = int(body.get('numClusters', 1))
operation_modes = [LearningType(
int(body.get('operationMode', 0)))] * num_clients
chosen_cluster = random.sample(CLUSTER_NAMES, num_clusters)
clusters = dict(zip(chosen_cluster, operation_modes))
print(clusters)
assignments = initialize_server(clusters, num_clients)
send_typed_message(
mqtt,
'server/general',
constants.START_LEARNING_MESSAGE,
MessageType.SIMPLE)
return json.dumps({'run_id': str(uuid.uuid4()),
'assignments': assignments})
@app.route('/test', methods=['GET'])
def test():
num_clients = 2
clusters = {
'ground': LearningType.CENTRALIZED
}
initialize_server(clusters, num_clients)
send_typed_message(
mqtt,
'server/general',
constants.START_LEARNING_MESSAGE,
MessageType.SIMPLE)
return 'TEST - server initialized and msg sent'
@app.route('/', methods=['GET', 'POST'])
def index():
if request.method == 'GET':
return render_template("index.html")
if request.method == 'POST':
body = request.get_json()
print(body)
num_clients = body.get('numDevices', 2)
operation_mode = LearningType(body.get('operationMode', 1))
clusters = {
"ground": operation_mode,
# "outdoor": operation_mode
}
assignments = initialize_server(clusters, num_clients)
send_typed_message(
mqtt,
'server/general',
constants.START_LEARNING_MESSAGE,
MessageType.SIMPLE)
return json.dumps({'run_id': str(uuid.uuid4()),
'assignments': assignments})
@socketio.on('connect')
def connection():
print('websocket connect')
socketio.emit("FromAPI", "test")
@mqtt.on_connect()
def handle_connect(client, userdata, flags, rc):
print('connected')
mqtt.subscribe(constants.NEW_CLIENT_INITIALIZATION_TOPIC)
@mqtt.on_message()
def handle_mqtt_message(client, userdata, msg):
global CLUSTERS, CLIENTS
payload = json.loads(msg.payload.decode())
dimensions = payload.get("dimensions", None)
label = payload.get("label", None)
data = payload.get("data", None)
message = payload.get("message", None)
# Add a new client and subscribe to appropriate topic
if msg.topic == constants.NEW_CLIENT_INITIALIZATION_TOPIC:
initialize_new_clients(message)
return
client_name = msg.topic.split("/")[1]
if message == constants.RESULT_DATA_MESSAGE_SIGNAL:
receive_result_data(client_name, payload['data'])
if message == constants.DEFAULT_ITERATION_END:
CLIENTS[client_name].set_state(ClientState.FREE)
if client_name in CLIENTS:
if CLIENTS[client_name].get_learning_type() == LearningType.FEDERATED:
collect_federated_data(data, message, client_name)
elif CLIENTS[client_name].get_learning_type() == LearningType.CENTRALIZED:
collect_centralized_data(
data, message, client_name, dimensions, label)
else:
print("Client not initialized correctly (client not in CLIENT_IDS)")
finished_clusters = get_completed_clusters()
for cluster in finished_clusters:
learning_type = CLUSTERS[cluster].get_learning_type()
clients = CLUSTERS[cluster].get_clients()
if learning_type == LearningType.CENTRALIZED:
perform_centralized_learning(clients, cluster)
elif learning_type == LearningType.FEDERATED:
perform_federated_learning(clients, cluster)
elif learning_type == LearningType.HYBRID:
# this needs to be fixed, not sure if we're including this in final
# demo
perform_hybrid_learning()
# This function resets the server and assigns available clients to a cluster.
# If there are not enough free clients, the server will assign the rest that is available.
def initialize_server(required_clusters, num_clients):
global CLUSTERS, CLIENTS, RUN_ID
reset()
RUN_ID = str(uuid.uuid4())
clients_per_cluster = num_clients / len(required_clusters)
print("clients per cluster: {}".format(clients_per_cluster))
generate_test_datablocks(required_clusters)
assignments = []
for cluster_name in required_clusters:
free_clients = get_free_clients(clients_per_cluster)
CLUSTERS[cluster_name] = ClusterBlock(
free_clients,
'cluster/' + cluster_name,
required_clusters[cluster_name])
if required_clusters[cluster_name] == LearningType.CENTRALIZED:
learning_type = 'centralized'
elif required_clusters[cluster_name] == LearningType.FEDERATED:
learning_type = 'federated'
else:
learning_type = 'personalized'
for client_id in free_clients:
CLIENTS[client_id].set_learning_type(
required_clusters[cluster_name])
if required_clusters[cluster_name] == LearningType.CENTRALIZED:
initialize_datablocks(client_id)
# send msg to those clients saying this your cluster (for subscription)
client_index = 0
for client_id in free_clients:
topic = CLUSTERS[cluster_name].get_mqtt_topic_name()
assignment = {
'device': client_id,
'topic': topic,
'learning_type': learning_type
}
assignments.append(assignment)
message = {
'message': constants.SUBSCRIBE_TO_CLUSTER,
constants.CLUSTER_TOPIC_NAME: topic,
'learning_type': learning_type,
'client_id': client_id,
'num_clients_in_cluster': len(free_clients),
'client_index_in_cluster': client_index
}
send_typed_message(
mqtt,
'server/general',
message,
MessageType.SIMPLE)
client_index += 1
return assignments
def generate_test_datablocks(clusters):
global TEST_DATABLOCKS
data = pickle.load(open('./data/federated-learning-data.pkl', 'rb'))
num_images = len(data)
split_index = int(num_images * 4 / 5) # 20% for testing
test_data = data[split_index:]
for cluster in clusters:
TEST_DATABLOCKS[cluster] = Datablock()
TEST_DATABLOCKS[cluster].add_images_for_cluster(
test_data, "cluster/" + cluster)
# Grabs [num_required] free clients and sets status of clients to STALE
def get_free_clients(num_required):
global CLIENTS
free_client_ids = []
for client in CLIENTS:
if CLIENTS[client].get_state() == ClientState.FREE:
free_client_ids.append(client)
CLIENTS[client].set_state(ClientState.STALE)
if len(free_client_ids) == num_required:
return free_client_ids
print("WARNING: Not enough clients")
return free_client_ids
def reset():
global CLIENT_NETWORKS, CLIENT_DATABLOCKS, CLUSTERS, CLIENTS, RUN_ID
RUN_ID = None
CLIENT_NETWORKS.clear()
CLIENT_DATABLOCKS.clear()
CLUSTERS.clear()
TEST_DATABLOCKS.clear()
for client in CLIENTS:
CLIENTS[client].set_state(ClientState.FREE)
send_typed_message(
mqtt,
'server/general',
constants.RESET_CLIENT_MESSAGE,
MessageType.SIMPLE)
# Takes the clients that need to be aggregated as input and sends the averaged/whatever aggregation scheme model back to
# clients in the cluster.
def perform_federated_learning(clients, cluster):
global CLUSTERS, CLIENTS, CLIENT_NETWORKS
print("averaging for cluster: {}".format(cluster))
averaged_state_dict = get_aggregation_scheme(clients, CLIENT_NETWORKS)
CLUSTERS[cluster].set_state_dict(averaged_state_dict)
for client in clients:
CLIENT_NETWORKS[client].reset_network_data()
CLIENTS[client].set_state(ClientState.STALE)
send_network_model(
encode_state_dictionary(averaged_state_dict),
CLUSTERS[cluster].get_mqtt_topic_name())
def perform_centralized_learning(clients, cluster):
global CLIENTS, CLIENT_DATABLOCKS, CLUSTERS
applicable_client_datablocks = {
k: v for (k, v) in CLIENT_DATABLOCKS.items() if k in clients}
test_datablock_dict = {
'test_datablock': TEST_DATABLOCKS[cluster]
}
runner = person_classifier.get_model_runner(
client_data=applicable_client_datablocks,
test_data=test_datablock_dict,
num_epochs=CENTRALIZED_EPOCHS)
if CLUSTERS[cluster].get_state_dict() is not None:
runner.model.load_state_dictionary(CLUSTERS[cluster].get_state_dict())
runner.train_model()
CLUSTERS[cluster].set_state_dict(runner.model.get_state_dictionary())
for client_id in clients:
CLIENTS[client_id].set_state(ClientState.STALE)
send_network_model(
encode_state_dictionary(
runner.model.get_state_dictionary()),
CLUSTERS[cluster].get_mqtt_topic_name())
# This is incomplete and does not work. This function is supposed to first average the federated model and then
# train that model with additional centralized data.
def perform_hybrid_learning():
global NETWORK, CLIENTS, CLIENT_NETWORKS
# Current method averages and then trains
try:
# Average models
averaged_state_dict = get_aggregation_scheme(
CLIENTS, CLIENT_NETWORKS)
if averaged_state_dict is not None:
NETWORK = PersonBinaryClassifier()
NETWORK.load_state_dictionary(averaged_state_dict)
print("Averaging Finished")
# runner = person_classifier.get_model_runner()
# runner.model.load_state_dictionary(
# NETWORK.get_state_dictionary())
# runner.test_model()
# reset models to stale and delete old data
for client in CLIENTS:
print("Resetting network data for client {}..".format(client))
CLIENT_NETWORKS[client].reset_network_data()
if len(CLIENT_DATABLOCKS) != 0:
runner = person_classifier.get_model_runner(
client_data=CLIENT_DATABLOCKS, num_epochs=1)
if averaged_state_dict is not None:
runner.model.load_state_dictionary(
NETWORK.get_state_dictionary())
runner.train_model()
encoded = encode_state_dictionary(
runner.model.get_state_dictionary())
else:
encoded = encode_state_dictionary(NETWORK.get_state_dictionary())
# send_network_model(encoded) # ======== BROKEN =========
for client in CLIENTS:
CLIENTS[client].set_state(ClientState.STALE)
except Exception as e:
print(traceback.format_exc())
def receive_result_data(client_id, data):
result_data_object = as_result_data(data)
socketio.emit(client_id, json.dumps(data))
conn = sqlite3.connect("runs.db")
cursor = conn.cursor()
# TODO: Fix ClientHardware to the six destructured specs
data = (
RUN_ID,
datetime.datetime.utcnow().isoformat(),
client_id,
result_data_object.system,
result_data_object.node,
result_data_object.release,
result_data_object.version,
result_data_object.machine,
result_data_object.processor,
CLIENTS[client_id].get_learning_type().name,
result_data_object.model_accuracy,
result_data_object.test_loss,
result_data_object.epochs,
result_data_object.iteration)
cursor.execute("""INSERT INTO runs(RunID, UTCDateTime, ClientID, ClientSystem, ClientNode, ClientRelease, ClientVersion, ClientMachine, ClientProcessor, LearningType, ModelAccuracy, TestLoss, NumEpochs, Iteration) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?)""", data)
conn.commit()
conn.close()
print(
"{}: Test Loss: {}".format(
client_id,
result_data_object.test_loss))
print(
"{}: Accuracy: {}".format(
client_id,
result_data_object.model_accuracy))
# Check if any of clusters are complete, meaning all clients in that cluster has their state set to FINISHED.
def get_completed_clusters():
finished_clusters = []
for cluster in CLUSTERS:
complete = True
for client_id in CLUSTERS[cluster].get_clients():
if CLIENTS[client_id].get_state() != ClientState.FINISHED:
complete = False
break
if complete:
finished_clusters.append(cluster)
return finished_clusters
# Method for collecting federated data (adds model chunk to appropriate client)
def collect_federated_data(data, message, client_id):
global CLIENT_NETWORKS, CLIENTS
# get model
if message == constants.DEFAULT_NETWORK_INIT:
CLIENT_NETWORKS[client_id] = Networkblock()
CLIENT_NETWORKS[client_id].reset_network_data()
elif message == constants.DEFAULT_NETWORK_CHUNK:
CLIENT_NETWORKS[client_id].add_network_chunk(data)
elif message == constants.DEFAULT_NETWORK_END:
print("All chunks received")
state_dict = CLIENT_NETWORKS[client_id].reconstruct_state_dict()
person_binary_classifier = PersonBinaryClassifier()
person_binary_classifier.load_state_dictionary(state_dict)
CLIENTS[client_id].set_state(ClientState.FINISHED)
# Method for collecting centralized data. Adds image chunk to appropriate client datablock.
def collect_centralized_data(data, message, client_name, dimensions, label):
global CLIENTS
if message == constants.DEFAULT_IMAGE_INIT:
initialize_new_image(client_name, dimensions, label)
elif message == constants.DEFAULT_IMAGE_CHUNK:
add_data_chunk(client_name, data)
elif message == constants.DEFAULT_IMAGE_END:
convert_data(client_name)
elif message == 'all_images_sent':
CLIENTS[client_name].set_state(ClientState.FINISHED)
print("All images received from client: {}".format(client_name))
# ===== Methods for sending image and model data from server to client ===== #
def initialize_new_clients(client_id):
print("New client connected: {}".format(client_id))
CLIENTS[client_id] = ClientBlock(ClientState.FREE)
mqtt.subscribe('client/' + client_id)
def initialize_new_image(client_name, dimensions, label):
global CLIENT_DATABLOCKS
datablock = CLIENT_DATABLOCKS[client_name]
datablock = datablock.init_new_image(dimensions, label)
def add_data_chunk(client_name, chunk):
global CLIENT_DATABLOCKS
datablock = CLIENT_DATABLOCKS[client_name]
datablock.add_image_chunk(chunk)
def convert_data(client_name):
global CLIENT_DATABLOCKS
datablock = CLIENT_DATABLOCKS[client_name]
datablock.convert_current_image_to_matrix()
def send_network_model(payload, topic):
send_typed_message(
mqtt,
topic,
payload,
MessageType.NETWORK_CHUNK)
def initialize_datablocks(client):
global CLIENT_DATABLOCKS
CLIENT_DATABLOCKS[client] = Datablock()
def initialize_database():
# establish db connection
conn = sqlite3.connect("runs.db")
cursor = conn.cursor()
# create table if it doesn't exist
cursor.execute("""CREATE TABLE IF NOT EXISTS runs(RunID VARCHAR(255), UTCDateTime VARCHAR(255), ClientID VARCHAR(255), ClientSystem TEXT, ClientNode TEXT, ClientRelease TEXT, ClientVersion TEXT, ClientMachine TEXT, ClientProcessor TEXT, LearningType VARCHAR(255), ModelAccuracy FLOAT, TestLoss FLOAT, NumEpochs INT, Iteration INT, PRIMARY KEY (RunID, Iteration, ClientID))""")
conn.commit()
conn.close()
if __name__ == '__main__':
initialize_database()
socketio.run(app, port=5000)#, host='0.0.0.0')