-
Notifications
You must be signed in to change notification settings - Fork 5
/
nietzsche.py
136 lines (109 loc) · 5.66 KB
/
nietzsche.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
"""
Script to compare performance of Vanilla SGD, FederatedAvergaging, and LASG-WK2 on
the Nietzsche corpus.
Character-LSTM implementation follows this example at
https://keras.io/examples/generative/lstm_character_level_text_generation/
"""
import io
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
import runner
import lasg
import federated_avg
import vanilla_sgd
def run():
"""
Execute the script
"""
############################################################################
# SETUP
############################################################################
path = keras.utils.get_file(
"nietzsche.txt", origin="https://s3.amazonaws.com/text-datasets/nietzsche.txt"
)
with io.open(path, encoding="utf-8") as f:
text = f.read().lower()
text = text.replace("\n", " ") # We remove newlines chars for nicer display
print("Corpus length:", len(text))
chars = sorted(list(set(text)))
print("Total chars:", len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
sentences.append(text[i : i + maxlen])
next_chars.append(text[i + maxlen])
print("Number of sequences:", len(sentences))
x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences)), dtype=np.int32)
for i, sentence in enumerate(sentences):
for t, char in enumerate(sentence):
x[i, t, char_indices[char]] = 1
y[i] = char_indices[next_chars[i]]
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.05, random_state=100)
#Define the model, loss, and optimizer
def model_constructor(hparams):
model = keras.Sequential([
keras.Input(shape=(maxlen, len(chars))),
layers.LSTM(128),
layers.Dense(len(chars), activation="softmax"),
])
sgd_optimizer = keras.optimizers.SGD(learning_rate=hparams.eta)
model.compile(loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
optimizer=sgd_optimizer, metrics=["accuracy"])
return model
############################################################################
# RUN EXPERIMENTS
############################################################################
#Run each experiment until this target test accuracy is reached
target_test_accuracy = 0.17
vanilla_sgd_hparams = vanilla_sgd.vanilla_sgd_hparams(eta=1.5, batch_size=32, epochs=100, evaluation_interval=5,
target_val_accuracy=target_test_accuracy)
#The hyperparameter variables for the federated learning algorithms are set to align with their respective papers.
#This can cause confusion: for example the hparam 'K' means # of workers for FederatedAveraging and # of iterations for LASG.
#See the constructor docstrings for a reference on what each hyperparameter does for each algorithm.
fedavg_hparams = federated_avg.fedavg_hparams(K=100, C=0.1, E=1, B=10, eta=1.5, MAX_T=10000, evaluation_interval=2,
target_val_accuracy=target_test_accuracy)
lasg_wk2_hparams = lasg.lasg_wk2_hparams(M=10, K=10000, D=50, c=0.1, c_range=10, eta=1.5, B=100, evaluation_interval=5,
target_val_accuracy=target_test_accuracy,
print_lasg_wk2_condition=True)
#Run with the training set distributed I.I.D
print()
print("Running Experiments with training set distributed I.I.D")
print()
vanilla_sgd_hparams.iid = True
fedavg_hparams.iid = True
lasg_wk2_hparams.iid = True
iid_vanilla_sgd_log, iid_fedavg_log, iid_lasg_wk2_log = runner.run_experiments(
X_train, y_train, X_test, y_test, model_constructor,
vanilla_sgd_hparams, fedavg_hparams, lasg_wk2_hparams, seed=100)
#Run with the training set distributed non-I.I.D
#We use the the technique of McMahan, et al. to split the data among workers in a
#"pathologically" non-IID manner where each worker may only have a few labels represented
#in its local data. Additionally, we also randomize the volume of data each worker receives.
print()
print("Running Experiments with training set distributed non-I.I.D")
print()
vanilla_sgd_hparams.iid = False
fedavg_hparams.iid = False
lasg_wk2_hparams.iid = False
non_iid_vanilla_sgd_log, non_iid_fedavg_log, non_iid_lasg_wk2_log = runner.run_experiments(
X_train, y_train, X_test, y_test, model_constructor,
vanilla_sgd_hparams, fedavg_hparams, lasg_wk2_hparams, seed=100)
print()
print("Done!")
print()
############################################################################
# Plot results
############################################################################
runner.plot_results(iid_vanilla_sgd_log, iid_fedavg_log, iid_lasg_wk2_log,
target_test_accuracy, iid=True, dataset="Nietzsche", modeltype="LSTM", mode="save")
runner.plot_results(non_iid_vanilla_sgd_log, non_iid_fedavg_log, non_iid_lasg_wk2_log,
target_test_accuracy, iid=False, dataset="Nietzsche", modeltype="LSTM", mode="save")
if __name__ == "__main__":
run()