-
Notifications
You must be signed in to change notification settings - Fork 0
/
broadcast
executable file
·261 lines (224 loc) · 9.93 KB
/
broadcast
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
#!/usr/bin/env python3
#
# TODO show detailed progress during long running procedure
# TODO it may be better to just launch a remote background process (or a
# systemd service?) to perform "the task" and do periodic polling to see
# the current status (the progress and if it is finished) of the task
#
# ssh remote true
# ssh remote sudo -n true
# ssh remote curl -fsL https://fast-crypto-lab.github.io/cluster/bootstrap
# ssh remote sudo -n bash /tmp/bootstrap
#
# if the following command keep reporting its progress (using its stdout)
#
# _update_settings_on_this_machine.py apply-json
#
# then /tmp/bootstrap could show the detailed progress
#
TEST_SUDO_COMMAND = ' sudo -n true ; '
REMOTE_BASH_COMMAND = ' set -o pipefail ; curl -fsL --retry 5 --connect-timeout 3 --max-time 4 --retry-delay 1 https://fast-crypto-lab.github.io/cluster/bootstrap | sudo -n bash ; '
#REMOTE_BASH_COMMAND = ' sleep 10; echo hi; '
def proc(args):
from subprocess import DEVNULL
from subprocess import Popen
assert type(args) is list and args and all(type(a) is str for a in args)
return Popen(args, stdin=DEVNULL, stdout=DEVNULL, stderr=DEVNULL)
def wait_all_procs(procs, timeout, show_progress=False):
from subprocess import Popen
from time import sleep
import sys
sys.stdout.flush()
sys.stderr.flush()
total_number = len(procs)
num_complete = 0
assert type(procs) is list and all(type(p) is Popen for p in procs)
assert type(timeout) in {int, float} and timeout >= 0
counter = 0
while timeout >= 0:
num_complete = len([None for p in procs if p.poll() is not None])
if show_progress:
print('\r{} {}/{} completed'.format('|/-\\'[counter], num_complete, total_number), flush=True, end='')
if num_complete == total_number:
if show_progress:
print('\r', flush=True)
return
sleep(0.25)
timeout -= 0.25
counter = (counter + 1) % 4
for p in procs:
if p.poll() is None:
p.terminate()
p.wait()
def load_fcl_json_to_global():
import os
import sys
global FCL
FCL = read_fcl_json(os.path.dirname(os.path.realpath(sys.argv[0])) + '/cluster-info/fcl.json')
def read_fcl_json(filename):
import json
import re
NAME = 'name' # host/user, name
KEYS = 'public-keys' # host/user, public keys
PRIP = 'private-ip' # host, null if this machine is not in 302
PUBA = 'public-ip-port' # host, public tcp port for ssh connection
NUID = 'id' # user, numeric uid
SUDO = 'permit-sudo' # user, sudo permission
RE_PUBKEY = (
'^ssh-ed25519 [A-Za-z0-9+/]{68}$|'
'^ecdsa-sha2-nistp256 [A-Za-z0-9+/]{139}=$|'
'^ssh-rsa [A-Za-z0-9+/=]{372,}$')
################################################################
def is_good_hosts_and_users(dd):
return (type(dd) is dict
and dd.keys() >= {'hosts', 'users'}
and is_good_hosts(dd['hosts'])
and is_good_users(dd['users']))
def is_good_hosts(hh):
return (type(hh) is list
and all(is_good_host(h) for h in hh)
and contains_no_duplicates(h[NAME] for h in hh)
and contains_no_duplicates(h[PRIP] for h in hh if h[PRIP])
and contains_no_duplicates(h[PUBA] for h in hh))
def is_good_users(uu):
return (type(uu) is list
and all(is_good_user(u) for u in uu)
and contains_no_duplicates(u[NUID] for u in uu)
and contains_no_duplicates(u[NAME] for u in uu)
and any(u[SUDO] and u[KEYS] for u in uu))
def contains_no_duplicates(it):
vals = list(it)
return len(vals) == len(set(vals))
def is_good_host(h):
return (type(h) is dict and h.keys() >= {NAME, PRIP, PUBA, KEYS}
and type(h[NAME]) is str
and type(h[PRIP]) in {str, type(None)}
and type(h[PUBA]) is str
and type(h[KEYS]) is list and all(type(k) is str for k in h[KEYS])
and re.fullmatch('^[a-z][a-z0-9]{1,30}$', h[NAME])
and is_good_private_ip_address(h[PRIP])
and is_good_public_ip_address_and_port(h[PUBA])
and all(re.fullmatch(RE_PUBKEY, k) for k in h[KEYS]))
def is_good_user(u):
return (type(u) is dict and u.keys() >= {NUID, NAME, KEYS, SUDO}
and type(u[NUID]) is int
and type(u[NAME]) is str
and type(u[SUDO]) is bool
and type(u[KEYS]) is list and all(type(k) is str for k in u[KEYS])
and 10000 <= u[NUID] <= 19999
and re.fullmatch('^[a-z][a-z0-9]{1,30}$', u[NAME])
and all(re.fullmatch(RE_PUBKEY, k) for k in u[KEYS]))
def is_good_private_ip_address(addr):
PAT = '^10\.3\.2\.([1-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-4])$'
return addr is None or re.fullmatch(PAT, addr)
def is_good_public_ip_address_and_port(ss):
PAT = (
'^'
'([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])'
'\.'
'([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])'
'\.'
'([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])'
'\.'
'([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])'
':'
'([0-9]|[1-9][0-9]{1,4})'
'$')
return re.fullmatch(PAT, ss) and int(ss.split(':')[1]) < 65536
################################################################
with open(filename, 'rb') as f:
raw_bytes = f.read()
try:
raw_str = raw_bytes.decode()
hosts_and_users = json.loads(raw_str)
assert is_good_hosts_and_users(hosts_and_users)
for h in hosts_and_users['hosts']:
h['public-keys'].sort(key=(
lambda s:
['ssh-ed25519', 'ecdsa-sha2-nistp256', 'ssh-rsa']
.index(s.split(' ')[0])
))
return hosts_and_users
except:
pass
raise ValueError('the content of {} is invalid'.format(filename))
def parse_cmdline_args():
from getpass import getuser
DEFAULT_USERNAME = getuser()
import argparse
parser = argparse.ArgumentParser(allow_abbrev=False)
parser.add_argument('-l', '--login', metavar='<name>', default=DEFAULT_USERNAME,
help='specify the user to log in as on the remote machine; the default is your current username ('+DEFAULT_USERNAME+')')
parser.add_argument('-i', '--identity', metavar='<identity>',
help='specify the private key for public key authentication instead of ssh defaults')
parser.add_argument('--ignore', metavar='<ignore>', default='',
help='specify a comma-separated list of host names to ignore')
return parser.parse_args()
def build_ssh_command(hostname, username, identity, CONFIG, KNOWNH, remote_command):
args = [
'ssh',
'-F', CONFIG,
'-o', 'GlobalKnownHostsFile {}'.format(KNOWNH),
'-o', 'UserKnownHostsFile /dev/null',
'-o', 'ConnectTimeout 8',
'-o', 'HostbasedAuthentication yes',
]
args += ['-l', username]
if identity is not None:
args += ['-i', identity]
args += [hostname]
args += [remote_command]
return args
def main():
import os
import sys
CONFIG = os.path.dirname(os.path.realpath(sys.argv[0])) + '/cluster-info/ssh_config.txt'
KNOWNH = os.path.dirname(os.path.realpath(sys.argv[0])) + '/cluster-info/ssh_known_hosts.txt'
assert os.path.isfile(CONFIG)
assert os.path.isfile(KNOWNH)
load_fcl_json_to_global()
parsed_opt = parse_cmdline_args()
identity = parsed_opt.identity
username = parsed_opt.login
_ignore_str = parsed_opt.ignore
ignore_list_of_hostnames = (lambda x: [] if len(x) == 1 and x[0] == '' else x)(_ignore_str.split(','))
hostnames = [h['name'] for h in FCL['hosts']]
for ignored_hostname in ignore_list_of_hostnames:
if ignored_hostname not in hostnames:
print('Command-line argument error:')
print('You mentioned that you want to ignore the host `{}`, but there is no such host'.format(ignored_hostname))
sys.exit(1)
FCL['hosts'] = [h for h in FCL['hosts'] if h['name'] not in ignore_list_of_hostnames]
hostnames = [h['name'] for h in FCL['hosts']]
print('The username we will use to log in the cluster is: "{}"'.format(username))
print()
print('The cluster currently contains these hosts:')
print(' '.join(hostnames))
print()
print('Trying to connect to all hosts and run "sudo -n true"...')
all_procs = [proc(build_ssh_command(hostname, username, identity, CONFIG, KNOWNH, TEST_SUDO_COMMAND)) for hostname in hostnames]
print('Waiting {} ssh processes to complete for at most 16 seconds...'.format(len(all_procs)))
wait_all_procs(all_procs, 16, show_progress=True)
result_codes = [p.returncode for p in all_procs]
print()
if any(c != 0 for c in result_codes):
print(result_codes)
print('Error: Failed to run "sudo -n true" on these hosts:')
print(' '.join([hostnames[idx] for idx, val in enumerate(result_codes) if val != 0]))
sys.exit(1)
print('Trying to run bootstrap command on all hosts...')
all_procs = [proc(build_ssh_command(hostname, username, identity, CONFIG, KNOWNH, REMOTE_BASH_COMMAND)) for hostname in hostnames]
from math import ceil
timeout_seconds = ceil(2.0 * len(FCL['users']))
print('Waiting {} ssh processes to complete for at most {} seconds...'.format(len(all_procs), timeout_seconds))
wait_all_procs(all_procs, timeout_seconds, show_progress=True)
result_codes = [p.returncode for p in all_procs]
print()
if any(c != 0 for c in result_codes):
print(result_codes)
print('Error: Failed to run bootstrap procedure on these hosts:')
print(' '.join([hostnames[idx] for idx, val in enumerate(result_codes) if val != 0]))
sys.exit(1)
print('broadcast completed')
if __name__ == '__main__':
main()