12
12
import pyfiglet
13
13
14
14
15
+ # Get cerberus status
16
+ def cerberus_integration (config ):
17
+ cerberus_status = True
18
+ if config ["cerberus" ]["cerberus_enabled" ]:
19
+ cerberus_url = config ["cerberus" ]["cerberus_url" ]
20
+ if not cerberus_url :
21
+ logging .error ("url where Cerberus publishes True/False signal is not provided." )
22
+ sys .exit (1 )
23
+ cerberus_status = requests .get (cerberus_url ).content
24
+ cerberus_status = True if cerberus_status == b'True' else False
25
+ if not cerberus_status :
26
+ logging .error ("Received a no-go signal from Cerberus, looks like "
27
+ "the cluster is unhealthy. Please check the Cerberus "
28
+ "report for more details. Test failed." )
29
+ sys .exit (1 )
30
+ else :
31
+ logging .info ("Received a go signal from Ceberus, the cluster is healthy. "
32
+ "Test passed." )
33
+ return cerberus_status
34
+
35
+
36
+ # Function to publish kraken status to cerberus
37
+ def publish_kraken_status (config , failed_post_scenarios ):
38
+ cerberus_status = cerberus_integration (config )
39
+ if not cerberus_status :
40
+ if failed_post_scenarios :
41
+ if config ['kraken' ]['exit_on_failure' ]:
42
+ logging .info ("Cerberus status is not healthy and post action scenarios "
43
+ "are still failing, exiting kraken run" )
44
+ sys .exit (1 )
45
+ else :
46
+ logging .info ("Cerberus status is not healthy and post action scenarios "
47
+ "are still failing" )
48
+ else :
49
+
50
+ if failed_post_scenarios :
51
+ if config ['kraken' ]['exit_on_failure' ]:
52
+ logging .info ("Cerberus status is healthy but post action scenarios "
53
+ "are still failing, exiting kraken run" )
54
+ sys .exit (1 )
55
+ else :
56
+ logging .info ("Cerberus status is healthy but post action scenarios "
57
+ "are still failing" )
58
+
59
+
60
+ def run_post_action (kubeconfig_path , scenario , pre_action_output = "" ):
61
+
62
+ if scenario .endswith (".yaml" ) or scenario .endswith (".yml" ):
63
+ action_output = runcommand .invoke ("powerfulseal autonomous "
64
+ "--use-pod-delete-instead-of-ssh-kill"
65
+ " --policy-file %s --kubeconfig %s --no-cloud"
66
+ " --inventory-kubernetes --headless"
67
+ % (scenario , kubeconfig_path ))
68
+ # read output to make sure no error
69
+ if "ERROR" in action_output :
70
+ action_output .split ("ERROR" )[1 ].split ('\n ' )[0 ]
71
+ if not pre_action_output :
72
+ logging .info ("Powerful seal pre action check failed for " + str (scenario ))
73
+ return False
74
+ else :
75
+ logging .info (scenario + " post action checks passed" )
76
+
77
+ elif scenario .endswith (".py" ):
78
+ action_output = runcommand .invoke ("python3 " + scenario ).strip ()
79
+ if pre_action_output :
80
+ if pre_action_output == action_output :
81
+ logging .info (scenario + " post action checks passed" )
82
+ else :
83
+ logging .info (scenario + ' post action response did not match pre check output' )
84
+ return False
85
+ else :
86
+ # invoke custom bash script
87
+ action_output = runcommand .invoke (scenario ).strip ()
88
+ if pre_action_output :
89
+ if pre_action_output == action_output :
90
+ logging .info (scenario + " post action checks passed" )
91
+ else :
92
+ logging .info (scenario + ' post action response did not match pre check output' )
93
+ return False
94
+
95
+ return action_output
96
+
97
+
98
+ # Perform the post scenario actions to see if components recovered
99
+ def post_actions (kubeconfig_path , scenario , failed_post_scenarios , pre_action_output ):
100
+
101
+ for failed_scenario in failed_post_scenarios :
102
+ post_action_output = run_post_action (kubeconfig_path ,
103
+ failed_scenario [0 ], failed_scenario [1 ])
104
+ if post_action_output is not False :
105
+ failed_post_scenarios .remove (failed_scenario )
106
+ else :
107
+ logging .info ('Post action scenario ' + str (failed_scenario ) + "is still failing" )
108
+
109
+ # check post actions
110
+ if len (scenario ) > 1 :
111
+ post_action_output = run_post_action (kubeconfig_path , scenario [1 ], pre_action_output )
112
+ if post_action_output is False :
113
+ failed_post_scenarios .append ([scenario [1 ], pre_action_output ])
114
+
115
+ return failed_post_scenarios
116
+
117
+
15
118
# Main function
16
119
def main (cfg ):
17
120
# Start kraken
@@ -24,7 +127,6 @@ def main(cfg):
24
127
config = yaml .full_load (f )
25
128
kubeconfig_path = config ["kraken" ]["kubeconfig_path" ]
26
129
scenarios = config ["kraken" ]["scenarios" ]
27
- cerberus_enabled = config ["cerberus" ]["cerberus_enabled" ]
28
130
wait_duration = config ["tunings" ]["wait_duration" ]
29
131
iterations = config ["tunings" ]["iterations" ]
30
132
daemon_mode = config ["tunings" ]['daemon_mode' ]
@@ -59,41 +161,34 @@ def main(cfg):
59
161
% str (iterations ))
60
162
iterations = int (iterations )
61
163
164
+ failed_post_scenarios = []
62
165
# Loop to run the chaos starts here
63
166
while (int (iteration ) < iterations ):
64
167
# Inject chaos scenarios specified in the config
168
+ logging .info ("Executing scenarios for iteration " + str (iteration ))
65
169
try :
66
170
# Loop to run the scenarios starts here
67
171
for scenario in scenarios :
68
- logging . info ( "Injecting scenario: %s" % ( scenario ) )
172
+ pre_action_output = run_post_action ( kubeconfig_path , scenario [ 1 ] )
69
173
runcommand .invoke ("powerfulseal autonomous --use-pod-delete-instead-of-ssh-kill"
70
174
" --policy-file %s --kubeconfig %s --no-cloud"
71
175
" --inventory-kubernetes --headless"
72
- % (scenario , kubeconfig_path ))
73
- logging .info ("Scenario: %s has been successfully injected!" % (scenario ))
74
-
75
- if cerberus_enabled :
76
- cerberus_url = config ["cerberus" ]["cerberus_url" ]
77
- if not cerberus_url :
78
- logging .error ("url where Cerberus publishes True/False signal "
79
- "is not provided." )
80
- sys .exit (1 )
81
- cerberus_status = requests .get (cerberus_url ).content
82
- cerberus_status = True if cerberus_status == b'True' else False
83
- if not cerberus_status :
84
- logging .error ("Received a no-go signal from Cerberus, looks like the"
85
- " cluster is unhealthy. Please check the Cerberus report"
86
- " for more details. Test failed." )
87
- sys .exit (1 )
88
- else :
89
- logging .info ("Received a go signal from Ceberus, the cluster is "
90
- "healthy. Test passed." )
176
+ % (scenario [0 ], kubeconfig_path ))
177
+
178
+ logging .info ("Scenario: %s has been successfully injected!" % (scenario [0 ]))
91
179
logging .info ("Waiting for the specified duration: %s" % (wait_duration ))
92
180
time .sleep (wait_duration )
181
+ failed_post_scenarios = post_actions (kubeconfig_path , scenario ,
182
+ failed_post_scenarios , pre_action_output )
183
+ publish_kraken_status (config , failed_post_scenarios )
93
184
except Exception as e :
94
185
logging .error ("Failed to run scenario: %s. Encountered the following exception: %s"
95
- % (scenario , e ))
186
+ % (scenario [ 0 ] , e ))
96
187
iteration += 1
188
+ logging .info ("" )
189
+ if failed_post_scenarios :
190
+ logging .error ("Post scenarios are still failing at the end of all iterations" )
191
+ sys .exit (1 )
97
192
else :
98
193
logging .error ("Cannot find a config at %s, please check" % (cfg ))
99
194
sys .exit (1 )
0 commit comments