From 7ac79b8c086d177fafcd2bf01f6f6fb2e3949e98 Mon Sep 17 00:00:00 2001 From: Oliver Gould Date: Mon, 18 Apr 2022 11:13:25 -0700 Subject: [PATCH] policy-test: Explicitly wait for init containers (#8272) The policy integration tests sometimes fail in CI due to timeouts waiting for a curl pod's exit code. It's not really clear why this is happening. This change separates waiting for the init containers from waiting for the exit code so we can log that situation explicitly. Furthermore, logging has been enhanced to include information about init containers. `curl` invocations now use a timeout and retries to help limit flakiness. Also, `curl-lock` deletions now wait for the resource to be fully removed. Signed-off-by: Oliver Gould --- policy-test/src/curl.rs | 50 ++++++++++++++++++++++++++++++++++++----- policy-test/src/lib.rs | 3 +++ 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/policy-test/src/curl.rs b/policy-test/src/curl.rs index bc5694a159322..b88ebd2554d88 100644 --- a/policy-test/src/curl.rs +++ b/policy-test/src/curl.rs @@ -49,11 +49,15 @@ impl Runner { /// Deletes the lock configmap, allowing curl pods to execute. pub async fn delete_lock(&self) { tracing::trace!(ns = %self.namespace, "Deleting curl-lock"); - kube::Api::::namespaced( + let api = kube::Api::::namespaced( self.client.clone(), &self.namespace, + ); + kube::runtime::wait::delete::delete_and_finalize( + api, + "curl-lock", + &kube::api::DeleteParams::foreground(), ) - .delete("curl-lock", &kube::api::DeleteParams::foreground()) .await .expect("curl-lock must be deleted"); tracing::debug!(ns = %self.namespace, "Deleted curl-lock"); @@ -159,10 +163,12 @@ impl Runner { // after the configmap is deleted, even with a long timeout. // Instead, we use a relatively short timeout and retry the // wait to get a better chance. - command: Some(vec!["sh".to_string(), "-c".to_string()]), + command: Some(vec!["sh".to_string(), "-c".to_string()]), args: Some(vec![format!( "for i in $(seq 12) ; do \ + echo waiting 10s for curl-lock to be deleted ; \ if kubectl wait --timeout=10s --for=delete --namespace={ns} cm/curl-lock ; then \ + echo curl-lock deleted ; \ exit 0 ; \ fi ; \ done ; \ @@ -174,7 +180,7 @@ impl Runner { name: "curl".to_string(), image: Some("docker.io/curlimages/curl:latest".to_string()), args: Some( - vec!["curl", "-sSfv", target_url] + vec!["curl", "-sSfv", "--max-time", "10", "--retry", "12", target_url] .into_iter() .map(Into::into) .collect(), @@ -201,6 +207,8 @@ impl Running { /// Waits for the curl container to complete and returns its exit code. pub async fn exit_code(self) -> i32 { + self.inits_complete().await; + fn get_exit_code(pod: &k8s::Pod) -> Option { let c = pod .status @@ -220,7 +228,7 @@ impl Running { &self.name, |obj: Option<&k8s::Pod>| -> bool { obj.and_then(get_exit_code).is_some() }, ); - match time::timeout(time::Duration::from_secs(120), finished).await { + match time::timeout(time::Duration::from_secs(30), finished).await { Ok(Ok(())) => {} Ok(Err(error)) => panic!("Failed to wait for exit code: {}: {}", self.name, error), Err(_timeout) => { @@ -241,4 +249,36 @@ impl Running { code } + + async fn inits_complete(&self) { + let api = kube::Api::namespaced(self.client.clone(), &self.namespace); + let init_complete = kube::runtime::wait::await_condition( + api, + &self.name, + |pod: Option<&k8s::Pod>| -> bool { + if let Some(pod) = pod { + if let Some(status) = pod.status.as_ref() { + return status.init_container_statuses.iter().flatten().all(|init| { + init.state + .as_ref() + .map(|s| s.terminated.is_some()) + .unwrap_or(false) + }); + } + } + false + }, + ); + + match time::timeout(time::Duration::from_secs(120), init_complete).await { + Ok(Ok(())) => {} + Ok(Err(error)) => panic!("Failed to watch pod status: {}: {}", self.name, error), + Err(_timeout) => { + panic!( + "Timeout waiting for init containers to complete: {}", + self.name + ); + } + }; + } } diff --git a/policy-test/src/lib.rs b/policy-test/src/lib.rs index 85c7460c88ed9..8b159cd39a7bb 100644 --- a/policy-test/src/lib.rs +++ b/policy-test/src/lib.rs @@ -149,6 +149,9 @@ where if let Some(status) = p.status { let _span = tracing::info_span!("pod", ns = %ns.name(), name = %pod).entered(); tracing::trace!(reason = ?status.reason, message = ?status.message); + for c in status.init_container_statuses.into_iter().flatten() { + tracing::trace!(init_container = %c.name, ready = %c.ready, state = ?c.state); + } for c in status.container_statuses.into_iter().flatten() { tracing::trace!(container = %c.name, ready = %c.ready, state = ?c.state); }