Skip to content

Commit

Permalink
policy-test: Explicitly wait for init containers (#8272)
Browse files Browse the repository at this point in the history
The policy integration tests sometimes fail in CI due to timeouts
waiting for a curl pod's exit code. It's not really clear why this is
happening.

This change separates waiting for the init containers from waiting for
the exit code so we can log that situation explicitly. Furthermore,
logging has been enhanced to include information about init containers.

`curl` invocations now use a timeout and retries to help limit flakiness.
Also, `curl-lock` deletions now wait for the resource to be fully removed.

Signed-off-by: Oliver Gould <[email protected]>
  • Loading branch information
olix0r authored Apr 18, 2022
1 parent 458cd0f commit 7ac79b8
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 5 deletions.
50 changes: 45 additions & 5 deletions policy-test/src/curl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,15 @@ impl Runner {
/// Deletes the lock configmap, allowing curl pods to execute.
pub async fn delete_lock(&self) {
tracing::trace!(ns = %self.namespace, "Deleting curl-lock");
kube::Api::<k8s::api::core::v1::ConfigMap>::namespaced(
let api = kube::Api::<k8s::api::core::v1::ConfigMap>::namespaced(
self.client.clone(),
&self.namespace,
);
kube::runtime::wait::delete::delete_and_finalize(
api,
"curl-lock",
&kube::api::DeleteParams::foreground(),
)
.delete("curl-lock", &kube::api::DeleteParams::foreground())
.await
.expect("curl-lock must be deleted");
tracing::debug!(ns = %self.namespace, "Deleted curl-lock");
Expand Down Expand Up @@ -159,10 +163,12 @@ impl Runner {
// after the configmap is deleted, even with a long timeout.
// Instead, we use a relatively short timeout and retry the
// wait to get a better chance.
command: Some(vec!["sh".to_string(), "-c".to_string()]),
command: Some(vec!["sh".to_string(), "-c".to_string()]),
args: Some(vec![format!(
"for i in $(seq 12) ; do \
echo waiting 10s for curl-lock to be deleted ; \
if kubectl wait --timeout=10s --for=delete --namespace={ns} cm/curl-lock ; then \
echo curl-lock deleted ; \
exit 0 ; \
fi ; \
done ; \
Expand All @@ -174,7 +180,7 @@ impl Runner {
name: "curl".to_string(),
image: Some("docker.io/curlimages/curl:latest".to_string()),
args: Some(
vec!["curl", "-sSfv", target_url]
vec!["curl", "-sSfv", "--max-time", "10", "--retry", "12", target_url]
.into_iter()
.map(Into::into)
.collect(),
Expand All @@ -201,6 +207,8 @@ impl Running {

/// Waits for the curl container to complete and returns its exit code.
pub async fn exit_code(self) -> i32 {
self.inits_complete().await;

fn get_exit_code(pod: &k8s::Pod) -> Option<i32> {
let c = pod
.status
Expand All @@ -220,7 +228,7 @@ impl Running {
&self.name,
|obj: Option<&k8s::Pod>| -> bool { obj.and_then(get_exit_code).is_some() },
);
match time::timeout(time::Duration::from_secs(120), finished).await {
match time::timeout(time::Duration::from_secs(30), finished).await {
Ok(Ok(())) => {}
Ok(Err(error)) => panic!("Failed to wait for exit code: {}: {}", self.name, error),
Err(_timeout) => {
Expand All @@ -241,4 +249,36 @@ impl Running {

code
}

async fn inits_complete(&self) {
let api = kube::Api::namespaced(self.client.clone(), &self.namespace);
let init_complete = kube::runtime::wait::await_condition(
api,
&self.name,
|pod: Option<&k8s::Pod>| -> bool {
if let Some(pod) = pod {
if let Some(status) = pod.status.as_ref() {
return status.init_container_statuses.iter().flatten().all(|init| {
init.state
.as_ref()
.map(|s| s.terminated.is_some())
.unwrap_or(false)
});
}
}
false
},
);

match time::timeout(time::Duration::from_secs(120), init_complete).await {
Ok(Ok(())) => {}
Ok(Err(error)) => panic!("Failed to watch pod status: {}: {}", self.name, error),
Err(_timeout) => {
panic!(
"Timeout waiting for init containers to complete: {}",
self.name
);
}
};
}
}
3 changes: 3 additions & 0 deletions policy-test/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,9 @@ where
if let Some(status) = p.status {
let _span = tracing::info_span!("pod", ns = %ns.name(), name = %pod).entered();
tracing::trace!(reason = ?status.reason, message = ?status.message);
for c in status.init_container_statuses.into_iter().flatten() {
tracing::trace!(init_container = %c.name, ready = %c.ready, state = ?c.state);
}
for c in status.container_statuses.into_iter().flatten() {
tracing::trace!(container = %c.name, ready = %c.ready, state = ?c.state);
}
Expand Down

0 comments on commit 7ac79b8

Please sign in to comment.