Skip to content

Commit

Permalink
Merge pull request open-horizon#3945 from LiilyZhang/zhangl/Issue3944
Browse files Browse the repository at this point in the history
Issue open-horizon#3944 - Bug: agent-install.sh breaks on some k8s en…
  • Loading branch information
LiilyZhang authored Nov 14, 2023
2 parents 2c78aea + 656a1fd commit 5e94844
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 14 deletions.
6 changes: 3 additions & 3 deletions agent-install/agent-install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3537,7 +3537,7 @@ function check_cluster_agent_scope() {

IFS="," read -ra namespace_array <<< "$namespaces_have_agent"
namespace_to_check=${namespace_array[0]}
local namespace_scoped_env_value_in_use=$($KUBECTL get deployment agent -n ${namespace_to_check} -o jsonpath='{.spec.template.spec.containers[0].env}' | jq -r '.[] | select(.name=="HZN_NAMESPACE_SCOPED").value')
local namespace_scoped_env_value_in_use=$($KUBECTL get deployment agent -n ${namespace_to_check} -o json | jq '.spec.template.spec.containers[0].env' | jq -r '.[] | select(.name=="HZN_NAMESPACE_SCOPED").value')
log_debug "Current HZN_NAMESPACE_SCOPED in agent deployment under namespace $namespace_to_check is: $namespace_scoped_env_value_in_use"
log_debug "NAMESPACE_SCOPED passed to this script is: $NAMESPACE_SCOPED" # namespace scoped

Expand Down Expand Up @@ -3574,7 +3574,7 @@ function check_agent_deployment_exist() {
log_fatal 3 "Previous agent pod in not in RUNNING status, please run agent-uninstall.sh to clean up and re-run the agent-install.sh"
else
# check 0) agent scope in deployment
local namespace_scoped_env_value_in_use=$($KUBECTL get deployment agent -n ${AGENT_NAMESPACE} -o jsonpath='{.spec.template.spec.containers[0].env}' | jq -r '.[] | select(.name=="HZN_NAMESPACE_SCOPED").value')
local namespace_scoped_env_value_in_use=$($KUBECTL get deployment agent -n ${AGENT_NAMESPACE} -o json | jq '.spec.template.spec.containers[0].env' | jq -r '.[] | select(.name=="HZN_NAMESPACE_SCOPED").value')
log_debug "Current HZN_NAMESPACE_SCOPED in agent deployment is $namespace_scoped_env_value_in_use"
log_debug "NAMESPACE_SCOPED passed to this script is: $NAMESPACE_SCOPED"

Expand Down Expand Up @@ -3637,7 +3637,7 @@ function check_agent_deployment_exist() {
fi

# check 3) HZN_ORG_ID set in deployment
local horizon_org_id_env_value_in_use=$($KUBECTL get deployment agent -n ${AGENT_NAMESPACE} -o jsonpath='{.spec.template.spec.containers[0].env}' | jq -r '.[] | select(.name=="HZN_ORG_ID").value')
local horizon_org_id_env_value_in_use=$($KUBECTL get deployment agent -n ${AGENT_NAMESPACE} -o json | jq '.spec.template.spec.containers[0].env' | jq -r '.[] | select(.name=="HZN_ORG_ID").value')
log_debug "Current HZN_ORG_ID in agent deployment is: $horizon_org_id_env_value_in_use"
log_debug "HZN_ORG_ID passed to this script is: $HZN_ORG_ID"

Expand Down
31 changes: 20 additions & 11 deletions agent-install/agent-uninstall.sh
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ function validate_positive_int() {

function get_agent_pod_id() {
log_debug "get_agent_pod_id() begin"
if [[ $($KUBECTL get pods -n ${AGENT_NAMESPACE} -l app=agent -o 'jsonpath={..status.conditions[?(@.type=="Ready")].status}') != "True" ]]; then
if [[ $($KUBECTL get pods -n ${AGENT_NAMESPACE} -l app=agent,type!=auto-upgrade-cronjob -o 'jsonpath={..status.conditions[?(@.type=="Ready")].status}') != "True" ]]; then
AGENT_POD_READY="false"
else
AGENT_POD_READY="true"
Expand Down Expand Up @@ -335,23 +335,23 @@ function deleteAgentResources() {
# give pods sometime to terminate by themselves
sleep 10

log_info "Checking if pods are deleted"
PODS=$($KUBECTL get pod -n $AGENT_NAMESPACE 2>/dev/null)
log_info "Checking if agent pods are deleted"
PODS=$($KUBECTL get pod -l app=agent -n $AGENT_NAMESPACE 2>/dev/null)
if [[ -n "$PODS" ]]; then
log_info "Pods are not deleted by deleting deployment, delete pods now"
log_info "Agent pods are not deleted by deleting deployment, delete pods now"
if [ "$USE_DELETE_FORCE" != true ]; then
$KUBECTL delete --all pods --namespace=$AGENT_NAMESPACE --grace-period=$DELETE_TIMEOUT
$KUBECTL delete pods -l app=agent --namespace=$AGENT_NAMESPACE --grace-period=$DELETE_TIMEOUT

PODS=$($KUBECTL get pod -n $AGENT_NAMESPACE 2>/dev/null)
PODS=$($KUBECTL get pod -l app=agent -n $AGENT_NAMESPACE 2>/dev/null)
if [[ -n "$PODS" ]]; then
log_info "Pods still exist"
log_info "Agent pods still exist"
PODS_STILL_EXIST="true"
fi
fi

if [ "$USE_DELETE_FORCE" == true ] || [ "$PODS_STILL_EXIST" == true ]; then
log_info "Force deleting all the pods under $AGENT_NAMESPACE"
$KUBECTL delete --all pods --namespace=$AGENT_NAMESPACE --force=true --grace-period=0
log_info "Force deleting agent pods under $AGENT_NAMESPACE"
$KUBECTL delete pods -l app=agent --namespace=$AGENT_NAMESPACE --force=true --grace-period=0
pkill -f anax.service
fi
fi
Expand Down Expand Up @@ -383,8 +383,15 @@ function deleteAgentResources() {
log_info "Deleting serviceaccount..."
$KUBECTL delete serviceaccount $SERVICE_ACCOUNT_NAME -n $AGENT_NAMESPACE

log_info "Deleting namespace..."
$KUBECTL delete namespace $AGENT_NAMESPACE --force=true --grace-period=0
log_info "Checking deployment and statefulset under namespace $AGENT_NAMESPACE"
deployment=$($KUBECTL get deployment -n $AGENT_NAMESPACE)
statefulset=$($KUBECTL get statefulset -n $AGENT_NAMESPACE)
if [[ -z "$deployment" ]] && [[ -z "$statefulset" ]]; then
log_info "No deployment and statefulset left under namespace $AGENT_NAMESPACE, deleting it..."
$KUBECTL delete namespace $AGENT_NAMESPACE --force=true --grace-period=0
else
log_info "Deployment or statefulset exists in the namespace $AGENT_NAMESPACE, skip deleting namespace $AGENT_NAMESPACE. Please delete namespace manually"
fi

log_info "Deleting cert file from /etc/default/cert ..."
rm /etc/default/cert/agent-install.crt
Expand All @@ -402,6 +409,8 @@ function uninstall_cluster() {

if [[ "$AGENT_POD_READY" == "true" ]]; then
removeNodeFromLocalAndManagementHub
else
log_info "agent pod under $AGENT_NAMESPACE is not ready, skip unregister process. Please remove node from management hub later if needed"
fi

deleteAgentResources
Expand Down
2 changes: 2 additions & 0 deletions agent-install/k8s/auto-upgrade-cronjob-template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ metadata:
labels:
app: agent
openhorizon.org/component: agent
type: auto-upgrade-cronjob
spec:
schedule: '*/1 * * * *'
concurrencyPolicy: Forbid
Expand All @@ -18,6 +19,7 @@ spec:
labels:
app: agent
openhorizon.org/component: agent
type: auto-upgrade-cronjob
spec:
volumes:
- name: agent-pvc-storage
Expand Down

0 comments on commit 5e94844

Please sign in to comment.