Skip to content

Commit

Permalink
Merge pull request #4204 from LiilyZhang/zhangl/cherrypickDec
Browse files Browse the repository at this point in the history
Zhangl/cherrypick dec
  • Loading branch information
LiilyZhang authored Dec 13, 2024
2 parents 2a70088 + b9b6e0a commit 27a900c
Show file tree
Hide file tree
Showing 13 changed files with 276 additions and 112 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/build-push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ jobs:
mac-build:
needs: offset-build-number

runs-on: ${{ (matrix.architecture == 'amd64') && 'macos-12' || 'macos-14' }}
runs-on: ${{ (matrix.architecture == 'amd64') && 'macos-13' || 'macos-14' }}

strategy:
matrix:
Expand Down Expand Up @@ -450,7 +450,7 @@ jobs:

# Upload created package to artifacts to be used in next job
- name: Upload MacOS Package to Artifacts
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: macos-${{ matrix.architecture }}-package
path: ${{ env.GOPATH }}/src/github.com/${{ github.repository }}/pkg/mac/build/*.pkg
Expand Down Expand Up @@ -517,7 +517,7 @@ jobs:

# Retrieve artifact from previous job
- name: Download our MacOS Package Artifact
uses: actions/download-artifact@v3
uses: actions/download-artifact@v4
with:
name: macos-${{ matrix.architecture }}-package
path: ${{ steps.workdir_setup.outputs.PKG_PATH }}
Expand Down
57 changes: 44 additions & 13 deletions agent-install/agent-install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2056,7 +2056,7 @@ function install_macos() {
fi

if [[ $AGENT_AUTO_UPGRADE != 'true' ]]; then
check_existing_exch_node_is_correct_type "device"
check_existing_exch_node_info "device"
fi

if is_agent_registered && (! is_horizon_defaults_correct || ! is_registration_correct); then
Expand Down Expand Up @@ -2304,7 +2304,7 @@ function install_debian() {
check_and_set_anax_port # sets ANAX_PORT

if [[ $AGENT_AUTO_UPGRADE != 'true' ]]; then
check_existing_exch_node_is_correct_type "device"
check_existing_exch_node_info "device"
fi

if is_agent_registered && (! is_horizon_defaults_correct "$ANAX_PORT" || ! is_registration_correct); then
Expand Down Expand Up @@ -2566,7 +2566,7 @@ function install_redhat() {
if [[ $AGENT_ONLY_CLI != 'true' ]]; then
check_and_set_anax_port # sets ANAX_PORT
if [[ $AGENT_AUTO_UPGRADE != 'true' ]]; then
check_existing_exch_node_is_correct_type "device"
check_existing_exch_node_info "device"
fi

if is_agent_registered && (! is_horizon_defaults_correct "$ANAX_PORT" || ! is_registration_correct); then
Expand Down Expand Up @@ -3392,13 +3392,8 @@ function find_node_ip_address() {
fi
}

# If node exist in management hub, verify it is correct type (device or cluster)
function check_existing_exch_node_is_correct_type() {
log_debug "check_existing_exch_node_is_correct_type() begin"

local expected_type=$1

log_info "Verifying that node $NODE_ID in the exchange is type $expected_type (if it exists)..."
# check the node with $NODE_ID in the exchange, return the output from the exchange
function get_existing_exch_node() {
local exch_creds cert_flag
if [[ -n $HZN_EXCHANGE_USER_AUTH ]]; then exch_creds="$HZN_ORG_ID/$HZN_EXCHANGE_USER_AUTH"
else exch_creds="$HZN_ORG_ID/$HZN_EXCHANGE_NODE_AUTH" # input checking requires either user creds or node creds
Expand All @@ -3407,7 +3402,32 @@ function check_existing_exch_node_is_correct_type() {
if [[ -n $AGENT_CERT_FILE && -f $AGENT_CERT_FILE ]]; then
cert_flag="--cacert $AGENT_CERT_FILE"
fi
local exch_output=$(curl -fsS ${CURL_RETRY_PARMS} $cert_flag $HZN_EXCHANGE_URL/orgs/$HZN_ORG_ID/nodes/$NODE_ID -u "$exch_creds" 2>/dev/null) || true
exch_output=$(curl -fsS ${CURL_RETRY_PARMS} $cert_flag $HZN_EXCHANGE_URL/orgs/$HZN_ORG_ID/nodes/$NODE_ID -u "$exch_creds" 2>/dev/null) || true
echo "$exch_output"
}

# check if the node with $NODE_ID exists in the exchange, and if public key of node is set
function check_node_existing_and_active() {
log_debug "check_node_existing_and_active() begin"
local exch_output=$(get_existing_exch_node)
if [[ -n "$exch_output" ]]; then
local exch_node_public_key=$(echo $exch_output | jq -re '.nodes | .[].publicKey')
if [[ "$exch_node_public_key" != "" ]] ; then
log_fatal 2 "node $NODE_ID already exists in the exchange and encryption key is set. To continue, use a different node id or delete existing node from the exchange"
fi
fi
log_debug "check_node_existing_and_active() end"
}

# Check if the node exist in management hub, verify 1) it is correct type (device or cluster), 2) for cluster node, verify namespace
function check_existing_exch_node_info() {
log_debug "check_existing_exch_node_info() begin"

local expected_type=$1
local expected_namespace=$2

log_info "Verifying that node $NODE_ID in the exchange is type $expected_type (if it exists)..."
local exch_output=$(get_existing_exch_node)

if [[ -n "$exch_output" ]]; then
local exch_node_type=$(echo $exch_output | jq -re '.nodes | .[].nodeType')
Expand All @@ -3416,9 +3436,17 @@ function check_existing_exch_node_is_correct_type() {
elif [[ "$exch_node_type" == "cluster" ]] && [[ "$expected_type" != "cluster" ]]; then
log_fatal 2 "Node id ${NODE_ID} has already been created as nodeType cluster. Remove the node from the exchange and run this script again."
fi

local exch_node_namespace=$(echo $exch_output | jq -re '.nodes | .[].clusterNamespace')
local exch_node_public_key=$(echo $exch_output | jq -re '.nodes | .[].publicKey')
if [[ "$exch_node_type" == "cluster" ]] && [[ "$exch_node_public_key" != "" ]] && [[ "$expected_namespace" != "$exch_node_namespace" ]]; then
log_fatal 2 "Cluster node: $NODE_ID already exists in namespace $exch_node_namespace. To continue, use a different node id or delete existing node from the exchange"
elif [[ "$exch_node_type" == "cluster" ]] && [[ "$exch_node_public_key" == "" ]]; then
log_info "The node in the exchange ($exch_node_namespace) has empty encryption key, continue on cluster install/update"
fi
fi

log_debug "check_existing_exch_node_is_correct_type() end"
log_debug "check_existing_exch_node_info() end"
}

# make sure the new exchange url and cert are good.
Expand Down Expand Up @@ -4505,7 +4533,7 @@ function install_update_cluster() {

confirmCmds jq

check_existing_exch_node_is_correct_type "cluster"
check_existing_exch_node_info "cluster" $AGENT_NAMESPACE

check_cluster_agent_scope # sets AGENT_DEPLOYMENT_EXIST_IN_SAME_NAMESPACE

Expand Down Expand Up @@ -4551,6 +4579,8 @@ function install_update_cluster() {
function install_cluster() {
log_debug "install_cluster() begin"

check_node_existing_and_active

# generate files based on templates
generate_installation_files

Expand Down Expand Up @@ -4713,3 +4743,4 @@ elif is_cluster; then
else
log_fatal 1 "AGENT_DEPLOY_TYPE must be 'device' or 'cluster'"
fi

58 changes: 27 additions & 31 deletions agent-install/agent-uninstall.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,6 @@ SKIP_DELETE_AGENT_NAMESPACE=false
USE_DELETE_FORCE=false
DELETE_TIMEOUT=10 # Default delete timeout

function now() {
echo `date '+%Y-%m-%d %H:%M:%S'`
}

# Exit handling
function quit(){
case $1 in
Expand Down Expand Up @@ -215,7 +211,7 @@ function get_agent_pod_id() {
fi

if [ "$AGENT_POD_READY" == "true" ]; then
POD_ID=$($KUBECTL get pod -n ${AGENT_NAMESPACE} 2> /dev/null | grep "agent-" | cut -d " " -f1 2> /dev/null)
POD_ID=$($KUBECTL get pod -n ${AGENT_NAMESPACE} -l app=agent,type!=auto-upgrade-cronjob 2> /dev/null | grep "agent-" | cut -d " " -f1 2> /dev/null)
if [ -n "${POD_ID}" ]; then
log_info "get pod: ${POD_ID}"
else
Expand All @@ -230,7 +226,7 @@ function removeNodeFromLocalAndManagementHub() {
log_debug "removeNodeFromLocalAndManagementHub() begin"
log_info "Check node status for agent pod: ${POD_ID}"

NODE_INFO=$($KUBECTL exec -it ${POD_ID} -n ${AGENT_NAMESPACE} -- bash -c "hzn node list")
NODE_INFO=$($KUBECTL exec ${POD_ID} -n ${AGENT_NAMESPACE} -c "anax" -- bash -c "hzn node list")
NODE_STATE=$(echo $NODE_INFO | jq -r .configstate.state | sed 's/[^a-z]*//g')
NODE_ID=$(echo $NODE_INFO | jq -r .id | sed 's/\r//g')
log_debug "NODE config state for ${NODE_ID} is ${NODE_STATE}"
Expand Down Expand Up @@ -273,11 +269,11 @@ function unregister() {
fi

set +e
$KUBECTL exec -it ${POD_ID} -n ${AGENT_NAMESPACE} -- bash -c "${HZN_UNREGISTER_CMD}"
$KUBECTL exec ${POD_ID} -n ${AGENT_NAMESPACE} -c "anax" -- bash -c "${HZN_UNREGISTER_CMD}"
set -e

# verify the node is unregistered
NODE_STATE=$($KUBECTL exec -it ${POD_ID} -n ${AGENT_NAMESPACE} -- bash -c "hzn node list | jq -r .configstate.state" | sed 's/[^a-z]*//g')
NODE_STATE=$($KUBECTL exec ${POD_ID} -n ${AGENT_NAMESPACE} -c "anax" -- bash -c "hzn node list | jq -r .configstate.state" | sed 's/[^a-z]*//g')
log_debug "NODE config state is ${NODE_STATE}"

if [[ "$NODE_STATE" != "unconfigured" ]] && [[ "$NODE_STATE" != "unconfiguring" ]]; then
Expand All @@ -287,8 +283,9 @@ function unregister() {
log_debug "unregister() end"
}

# escape: ;, $, &, |, (, )
function getEscapedExchangeUserAuth() {
local escaped_auth=$( echo "${HZN_EXCHANGE_USER_AUTH}" | sed 's/;/\\;/g;s/\$/\\$/g;s/\&/\\&/g;s/|/\\|/g' )
local escaped_auth=$( echo "${HZN_EXCHANGE_USER_AUTH}" | sed 's/;/\\;/g;s/\$/\\$/g;s/\&/\\&/g;s/|/\\|/g;s/(/\\(/g;s/)/\\)/g' )
echo "${escaped_auth}"
}

Expand All @@ -302,7 +299,7 @@ function deleteNodeFromManagementHub() {
log_info "Deleting node ${node_id} from the management hub..."

set +e
$KUBECTL exec -it ${POD_ID} -n ${AGENT_NAMESPACE} -- bash -c "${EXPORT_EX_USER_AUTH_CMD}; hzn exchange node remove ${node_id} -f"
$KUBECTL exec ${POD_ID} -n ${AGENT_NAMESPACE} -c "anax" -- bash -c "${EXPORT_EX_USER_AUTH_CMD}; hzn exchange node remove ${node_id} -f"
set -e

log_debug "deleteNodeFromManagementHub() end"
Expand All @@ -318,7 +315,7 @@ function verifyNodeRemovedFromManagementHub() {
log_info "Verifying node ${node_id} is removed from the management hub..."

set +e
$KUBECTL exec -it ${POD_ID} -n ${AGENT_NAMESPACE} -- bash -c "${EXPORT_EX_USER_AUTH_CMD}; hzn exchange node list ${node_id}" >/dev/null 2>&1
$KUBECTL exec ${POD_ID} -n ${AGENT_NAMESPACE} -c "anax" -- bash -c "${EXPORT_EX_USER_AUTH_CMD}; hzn exchange node list ${node_id}" >/dev/null 2>&1
if [ $? -ne 8 ]; then
log_warning "Node was not removed from the management hub"
fi
Expand Down Expand Up @@ -347,6 +344,13 @@ function deleteAgentResources() {
$KUBECTL delete deployment $DEPLOYMENT_NAME -n $AGENT_NAMESPACE --force=true --grace-period=0
fi

log_info "Deleting auto-upgrade cronjob..."
if $KUBECTL get cronjob ${CRONJOB_AUTO_UPGRADE_NAME} -n ${AGENT_NAMESPACE} 2>/dev/null; then
$KUBECTL delete cronjob $CRONJOB_AUTO_UPGRADE_NAME -n $AGENT_NAMESPACE
else
log_info "cronjob ${CRONJOB_AUTO_UPGRADE_NAME} does not exist, skip deleting cronjob"
fi

# give pods sometime to terminate by themselves
sleep 10

Expand All @@ -372,31 +376,23 @@ function deleteAgentResources() {
fi

log_info "Deleting configmap..."
$KUBECTL delete configmap $CONFIGMAP_NAME -n $AGENT_NAMESPACE
$KUBECTL delete configmap ${CONFIGMAP_NAME}-backup -n $AGENT_NAMESPACE
$KUBECTL delete configmap $CONFIGMAP_NAME -n $AGENT_NAMESPACE --ignore-not-found
$KUBECTL delete configmap ${CONFIGMAP_NAME}-backup -n $AGENT_NAMESPACE --ignore-not-found

log_info "Deleting secret..."
$KUBECTL delete secret $SECRET_NAME -n $AGENT_NAMESPACE
$KUBECTL delete secret $IMAGE_REGISTRY_SECRET_NAME -n $AGENT_NAMESPACE
$KUBECTL delete secret $IMAGE_PULL_SECRET_NAME -n $AGENT_NAMESPACE
$KUBECTL delete secret ${SECRET_NAME}-backup -n $AGENT_NAMESPACE
set -e

log_info "Deleting auto-upgrade cronjob..."
if $KUBECTL get cronjob ${CRONJOB_AUTO_UPGRADE_NAME} -n ${AGENT_NAMESPACE} 2>/dev/null; then
$KUBECTL delete cronjob $CRONJOB_AUTO_UPGRADE_NAME -n $AGENT_NAMESPACE
else
log_info "cronjob ${CRONJOB_AUTO_UPGRADE_NAME} does not exist, skip deleting cronjob"
fi

set +e
$KUBECTL delete clusterrolebinding ${AGENT_NAMESPACE}-${CLUSTER_ROLE_BINDING_NAME}
$KUBECTL delete secret $SECRET_NAME -n $AGENT_NAMESPACE --ignore-not-found
$KUBECTL delete secret $IMAGE_REGISTRY_SECRET_NAME -n $AGENT_NAMESPACE --ignore-not-found
$KUBECTL delete secret $IMAGE_PULL_SECRET_NAME -n $AGENT_NAMESPACE --ignore-not-found
$KUBECTL delete secret ${SECRET_NAME}-backup -n $AGENT_NAMESPACE --ignore-not-found

log_info "Deleting persistent volume..."
$KUBECTL delete pvc $PVC_NAME -n $AGENT_NAMESPACE
$KUBECTL delete pvc $PVC_NAME -n $AGENT_NAMESPACE --ignore-not-found

log_info "Deleting clusterrolebinding..."
$KUBECTL delete clusterrolebinding ${AGENT_NAMESPACE}-${CLUSTER_ROLE_BINDING_NAME} --ignore-not-found

log_info "Deleting serviceaccount..."
$KUBECTL delete serviceaccount $SERVICE_ACCOUNT_NAME -n $AGENT_NAMESPACE
$KUBECTL delete serviceaccount $SERVICE_ACCOUNT_NAME -n $AGENT_NAMESPACE --ignore-not-found

if [[ "$SKIP_DELETE_AGENT_NAMESPACE" != "true" ]]; then
log_info "Checking deployment and statefulset under namespace $AGENT_NAMESPACE"
Expand All @@ -413,7 +409,7 @@ function deleteAgentResources() {
fi

log_info "Deleting cert file from /etc/default/cert ..."
rm /etc/default/cert/agent-install.crt
rm -f /etc/default/cert/agent-install.crt
set -e

log_debug "deleteAgentResources() end"
Expand Down
9 changes: 6 additions & 3 deletions agreementbot/agreementbot.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ func NewAgreementBotWorker(name string, cfg *config.HorizonConfig, db persistenc
newMessagesToProcess: false,
nodeSearch: NewNodeSearch(),
secretProvider: s,
secretUpdateManager: NewSecretUpdateManager(),
secretUpdateManager: NewSecretUpdateManager(cfg.AgreementBot.SecretsUpdateCheckInterval, cfg.AgreementBot.SecretsUpdateCheckInterval, cfg.AgreementBot.SecretsUpdateCheckMaxInterval, cfg.AgreementBot.SecretsUpdateCheckIncrement),
}

patternManager = NewPatternManager()
Expand Down Expand Up @@ -1634,7 +1634,7 @@ func (w *AgreementBotWorker) secretsProviderMaintenance() int {

// This function is called by the secrets update sub worker to learn about secrets that have been updated.
func (w *AgreementBotWorker) secretsUpdate() int {

nextRunWait := w.secretUpdateManager.PollInterval
secretUpdates, err := w.secretUpdateManager.CheckForUpdates(w.secretProvider, w.db)
if err != nil {
glog.Errorf(AWlogString(err))
Expand All @@ -1643,10 +1643,13 @@ func (w *AgreementBotWorker) secretsUpdate() int {
// Send out an event with the changed secrets and affected policies in it.
if secretUpdates != nil && secretUpdates.Length() != 0 {
w.Messages() <- events.NewSecretUpdatesMessage(events.UPDATED_SECRETS, secretUpdates)
nextRunWait = w.secretUpdateManager.AdjustSecretsPollingInterval(secretUpdates.Length())
} else {
nextRunWait = w.secretUpdateManager.AdjustSecretsPollingInterval(0)
}
}

return 0
return nextRunWait
}

func (w *AgreementBotWorker) monitorHAGroupNMPUpdates() int {
Expand Down
31 changes: 25 additions & 6 deletions agreementbot/consumer_protocol_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -392,18 +392,29 @@ func (b *BaseConsumerProtocolHandler) HandlePolicyChangeForAgreement(ag persiste
glog.Infof(BCPHlogstring(b.Name(), fmt.Sprintf("attempting to update agreement %v due to change in policy", ag.CurrentAgreementId)))
}

msgPrinter := i18n.GetMessagePrinter()

svcAllPol := externalpolicy.ExternalPolicy{}
svcPolicyHandler := exchange.GetHTTPServicePolicyHandler(b)
svcResolveHandler := exchange.GetHTTPServiceDefResolverHandler(b)

for _, svcId := range ag.ServiceId {
if svcPol, err := exchange.GetServicePolicyWithId(b, svcId); err != nil {
glog.Errorf(BCPHlogstring(b.Name(), fmt.Sprintf("failed to get service policy for %v from the exchange: %v", svcId, err)))
if svcDef, err := exchange.GetServiceWithId(b, svcId); err != nil {
glog.Errorf(BCPHlogstring(b.Name(), fmt.Sprintf("failed to get service %v, error: %v", svcId, err)))
return false, false, false
} else if svcPol != nil {
svcAllPol.MergeWith(&svcPol.ExternalPolicy, false)
} else if svcDef != nil {
if mergedSvcPol, _, _, _, _, err := compcheck.GetServicePolicyWithDefaultProperties(svcPolicyHandler, svcResolveHandler, svcDef.URL, exchange.GetOrg(svcId), svcDef.Version, svcDef.Arch, msgPrinter); err != nil {
glog.Errorf(BCPHlogstring(b.Name(), fmt.Sprintf("failed to get merged service policy for %v, error: %v", svcId, err)))
return false, false, false
} else if mergedSvcPol != nil {
svcAllPol.MergeWith(mergedSvcPol, false)
}
}
}

msgPrinter := i18n.GetMessagePrinter()
if glog.V(5) {
glog.Infof(BCPHlogstring(b.Name(), fmt.Sprintf("For agreement %v merged svc policy is %v", ag.CurrentAgreementId, svcAllPol)))
}

busPolHandler := exchange.GetHTTPBusinessPoliciesHandler(b)
_, busPol, err := compcheck.GetBusinessPolicy(busPolHandler, ag.PolicyName, true, msgPrinter)
Expand Down Expand Up @@ -510,7 +521,7 @@ func (b *BaseConsumerProtocolHandler) HandlePolicyChangeForAgreement(ag persiste
}
return true, true, false
}
// new cluster namespace is still compatible
// cluster namespace remains same
}
}

Expand All @@ -535,6 +546,13 @@ func (b *BaseConsumerProtocolHandler) HandlePolicyChangeForAgreement(ag persiste
}
}

if same, msg := consumerPol.IsSamePolicy(oldPolicy); same {
glog.V(3).Infof("business policy(producerPol) %v content remains same with old policy; no update to agreement %s", ag.PolicyName, ag.CurrentAgreementId)
return true, true, true
} else {
glog.V(3).Infof("business policy %v content is changed in agreement %v: %v", ag.PolicyName, ag.CurrentAgreementId, msg)
}

newTsCs, err := policy.Create_Terms_And_Conditions(producerPol, consumerPol, wl, ag.CurrentAgreementId, b.config.AgreementBot.DefaultWorkloadPW, b.config.AgreementBot.NoDataIntervalS, basicprotocol.PROTOCOL_CURRENT_VERSION)
if err != nil {
glog.Errorf(BCPHlogstring(b.Name(), fmt.Sprintf("error creating new terms and conditions: %v", err)))
Expand All @@ -543,6 +561,7 @@ func (b *BaseConsumerProtocolHandler) HandlePolicyChangeForAgreement(ag persiste

ag.LastPolicyUpdateTime = uint64(time.Now().Unix())

// this function will send out "basicagreementupdate"
b.UpdateAgreement(&ag, basicprotocol.MsgUpdateTypePolicyChange, newTsCs, cph)

return true, true, true
Expand Down
Loading

0 comments on commit 27a900c

Please sign in to comment.