Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CAPZ Win FV fixes #9150

Merged
merged 8 commits into from
Aug 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .semaphore/semaphore-scheduled-builds.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion .semaphore/semaphore.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion .semaphore/semaphore.yml.d/blocks/20-felix.yml
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,15 @@

- name: "Felix: Windows FV capz"
run:
when: "false or change_in(['/*', '/api/', '/libcalico-go/', '/typha/', '/felix/', '/node', '/hack/test/certs/', '/process/testing/winfv/'], {exclude: ['/**/.gitignore', '/**/README.md', '/**/LICENSE']})"
when: "false or change_in(['/*', '/api/', '/libcalico-go/', '/typha/', '/felix/', '/node', '/hack/test/certs/', '/process/testing/winfv-felix/'], {exclude: ['/**/.gitignore', '/**/README.md', '/**/LICENSE']})"
dependencies: ["Felix: Build Windows binaries"]
task:
secrets:
- name: banzai-secrets
- name: private-repo
prologue:
commands:
- az login --service-principal -u "${AZ_SP_ID}" -p "${AZ_SP_PASSWORD}" --tenant "${AZ_TENANT_ID}" --output none
- export REPORT_DIR=/home/semaphore/report
- export AZURE_SUBSCRIPTION_ID=$AZ_SUBSCRIPTION_ID
- export AZURE_TENANT_ID=$AZ_TENANT_ID
Expand Down
2 changes: 1 addition & 1 deletion cni-plugin/.semaphore/cleanup.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ blocks:
- name: Clean up winfv aws resources
commands:
- aws ec2 delete-key-pair --key-name ${KEYPAIR_NAME} || true
- cd ~/calico/process/testing/winfv && NAME_PREFIX="${CLUSTER_NAME}-containerd" ./setup-fv.sh -q -u || true
- cd ~/calico/process/testing/winfv-cni-plugin && NAME_PREFIX="${CLUSTER_NAME}-containerd" ./setup-fv.sh -q -u || true
- NAME_PREFIX="${CLUSTER_NAME}-docker" ./setup-fv.sh -q -u | true
env_vars:
- name: AWS_DEFAULT_REGION
Expand Down
4 changes: 2 additions & 2 deletions cni-plugin/.semaphore/semaphore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ blocks:
- artifact push job ${REPORT_DIR} --destination semaphore/test-results --expire-in ${SEMAPHORE_ARTIFACT_EXPIRY} || true
- artifact push job ${LOGS_DIR} --destination semaphore/logs --expire-in ${SEMAPHORE_ARTIFACT_EXPIRY} || true
- aws ec2 delete-key-pair --key-name ${KEYPAIR_NAME} || true
- cd ~/calico/process/testing/winfv && NAME_PREFIX="${CLUSTER_NAME}" ./setup-fv.sh -q -u
- cd ~/calico/process/testing/winfv-cni-plugin && NAME_PREFIX="${CLUSTER_NAME}" ./setup-fv.sh -q -u
env_vars:
- name: SEMAPHORE_ARTIFACT_EXPIRY
value: 2w
Expand Down Expand Up @@ -152,7 +152,7 @@ blocks:
- artifact push job ${REPORT_DIR} --destination semaphore/test-results --expire-in ${SEMAPHORE_ARTIFACT_EXPIRY} || true
- artifact push job ${LOGS_DIR} --destination semaphore/logs --expire-in ${SEMAPHORE_ARTIFACT_EXPIRY} || true
- aws ec2 delete-key-pair --key-name ${KEYPAIR_NAME} || true
- cd ~/calico/process/testing/winfv && NAME_PREFIX="${CLUSTER_NAME}" ./setup-fv.sh -q -u
- cd ~/calico/process/testing/winfv-cni-plugin && NAME_PREFIX="${CLUSTER_NAME}" ./setup-fv.sh -q -u
env_vars:
- name: SEMAPHORE_ARTIFACT_EXPIRY
value: 2w
Expand Down
39 changes: 18 additions & 21 deletions felix/.semaphore/run-win-fv
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.

set -e
set -ex

FV_DIR="/home/semaphore/calico/process/testing/winfv"
ERROR_CODE=0
FV_DIR="/home/semaphore/calico/process/testing/winfv-felix"
EXIT_CODE=0

pushd ${FV_DIR}
# Prepare local files
Expand Down Expand Up @@ -51,18 +51,23 @@ if [[ $FV_PROVISIONER == "aws" ]]; then
${SCP_CMD} -r ubuntu@${MASTER_IP}:/home/ubuntu/report /home/semaphore
elif [[ $FV_PROVISIONER == "capz" ]]; then
export KUBE_VERSION="$K8S_VERSION"
/bin/bash -x ./setup-fv-capz.sh -q | tee setup-fv.log
if [[ ${PIPESTATUS[0]} != 0 ]]; then
ERROR_CODE=${PIPESTATUS[0]}
/bin/bash -x ./setup-fv-capz.sh -q | tee setup-fv.log; pstat=${PIPESTATUS[0]}
if [[ $pstat != 0 ]]; then
EXIT_CODE=$pstat
fi
mv ./report /home/semaphore/report
mv ./setup-fv.log /home/semaphore/report/setup-fv.log
mv ./report /home/semaphore
mv ./setup-fv.log /home/semaphore/report
popd
fi
ls -ltr ./report
mkdir /home/semaphore/fv.log
# check if *.log glob contains any files so that mv doesn't fail
compgen -G /home/semaphore/report/*.log > /dev/null && mv /home/semaphore/report/*.log /home/semaphore/fv.log
ls -ltr /home/semaphore/report

# Print relevant snippets from logs
log_regexps='(?<!Decode)Failure|SUCCESS|FV-TEST-START'
compgen -G /home/semaphore/report/*.log > /dev/null && \
for log_file in /home/semaphore/report/*.log; do
prefix="[$(basename ${log_file})]"
cat ${log_file} | iconv -f UTF-16 -t UTF-8 | sed 's/\r$//g' | grep --line-buffered --perl ${log_regexps} -B 2 -A 15 | sed 's/.*/'"${prefix}"' &/g'
done;

# Stop for debug
echo "Check for pause file..."
Expand All @@ -72,16 +77,8 @@ do
sleep 30
done

# Print relevant snippets from logs
log_regexps='(?<!Decode)Failure|SUCCESS|FV-TEST-START'
compgen -G /home/semaphore/fv.log/*.log > /dev/null && \
for log_file in /home/semaphore/fv.log/*.log; do
prefix="[$(basename ${log_file})]"
cat ${log_file} | iconv -f UTF-16 -t UTF-8 | sed 's/\r$//g' | grep --line-buffered --perl ${log_regexps} -B 2 -A 15 | sed 's/.*/'"${prefix}"' &/g'
done;

# Search for error code file
if [[ -f /home/semaphore/report/error-codes || $ERROR_CODE != 0 ]];
if [[ -f /home/semaphore/report/error-codes || $EXIT_CODE != 0 ]];
then
echo "Windows FV returned error(s)."
exit 1
Expand Down
2 changes: 1 addition & 1 deletion felix/.semaphore/semaphore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ blocks:
- artifact push job ${REPORT_DIR} --destination semaphore/test-results --expire-in ${SEMAPHORE_ARTIFACT_EXPIRY} || true
- artifact push job ${LOGS_DIR} --destination semaphore/logs --expire-in ${SEMAPHORE_ARTIFACT_EXPIRY} || true
- aws ec2 delete-key-pair --key-name ${KEYPAIR_NAME} || true
- cd ~/calico/process/testing/winfv && NAME_PREFIX="${CLUSTER_NAME}" /bin/bash -x ./setup-fv.sh -q -u
- cd ~/calico/process/testing/winfv-felix && NAME_PREFIX="${CLUSTER_NAME}" /bin/bash -x ./setup-fv.sh -q -u
env_vars:
- name: SEMAPHORE_ARTIFACT_EXPIRY
value: 2w
Expand Down
4 changes: 2 additions & 2 deletions metadata.mk
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ K8S_VERSION=v1.29.7
COREDNS_VERSION=1.5.2
ETCD_VERSION=v3.5.6
HELM_VERSION=v3.11.3
KINDEST_NODE_VERSION=v1.29.7
KIND_VERSION=v0.22.0
KINDEST_NODE_VERSION=v1.29.2
coutinhop marked this conversation as resolved.
Show resolved Hide resolved
KIND_VERSION=v0.24.0
PROTOC_VER=v0.1
UBI_VERSION=8.10

Expand Down
4 changes: 2 additions & 2 deletions process/testing/winfv-cni-plugin/aso/utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ function retry_command() {
local CMD=$2
echo

for i in `seq 1 $RETRY`; do
echo Trying $CMD, attempt ${i}
for i in $(seq 1 $RETRY); do
echo "Trying '$CMD', attempt ${i}"
$CMD && return 0 || sleep 10
done
echo "Command '${CMD}' failed after $RETRY attempts"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
.calico_installed
bin/*
*.log
.cluster_created
.calico_installed
.sshkey
.sshkey.pub
kubeconfig
scp-from-node.sh
scp-to-node.sh
ssh-node.sh
tigera-operator.yaml
win-capz.yaml
tigera-operator.yaml
tigera-prometheus-operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ CLUSTER_CREATED_MARKER:=.cluster_created
.PHONY: create-cluster
create-cluster: $(CLUSTER_CREATED_MARKER)

$(CLUSTER_CREATED_MARKER): $(BINDIR)/kind $(BINDIR)/kubectl $(BINDIR)/clusterctl
$(CLUSTER_CREATED_MARKER): $(BINDIR)/kind $(BINDIR)/kubectl $(BINDIR)/clusterctl $(BINDIR)/yq
@echo "Creating cluster $(CLUSTER_NAME_CAPZ) ..."
./create-cluster.sh
$(MAKE) generate-helpers
Expand All @@ -18,19 +18,16 @@ $(CLUSTER_CREATED_MARKER): $(BINDIR)/kind $(BINDIR)/kubectl $(BINDIR)/clusterctl

.PHONY: delete-cluster
delete-cluster: $(BINDIR)/kind $(BINDIR)/kubectl
ifeq (,$(wildcard $(CLUSTER_CREATED_MARKER)))
@echo "Cluster marker '$(CLUSTER_CREATED_MARKER)' does not exist, doing nothing"
else
@echo "Azure resources for cluster $(CLUSTER_NAME_CAPZ) will now be deleted, this can take up to 20 minutes"
-$(BINDIR)/kubectl delete cluster $(CLUSTER_NAME_CAPZ)
-$(BINDIR)/kind delete cluster --name kind${SUFFIX}
-az group delete --name $(CI_RG) -y
-rm -f kubeconfig
-rm -f win-capz.yaml
-rm -f tigera-operator.yaml
-rm -f tigera-prometheus-operator.yaml
-rm -f $(HELPERS)
-rm -f $(CLUSTER_CREATED_MARKER) $(CALICO_INSTALLED_MARKER)
endif

CALICO_INSTALLED_MARKER:=.calico_installed

Expand Down Expand Up @@ -70,15 +67,23 @@ $(BINDIR)/clusterctl:
touch $@
$(BINDIR)/clusterctl version

$(BINDIR)/yq:
mkdir -p $(@D)
curl -sSf -L --retry 5 https://github.com/mikefarah/yq/releases/download/$(YQ_VERSION)/yq_linux_$(ARCH) -o $(BINDIR)/yq
chmod +x $@
touch $@

.PHONY: clean
clean:
-rm -f kubeconfig
-rm -f win-capz.yaml
-rm -f tigera-operator.yaml
-rm -f .sshkey .sshkey.pub
-rm -f $(HELPERS)
-rm -f az-output.log

.PHONY: dist-clean
dist-clean: clean
-rm -rf $(BINDIR)
-rm -f $(CLUSTER_CREATED_MARKER) $(CALICO_INSTALLED_MARKER)
-rm -f *.log
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ Optionally, define `PRODUCT`, `RELEASE_STREAM` and/or `HASH_RELEASE`:
make install-calico PRODUCT=calient RELEASE_STREAM=master HASH_RELEASE=true
```

(Use `RELEASE_STREAM=local` to use local manifests from the monorepo instead of pulling them)

To access your cluster, run `kubectl --kubeconfig=./kubeconfig ...`

### Access Linux or Windows nodes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,24 @@ set -o pipefail
export AZURE_CONTROL_PLANE_MACHINE_TYPE
export AZURE_NODE_MACHINE_TYPE

# Number of Linux node is same as number of Windows nodes
export AZURE_CLIENT_ID_USER_ASSIGNED_IDENTITY=$AZURE_CLIENT_ID # for compatibility with CAPZ v1.16 templates

# Create the resource group and managed identity for the cluster CI
rm az-output.log || true
{
echo "az group create --name ${CI_RG} --location ${AZURE_LOCATION}"
az group create --name ${CI_RG} --location ${AZURE_LOCATION}
echo
echo "az identity create --name ${USER_IDENTITY} --resource-group ${CI_RG} --location ${AZURE_LOCATION}"
az identity create --name ${USER_IDENTITY} --resource-group ${CI_RG} --location ${AZURE_LOCATION}
sleep 10s
export USER_IDENTITY_ID=$(az identity show --resource-group "${CI_RG}" --name "${USER_IDENTITY}" | jq -r .principalId)
echo
echo az role assignment create --assignee-object-id "${USER_IDENTITY_ID}" --assignee-principal-type "ServicePrincipal" --role "Contributor" --scope "/subscriptions/${AZURE_SUBSCRIPTION_ID}/resourceGroups/${CI_RG}"
az role assignment create --assignee-object-id "${USER_IDENTITY_ID}" --assignee-principal-type "ServicePrincipal" --role "Contributor" --scope "/subscriptions/${AZURE_SUBSCRIPTION_ID}/resourceGroups/${CI_RG}"
} >> az-output.log 2>&1

# Number of Linux worker nodes is the same as number of Windows worker nodes
: ${WIN_NODE_COUNT:=2}
TOTAL_NODES=$((WIN_NODE_COUNT*2+1))
SEMAPHORE="${SEMAPHORE:="false"}"
Expand All @@ -57,6 +74,7 @@ echo ' WIN_NODE_COUNT='${WIN_NODE_COUNT}
: ${KIND:=./bin/kind}
: ${KUBECTL:=./bin/kubectl}
: ${CLUSTERCTL:=./bin/clusterctl}
: ${YQ:=./bin/yq}
: ${KCAPZ:="${KUBECTL} --kubeconfig=./kubeconfig"}

# Base64 encode the variables
Expand All @@ -69,8 +87,6 @@ else
export SUFFIX="-${RAND}"
fi



# Settings needed for AzureClusterIdentity used by the AzureCluster
export AZURE_CLUSTER_IDENTITY_SECRET_NAME="cluster-identity-secret"
export CLUSTER_IDENTITY_NAME="cluster-identity"
Expand Down Expand Up @@ -118,10 +134,14 @@ ${CLUSTERCTL} generate cluster ${CLUSTER_NAME_CAPZ} \
--flavor machinepool-windows \
> win-capz.yaml

# Cluster templates authenticate with Workload Identity by default. Modify the AzureClusterIdentity for ServicePrincipal authentication.
# See https://capz.sigs.k8s.io/topics/identities for more details.
${YQ} -i "with(. | select(.kind == \"AzureClusterIdentity\"); .spec.type |= \"ServicePrincipal\" | .spec.clientSecret.name |= \"${AZURE_CLUSTER_IDENTITY_SECRET_NAME}\" | .spec.clientSecret.namespace |= \"${AZURE_CLUSTER_IDENTITY_SECRET_NAMESPACE}\")" win-capz.yaml

retry_command 600 "${KUBECTL} apply -f win-capz.yaml"

# Wait for CAPZ deployments
${KUBECTL} wait --for=condition=Available --timeout=5m -n capz-system deployment -l cluster.x-k8s.io/provider=infrastructure-azure
timeout --foreground 600 bash -c "while ! ${KUBECTL} wait --for=condition=Available --timeout=30s -n capz-system deployment -l cluster.x-k8s.io/provider=infrastructure-azure; do sleep 5; done"

# Wait for the kubeconfig to become available.
timeout --foreground 600 bash -c "while ! ${KUBECTL} get secrets | grep ${CLUSTER_NAME_CAPZ}-kubeconfig; do sleep 5; done"
Expand All @@ -140,9 +160,9 @@ retry_command 300 "${KCAPZ} taint nodes --selector=!node-role.kubernetes.io/cont

echo "Done creating cluster"

ID0=$(${KCAPZ} get node -o wide | grep win-p-win000000 | awk '{print $6}' | awk -F '.' '{print $4}')
echo "ID0: $ID0"
if [[ ${WIN_NODE_COUNT} -gt 1 ]]; then
ID1=$(${KCAPZ} get node -o wide | grep win-p-win000001 | awk '{print $6}' | awk -F '.' '{print $4}')
echo "ID1:$ID1"
fi
WIN_NODES=$(${KCAPZ} get nodes -o wide -l kubernetes.io/os=windows --no-headers | awk '{print $6}' | awk -F '.' '{print $4}' | sort)
i=0
for n in ${WIN_NODES}
do
echo "ID$i: $n"; i=$(expr $i + 1)
done
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
export CLUSTER_NAME_CAPZ="${CLUSTER_NAME_CAPZ:=${USER}-capz-win}"
export AZURE_LOCATION="${AZURE_LOCATION:="westcentralus"}"
export AZURE_LOCATION="${AZURE_LOCATION:="westus2"}"

# [Optional] Select resource group. The default value is ${CLUSTER_NAME_CAPZ}-rg.
export AZURE_RESOURCE_GROUP="${AZURE_RESOURCE_GROUP:=${CLUSTER_NAME_CAPZ}-rg}"
# These are required by the machinepool-windows template
export CI_RG="${AZURE_RESOURCE_GROUP}-ci"
export USER_IDENTITY="cloud-provider-user-identity"

# Optional, can be windows-2019 or windows-2022 (default)
# https://capz.sigs.k8s.io/developers/development.html
Expand All @@ -13,17 +16,10 @@ export WINDOWS_SERVER_VERSION="${WINDOWS_SERVER_VERSION:="windows-2022"}"
export AZURE_CONTROL_PLANE_MACHINE_TYPE="${AZURE_CONTROL_PLANE_MACHINE_TYPE:="Standard_D2s_v3"}"
export AZURE_NODE_MACHINE_TYPE="${AZURE_NODE_MACHINE_TYPE:="Standard_D2s_v3"}"

# Get KINDEST_NODE_VERSION variable from metadata.mk, default to a value if it cannot be found
SCRIPT_CURRENT_DIR="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 && pwd -P )"
METADATAMK=${SCRIPT_CURRENT_DIR}/../../../../metadata.mk
if [ -f "${METADATAMK}" ]; then
export KUBE_VERSION=$(grep KINDEST_NODE_VERSION= ${METADATAMK} | cut -d "=" -f 2)
export KIND_VERSION=$(grep KIND_VERSION= ${METADATAMK} | cut -d "=" -f 2)
else
export KUBE_VERSION=v1.27.11
export KIND_VERSION=v0.22.0
fi
export CLUSTER_API_VERSION="${CLUSTER_API_VERSION:="v1.6.3"}"
export KUBE_VERSION=v1.28.9
export KIND_VERSION=v0.24.0
export CLUSTER_API_VERSION="${CLUSTER_API_VERSION:="v1.8.1"}"
export AZURE_PROVIDER_VERSION="${AZURE_PROVIDER_VERSION:="v1.13.2"}"
export CONTAINERD_VERSION="${CONTAINERD_VERSION:="v1.7.13"}"
export CONTAINERD_VERSION="${CONTAINERD_VERSION:="v1.7.20"}"
export CALICO_VERSION="${CALICO_VERSION:="v3.28.1"}"
export YQ_VERSION="${YQ_VERSION:="v4.44.3"}"
Original file line number Diff line number Diff line change
@@ -1,4 +1,21 @@
#!/bin/bash
# Copyright (c) 2024 Tigera, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -o errexit
set -o nounset
set -o pipefail

set -e
LOCAL_PATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
Expand All @@ -7,11 +24,12 @@ LOCAL_PATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
# with windows ssh servers, but older versions don't know about that flag. Only use
# it when necessary (i.e. supported).
OFLAG="-O "
if scp -O 2>&1 | grep -q "unknown option -- O"; then
if [ "$(scp -O 2>&1 | grep -c 'unknown option -- O')" -gt 0 ]; then
OFLAG=""
fi

: ${KUBECTL:=${LOCAL_PATH}/bin/kubectl}
: ${WIN_NODE_COUNT:=2}

KCAPZ="${KUBECTL} --kubeconfig=./kubeconfig"

Expand Down
Loading