diff --git a/.github/workflows/e2e-test.yaml b/.github/workflows/e2e-test.yaml index 1a31f9c8..5dd80779 100644 --- a/.github/workflows/e2e-test.yaml +++ b/.github/workflows/e2e-test.yaml @@ -81,6 +81,7 @@ jobs: - name: Run test run: | make e2e-test + make webhook-e2e-test - name: Cleanup if: always() run: | diff --git a/Makefile b/Makefile index ce88c464..ac465aeb 100644 --- a/Makefile +++ b/Makefile @@ -101,16 +101,8 @@ docker-push: ## Push docker image with the manager. # also generates a placeholder cert for the webhook - this cert is not intended to be valid .PHONY: build-deploy build-deploy: ## Create a deployment file that can be applied with `kubectl apply -f deploy.yaml` - $(eval TEMP_KEY := $(shell mktemp)) - $(eval TEMP_CERT := $(shell mktemp)) cd config/manager && kustomize edit set image controller=${ECRIMAGES} kustomize build config/default > deploy.yaml - openssl req -x509 -nodes -days 1 -newkey rsa:2048 -keyout $(TEMP_KEY) -out $(TEMP_CERT) -subj "/CN=not-a-real-cn/O=not-a-real-o" > /dev/null 2>&1 - export KEY_B64=`cat $(TEMP_KEY) | base64` && \ - export CERT_B64=`cat $(TEMP_CERT) | base64` && \ - yq -i e '(.[] as $$item | select(.metadata.name == "webhook-cert" and .kind == "Secret") | .data."tls.crt") = env(CERT_B64)' deploy.yaml && \ - yq -i e '(.[] as $$item | select(.metadata.name == "webhook-cert" and .kind == "Secret") | .data."tls.key") = env(KEY_B64)' deploy.yaml 2>&1 - rm $(TEMP_KEY) $(TEMP_CERT) .PHONY: manifest manifest: ## Generate CRD manifest @@ -155,11 +147,8 @@ docs: mkdocs build # NB webhook tests can only run if the controller is deployed to the cluster -webhook-e2e-test-namespace := "webhook-e2e-test" - .PHONY: webhook-e2e-test webhook-e2e-test: - @kubectl create namespace $(webhook-e2e-test-namespace) > /dev/null 2>&1 || true # ignore already exists error LOG_LEVEL=debug cd test && go test \ -p 1 \ diff --git a/cmd/aws-application-networking-k8s/main.go b/cmd/aws-application-networking-k8s/main.go index ab2a8f6b..744da27a 100644 --- a/cmd/aws-application-networking-k8s/main.go +++ b/cmd/aws-application-networking-k8s/main.go @@ -23,6 +23,7 @@ import ( "go.uber.org/zap/zapcore" "os" k8swebhook "sigs.k8s.io/controller-runtime/pkg/webhook" + "strings" "github.com/aws/aws-application-networking-k8s/pkg/aws" "github.com/aws/aws-application-networking-k8s/pkg/utils/gwlog" @@ -132,14 +133,17 @@ func main() { // do not create the webhook server when running locally var webhookServer k8swebhook.Server - isLocalDev := config.DevMode != "" - if !isLocalDev { + enableWebhook := strings.ToLower(config.WebhookEnabled) == "true" + if enableWebhook { + setupLog.Info("Webhook is enabled, 'webhook-cert' secret must contain a valid TLS key and cert") webhookServer = k8swebhook.NewServer(k8swebhook.Options{ Port: 9443, CertDir: "/etc/webhook-cert/", CertName: "tls.crt", KeyName: "tls.key", }) + } else { + setupLog.Infof("Webhook is disabled, value: '%s'", config.WebhookEnabled) } mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ @@ -156,8 +160,7 @@ func main() { setupLog.Fatal("manager setup failed:", err) } - if !isLocalDev { - // register webhook handlers + if enableWebhook { readinessGateInjector := webhook.NewPodReadinessGateInjector( mgr.GetClient(), log.Named("pod-readiness-gate-injector"), diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 54b71ded..88b84b18 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -51,6 +51,9 @@ spec: port: 8081 initialDelaySeconds: 5 periodSeconds: 10 + env: + - name: WEBHOOK_ENABLED + value: "" # TODO(user): Configure the resources accordingly based on the project requirements. # More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ resources: diff --git a/config/webhook/manifests.yaml b/config/webhook/manifests.yaml index 0718d682..f74f1ab1 100644 --- a/config/webhook/manifests.yaml +++ b/config/webhook/manifests.yaml @@ -2,7 +2,6 @@ apiVersion: admissionregistration.k8s.io/v1 kind: MutatingWebhookConfiguration metadata: - creationTimestamp: null name: aws-appnet-gwc-mutating-webhook webhooks: - admissionReviewVersions: diff --git a/docs/guides/environment.md b/docs/guides/environment.md index f8532b81..80c43c6f 100644 --- a/docs/guides/environment.md +++ b/docs/guides/environment.md @@ -75,3 +75,17 @@ Default: "" When set as "true", the controller will run in "single service network" mode that will override all gateways to point to default service network, instead of searching for service network with the same name. Can be used for small setups and conformance tests. + +--- + +#### `WEBHOOK_ENABLED` + +Type: string + +Default: "" + +When set as "true", the controller will start the webhook listener responsible for pod readiness gate injection +(see ```pod-readiness-gates.md```). This is disabled by default for ```deploy.yaml``` because the controller will not start +successfully without the TLS certificate for the webhook in place. While this can be fixed by running +```scripts/gen-webhook-cert.sh```, it requires manual action. The webhook is enabled by default for the Helm install +as the Helm install will also generate the necessary certificate. diff --git a/docs/guides/pod-readiness-gates.md b/docs/guides/pod-readiness-gates.md index 4ffb57db..d7b13d2e 100644 --- a/docs/guides/pod-readiness-gates.md +++ b/docs/guides/pod-readiness-gates.md @@ -18,53 +18,45 @@ This prevents the rolling update of a deployment from terminating old pods until ## Setup Pod readiness gates rely on [»admission webhooks«](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/), where the Kubernetes API server makes calls to the AWS Gateway API controller as part of pod creation. This call is made using TLS, so the controller must present a TLS certificate. This certificate is stored as a standard Kubernetes secret. If you are using Helm, the certificate will automatically be configured as part of the Helm install. -If you are manually deploying the controller, for example using the ```deploy.yaml``` file, you will need to create the tls secret for the webhook in the controller namespace. The ```deploy.yaml``` file includes a placeholder secret, but it must be updated if you wish to use the webhook. The placeholder secret _will not_ pass API server validations, but will ensure the controller container is able to start. +If you are manually deploying the controller using the ```deploy.yaml``` file, you will need to either patch the ```deploy.yaml``` file (see ```scripts/patch-deploy-yaml.sh```) or generate the secret following installation (see ```scripts/gen-webhook-secret.sh```) and manually enable the webhook via the ```WEBHOOK_ENABLED``` environment variable. -### Webhook secret requirements -The webhook requires a specific kubernetes secret to exist in the same namespace as the webhook itself: -* secret name: ```webhook-cert``` -* default controller namespace: ```aws-application-networking-system``` -```console -# example create-secret command, assumes tls.crt and tls.key exist in current directory -# if the placeholder secret exists, you will need to delete it before setting the new value -kubectl create secret tls webhook-cert --namespace aws-application-networking-system --cert=tls.crt --key=tls.key +Note that, without the secret in place, the controller cannot start successfully, and you will see an error message like the following: ``` - -### Webhook secret configuration example -The below example creates an unsigned certificate, adds it as the webhook secret, then patches the webhook configuration so the API server trusts the certificate. - -If your cluster uses its own PKI and includes appropriate trust configuration for the API server, the certificate issued would be signed by your internal certificate authority and therefore not require the ```kubectl patch``` command below. -```console -# Example commands to configure the webhook to use an unsigned certificate -CERT_FILE=tls.crt -KEY_FILE=tls.key - -WEBHOOK_SVC_NAME=webhook-service -WEBHOOK_NAME=aws-appnet-gwc-mutating-webhook -WEBHOOK_NAMESPACE=aws-application-networking-system -WEBHOOK_SECRET_NAME=webhook-cert - -# Step 1: generate a certificate if needed, can also be provisioned through orgnanizational PKI, etc -# This cert includes a 100 year expiry -HOST=${WEBHOOK_SVC_NAME}.${WEBHOOK_NAMESPACE}.svc -openssl req -x509 -nodes -days 36500 -newkey rsa:2048 -keyout ${KEY_FILE} -out ${CERT_FILE} -subj "/CN=${HOST}/O=${HOST}" \ - -addext "subjectAltName = DNS:${HOST}, DNS:${HOST}.cluster.local" - -# Step 2: replace the placeholder secret from deploy.yaml -kubectl delete secret $WEBHOOK_SECRET_NAME --namespace $WEBHOOK_NAMESPACE -kubectl create secret tls $WEBHOOK_SECRET_NAME --namespace $WEBHOOK_NAMESPACE --cert=${CERT_FILE} --key=${KEY_FILE} - -# Step 3: Patch the webhook CA bundle to exactly the cert being used. -# This will ensure Kubernetes API server is able to trust the certificate presented by the webhook. -# This step would not be required if you are using a signed certificate that is already trusted by the API server -CERT_B64=$(cat tls.crt | base64) -kubectl patch mutatingwebhookconfigurations.admissionregistration.k8s.io $WEBHOOK_NAME \ - --namespace $WEBHOOK_NAMESPACE --type='json' \ - -p="[{'op': 'replace', 'path': '/webhooks/0/clientConfig/caBundle', 'value': '${CERT_B64}'}]" +{"level":"error","ts":"...","logger":"setup","caller":"workspace/main.go:240","msg":"tls: failed to find any PEM data in certificate inputproblem running manager"} +``` +For this reason, the webhook is ```DISABLED``` by default in the controller for the non-Helm install. You can enable the webhook by setting the ```WEBHOOK_ENABLED``` environment variable to "true" in the ```deploy.yaml``` file. +```yaml +apiVersion: apps/v1 +kind: Deployment +metadata: + name: gateway-api-controller + namespace: aws-application-networking-system + labels: + control-plane: gateway-api-controller +spec: + ... + template: + metadata: + annotations: + kubectl.kubernetes.io/default-container: manager + labels: + control-plane: gateway-api-controller + spec: + securityContext: + runAsNonRoot: true + containers: + - command: + ... + name: manager + ... + env: + - name: WEBHOOK_ENABLED + value: "true" # <-- value of "true" enables the webhook in the controller ``` +If you run ```scripts/patch-deploy-yaml.sh``` prior to installing ```deploy.yaml```, the script will create the necessary TLS certificates and configuration and will enable the webhook in the controller. Note that, even with the webhook enabled, the webhook will only run for namespaces labeled with `application-networking.k8s.aws/pod-readiness-gate-inject: enabled`. -## Configuration -Pod readiness gate support is enabled by default on the AWS Gateway API controller. To enable the feature, you must apply a label to each of the namespaces you would like to use this feature. You can create and label a namespace as follows - +## Enabling the readiness gate +After a Helm install or manually configuring and enabling the webhook, you are ready to begin using pod readiness gates. Apply a label to each namespace you would like to use this feature. You can create and label a namespace as follows - ``` $ kubectl create namespace example-ns @@ -81,7 +73,7 @@ Annotations: Status: Active ``` -Once labelled, the controller will add the pod readiness gates to all subsequently created pods. +Once labelled, the controller will add the pod readiness gates to all subsequently created pods in the namespace. The readiness gates have the condition type ```application-networking.k8s.aws/pod-readiness-gate``` and the controller injects the config to the pod spec only during pod creation. diff --git a/helm/templates/_helpers.tpl b/helm/templates/_helpers.tpl index ba90cd16..986b4fa8 100644 --- a/helm/templates/_helpers.tpl +++ b/helm/templates/_helpers.tpl @@ -30,3 +30,21 @@ If release name contains chart name it will be used as a full name. {{- define "service-account.name" -}} {{ default "default" .Values.serviceAccount.name }} {{- end -}} + +{{/* Import or generate certificates for webhook */}} +{{- define "aws-gateway-controller.webhookTLS" -}} +{{- if (and .Values.webhookTLS.caCert .Values.webhookTLS.cert .Values.webhookTLS.key) -}} +caCert: {{ .Values.webhookTLS.caCert }} +cert: {{ .Values.webhookTLS.cert }} +key: {{ .Values.webhookTLS.key }} +{{- else -}} +{{- $ca := genCA "aws-gateway-controller-ca" 36500 -}} +{{- $serviceDefaultName:= printf "webhook-service.%s.svc" .Release.Namespace -}} +{{- $secretName := "webhook-cert" -}} +{{- $altNames := list ($serviceDefaultName) (printf "%s.cluster.local" $serviceDefaultName) -}} +{{- $cert := genSignedCert $serviceDefaultName nil $altNames 36500 $ca -}} +caCert: {{ $ca.Cert | b64enc }} +cert: {{ $cert.Cert | b64enc }} +key: {{ $cert.Key | b64enc }} +{{- end -}} +{{- end -}} diff --git a/helm/templates/deployment.yaml b/helm/templates/deployment.yaml index 8aca7bdd..6687e504 100644 --- a/helm/templates/deployment.yaml +++ b/helm/templates/deployment.yaml @@ -52,6 +52,8 @@ spec: ports: - name: http containerPort: {{ .Values.deployment.containerPort }} + - name: webhook-server + containerPort: 9443 resources: {{- toYaml .Values.resources | nindent 10 }} livenessProbe: @@ -72,6 +74,10 @@ spec: drop: - ALL readOnlyRootFilesystem: true + volumeMounts: + - mountPath: /etc/webhook-cert + name: webhook-cert + readOnly: true env: - name: REGION value: {{ .Values.awsRegion | quote }} @@ -87,7 +93,14 @@ spec: value: {{ .Values.defaultServiceNetwork | quote }} - name: LOG_LEVEL value: {{ .Values.log.level | quote }} + - name: WEBHOOK_ENABLED + value: {{ .Values.webhookEnabled | quote }} terminationGracePeriodSeconds: 10 + volumes: + - name: webhook-cert + secret: + defaultMode: 420 + secretName: webhook-cert nodeSelector: {{ toYaml .Values.deployment.nodeSelector | nindent 8 }} {{ if .Values.deployment.tolerations -}} tolerations: {{ toYaml .Values.deployment.tolerations | nindent 8 }} @@ -97,4 +110,4 @@ spec: {{ end -}} {{ if .Values.deployment.priorityClassName -}} priorityClassName: {{ .Values.deployment.priorityClassName }} - {{ end -}} + {{ end -}} \ No newline at end of file diff --git a/helm/templates/webhook.yaml b/helm/templates/webhook.yaml new file mode 100644 index 00000000..e8047fe3 --- /dev/null +++ b/helm/templates/webhook.yaml @@ -0,0 +1,62 @@ +{{ $tls := fromYaml ( include "aws-gateway-controller.webhookTLS" . ) }} +--- +apiVersion: admissionregistration.k8s.io/v1 +kind: MutatingWebhookConfiguration +metadata: + name: aws-appnet-gwc-mutating-webhook +webhooks: + - admissionReviewVersions: + - v1 + clientConfig: + caBundle: {{ $tls.caCert }} + service: + name: webhook-service + namespace: {{ .Release.Namespace }} + path: /mutate-pod + failurePolicy: Fail + name: mpod.gwc.k8s.aws + rules: + - apiGroups: + - "" + apiVersions: + - v1 + operations: + - CREATE + resources: + - pods + sideEffects: None + namespaceSelector: + matchExpressions: + - key: application-networking.k8s.aws/pod-readiness-gate-inject + operator: In + values: + - enabled + objectSelector: + matchExpressions: + - key: app.kubernetes.io/name + operator: NotIn + values: + - gateway-api-controller +--- +apiVersion: v1 +kind: Service +metadata: + name: webhook-service + namespace: {{ .Release.Namespace }} +spec: + ports: + - port: 443 + targetPort: webhook-server + selector: + control-plane: gateway-api-controller +--- +apiVersion: v1 +kind: Secret +metadata: + name: webhook-cert + namespace: {{ .Release.Namespace }} +type: kubernetes.io/tls +data: + ca.crt: {{ $tls.caCert }} + tls.crt: {{ $tls.cert }} + tls.key: {{ $tls.key }} \ No newline at end of file diff --git a/helm/values.yaml b/helm/values.yaml index 3f7de481..bba9322e 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -76,4 +76,11 @@ awsAccountId: clusterVpcId: clusterName: defaultServiceNetwork: -latticeEndpoint: \ No newline at end of file +latticeEndpoint: +webhookEnabled: true + +# TLS cert/key for the webhook. If specified, values must be base64 encoded +webhookTLS: + caCert: + cert: + key: \ No newline at end of file diff --git a/pkg/config/controller_config.go b/pkg/config/controller_config.go index 133bd3a6..43af0679 100644 --- a/pkg/config/controller_config.go +++ b/pkg/config/controller_config.go @@ -26,6 +26,7 @@ const ( ENABLE_SERVICE_NETWORK_OVERRIDE = "ENABLE_SERVICE_NETWORK_OVERRIDE" AWS_ACCOUNT_ID = "AWS_ACCOUNT_ID" DEV_MODE = "DEV_MODE" + WEBHOOK_ENABLED = "WEBHOOK_ENABLED" ) var VpcID = "" @@ -34,6 +35,7 @@ var Region = "" var DefaultServiceNetwork = "" var ClusterName = "" var DevMode = "" +var WebhookEnabled = "" var ServiceNetworkOverrideMode = false @@ -47,6 +49,7 @@ func configInit(sess *session.Session, metadata EC2Metadata) error { var err error DevMode = os.Getenv(DEV_MODE) + WebhookEnabled = os.Getenv(WEBHOOK_ENABLED) VpcID = os.Getenv(CLUSTER_VPC_ID) if VpcID == "" { diff --git a/scripts/gen-webhook-secret.sh b/scripts/gen-webhook-secret.sh new file mode 100755 index 00000000..12c4d675 --- /dev/null +++ b/scripts/gen-webhook-secret.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +# Use this script to update the webhook secret post-deployment. Alternatively, +# you can use patch-deploy-yaml.sh prior to deployment. +# +# Generates a certificate for use by the controller webhook +# You can also manually re-create the webhook secret using your own PKI + +TEMP_KEY=`mktemp` +TEMP_CERT=`mktemp` + +WEBHOOK_SVC_NAME=webhook-service +WEBHOOK_NAME=aws-appnet-gwc-mutating-webhook +WEBHOOK_NAMESPACE=aws-application-networking-system +WEBHOOK_SECRET_NAME=webhook-cert + +echo "Generating certificate for webhook" +HOST=${WEBHOOK_SVC_NAME}.${WEBHOOK_NAMESPACE}.svc +openssl req -x509 -nodes -days 36500 -newkey rsa:2048 -keyout ${TEMP_KEY} -out ${TEMP_CERT} -subj "/CN=${HOST}/O=${HOST}" \ + -addext "subjectAltName = DNS:${HOST}, DNS:${HOST}.cluster.local" + +CERT_B64=`cat $TEMP_CERT | base64` + +echo "Recreating webhook secret" +kubectl delete secret $WEBHOOK_SECRET_NAME --namespace $WEBHOOK_NAMESPACE +kubectl create secret tls $WEBHOOK_SECRET_NAME --namespace $WEBHOOK_NAMESPACE --cert=${TEMP_CERT} --key=${TEMP_KEY} + +echo "Patching webhook with new cert" +kubectl patch mutatingwebhookconfigurations.admissionregistration.k8s.io $WEBHOOK_NAME \ + --namespace $WEBHOOK_NAMESPACE --type='json' \ + -p="[{'op': 'replace', 'path': '/webhooks/0/clientConfig/caBundle', 'value': '${CERT_B64}'}]" + +rm $TEMP_KEY $TEMP_CERT +echo "Done" \ No newline at end of file diff --git a/scripts/patch-deploy-yaml.sh b/scripts/patch-deploy-yaml.sh new file mode 100755 index 00000000..3d2e1e8a --- /dev/null +++ b/scripts/patch-deploy-yaml.sh @@ -0,0 +1,86 @@ +#!/usr/bin/env bash + +# Updates deploy.yaml with a certificate for use by the controller webhook +# AND enables the webhook via environment variable. +# You can also provide a certificate with using own PKI, or use the +# gen-webhook-secret.sh script after deploying. +# +# Usage: +# ./patch-deploy-yaml.sh [-k KEY_FILE -c CERT_FILE -a CA_FILE] + +KEY="" +CERT="" +CA="" +TEMP_KEY=`mktemp` +TEMP_CERT=`mktemp` + +while getopts "k:c:a:" opt; do + case $opt in + k) + KEY=${OPTARG} + ;; + c) + CERT=${OPTARG} + ;; + a) + CA=${OPTARG} + ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + exit 1 + ;; + :) + echo "Option -$OPTARG requires an argument." >&2 + exit 1 + ;; + esac +done + +shift $((OPTIND - 1)) +DEPLOY_YAML=$1 + +if [ -z "${DEPLOY_YAML}" ]; then + echo "Target deploy.yaml file not specified" >&2 + exit 1 +fi + +generate_cert() { + KEY=$TEMP_KEY + CERT=$TEMP_CERT + # point the CA to the cert itself, this will ensure it is trusted by API server + CA=$CERT + + WEBHOOK_SVC_NAME=webhook-service + WEBHOOK_NAME=aws-appnet-gwc-mutating-webhook + WEBHOOK_NAMESPACE=aws-application-networking-system + WEBHOOK_SECRET_NAME=webhook-cert + + echo "Generating certificate for webhook" + HOST=${WEBHOOK_SVC_NAME}.${WEBHOOK_NAMESPACE}.svc + openssl req -x509 -nodes -days 36500 -newkey rsa:2048 -keyout ${KEY} -out ${CERT} -subj "/CN=${HOST}/O=${HOST}" \ + -addext "subjectAltName = DNS:${HOST}, DNS:${HOST}.cluster.local" +} + +if [ -z "${KEY}" ]; then + echo "Key not specified. Will generate..." + REMOVE_KEY_FILES="1" + generate_cert +fi + +export KEY_B64=`cat $KEY | base64` +export CERT_B64=`cat $CERT | base64` +export CA_B64=`cat $CA | base64` + +echo "Patching webhook secret" +yq -i e '(.[] as $item | select(.metadata.name == "webhook-cert" and .kind == "Secret") | .data."tls.crt") = env(CERT_B64)' $DEPLOY_YAML 2>&1 +yq -i e '(.[] as $item | select(.metadata.name == "webhook-cert" and .kind == "Secret") | .data."tls.key") = env(KEY_B64)' $DEPLOY_YAML 2>&1 +yq -i e '(.[] as $item | select(.metadata.name == "webhook-cert" and .kind == "Secret") | .data."ca.crt") = env(CA_B64)' $DEPLOY_YAML 2>&1 + +echo "Patching webhook" +yq -i e '(.[] as $item | select(.metadata.name == "aws-appnet-gwc-mutating-webhook" and .kind == "MutatingWebhookConfiguration") | .webhooks[0].clientConfig.caBundle) = env(CA_B64)' $DEPLOY_YAML 2>&1 + +echo "Enabling webhook" +yq -i -e '(.[] as $item | select(.metadata.name == "gateway-api-controller" and .kind == "Deployment") | .spec.template.spec.containers[] | select(.name == "manager") | .env[] | select(.name == "WEBHOOK_ENABLED") | .value) = "true"' $DEPLOY_YAML 2>&1 + +rm $TEMP_KEY $TEMP_CERT +echo "Done" \ No newline at end of file