From ba7fb9da548bc6a8edb207341a06b796fd816c90 Mon Sep 17 00:00:00 2001 From: miampf Date: Thu, 21 Nov 2024 12:12:36 +0100 Subject: [PATCH] e2e: improve log collection (#985) --- .github/workflows/e2e.yml | 5 +- .github/workflows/e2e_aks_runtime.yml | 5 +- .../by-name/k8s-log-collector/package.nix | 65 +++++++++++++++++++ packages/log-collector.yaml | 51 +++++++++++++++ packages/scripts.nix | 54 ++++++++------- 5 files changed, 154 insertions(+), 26 deletions(-) create mode 100644 packages/by-name/k8s-log-collector/package.nix create mode 100644 packages/log-collector.yaml diff --git a/.github/workflows/e2e.yml b/.github/workflows/e2e.yml index 1e3ed5d19..55be78ccb 100644 --- a/.github/workflows/e2e.yml +++ b/.github/workflows/e2e.yml @@ -73,18 +73,19 @@ jobs: just coordinator initializer port-forwarder openssl cryptsetup service-mesh-proxy node-installer ${{ inputs.platform }} - name: E2E Test run: | - nix run .#scripts.get-logs workspace/e2e.namespace & + nix run .#scripts.get-logs start workspace/e2e.namespace & nix shell -L .#contrast.e2e --command ${{ inputs.test-name }}.test -test.v \ --image-replacements workspace/just.containerlookup \ --namespace-file workspace/e2e.namespace \ --platform ${{ inputs.platform }} \ --skip-undeploy="${{ inputs.skip-undeploy && 'true' || 'false' }}" + nix run .#scripts.get-logs download workspace/e2e.namespace - name: Upload logs if: always() uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: e2e_pod_logs-${{ inputs.platform }}-${{ inputs.test-name }} - path: workspace/namespace-logs + path: workspace/logs/export/logs - name: Notify teams channel of failure if: ${{ failure() && github.event_name == 'schedule' && github.run_attempt == 1 }} uses: ./.github/actions/post_to_teams diff --git a/.github/workflows/e2e_aks_runtime.yml b/.github/workflows/e2e_aks_runtime.yml index 064e4a3cb..23e5ddfa7 100644 --- a/.github/workflows/e2e_aks_runtime.yml +++ b/.github/workflows/e2e_aks_runtime.yml @@ -80,19 +80,20 @@ jobs: az extension add --name confcom - name: E2E test run: | - nix run .#scripts.get-logs workspace/e2e.namespace & + nix run .#scripts.get-logs start workspace/e2e.namespace & nix build .#contrast.e2e ./result/bin/aks-runtime.test -test.v \ --image-replacements workspace/just.containerlookup \ --namespace-file workspace/e2e.namespace \ --platform AKS-CLH-SNP \ --skip-undeploy="false" + nix run .#scripts.get-logs download workspace/e2e.namespace - name: Upload logs if: always() uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 with: name: e2e_pod_logs-AKS-CLH-SNP-aks-runtime - path: workspace/namespace-logs + path: workspace/logs/export/logs - name: Notify teams channel of failure if: ${{ failure() && github.event_name == 'schedule' && github.run_attempt == 1 }} uses: ./.github/actions/post_to_teams diff --git a/packages/by-name/k8s-log-collector/package.nix b/packages/by-name/k8s-log-collector/package.nix new file mode 100644 index 000000000..ecd08e6f0 --- /dev/null +++ b/packages/by-name/k8s-log-collector/package.nix @@ -0,0 +1,65 @@ +# Copyright 2024 Edgeless Systems GmbH +# SPDX-License-Identifier: AGPL-3.0-only + +{ + dockerTools, + writeShellApplication, + buildEnv, + inotify-tools, + coreutils, + findutils, + bash, + gnutar, + gzip, +}: + +let + collection-script = writeShellApplication { + name = "collect-logs"; + runtimeInputs = [ + inotify-tools + coreutils + findutils + ]; + text = '' + set -euo pipefail + mkdir /export + # collect all logs that may have been missed during startup + find /logs -name "*.log" | + while read -r file; do + if [[ -f "$file" && "$file" == *"$POD_NAMESPACE"* ]]; then + mkdir -p "/export$(dirname "$file")" + tail --follow=name "$file" >"/export$file" & + fi + done + inotifywait -m /logs -r -e create -e moved_to | + while read -r path _action file; do + filepath="$path$file" + if [[ -f "$filepath" && "$filepath" == *"$POD_NAMESPACE"* ]]; then + mkdir -p "/export$path" + tail --follow=name "$filepath" >"/export$filepath" & + fi + done + ''; + }; +in +dockerTools.buildImage { + name = "k8s-log-collector"; + tag = "0.1.0"; + copyToRoot = buildEnv { + name = "bin"; + paths = [ + bash + coreutils + gnutar + gzip + ]; + pathsToLink = "/bin"; + }; + config = { + Cmd = [ "${collection-script}/bin/collect-logs" ]; + Volumes = { + "/logs" = { }; + }; + }; +} diff --git a/packages/log-collector.yaml b/packages/log-collector.yaml new file mode 100644 index 000000000..ab31e585d --- /dev/null +++ b/packages/log-collector.yaml @@ -0,0 +1,51 @@ +# Copyright 2024 Edgeless Systems GmbH +# SPDX-License-Identifier: AGPL-3.0-only + +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: log-collector + namespace: "@@NAMESPACE@@" +spec: + selector: + matchLabels: + name: log-collector + template: + metadata: + labels: + name: log-collector + spec: + priorityClassName: high-priority-logcollector + tolerations: + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + - key: node-role.kubernetes.io/master + operator: Exists + effect: NoSchedule + containers: + - name: log-collector + image: "ghcr.io/edgelesssys/k8s-log-collector@sha256:fd173230870b9e19a342627e31a50a0d6e45e7c8770c133b62e72cb4e898bc3e" + volumeMounts: + - mountPath: /logs + name: log-volume + readOnly: true + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + volumes: + - name: log-volume + # mount the nodes logs to the container + hostPath: + path: /var/log/pods + type: Directory +--- +apiVersion: scheduling.k8s.io/v1 +kind: PriorityClass +metadata: + name: high-priority-logcollector +value: 10000000 +globalDefault: false +description: "This priority class is used to prioritise the log collector pod creation before anything else" diff --git a/packages/scripts.nix b/packages/scripts.nix index a116e9343..3f977ec2a 100644 --- a/packages/scripts.nix +++ b/packages/scripts.nix @@ -424,33 +424,43 @@ ''; }; - # Usage: get-logs $namespaceFile + # Usage: get-logs [start | download] $namespaceFile get-logs = writeShellApplication { name = "get-logs"; - runtimeInputs = with pkgs; [ kubectl ]; + runtimeInputs = with pkgs; [ + kubectl + ]; text = '' set -euo pipefail - # wait until namespace file is populated - while ! [[ -s "$1" ]]; do - sleep 1 - done - namespace="$(head -n1 "$1")" - while kubectl get ns "$namespace" 1>/dev/null 2>/dev/null; do - pods="$(kubectl get pods -n "$namespace" | awk '!/^NAME/{print $1}')" - mkdir -p "workspace/namespace-logs" - for pod in $pods; do - logfile="workspace/namespace-logs/$pod.log" - if ! [[ -f "$logfile" ]]; then - { - touch "$logfile" # prevents creation of to much processes - # wait for all containers of the pod to come online, then collect the logs - kubectl wait pod --all --for=condition=Ready --timeout="-1s" -n "$namespace" "$pod" 1>/dev/null 2>/dev/null - kubectl logs -f --all-containers=true -n "$namespace" "$pod" > "$logfile" - } & - fi + + if [[ $# -lt 2 ]]; then + echo "Usage: get-logs [start | download] namespaceFile" + exit 1 + fi + case $1 in + start) + while ! [[ -s "$2" ]]; do + sleep 1 done - done - wait + namespace="$(head -n1 "$2")" + cp ./packages/log-collector.yaml ./workspace/log-collector.yaml + sed -i "s/@@NAMESPACE@@/''${namespace}/g" ./workspace/log-collector.yaml + kubectl apply -f ./workspace/log-collector.yaml 1>/dev/null 2>/dev/null + ;; + download) + namespace="$(head -n1 "$2")" + pod="$(kubectl get pods -o name -n "$namespace" | grep log-collector | cut -c 5-)" + mkdir -p ./workspace/logs + kubectl wait --for=condition=Ready -n "$namespace" "pod/$pod" 1>/dev/null 2>/dev/null + kubectl exec -n "$namespace" "$pod" -- /bin/bash -c "rm -f /exported-logs.tar.gz; tar zcvf /exported-logs.tar.gz /export" 1>/dev/null 2>/dev/null + kubectl cp -n "$namespace" "$pod:/exported-logs.tar.gz" ./workspace/logs/exported-logs.tar.gz 1>/dev/null 2>/dev/null + tar xzvf ./workspace/logs/exported-logs.tar.gz --directory ./workspace/logs 1>/dev/null 2>/dev/null + ;; + *) + echo "Unknown option $1" + echo "Usage: get-logs [start | download] namespaceFile" + exit 1 + esac ''; };