Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

e2e: improve log collection #985

Merged
merged 22 commits into from
Nov 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,18 +73,19 @@ jobs:
just coordinator initializer port-forwarder openssl cryptsetup service-mesh-proxy node-installer ${{ inputs.platform }}
- name: E2E Test
run: |
nix run .#scripts.get-logs workspace/e2e.namespace &
nix run .#scripts.get-logs start workspace/e2e.namespace &
nix shell -L .#contrast.e2e --command ${{ inputs.test-name }}.test -test.v \
--image-replacements workspace/just.containerlookup \
--namespace-file workspace/e2e.namespace \
--platform ${{ inputs.platform }} \
--skip-undeploy="${{ inputs.skip-undeploy && 'true' || 'false' }}"
nix run .#scripts.get-logs download workspace/e2e.namespace
- name: Upload logs
if: always()
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
with:
name: e2e_pod_logs-${{ inputs.platform }}-${{ inputs.test-name }}
path: workspace/namespace-logs
path: workspace/logs/export/logs
- name: Notify teams channel of failure
if: ${{ failure() && github.event_name == 'schedule' && github.run_attempt == 1 }}
uses: ./.github/actions/post_to_teams
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/e2e_aks_runtime.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,19 +80,20 @@ jobs:
az extension add --name confcom
- name: E2E test
run: |
nix run .#scripts.get-logs workspace/e2e.namespace &
nix run .#scripts.get-logs start workspace/e2e.namespace &
nix build .#contrast.e2e
./result/bin/aks-runtime.test -test.v \
--image-replacements workspace/just.containerlookup \
--namespace-file workspace/e2e.namespace \
--platform AKS-CLH-SNP \
--skip-undeploy="false"
nix run .#scripts.get-logs download workspace/e2e.namespace
- name: Upload logs
if: always()
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
with:
name: e2e_pod_logs-AKS-CLH-SNP-aks-runtime
path: workspace/namespace-logs
path: workspace/logs/export/logs
- name: Notify teams channel of failure
if: ${{ failure() && github.event_name == 'schedule' && github.run_attempt == 1 }}
uses: ./.github/actions/post_to_teams
Expand Down
65 changes: 65 additions & 0 deletions packages/by-name/k8s-log-collector/package.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Copyright 2024 Edgeless Systems GmbH
# SPDX-License-Identifier: AGPL-3.0-only

{
dockerTools,
writeShellApplication,
buildEnv,
inotify-tools,
coreutils,
findutils,
bash,
gnutar,
gzip,
}:

let
collection-script = writeShellApplication {
name = "collect-logs";
runtimeInputs = [
inotify-tools
coreutils
findutils
];
text = ''
set -euo pipefail
mkdir /export
# collect all logs that may have been missed during startup
find /logs -name "*.log" |
while read -r file; do
if [[ -f "$file" && "$file" == *"$POD_NAMESPACE"* ]]; then
mkdir -p "/export$(dirname "$file")"
tail --follow=name "$file" >"/export$file" &
fi
done
inotifywait -m /logs -r -e create -e moved_to |
while read -r path _action file; do
filepath="$path$file"
if [[ -f "$filepath" && "$filepath" == *"$POD_NAMESPACE"* ]]; then
mkdir -p "/export$path"
tail --follow=name "$filepath" >"/export$filepath" &
fi
done
'';
};
in
dockerTools.buildImage {
name = "k8s-log-collector";
tag = "0.1.0";
copyToRoot = buildEnv {
name = "bin";
paths = [
bash
coreutils
gnutar
gzip
];
pathsToLink = "/bin";
};
config = {
Cmd = [ "${collection-script}/bin/collect-logs" ];
Volumes = {
"/logs" = { };
};
};
}
51 changes: 51 additions & 0 deletions packages/log-collector.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Copyright 2024 Edgeless Systems GmbH
# SPDX-License-Identifier: AGPL-3.0-only

apiVersion: apps/v1
kind: DaemonSet
metadata:
name: log-collector
namespace: "@@NAMESPACE@@"
spec:
selector:
matchLabels:
name: log-collector
template:
metadata:
labels:
name: log-collector
spec:
priorityClassName: high-priority-logcollector
tolerations:
- key: node-role.kubernetes.io/control-plane
operator: Exists
effect: NoSchedule
- key: node-role.kubernetes.io/master
operator: Exists
effect: NoSchedule
containers:
- name: log-collector
image: "ghcr.io/edgelesssys/k8s-log-collector@sha256:fd173230870b9e19a342627e31a50a0d6e45e7c8770c133b62e72cb4e898bc3e"
volumeMounts:
- mountPath: /logs
name: log-volume
readOnly: true
env:
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
volumes:
- name: log-volume
# mount the nodes logs to the container
hostPath:
path: /var/log/pods
type: Directory
---
apiVersion: scheduling.k8s.io/v1
kind: PriorityClass
metadata:
name: high-priority-logcollector
value: 10000000
globalDefault: false
description: "This priority class is used to prioritise the log collector pod creation before anything else"
54 changes: 32 additions & 22 deletions packages/scripts.nix
Original file line number Diff line number Diff line change
Expand Up @@ -424,33 +424,43 @@
'';
};

# Usage: get-logs $namespaceFile
# Usage: get-logs [start | download] $namespaceFile
get-logs = writeShellApplication {
name = "get-logs";
runtimeInputs = with pkgs; [ kubectl ];
runtimeInputs = with pkgs; [
kubectl
];
text = ''
set -euo pipefail
# wait until namespace file is populated
while ! [[ -s "$1" ]]; do
sleep 1
done
namespace="$(head -n1 "$1")"
while kubectl get ns "$namespace" 1>/dev/null 2>/dev/null; do
pods="$(kubectl get pods -n "$namespace" | awk '!/^NAME/{print $1}')"
mkdir -p "workspace/namespace-logs"
for pod in $pods; do
logfile="workspace/namespace-logs/$pod.log"
if ! [[ -f "$logfile" ]]; then
{
touch "$logfile" # prevents creation of to much processes
# wait for all containers of the pod to come online, then collect the logs
kubectl wait pod --all --for=condition=Ready --timeout="-1s" -n "$namespace" "$pod" 1>/dev/null 2>/dev/null
kubectl logs -f --all-containers=true -n "$namespace" "$pod" > "$logfile"
} &
fi

if [[ $# -lt 2 ]]; then
echo "Usage: get-logs [start | download] namespaceFile"
exit 1
fi
case $1 in
start)
while ! [[ -s "$2" ]]; do
sleep 1
done
done
wait
namespace="$(head -n1 "$2")"
cp ./packages/log-collector.yaml ./workspace/log-collector.yaml
sed -i "s/@@NAMESPACE@@/''${namespace}/g" ./workspace/log-collector.yaml
kubectl apply -f ./workspace/log-collector.yaml 1>/dev/null 2>/dev/null
;;
download)
namespace="$(head -n1 "$2")"
pod="$(kubectl get pods -o name -n "$namespace" | grep log-collector | cut -c 5-)"
mkdir -p ./workspace/logs
kubectl wait --for=condition=Ready -n "$namespace" "pod/$pod" 1>/dev/null 2>/dev/null
kubectl exec -n "$namespace" "$pod" -- /bin/bash -c "rm -f /exported-logs.tar.gz; tar zcvf /exported-logs.tar.gz /export" 1>/dev/null 2>/dev/null
kubectl cp -n "$namespace" "$pod:/exported-logs.tar.gz" ./workspace/logs/exported-logs.tar.gz 1>/dev/null 2>/dev/null
tar xzvf ./workspace/logs/exported-logs.tar.gz --directory ./workspace/logs 1>/dev/null 2>/dev/null
;;
*)
echo "Unknown option $1"
echo "Usage: get-logs [start | download] namespaceFile"
exit 1
esac
'';
};

Expand Down