diff --git a/.github/actions/cdbg_deploy/action.yml b/.github/actions/cdbg_deploy/action.yml new file mode 100644 index 00000000000..fa9c402e1fb --- /dev/null +++ b/.github/actions/cdbg_deploy/action.yml @@ -0,0 +1,100 @@ +name: Cdbg deploy +description: Deploy the Constellation Bootstrapper to the cluster via the debugd. + +inputs: + test: + description: "The e2e test payload." + required: true + azureClusterCreateCredentials: + description: "Azure credentials authorized to create a Constellation cluster." + required: true + azureIAMCreateCredentials: + description: "Azure credentials authorized to create an IAM configuration." + required: true + cloudProvider: + description: "The cloud provider to use." + required: true + kubernetesVersion: + description: "Kubernetes version to create the cluster from." + required: true + refStream: + description: "The refStream of the image the test runs on." + required: true + +runs: + using: "composite" + steps: + - name: Login to Azure (IAM service principal) + if: inputs.cloudProvider == 'azure' + uses: ./.github/actions/login_azure + with: + azure_credentials: ${{ inputs.azureIAMCreateCredentials }} + + - name: Add Azure Keyvault access role + if: inputs.cloudProvider == 'azure' + shell: bash + run: | + UAMI=$(yq eval ".provider.azure.userAssignedIdentity | upcase" constellation-conf.yaml) + PRINCIPAL_ID=$(az identity list | yq ".[] | select(.id | test(\"(?i)$UAMI\"; \"g\")) | .principalId") + az role assignment create --role "Key Vault Secrets User" \ + --assignee "$PRINCIPAL_ID" \ + --scope /subscriptions/0d202bbb-4fa7-4af8-8125-58c269a05435/resourceGroups/e2e-test-creds/providers/Microsoft.KeyVault/vaults/opensearch-creds + + - name: Login to Azure (Cluster service principal) + if: inputs.cloudProvider == 'azure' + uses: ./.github/actions/login_azure + with: + azure_credentials: ${{ inputs.azureClusterCreateCredentials }} + + - name: Login to AWS (IAM service principal) + if: inputs.cloudProvider == 'aws' + uses: aws-actions/configure-aws-credentials@5fd3084fc36e372ff1fff382a39b10d03659f355 # v2.2.0 + with: + role-to-assume: arn:aws:iam::795746500882:role/GithubActionsE2EIAM + aws-region: eu-central-1 + # extend token expiry to 6 hours to ensure constellation can terminate + role-duration-seconds: 21600 + + - name: Add AWS Secrets Manager access role + if: inputs.cloudProvider == 'aws' + shell: bash + run: | + INSTANCE_PROFILE=$(yq eval ".provider.aws.iamProfileControlPlane" constellation-conf.yaml) + ROLE_NAME=$(aws iam get-instance-profile --instance-profile-name "$INSTANCE_PROFILE" | yq ".InstanceProfile.Roles[0].RoleName") + aws iam attach-role-policy \ + --role-name "$ROLE_NAME" \ + --policy-arn arn:aws:iam::795746500882:policy/GitHubActionsOSCredAccess + + - name: Login to AWS (Cluster service principal) + if: inputs.cloudProvider == 'aws' + uses: aws-actions/configure-aws-credentials@5fd3084fc36e372ff1fff382a39b10d03659f355 # v2.2.0 + with: + role-to-assume: arn:aws:iam::795746500882:role/GithubActionsE2ECluster + aws-region: eu-central-1 + # extend token expiry to 6 hours to ensure constellation can terminate + role-duration-seconds: 21600 + + - name: Cdbg deploy + shell: bash + run: | + echo "::group::cdbg deploy" + chmod +x $GITHUB_WORKSPACE/build/cdbg + cdbg deploy \ + --bootstrapper "${{ github.workspace }}/build/bootstrapper" \ + --upgrade-agent "${{ github.workspace }}/build/upgrade-agent" \ + --info logcollect=true \ + --info logcollect.github.actor="${{ github.triggering_actor }}" \ + --info logcollect.github.workflow="${{ github.workflow }}" \ + --info logcollect.github.run-id="${{ github.run_id }}" \ + --info logcollect.github.run-attempt="${{ github.run_attempt }}" \ + --info logcollect.github.ref-name="${{ github.ref_name }}" \ + --info logcollect.github.sha="${{ github.sha }}" \ + --info logcollect.github.runner-os="${{ runner.os }}" \ + --info logcollect.github.e2e-test-payload="${{ inputs.test }}" \ + --info logcollect.github.is-debug-cluster=false \ + --info logcollect.github.ref-stream="${{ inputs.refStream }}" \ + --info logcollect.github.kubernetes-version="${{ inputs.kubernetesVersion }}" \ + --info logcollect.deployment-type="debugd" \ + --verbosity=-1 \ + --force + echo "::endgroup::" diff --git a/.github/actions/constellation_create/action.yml b/.github/actions/constellation_create/action.yml index e62d898a494..a045332475c 100644 --- a/.github/actions/constellation_create/action.yml +++ b/.github/actions/constellation_create/action.yml @@ -38,6 +38,15 @@ inputs: test: description: "The e2e test payload." required: true + azureClusterCreateCredentials: + description: "Azure credentials authorized to create a Constellation cluster." + required: true + azureIAMCreateCredentials: + description: "Azure credentials authorized to create an IAM configuration." + required: true + refStream: + description: "Reference and stream of the image in use" + required: false outputs: kubeconfig: @@ -119,29 +128,14 @@ runs: - name: Cdbg deploy if: inputs.isDebugImage == 'true' - shell: bash - run: | - echo "::group::cdbg deploy" - chmod +x $GITHUB_WORKSPACE/build/cdbg - cdbg deploy \ - --bootstrapper "${{ github.workspace }}/build/bootstrapper" \ - --upgrade-agent "${{ github.workspace }}/build/upgrade-agent" \ - --info logcollect=true \ - --info logcollect.github.actor="${{ github.triggering_actor }}" \ - --info logcollect.github.workflow="${{ github.workflow }}" \ - --info logcollect.github.run-id="${{ github.run_id }}" \ - --info logcollect.github.run-attempt="${{ github.run_attempt }}" \ - --info logcollect.github.ref-name="${{ github.ref_name }}" \ - --info logcollect.github.sha="${{ github.sha }}" \ - --info logcollect.github.runner-os="${{ runner.os }}" \ - --info logcollect.github.e2e-test-payload="${{ inputs.test }}" \ - --info logcollect.github.is-debug-cluster=false \ - --info logcollect.github.ref-stream="${{ inputs.refStream }}" \ - --info logcollect.github.kubernetes-version="${{ inputs.kubernetesVersion }}" \ - --info logcollect.deployment-type="debugd" \ - --verbosity=-1 \ - --force - echo "::endgroup::" + uses: ./.github/actions/cdbg_deploy + with: + cloudProvider: ${{ inputs.cloudProvider }} + test: ${{ inputs.test }} + azureClusterCreateCredentials: ${{ inputs.azureClusterCreateCredentials }} + azureIAMCreateCredentials: ${{ inputs.azureIAMCreateCredentials }} + refStream: ${{ inputs.refStream }} + kubernetesVersion: ${{ inputs.kubernetesVersion }} - name: Constellation init id: constellation-init diff --git a/.github/actions/deploy_logcollection/action.yml b/.github/actions/deploy_logcollection/action.yml index 3cb03e8c765..7f68f82f42f 100644 --- a/.github/actions/deploy_logcollection/action.yml +++ b/.github/actions/deploy_logcollection/action.yml @@ -50,7 +50,7 @@ runs: --fields github.sha="${{ github.sha }}" \ --fields github.runner-os="${{ runner.os }}" \ --fields github.e2e-test-payload="${{ inputs.test }}" \ - --fields github.isDebugImage="${{ inputs.isDebugImage }}" \ + --fields github.is-debug-cluster="${{ inputs.isDebugImage }}" \ --fields github.e2e-test-provider="${{ inputs.provider }}" \ --fields github.ref-stream="${{ inputs.refStream }}" \ --fields github.kubernetes-version="${{ inputs.kubernetesVersion }}" \ @@ -86,3 +86,17 @@ runs: helm repo update helm install filebeat elastic/filebeat \ --wait --timeout=1200s --values values.yml + + - name: Deploy Metricbeat + id: deploy-metricbeat + shell: bash + working-directory: ./metricbeat + env: + KUBECONFIG: ${{ inputs.kubeconfig }} + run: | + helm repo add elastic https://helm.elastic.co + helm repo update + helm install metricbeat-k8s elastic/metricbeat \ + --wait --timeout=1200s --values values-control-plane.yml + helm install metricbeat-system elastic/metricbeat \ + --wait --timeout=1200s --values values-all-nodes.yml diff --git a/.github/actions/e2e_test/action.yml b/.github/actions/e2e_test/action.yml index 8e512458c24..8bba5bf3da1 100644 --- a/.github/actions/e2e_test/action.yml +++ b/.github/actions/e2e_test/action.yml @@ -249,12 +249,14 @@ runs: fetchMeasurements: ${{ inputs.fetchMeasurements }} cliVersion: ${{ inputs.cliVersion }} azureSNPEnforcementPolicy: ${{ inputs.azureSNPEnforcementPolicy }} + azureIAMCreateCredentials: ${{ inputs.azureIAMCreateCredentials }} + azureClusterCreateCredentials: ${{ inputs.azureClusterCreateCredentials }} + kubernetesVersion: ${{ inputs.kubernetesVersion }} + refStream: ${{ inputs.refStream }} - - name: Deploy logcollection + - name: Deploy log- and metrics-collection (Kubernetes) id: deploy-logcollection - # TODO(msanft):temporarily deploy in debug clusters too to resolve "missing logs"-bug - # see https://dev.azure.com/Edgeless/Edgeless/_workitems/edit/3227 - # if: inputs.isDebugImage == 'false' + if: inputs.isDebugImage == 'false' uses: ./.github/actions/deploy_logcollection with: kubeconfig: ${{ steps.constellation-create.outputs.kubeconfig }} diff --git a/.github/workflows/build-logcollector-images.yml b/.github/workflows/build-logcollector-images.yml index 8311d6a7529..aff25729d22 100644 --- a/.github/workflows/build-logcollector-images.yml +++ b/.github/workflows/build-logcollector-images.yml @@ -37,3 +37,10 @@ jobs: name: filebeat-debugd dockerfile: debugd/filebeat/Dockerfile githubToken: ${{ secrets.GITHUB_TOKEN }} + + - name: Build and upload Metricbeat container image + uses: ./.github/actions/build_micro_service + with: + name: metricbeat-debugd + dockerfile: debugd/metricbeat/Dockerfile + githubToken: ${{ secrets.GITHUB_TOKEN }} diff --git a/debugd/filebeat/BUILD.bazel b/debugd/filebeat/BUILD.bazel index a04d140253b..16f5afcbe3a 100644 --- a/debugd/filebeat/BUILD.bazel +++ b/debugd/filebeat/BUILD.bazel @@ -3,10 +3,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library") go_library( name = "filebeat", srcs = ["assets.go"], - embedsrcs = [ - "templates/filebeat.yml", - "inputs.yml", - ], + embedsrcs = ["templates/filebeat.yml"], importpath = "github.com/edgelesssys/constellation/v2/debugd/filebeat", visibility = ["//visibility:public"], ) diff --git a/debugd/filebeat/Dockerfile b/debugd/filebeat/Dockerfile index d75565a27cd..2a0c3cb4fc9 100644 --- a/debugd/filebeat/Dockerfile +++ b/debugd/filebeat/Dockerfile @@ -2,7 +2,6 @@ FROM fedora:38@sha256:6fc00f83a1b6526b1c6562e30f552d109ba8e269259c6742a26efab1b7 RUN dnf install -y https://artifacts.elastic.co/downloads/beats/filebeat/filebeat-8.6.2-x86_64.rpm -COPY debugd/filebeat/inputs.yml /usr/share/filebeat/inputs.yml -COPY debugd/filebeat/templates/ /usr/share/filebeat/templates/ +COPY debugd/filebeat/templates/ /usr/share/constellogs/templates/ ENTRYPOINT ["/usr/share/filebeat/bin/filebeat", "-e", "--path.home", "/usr/share/filebeat", "--path.data", "/usr/share/filebeat/data"] diff --git a/debugd/filebeat/assets.go b/debugd/filebeat/assets.go index 0a0ec10eb48..744ef37998b 100644 --- a/debugd/filebeat/assets.go +++ b/debugd/filebeat/assets.go @@ -10,6 +10,5 @@ import "embed" // Assets are the exported Filebeat template files. // -//go:embed *.yml //go:embed templates/* var Assets embed.FS diff --git a/debugd/filebeat/inputs.yml b/debugd/filebeat/inputs.yml deleted file mode 100644 index bfc3d09201f..00000000000 --- a/debugd/filebeat/inputs.yml +++ /dev/null @@ -1,9 +0,0 @@ -- type: journald - enabled: true - id: journald - -- type: filestream - enabled: true - id: container - paths: - - /var/log/pods/*/*/*.log diff --git a/debugd/filebeat/templates/filebeat.yml b/debugd/filebeat/templates/filebeat.yml index 4eaedeb2086..463c1a3246a 100644 --- a/debugd/filebeat/templates/filebeat.yml +++ b/debugd/filebeat/templates/filebeat.yml @@ -9,12 +9,15 @@ logging: metrics.enabled: false level: warning -filebeat.config: - inputs: +filebeat.inputs: + - type: journald enabled: true - path: /usr/share/filebeat/inputs.yml - # reload.enabled: true - # reload.period: 10s + id: journald + - type: filestream + enabled: true + id: container + paths: + - /var/log/pods/*/*/*.log timestamp.precision: nanosecond @@ -27,3 +30,6 @@ processors: field: "log.file.path" target_prefix: "kubernetes" ignore_failure: true + {{ if .AddCloudMetadata }} + - add_cloud_metadata: ~ + {{ end }} diff --git a/debugd/internal/debugd/logcollector/logcollector.go b/debugd/internal/debugd/logcollector/logcollector.go index 7a7354a2523..28b04a521a0 100644 --- a/debugd/internal/debugd/logcollector/logcollector.go +++ b/debugd/internal/debugd/logcollector/logcollector.go @@ -32,7 +32,7 @@ const ( // NewStartTrigger returns a trigger func can be registered with an infos instance. // The trigger is called when infos changes to received state and starts a log collection pod -// with filebeat and logstash in case the flags are set. +// with filebeat, metricbeat and logstash in case the flags are set. // // This requires podman to be installed. func NewStartTrigger(ctx context.Context, wg *sync.WaitGroup, provider cloudprovider.Provider, @@ -74,7 +74,7 @@ func NewStartTrigger(ctx context.Context, wg *sync.WaitGroup, provider cloudprov return } - logger.Infof("Getting logstash pipeline template") + logger.Infof("Getting logstash pipeline template from image %s", versions.LogstashImage) tmpl, err := getTemplate(ctx, logger, versions.LogstashImage, "/run/logstash/templates/pipeline.conf", "/run/logstash") if err != nil { logger.Errorf("Getting logstash pipeline template: %v", err) @@ -97,28 +97,46 @@ func NewStartTrigger(ctx context.Context, wg *sync.WaitGroup, provider cloudprov InfoMap: infoMapM, Credentials: creds, } - if err := writeTemplate("/run/filebeat/filebeat.yml", tmpl, pipelineConf); err != nil { - logger.Errorf("Writing logstash pipeline: %v", err) + if err := writeTemplate("/run/logstash/pipeline/pipeline.conf", tmpl, pipelineConf); err != nil { + logger.Errorf("Writing logstash config: %v", err) return } - logger.Infof("Getting logstash config template") + logger.Infof("Getting filebeat config template from image %s", versions.FilebeatImage) tmpl, err = getTemplate(ctx, logger, versions.FilebeatImage, "/run/filebeat/templates/filebeat.yml", "/run/filebeat") if err != nil { logger.Errorf("Getting filebeat config template: %v", err) return } filebeatConf := filebeatConfInput{ - LogstashHost: "localhost:5044", + LogstashHost: "localhost:5044", + AddCloudMetadata: true, } - if err := writeTemplate("/run/logstash/pipeline/pipeline.conf", tmpl, filebeatConf); err != nil { - logger.Errorf("Writing filebeat config: %v", err) + if err := writeTemplate("/run/filebeat/filebeat.yml", tmpl, filebeatConf); err != nil { + logger.Errorf("Writing filebeat pipeline: %v", err) + return + } + + logger.Infof("Getting metricbeat config template from image %s", versions.MetricbeatImage) + tmpl, err = getTemplate(ctx, logger, versions.MetricbeatImage, "/run/metricbeat/templates/metricbeat.yml", "/run/metricbeat") + if err != nil { + logger.Errorf("Getting metricbeat config template: %v", err) + return + } + metricbeatConf := metricbeatConfInput{ + LogstashHost: "localhost:5044", + Port: 5066, + CollectSystemMetrics: true, + AddCloudMetadata: true, + } + if err := writeTemplate("/run/metricbeat/metricbeat.yml", tmpl, metricbeatConf); err != nil { + logger.Errorf("Writing metricbeat pipeline: %v", err) return } logger.Infof("Starting log collection pod") if err := startPod(ctx, logger); err != nil { - logger.Errorf("Starting filebeat: %v", err) + logger.Errorf("Starting log collection: %v", err) } }() } @@ -170,7 +188,7 @@ func getTemplate(ctx context.Context, logger *logger.Logger, image, templateDir, } func startPod(ctx context.Context, logger *logger.Logger) error { - // create a shared pod for filebeat and logstash + // create a shared pod for filebeat, metricbeat and logstash createPodArgs := []string{ "pod", "create", @@ -215,7 +233,7 @@ func startPod(ctx context.Context, logger *logger.Logger) error { "--volume=/run/systemd:/run/systemd:ro", "--volume=/run/systemd/journal/socket:/run/systemd/journal/socket:rw", "--volume=/run/state/var/log:/var/log:ro", - "--volume=/run/filebeat:/usr/share/filebeat/:ro", + "--volume=/run/filebeat/filebeat.yml:/usr/share/filebeat/filebeat.yml:ro", versions.FilebeatImage, } runFilebeatCmd := exec.CommandContext(ctx, "podman", runFilebeatArgs...) @@ -226,6 +244,28 @@ func startPod(ctx context.Context, logger *logger.Logger) error { return fmt.Errorf("failed to run filebeat: %w", err) } + // start metricbeat container + metricbeatLog := newCmdLogger(logger.Named("metricbeat")) + runMetricbeatArgs := []string{ + "run", + "--rm", + "--name=metricbeat", + "--pod=logcollection", + "--privileged", + "--log-driver=none", + "--volume=/proc:/hostfs/proc:ro", + "--volume=/sys/fs/cgroup:/hostfs/sys/fs/cgroup:ro", + "--volume=/run/metricbeat/metricbeat.yml:/usr/share/metricbeat/metricbeat.yml:ro", + versions.MetricbeatImage, + } + runMetricbeatCmd := exec.CommandContext(ctx, "podman", runMetricbeatArgs...) + logger.Infof("Run metricbeat command: %v", runMetricbeatCmd.String()) + runMetricbeatCmd.Stdout = metricbeatLog + runMetricbeatCmd.Stderr = metricbeatLog + if err := runMetricbeatCmd.Start(); err != nil { + return fmt.Errorf("failed to run metricbeat: %w", err) + } + return nil } @@ -238,7 +278,16 @@ type logstashConfInput struct { } type filebeatConfInput struct { - LogstashHost string + LogstashHost string + AddCloudMetadata bool +} + +type metricbeatConfInput struct { + Port int + LogstashHost string + CollectEtcdMetrics bool + CollectSystemMetrics bool + AddCloudMetadata bool } func writeTemplate(path string, templ *template.Template, in any) error { diff --git a/debugd/internal/debugd/logcollector/logstash/templates/pipeline.conf b/debugd/internal/debugd/logcollector/logstash/templates/pipeline.conf deleted file mode 100644 index 2f6e5d59c6e..00000000000 --- a/debugd/internal/debugd/logcollector/logstash/templates/pipeline.conf +++ /dev/null @@ -1,66 +0,0 @@ -input { - beats { - host => "0.0.0.0" - port => 5044 - } -} - -filter { - mutate { - # Remove some fields that are not needed. - remove_field => [ - "[agent]", - "[journald]", - "[syslog]", - "[systemd][invocation_id]", - "[event][original]", - "[log][offset]", - "[log][syslog]" - ] - - # Tag with the provided metadata. - add_field => { - {{ range $key, $value := .InfoMap }} - "[metadata][{{ $key }}]" => "{{ $value }}" - {{ end }} - } - } - - # Parse structured logs for following systemd units. - if [systemd][unit] in ["bootstrapper.service", "constellation-bootstrapper.service"] { - # skip_on_invalid_json below does not skip the whole filter, so let's use a cheap workaround here. - # See: - # https://discuss.elastic.co/t/skip-on-invalid-json-skipping-all-filters/215195 - # https://discuss.elastic.co/t/looking-for-a-way-to-detect-json/102263 - if [message] =~ "\A\{.+\}\z" { - json { - source => "message" - target => "logs" - skip_on_invalid_json => true - } - mutate { - replace => { - "message" => "%{[logs][msg]}" - } - remove_field => [ - "[logs][msg]", - "[logs][ts]" - ] - } - de_dot { - fields => ["[logs][peer.address]"] - } - } - } -} - -output { - opensearch { - hosts => "{{ .Host }}" - index => "{{ .IndexPrefix }}-%{+YYYY.MM.dd}" - user => "{{ .Credentials.Username }}" - password => "{{ .Credentials.Password }}" - ssl => true - ssl_certificate_verification => true - } -} diff --git a/debugd/logstash/templates/pipeline.conf b/debugd/logstash/templates/pipeline.conf index fc7319ae141..b67b4f6ab84 100644 --- a/debugd/logstash/templates/pipeline.conf +++ b/debugd/logstash/templates/pipeline.conf @@ -57,7 +57,7 @@ filter { output { opensearch { hosts => "{{ .Host }}" - index => "systemd-logs-%{+YYYY.MM.dd}" + index => "{{ .IndexPrefix }}-%{+YYYY.MM.dd}" user => "{{ .Credentials.Username }}" password => "{{ .Credentials.Password }}" ssl => true diff --git a/debugd/metricbeat/BUILD.bazel b/debugd/metricbeat/BUILD.bazel new file mode 100644 index 00000000000..3904d498d80 --- /dev/null +++ b/debugd/metricbeat/BUILD.bazel @@ -0,0 +1,9 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "metricbeat", + srcs = ["assets.go"], + embedsrcs = ["templates/metricbeat.yml"], + importpath = "github.com/edgelesssys/constellation/v2/debugd/metricbeat", + visibility = ["//visibility:public"], +) diff --git a/debugd/metricbeat/Dockerfile b/debugd/metricbeat/Dockerfile new file mode 100644 index 00000000000..121bf14c648 --- /dev/null +++ b/debugd/metricbeat/Dockerfile @@ -0,0 +1,15 @@ +FROM fedora:38@sha256:61f921e0c7b51e162e6f94b14ef4e6b0d38eac5987286fe4f52a2c1158cc2399 AS release + +RUN dnf install -y https://artifacts.elastic.co/downloads/beats/metricbeat/metricbeat-8.9.2-x86_64.rpm + +COPY debugd/metricbeat/templates/ /usr/share/constellogs/templates/ + +# -e enables logging to stderr +# -E http.enabled=true enables http endpoint as seen in https://github.com/elastic/helm-charts/blob/main/metricbeat/templates/daemonset.yaml +# --path.home and --path.data set the paths to the metricbeat binary and data directory +ENTRYPOINT [ "/usr/share/metricbeat/bin/metricbeat", \ + "-e", \ + "-E", "http.enabled=true", \ + "--path.home", "/usr/share/metricbeat", \ + "--path.data", "/usr/share/metricbeat/data" \ +] diff --git a/debugd/metricbeat/assets.go b/debugd/metricbeat/assets.go new file mode 100644 index 00000000000..8f3f954f1b7 --- /dev/null +++ b/debugd/metricbeat/assets.go @@ -0,0 +1,14 @@ +/* +Copyright (c) Edgeless Systems GmbH + +SPDX-License-Identifier: AGPL-3.0-only +*/ + +package metricbeat + +import "embed" + +// Assets are the exported Metricbeat template files. +// +//go:embed templates/* +var Assets embed.FS diff --git a/debugd/metricbeat/templates/metricbeat.yml b/debugd/metricbeat/templates/metricbeat.yml new file mode 100644 index 00000000000..7278c39125e --- /dev/null +++ b/debugd/metricbeat/templates/metricbeat.yml @@ -0,0 +1,63 @@ +http.port: {{ .Port }} + +output.logstash: + hosts: ["{{ .LogstashHost }}"] + +output.console: + enabled: false + +logging: + to_files: false + metrics.enabled: false + level: warning + +timestamp.precision: nanosecond + +# https://www.elastic.co/guide/en/beats/metricbeat/current/configuration-path.html#_system_hostfs +system.hostfs: "/hostfs" + +metricbeat.modules: + {{ if .CollectSystemMetrics }} + - module: system + enabled: true + metricsets: + - cpu # CPU usage + - load # CPU load averages + - memory # Memory usage + - network # Network IO + - process # Per process metrics + - process_summary # Process summary + #- uptime # System Uptime + #- socket_summary # Socket summary + #- core # Per CPU core usage + - diskio # Disk IO + - filesystem # File system usage for each mountpoint + - fsstat # File system summary metrics + #- raid # Raid + #- socket # Sockets and connection info (linux only) + #- service # systemd service information + cpu.metrics: ["percentages","normalized_percentages"] + period: 10s + processes: ['.*'] + # To monitor host metrics from within a container. As per: + # https://www.elastic.co/guide/en/beats/metricbeat/current/metricbeat-module-system.html#_example_configuration_59 + hostfs: "/hostfs" + {{ end }} + {{ if .CollectEtcdMetrics }} + - module: etcd + enabled: true + metricsets: ["metrics"] + period: 30s + hosts: ["https://localhost:2379"] + ssl: + certificate_authorities: ["/etc/kubernetes/pki/etcd/ca.crt"] + certificate: "/etc/kubernetes/pki/etcd/peer.crt" + key: "/etc/kubernetes/pki/etcd/peer.key" + verification_mode: "full" + {{ end }} + + +processors: + {{ if .AddCloudMetadata }} + - add_cloud_metadata: ~ + {{ end }} diff --git a/dev-docs/workflows/logcollection.md b/dev-docs/workflows/logcollection.md index b5f543f7bb6..6e0445b33a2 100644 --- a/dev-docs/workflows/logcollection.md +++ b/dev-docs/workflows/logcollection.md @@ -8,6 +8,14 @@ The logcollection functionality can be deployed to both [debug](./debug-cluster. In debug clusters, logcollection functionality should be deployed automatically through the debug daemon `debugd`, which runs *before* the bootstrapper and can therefore, contrary to non-debug clusters, also collect logs of the bootstrapper. +> [!WARNING] +> If logs from a E2E test run for a debug-cluster with a bootstrapping-failure are missing in OpenSearch, this might be caused by a race condition +> between the termination of the cluster and the start-up of the logcollection containers in the debugd. +> If the failure can be reproduced manually, it is best to do so and observe the serial console of the bootstrapping node with the following command until the logcollection containers have started. +> ```bash +> journalctl _SYSTEMD_UNIT=debugd.service | grep > logcollect +> ``` + ## Deployment in Non-Debug Clusters In non-debug clusters, logcollection functionality needs to be explicitly deployed as a Kubernetes Deployment through Helm. To do that, a few steps need to be followed: @@ -29,28 +37,39 @@ In non-debug clusters, logcollection functionality needs to be explicitly deploy One can add additional key-value pairs to the configuration by appending `--info key=value` to the command. These key-value pairs will be attached to the log entries and can be used to filter them in OpenSearch. For example, it might be helpful to add a `test=` tag to be able to filter out logs from a specific test run. +2. Add the Elastic Helm repository + ```bash + helm repo add elastic https://helm.elastic.co + helm repo update + ``` 2. Deploy Logstash ```bash cd logstash - make add - make install + helm install logstash elastic/logstash \ + --wait --timeout=1200s --values values.yml cd .. ``` This will add the required Logstash Helm charts and deploy them to your cluster. -3. Deploy Filebeat +2. Deploy Beats ```bash + cd metricbeat + helm install metricbeat-k8s elastic/metricbeat \ + --wait --timeout=1200s --values values-control-plane.yml + helm install metricbeat-system elastic/metricbeat \ + --wait --timeout=1200s --values values-all-nodes.yml + cd .. cd filebeat - make add - make install + helm install filebeat elastic/filebeat \ + --wait --timeout=1200s --values values.yml cd .. ``` - This will add the required Filebeat Helm charts and deploy them to your cluster. + This will add the required Filebeat and Metricbeat Helm charts and deploy them to your cluster. -To remove Logstash or Filebeat, `cd` into the corresponding directory and run `make remove`. +To remove Logstash or one of the beats, `cd` into the corresponding directory and run `helm uninstall {logstash,filebeat,metricbeat}`. ## Inspecting Logs in OpenSearch diff --git a/docs/screencasts/README.md b/docs/screencasts/README.md index 889aa570e84..2c50153fa83 100644 --- a/docs/screencasts/README.md +++ b/docs/screencasts/README.md @@ -2,7 +2,7 @@ [Asciinema](https://github.com/asciinema/asciinema) is used to automatically generate terminal session recordings for our documentation. To fully automate this we use scripts -that utilize [expect](https://linux.die.net/man/1/expect) to interface with different +that utilize [expect](https://manpages.debian.org/testing/expect/expect.1.en.html) to interface with different CLI tools, and run them inside a [container](docker/Dockerfile). ## Usage diff --git a/hack/logcollector/cmd/template.go b/hack/logcollector/cmd/template.go index 2e170f9b777..951ffb08d40 100644 --- a/hack/logcollector/cmd/template.go +++ b/hack/logcollector/cmd/template.go @@ -63,6 +63,13 @@ func runTemplate(cmd *cobra.Command, _ []string) error { return fmt.Errorf("prepare filebeat: %w", err) } + metricbeatPreparer := internal.NewMetricbeatPreparer( + flags.port, + ) + if err := metricbeatPreparer.Prepare(flags.dir); err != nil { + return fmt.Errorf("prepare metricbeat: %w", err) + } + return nil } diff --git a/hack/logcollector/internal/BUILD.bazel b/hack/logcollector/internal/BUILD.bazel index a9145de7a82..a24bd36d4e5 100644 --- a/hack/logcollector/internal/BUILD.bazel +++ b/hack/logcollector/internal/BUILD.bazel @@ -5,17 +5,21 @@ go_library( srcs = [ "filebeat.go", "logstash.go", + "metricbeat.go", "prepare.go", ], embedsrcs = [ "templates/filebeat/values.yml", "templates/logstash/values.yml", + "templates/metricbeat/values-all-nodes.yml", + "templates/metricbeat/values-control-plane.yml", ], importpath = "github.com/edgelesssys/constellation/v2/hack/logcollector/internal", visibility = ["//hack/logcollector:__subpackages__"], deps = [ "//debugd/filebeat", "//debugd/logstash", + "//debugd/metricbeat", "//internal/file", "@com_github_spf13_afero//:afero", "@in_gopkg_yaml_v3//:yaml_v3", diff --git a/hack/logcollector/internal/filebeat.go b/hack/logcollector/internal/filebeat.go index 07ed42076f6..bb7fd84938a 100644 --- a/hack/logcollector/internal/filebeat.go +++ b/hack/logcollector/internal/filebeat.go @@ -41,17 +41,13 @@ func NewFilebeatPreparer(port int) *FilebeatPreparer { // Prepare prepares the Filebeat Helm chart by templating the filebeat.yml and inputs.yml files and placing them in the specified directory. func (p *FilebeatPreparer) Prepare(dir string) error { templatedFilebeatYaml, err := p.template(filebeatAssets, "templates/filebeat.yml", FilebeatTemplateData{ - LogstashHost: fmt.Sprintf("logstash-logstash:%d", p.port), + LogstashHost: fmt.Sprintf("logstash-logstash:%d", p.port), + AddCloudMetadata: true, }) if err != nil { return fmt.Errorf("template filebeat.yml: %w", err) } - inputsYaml, err := filebeatAssets.ReadFile("inputs.yml") - if err != nil { - return fmt.Errorf("read log4j2.properties: %w", err) - } - rawHelmValues, err := filebeatHelmAssets.ReadFile("templates/filebeat/values.yml") if err != nil { return fmt.Errorf("read values.yml: %w", err) @@ -63,8 +59,6 @@ func (p *FilebeatPreparer) Prepare(dir string) error { } helmValuesYaml.Daemonset.FilebeatConfig.FilebeatYml = templatedFilebeatYaml.String() - helmValuesYaml.Daemonset.FilebeatConfig.InputsYml = string(inputsYaml) - helmValues, err := yaml.Marshal(helmValuesYaml) if err != nil { return fmt.Errorf("marshal values.yml: %w", err) @@ -79,7 +73,8 @@ func (p *FilebeatPreparer) Prepare(dir string) error { // FilebeatTemplateData is template data. type FilebeatTemplateData struct { - LogstashHost string + LogstashHost string + AddCloudMetadata bool } // FilebeatHelmValues repesents the Helm values.yml. @@ -90,7 +85,6 @@ type FilebeatHelmValues struct { Enabled bool `yaml:"enabled"` FilebeatConfig struct { FilebeatYml string `yaml:"filebeat.yml"` - InputsYml string `yaml:"inputs.yml"` } `yaml:"filebeatConfig"` ExtraEnvs []interface{} `yaml:"extraEnvs"` SecretMounts []interface{} `yaml:"secretMounts"` diff --git a/hack/logcollector/internal/metricbeat.go b/hack/logcollector/internal/metricbeat.go new file mode 100644 index 00000000000..603f2dcd4fb --- /dev/null +++ b/hack/logcollector/internal/metricbeat.go @@ -0,0 +1,164 @@ +/* +Copyright (c) Edgeless Systems GmbH + +SPDX-License-Identifier: AGPL-3.0-only +*/ +package internal + +import ( + "embed" + "fmt" + "path/filepath" + + "github.com/edgelesssys/constellation/v2/debugd/metricbeat" + "github.com/edgelesssys/constellation/v2/internal/file" + "github.com/spf13/afero" + "gopkg.in/yaml.v3" +) + +var ( + //go:embed templates/metricbeat/* + metricbeatHelmAssets embed.FS + + metricbeatAssets = metricbeat.Assets +) + +// MetricbeatPreparer prepares the Metricbeat Helm chart. +type MetricbeatPreparer struct { + fh file.Handler + port int + templatePreparer +} + +// NewMetricbeatPreparer returns a new MetricbeatPreparer. +func NewMetricbeatPreparer(port int) *MetricbeatPreparer { + return &MetricbeatPreparer{ + fh: file.NewHandler(afero.NewOsFs()), + port: port, + } +} + +// Prepare prepares the Filebeat Helm chart by templating the metricbeat.yml file and placing it +// in the specified directory. +func (p *MetricbeatPreparer) Prepare(dir string) error { + templatedSystemMetricbeatYaml, err := p.template(metricbeatAssets, "templates/metricbeat.yml", MetricbeatTemplateData{ + LogstashHost: fmt.Sprintf("logstash-logstash:%d", p.port), + Port: 5066, + CollectSystemMetrics: true, + AddCloudMetadata: true, + }) + if err != nil { + return fmt.Errorf("template system metricbeat.yml: %w", err) + } + templatedK8sMetricbeatYaml, err := p.template(metricbeatAssets, "templates/metricbeat.yml", MetricbeatTemplateData{ + LogstashHost: fmt.Sprintf("logstash-logstash:%d", p.port), + Port: 5067, + CollectEtcdMetrics: true, + AddCloudMetadata: true, + }) + if err != nil { + return fmt.Errorf("template k8s metricbeat.yml: %w", err) + } + + rawAllNodesHelmValues, err := metricbeatHelmAssets.ReadFile("templates/metricbeat/values-all-nodes.yml") + if err != nil { + return fmt.Errorf("read values-all-nodes.yml: %w", err) + } + rawControlPlaneHelmValues, err := metricbeatHelmAssets.ReadFile("templates/metricbeat/values-control-plane.yml") + if err != nil { + return fmt.Errorf("read values-control-plane.yml: %w", err) + } + + allNodesHelmValuesYaml := &MetricbeatHelmValues{} + if err := yaml.Unmarshal(rawAllNodesHelmValues, allNodesHelmValuesYaml); err != nil { + return fmt.Errorf("unmarshal values-all-nodes.yml: %w", err) + } + controlPlaneHelmValuesYaml := &MetricbeatHelmValues{} + if err := yaml.Unmarshal(rawControlPlaneHelmValues, controlPlaneHelmValuesYaml); err != nil { + return fmt.Errorf("unmarshal values-control-plane.yml: %w", err) + } + + allNodesHelmValuesYaml.Daemonset.MetricbeatConfig.MetricbeatYml = templatedSystemMetricbeatYaml.String() + controlPlaneHelmValuesYaml.Daemonset.MetricbeatConfig.MetricbeatYml = templatedK8sMetricbeatYaml.String() + + allNodesHelmValues, err := yaml.Marshal(allNodesHelmValuesYaml) + if err != nil { + return fmt.Errorf("marshal values-all-nodes.ym: %w", err) + } + controlPlaneHelmValues, err := yaml.Marshal(controlPlaneHelmValuesYaml) + if err != nil { + return fmt.Errorf("marshal values-control-plane.yml: %w", err) + } + + if err = p.fh.Write(filepath.Join(dir, "metricbeat", "values-all-nodes.yml"), allNodesHelmValues, file.OptMkdirAll); err != nil { + return fmt.Errorf("write values-all-nodes.yml: %w", err) + } + if err = p.fh.Write(filepath.Join(dir, "metricbeat", "values-control-plane.yml"), controlPlaneHelmValues, file.OptMkdirAll); err != nil { + return fmt.Errorf("write values-control-plane.yml: %w", err) + } + + return nil +} + +// MetricbeatTemplateData is template data. +type MetricbeatTemplateData struct { + LogstashHost string + Port int + CollectEtcdMetrics bool + CollectSystemMetrics bool + CollectK8sMetrics bool + AddK8sMetadata bool + AddCloudMetadata bool +} + +// MetricbeatHelmValues repesents the Helm values.yml. +type MetricbeatHelmValues struct { + Image string `yaml:"image"` + ImageTag string `yaml:"imageTag"` + KubeStateMetrics struct { + Enabled bool `yaml:"enabled"` + } `yaml:"kube_state_metrics"` + Deployment struct { + Enabled bool `yaml:"enabled"` + } `yaml:"deployment"` + Daemonset Daemonset `yaml:"daemonset"` + ClusterRoleRules []struct { + APIGroups []string `yaml:"apiGroups,omitempty"` + Resources []string `yaml:"resources,omitempty"` + Verbs []string `yaml:"verbs"` + NonResourceURLs []string `yaml:"nonResourceURLs,omitempty"` + } `yaml:"clusterRoleRules"` +} + +// Daemonset represents the nested daemonset for the Helm values.yml. +type Daemonset struct { + Enabled bool `yaml:"enabled"` + HostNetworking bool `yaml:"hostNetworking"` + MetricbeatConfig struct { + MetricbeatYml string `yaml:"metricbeat.yml"` + } `yaml:"metricbeatConfig"` + ExtraEnvs []any `yaml:"extraEnvs"` + SecretMounts []any `yaml:"secretMounts"` + NodeSelector any `yaml:"nodeSelector"` + Tolerations []struct { + Key string `yaml:"key"` + Operator string `yaml:"operator"` + Effect string `yaml:"effect"` + } `yaml:"tolerations"` + SecurityContext struct { + Privileged bool `yaml:"privileged"` + RunAsUser int `yaml:"runAsUser"` + } `yaml:"securityContext"` + ExtraVolumeMounts []struct { + Name string `yaml:"name"` + MountPath string `yaml:"mountPath"` + ReadOnly bool `yaml:"readOnly"` + } `yaml:"extraVolumeMounts"` + ExtraVolumes []struct { + Name string `yaml:"name"` + HostPath struct { + Path string `yaml:"path"` + Type string `yaml:"type"` + } `yaml:"hostPath"` + } `yaml:"extraVolumes"` +} diff --git a/hack/logcollector/internal/templates/metricbeat/values-all-nodes.yml b/hack/logcollector/internal/templates/metricbeat/values-all-nodes.yml new file mode 100644 index 00000000000..aa884b7c45d --- /dev/null +++ b/hack/logcollector/internal/templates/metricbeat/values-all-nodes.yml @@ -0,0 +1,79 @@ +# Helm Values for the DaemonSet that is deployed on all nodes in the cluster and +# collects system metrics. +image: ghcr.io/edgelesssys/beats/metricbeat-oss +imageTag: 8.11.0@sha256:c2488378e794467f2a7214a56da0de017db1f2c28198721f12d74ad9cc263d08 + +kube_state_metrics: + enabled: false + +deployment: + enabled: false + +daemonset: + enabled: true + metricbeatConfig: + metricbeat.yml: "" + hostNetworking: true + extraEnvs: [] + secretMounts: [] + tolerations: + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + securityContext: + privileged: true + runAsUser: 0 + extraVolumeMounts: + - name: runsystemd + mountPath: /run/systemd + readOnly: true + - name: varrundbus + mountPath: /var/run/dbus + readOnly: true + extraVolumes: + - name: runsystemd + hostPath: + path: /run/systemd + type: "" + - name: varrundbus + hostPath: + path: /var/run/dbus + type: "" + +clusterRoleRules: + - apiGroups: [""] + resources: + - nodes + - namespaces + - events + - pods + - services + verbs: ["get", "list", "watch"] + - apiGroups: ["extensions"] + resources: + - replicasets + verbs: ["get", "list", "watch"] + - apiGroups: ["apps"] + resources: + - statefulsets + - deployments + - replicasets + verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: + - nodes/stats + verbs: ["get"] + - apiGroups: ["batch"] + resources: + - jobs + verbs: ["get", "list", "watch"] + - apiGroups: + - "" + resources: + - nodes/stats + verbs: + - get + - nonResourceURLs: + - "/metrics" + verbs: + - get diff --git a/hack/logcollector/internal/templates/metricbeat/values-control-plane.yml b/hack/logcollector/internal/templates/metricbeat/values-control-plane.yml new file mode 100644 index 00000000000..8182359b290 --- /dev/null +++ b/hack/logcollector/internal/templates/metricbeat/values-control-plane.yml @@ -0,0 +1,88 @@ +# Helm Values for the DaemonSet that is deployed on all control plane nodes in the cluster and +# collects Kubernetes and etcd metrics. +image: ghcr.io/edgelesssys/beats/metricbeat-oss +imageTag: 8.11.0@sha256:c2488378e794467f2a7214a56da0de017db1f2c28198721f12d74ad9cc263d08 + +kube_state_metrics: + enabled: false + +deployment: + enabled: false + +daemonset: + enabled: true + metricbeatConfig: + metricbeat.yml: "" + hostNetworking: true + extraEnvs: [] + secretMounts: [] + nodeSelector: + node-role.kubernetes.io/control-plane: "" + tolerations: + - key: node-role.kubernetes.io/control-plane + operator: Exists + effect: NoSchedule + securityContext: + privileged: true + runAsUser: 0 + extraVolumeMounts: + - name: runsystemd + mountPath: /run/systemd + readOnly: true + - name: varrundbus + mountPath: /var/run/dbus + readOnly: true + - name: etcdcerts + mountPath: /etc/kubernetes/pki/etcd + readOnly: true + extraVolumes: + - name: runsystemd + hostPath: + path: /run/systemd + type: "" + - name: varrundbus + hostPath: + path: /var/run/dbus + type: "" + - name: etcdcerts + hostPath: + path: /etc/kubernetes/pki/etcd + type: "" + +clusterRoleRules: + - apiGroups: [""] + resources: + - nodes + - namespaces + - events + - pods + - services + verbs: ["get", "list", "watch"] + - apiGroups: ["extensions"] + resources: + - replicasets + verbs: ["get", "list", "watch"] + - apiGroups: ["apps"] + resources: + - statefulsets + - deployments + - replicasets + verbs: ["get", "list", "watch"] + - apiGroups: [""] + resources: + - nodes/stats + verbs: ["get"] + - apiGroups: ["batch"] + resources: + - jobs + verbs: ["get", "list", "watch"] + - apiGroups: + - "" + resources: + - nodes/stats + verbs: + - get + - nonResourceURLs: + - "/metrics" + verbs: + - get diff --git a/internal/osimage/aws/awsupload.go b/internal/osimage/aws/awsupload.go index fee5fbd1dfb..407a5ac4080 100644 --- a/internal/osimage/aws/awsupload.go +++ b/internal/osimage/aws/awsupload.go @@ -588,7 +588,7 @@ func toPtr[T any](v T) *T { const ( waitInterval = 15 * time.Second - maxWait = 15 * time.Minute + maxWait = 30 * time.Minute timestampFormat = "20060102150405" ) diff --git a/internal/versions/versions.go b/internal/versions/versions.go index c01b35b383d..576331d5431 100644 --- a/internal/versions/versions.go +++ b/internal/versions/versions.go @@ -170,9 +170,11 @@ const ( // NodeMaintenanceOperatorImage is the image for the node maintenance operator. NodeMaintenanceOperatorImage = "quay.io/medik8s/node-maintenance-operator:v0.15.0@sha256:8cb8dad93283268282c30e75c68f4bd76b28def4b68b563d2f9db9c74225d634" // renovate:container // LogstashImage is the container image of logstash, used for log collection by debugd. - LogstashImage = "ghcr.io/edgelesssys/constellation/filebeat-debugd:v2.11.0-pre.0.20230821060133-60bf770e62bc@sha256:89ea1925345922a5471f26de6bc2344a83a76f2f908a6f048230699f8b810114" // renovate:container + LogstashImage = "ghcr.io/edgelesssys/constellation/logstash-debugd:v2.12.0-pre.0.20230922130827-dbc6d7cec47b@sha256:d2258bd6f02394b33cca26b4565a8e1f44b29d85d0dec76027bac6afb7da2bee" // renovate:container // FilebeatImage is the container image of filebeat, used for log collection by debugd. - FilebeatImage = "ghcr.io/edgelesssys/constellation/filebeat-debugd:v2.11.0-pre.0.20230821060133-60bf770e62bc@sha256:89ea1925345922a5471f26de6bc2344a83a76f2f908a6f048230699f8b810114" // renovate:container + FilebeatImage = "ghcr.io/edgelesssys/constellation/filebeat-debugd:v2.12.0-pre.0.20230922130827-dbc6d7cec47b@sha256:460a5e754438d97ece2e1672ea469055f2bdfdd99290b6c727c493d030d0c382" // renovate:container + // MetricbeatImage is the container image of filebeat, used for log collection by debugd. + MetricbeatImage = "ghcr.io/edgelesssys/constellation/metricbeat-debugd:v2.12.0-pre.0.20230922130827-dbc6d7cec47b@sha256:63ba8b5aa06b8186e9b6d1f37967363c2807aef05d998a5db70df08ee6734259" // renovate:container // currently supported versions. //nolint:revive