diff --git a/.github/actions/cdbg_deploy/action.yml b/.github/actions/cdbg_deploy/action.yml new file mode 100644 index 0000000000..d3e4a0d219 --- /dev/null +++ b/.github/actions/cdbg_deploy/action.yml @@ -0,0 +1,92 @@ +name: Cdbg deploy +description: Deploy the Constellation Bootstrapper to the cluster via the debugd. + +inputs: + test: + description: "The e2e test payload." + required: true + azureClusterCreateCredentials: + description: "Azure credentials authorized to create a Constellation cluster." + required: true + azureIAMCreateCredentials: + description: "Azure credentials authorized to create an IAM configuration." + required: true + cloudProvider: + description: "The cloud provider to use." + required: true + +runs: + using: "composite" + steps: + - name: Login to Azure (IAM service principal) + if: inputs.cloudProvider == 'azure' + uses: ./.github/actions/login_azure + with: + azure_credentials: ${{ inputs.azureIAMCreateCredentials }} + + - name: Add Azure Keyvault access role + if: inputs.cloudProvider == 'azure' + shell: bash + run: | + UAMI=$(yq eval ".provider.azure.userAssignedIdentity | upcase" constellation-conf.yaml) + PRINCIPAL_ID=$(az identity list | yq ".[] | select(.id | test(\"(?i)$UAMI\"; \"g\")) | .principalId") + az role assignment create --role "Key Vault Secrets User" \ + --assignee "$PRINCIPAL_ID" \ + --scope /subscriptions/0d202bbb-4fa7-4af8-8125-58c269a05435/resourceGroups/e2e-test-creds/providers/Microsoft.KeyVault/vaults/opensearch-creds + + - name: Login to Azure (Cluster service principal) + if: inputs.cloudProvider == 'azure' + uses: ./.github/actions/login_azure + with: + azure_credentials: ${{ inputs.azureClusterCreateCredentials }} + + - name: Login to AWS (IAM service principal) + if: inputs.cloudProvider == 'aws' + uses: aws-actions/configure-aws-credentials@5fd3084fc36e372ff1fff382a39b10d03659f355 # v2.2.0 + with: + role-to-assume: arn:aws:iam::795746500882:role/GithubActionsE2EIAM + aws-region: eu-central-1 + # extend token expiry to 6 hours to ensure constellation can terminate + role-duration-seconds: 21600 + + - name: Add AWS Secrets Manager access role + if: inputs.cloudProvider == 'aws' + shell: bash + run: | + INSTANCE_PROFILE=$(yq eval ".provider.aws.iamProfileControlPlane" constellation-conf.yaml) + ROLE_NAME=$(aws iam get-instance-profile --instance-profile-name "$INSTANCE_PROFILE" | yq ".InstanceProfile.Roles[0].RoleName") + aws iam attach-role-policy \ + --role-name "$ROLE_NAME" \ + --policy-arn arn:aws:iam::795746500882:policy/GitHubActionsOSCredAccess + + - name: Login to AWS (Cluster service principal) + if: inputs.cloudProvider == 'aws' + uses: aws-actions/configure-aws-credentials@5fd3084fc36e372ff1fff382a39b10d03659f355 # v2.2.0 + with: + role-to-assume: arn:aws:iam::795746500882:role/GithubActionsE2ECluster + aws-region: eu-central-1 + # extend token expiry to 6 hours to ensure constellation can terminate + role-duration-seconds: 21600 + + - name: Cdbg deploy + shell: bash + run: | + echo "::group::cdbg deploy" + chmod +x $GITHUB_WORKSPACE/build/cdbg + cdbg deploy \ + --bootstrapper "${{ github.workspace }}/build/bootstrapper" \ + --upgrade-agent "${{ github.workspace }}/build/upgrade-agent" \ + --info logcollect=true \ + --info logcollect.github.actor="${{ github.triggering_actor }}" \ + --info logcollect.github.workflow="${{ github.workflow }}" \ + --info logcollect.github.run-id="${{ github.run_id }}" \ + --info logcollect.github.run-attempt="${{ github.run_attempt }}" \ + --info logcollect.github.ref-name="${{ github.ref_name }}" \ + --info logcollect.github.sha="${{ github.sha }}" \ + --info logcollect.github.runner-os="${{ runner.os }}" \ + --info logcollect.github.e2e-test-payload="${{ inputs.test }}" \ + --info logcollect.github.is-debug-cluster=false \ + --info logcollect.deployment-type="debugd" \ + --verbosity=-1 \ + --force + echo "::endgroup::" diff --git a/.github/actions/constellation_create/action.yml b/.github/actions/constellation_create/action.yml index 2935c0690f..67869cf626 100644 --- a/.github/actions/constellation_create/action.yml +++ b/.github/actions/constellation_create/action.yml @@ -38,6 +38,12 @@ inputs: test: description: "The e2e test payload." required: true + azureClusterCreateCredentials: + description: "Azure credentials authorized to create a Constellation cluster." + required: true + azureIAMCreateCredentials: + description: "Azure credentials authorized to create an IAM configuration." + required: true outputs: kubeconfig: @@ -119,27 +125,12 @@ runs: - name: Cdbg deploy if: inputs.isDebugImage == 'true' - shell: bash - run: | - echo "::group::cdbg deploy" - chmod +x $GITHUB_WORKSPACE/build/cdbg - cdbg deploy \ - --bootstrapper "${{ github.workspace }}/build/bootstrapper" \ - --upgrade-agent "${{ github.workspace }}/build/upgrade-agent" \ - --info logcollect=true \ - --info logcollect.github.actor="${{ github.triggering_actor }}" \ - --info logcollect.github.workflow="${{ github.workflow }}" \ - --info logcollect.github.run-id="${{ github.run_id }}" \ - --info logcollect.github.run-attempt="${{ github.run_attempt }}" \ - --info logcollect.github.ref-name="${{ github.ref_name }}" \ - --info logcollect.github.sha="${{ github.sha }}" \ - --info logcollect.github.runner-os="${{ runner.os }}" \ - --info logcollect.github.e2e-test-payload="${{ inputs.test }}" \ - --info logcollect.github.is-debug-cluster=false \ - --info logcollect.deployment-type="debugd" \ - --verbosity=-1 \ - --force - echo "::endgroup::" + uses: ./.github/actions/cdbg_deploy + with: + cloudProvider: ${{ inputs.cloudProvider }} + test: ${{ inputs.test }} + azureClusterCreateCredentials: ${{ inputs.azureClusterCreateCredentials }} + azureIAMCreateCredentials: ${{ inputs.azureIAMCreateCredentials }} - name: Constellation init id: constellation-init diff --git a/.github/actions/e2e_test/action.yml b/.github/actions/e2e_test/action.yml index b800814b06..aac401071f 100644 --- a/.github/actions/e2e_test/action.yml +++ b/.github/actions/e2e_test/action.yml @@ -247,8 +247,10 @@ runs: fetchMeasurements: ${{ inputs.fetchMeasurements }} cliVersion: ${{ inputs.cliVersion }} azureSNPEnforcementPolicy: ${{ inputs.azureSNPEnforcementPolicy }} + azureIAMCreateCredentials: ${{ inputs.azureIAMCreateCredentials }} + azureClusterCreateCredentials: ${{ inputs.azureClusterCreateCredentials }} - - name: Deploy log- and metrics-collection + - name: Deploy log- and metrics-collection (Kubernetes) id: deploy-logcollection if: inputs.isDebugImage == 'false' uses: ./.github/actions/deploy_logcollection diff --git a/.github/workflows/build-logcollector-images.yml b/.github/workflows/build-logcollector-images.yml index d60fa44703..aff25729d2 100644 --- a/.github/workflows/build-logcollector-images.yml +++ b/.github/workflows/build-logcollector-images.yml @@ -38,7 +38,7 @@ jobs: dockerfile: debugd/filebeat/Dockerfile githubToken: ${{ secrets.GITHUB_TOKEN }} - - name: Build and upload Filebeat container image + - name: Build and upload Metricbeat container image uses: ./.github/actions/build_micro_service with: name: metricbeat-debugd diff --git a/debugd/filebeat/BUILD.bazel b/debugd/filebeat/BUILD.bazel index a04d140253..16f5afcbe3 100644 --- a/debugd/filebeat/BUILD.bazel +++ b/debugd/filebeat/BUILD.bazel @@ -3,10 +3,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library") go_library( name = "filebeat", srcs = ["assets.go"], - embedsrcs = [ - "templates/filebeat.yml", - "inputs.yml", - ], + embedsrcs = ["templates/filebeat.yml"], importpath = "github.com/edgelesssys/constellation/v2/debugd/filebeat", visibility = ["//visibility:public"], ) diff --git a/debugd/filebeat/Dockerfile b/debugd/filebeat/Dockerfile index 5db49fd1d6..83b8ede8b3 100644 --- a/debugd/filebeat/Dockerfile +++ b/debugd/filebeat/Dockerfile @@ -2,7 +2,6 @@ FROM fedora:38@sha256:61f921e0c7b51e162e6f94b14ef4e6b0d38eac5987286fe4f52a2c1158 RUN dnf install -y https://artifacts.elastic.co/downloads/beats/filebeat/filebeat-8.6.2-x86_64.rpm -COPY debugd/filebeat/inputs.yml /usr/share/filebeat/inputs.yml -COPY debugd/filebeat/templates/ /usr/share/filebeat/templates/ +COPY debugd/filebeat/templates/ /usr/share/constellogs/templates/ ENTRYPOINT ["/usr/share/filebeat/bin/filebeat", "-e", "--path.home", "/usr/share/filebeat", "--path.data", "/usr/share/filebeat/data"] diff --git a/debugd/filebeat/assets.go b/debugd/filebeat/assets.go index 0a0ec10eb4..744ef37998 100644 --- a/debugd/filebeat/assets.go +++ b/debugd/filebeat/assets.go @@ -10,6 +10,5 @@ import "embed" // Assets are the exported Filebeat template files. // -//go:embed *.yml //go:embed templates/* var Assets embed.FS diff --git a/debugd/filebeat/inputs.yml b/debugd/filebeat/inputs.yml deleted file mode 100644 index bfc3d09201..0000000000 --- a/debugd/filebeat/inputs.yml +++ /dev/null @@ -1,9 +0,0 @@ -- type: journald - enabled: true - id: journald - -- type: filestream - enabled: true - id: container - paths: - - /var/log/pods/*/*/*.log diff --git a/debugd/filebeat/templates/filebeat.yml b/debugd/filebeat/templates/filebeat.yml index 4eaedeb208..463c1a3246 100644 --- a/debugd/filebeat/templates/filebeat.yml +++ b/debugd/filebeat/templates/filebeat.yml @@ -9,12 +9,15 @@ logging: metrics.enabled: false level: warning -filebeat.config: - inputs: +filebeat.inputs: + - type: journald enabled: true - path: /usr/share/filebeat/inputs.yml - # reload.enabled: true - # reload.period: 10s + id: journald + - type: filestream + enabled: true + id: container + paths: + - /var/log/pods/*/*/*.log timestamp.precision: nanosecond @@ -27,3 +30,6 @@ processors: field: "log.file.path" target_prefix: "kubernetes" ignore_failure: true + {{ if .AddCloudMetadata }} + - add_cloud_metadata: ~ + {{ end }} diff --git a/debugd/internal/debugd/logcollector/logcollector.go b/debugd/internal/debugd/logcollector/logcollector.go index 1d6e9b4ab5..28b04a521a 100644 --- a/debugd/internal/debugd/logcollector/logcollector.go +++ b/debugd/internal/debugd/logcollector/logcollector.go @@ -74,7 +74,7 @@ func NewStartTrigger(ctx context.Context, wg *sync.WaitGroup, provider cloudprov return } - logger.Infof("Getting logstash pipeline template") + logger.Infof("Getting logstash pipeline template from image %s", versions.LogstashImage) tmpl, err := getTemplate(ctx, logger, versions.LogstashImage, "/run/logstash/templates/pipeline.conf", "/run/logstash") if err != nil { logger.Errorf("Getting logstash pipeline template: %v", err) @@ -102,21 +102,22 @@ func NewStartTrigger(ctx context.Context, wg *sync.WaitGroup, provider cloudprov return } - logger.Infof("Getting filebeat config template") + logger.Infof("Getting filebeat config template from image %s", versions.FilebeatImage) tmpl, err = getTemplate(ctx, logger, versions.FilebeatImage, "/run/filebeat/templates/filebeat.yml", "/run/filebeat") if err != nil { logger.Errorf("Getting filebeat config template: %v", err) return } filebeatConf := filebeatConfInput{ - LogstashHost: "localhost:5044", + LogstashHost: "localhost:5044", + AddCloudMetadata: true, } if err := writeTemplate("/run/filebeat/filebeat.yml", tmpl, filebeatConf); err != nil { logger.Errorf("Writing filebeat pipeline: %v", err) return } - logger.Infof("Getting metricbeat config template") + logger.Infof("Getting metricbeat config template from image %s", versions.MetricbeatImage) tmpl, err = getTemplate(ctx, logger, versions.MetricbeatImage, "/run/metricbeat/templates/metricbeat.yml", "/run/metricbeat") if err != nil { logger.Errorf("Getting metricbeat config template: %v", err) @@ -126,6 +127,7 @@ func NewStartTrigger(ctx context.Context, wg *sync.WaitGroup, provider cloudprov LogstashHost: "localhost:5044", Port: 5066, CollectSystemMetrics: true, + AddCloudMetadata: true, } if err := writeTemplate("/run/metricbeat/metricbeat.yml", tmpl, metricbeatConf); err != nil { logger.Errorf("Writing metricbeat pipeline: %v", err) @@ -231,7 +233,7 @@ func startPod(ctx context.Context, logger *logger.Logger) error { "--volume=/run/systemd:/run/systemd:ro", "--volume=/run/systemd/journal/socket:/run/systemd/journal/socket:rw", "--volume=/run/state/var/log:/var/log:ro", - "--volume=/run/filebeat:/usr/share/filebeat/:ro", + "--volume=/run/filebeat/filebeat.yml:/usr/share/filebeat/filebeat.yml:ro", versions.FilebeatImage, } runFilebeatCmd := exec.CommandContext(ctx, "podman", runFilebeatArgs...) @@ -253,7 +255,7 @@ func startPod(ctx context.Context, logger *logger.Logger) error { "--log-driver=none", "--volume=/proc:/hostfs/proc:ro", "--volume=/sys/fs/cgroup:/hostfs/sys/fs/cgroup:ro", - "--volume=/run/metricbeat:/usr/share/metricbeat/:ro", + "--volume=/run/metricbeat/metricbeat.yml:/usr/share/metricbeat/metricbeat.yml:ro", versions.MetricbeatImage, } runMetricbeatCmd := exec.CommandContext(ctx, "podman", runMetricbeatArgs...) @@ -276,7 +278,8 @@ type logstashConfInput struct { } type filebeatConfInput struct { - LogstashHost string + LogstashHost string + AddCloudMetadata bool } type metricbeatConfInput struct { diff --git a/debugd/metricbeat/Dockerfile b/debugd/metricbeat/Dockerfile index ccccb5f03a..121bf14c64 100644 --- a/debugd/metricbeat/Dockerfile +++ b/debugd/metricbeat/Dockerfile @@ -2,7 +2,7 @@ FROM fedora:38@sha256:61f921e0c7b51e162e6f94b14ef4e6b0d38eac5987286fe4f52a2c1158 RUN dnf install -y https://artifacts.elastic.co/downloads/beats/metricbeat/metricbeat-8.9.2-x86_64.rpm -COPY debugd/metricbeat/templates/ /usr/share/metricbeat/templates/ +COPY debugd/metricbeat/templates/ /usr/share/constellogs/templates/ # -e enables logging to stderr # -E http.enabled=true enables http endpoint as seen in https://github.com/elastic/helm-charts/blob/main/metricbeat/templates/daemonset.yaml diff --git a/dev-docs/workflows/logcollection.md b/dev-docs/workflows/logcollection.md index 35a22c04fc..6e0445b33a 100644 --- a/dev-docs/workflows/logcollection.md +++ b/dev-docs/workflows/logcollection.md @@ -8,6 +8,14 @@ The logcollection functionality can be deployed to both [debug](./debug-cluster. In debug clusters, logcollection functionality should be deployed automatically through the debug daemon `debugd`, which runs *before* the bootstrapper and can therefore, contrary to non-debug clusters, also collect logs of the bootstrapper. +> [!WARNING] +> If logs from a E2E test run for a debug-cluster with a bootstrapping-failure are missing in OpenSearch, this might be caused by a race condition +> between the termination of the cluster and the start-up of the logcollection containers in the debugd. +> If the failure can be reproduced manually, it is best to do so and observe the serial console of the bootstrapping node with the following command until the logcollection containers have started. +> ```bash +> journalctl _SYSTEMD_UNIT=debugd.service | grep > logcollect +> ``` + ## Deployment in Non-Debug Clusters In non-debug clusters, logcollection functionality needs to be explicitly deployed as a Kubernetes Deployment through Helm. To do that, a few steps need to be followed: diff --git a/hack/logcollector/internal/filebeat.go b/hack/logcollector/internal/filebeat.go index 07ed42076f..d4c83b6962 100644 --- a/hack/logcollector/internal/filebeat.go +++ b/hack/logcollector/internal/filebeat.go @@ -41,7 +41,8 @@ func NewFilebeatPreparer(port int) *FilebeatPreparer { // Prepare prepares the Filebeat Helm chart by templating the filebeat.yml and inputs.yml files and placing them in the specified directory. func (p *FilebeatPreparer) Prepare(dir string) error { templatedFilebeatYaml, err := p.template(filebeatAssets, "templates/filebeat.yml", FilebeatTemplateData{ - LogstashHost: fmt.Sprintf("logstash-logstash:%d", p.port), + LogstashHost: fmt.Sprintf("logstash-logstash:%d", p.port), + AddCloudMetadata: true, }) if err != nil { return fmt.Errorf("template filebeat.yml: %w", err) @@ -79,7 +80,8 @@ func (p *FilebeatPreparer) Prepare(dir string) error { // FilebeatTemplateData is template data. type FilebeatTemplateData struct { - LogstashHost string + LogstashHost string + AddCloudMetadata bool } // FilebeatHelmValues repesents the Helm values.yml. diff --git a/internal/osimage/aws/awsupload.go b/internal/osimage/aws/awsupload.go index fee5fbd1df..407a5ac408 100644 --- a/internal/osimage/aws/awsupload.go +++ b/internal/osimage/aws/awsupload.go @@ -588,7 +588,7 @@ func toPtr[T any](v T) *T { const ( waitInterval = 15 * time.Second - maxWait = 15 * time.Minute + maxWait = 30 * time.Minute timestampFormat = "20060102150405" ) diff --git a/internal/versions/versions.go b/internal/versions/versions.go index a037399a11..576331d543 100644 --- a/internal/versions/versions.go +++ b/internal/versions/versions.go @@ -170,11 +170,11 @@ const ( // NodeMaintenanceOperatorImage is the image for the node maintenance operator. NodeMaintenanceOperatorImage = "quay.io/medik8s/node-maintenance-operator:v0.15.0@sha256:8cb8dad93283268282c30e75c68f4bd76b28def4b68b563d2f9db9c74225d634" // renovate:container // LogstashImage is the container image of logstash, used for log collection by debugd. - LogstashImage = "ghcr.io/edgelesssys/constellation/filebeat-debugd:v2.11.0-pre.0.20230821060133-60bf770e62bc@sha256:89ea1925345922a5471f26de6bc2344a83a76f2f908a6f048230699f8b810114" // renovate:container + LogstashImage = "ghcr.io/edgelesssys/constellation/logstash-debugd:v2.12.0-pre.0.20230922130827-dbc6d7cec47b@sha256:d2258bd6f02394b33cca26b4565a8e1f44b29d85d0dec76027bac6afb7da2bee" // renovate:container // FilebeatImage is the container image of filebeat, used for log collection by debugd. - FilebeatImage = "ghcr.io/edgelesssys/constellation/filebeat-debugd:v2.11.0-pre.0.20230821060133-60bf770e62bc@sha256:89ea1925345922a5471f26de6bc2344a83a76f2f908a6f048230699f8b810114" // renovate:container + FilebeatImage = "ghcr.io/edgelesssys/constellation/filebeat-debugd:v2.12.0-pre.0.20230922130827-dbc6d7cec47b@sha256:460a5e754438d97ece2e1672ea469055f2bdfdd99290b6c727c493d030d0c382" // renovate:container // MetricbeatImage is the container image of filebeat, used for log collection by debugd. - MetricbeatImage = "ghcr.io/edgelesssys/constellation/metricbeat-debugd:v2.12.0-pre.0.20230918092128-9bc15334c100@sha256:5941e91aebdbf5a93c041fab03d020aade0715a9f7aa7723cca631b49065b1e0" // renovate:container + MetricbeatImage = "ghcr.io/edgelesssys/constellation/metricbeat-debugd:v2.12.0-pre.0.20230922130827-dbc6d7cec47b@sha256:63ba8b5aa06b8186e9b6d1f37967363c2807aef05d998a5db70df08ee6734259" // renovate:container // currently supported versions. //nolint:revive