Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ci: fix debugd logcollection #2355

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 92 additions & 0 deletions .github/actions/cdbg_deploy/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
name: Cdbg deploy
description: Deploy the Constellation Bootstrapper to the cluster via the debugd.

inputs:
test:
description: "The e2e test payload."
required: true
azureClusterCreateCredentials:
description: "Azure credentials authorized to create a Constellation cluster."
required: true
azureIAMCreateCredentials:
description: "Azure credentials authorized to create an IAM configuration."
required: true
cloudProvider:
description: "The cloud provider to use."
required: true

runs:
using: "composite"
steps:
- name: Login to Azure (IAM service principal)
if: inputs.cloudProvider == 'azure'
uses: ./.github/actions/login_azure
with:
azure_credentials: ${{ inputs.azureIAMCreateCredentials }}

- name: Add Azure Keyvault access role
if: inputs.cloudProvider == 'azure'
shell: bash
run: |
UAMI=$(yq eval ".provider.azure.userAssignedIdentity | upcase" constellation-conf.yaml)
PRINCIPAL_ID=$(az identity list | yq ".[] | select(.id | test(\"(?i)$UAMI\"; \"g\")) | .principalId")
az role assignment create --role "Key Vault Secrets User" \
--assignee "$PRINCIPAL_ID" \
--scope /subscriptions/0d202bbb-4fa7-4af8-8125-58c269a05435/resourceGroups/e2e-test-creds/providers/Microsoft.KeyVault/vaults/opensearch-creds

- name: Login to Azure (Cluster service principal)
if: inputs.cloudProvider == 'azure'
uses: ./.github/actions/login_azure
with:
azure_credentials: ${{ inputs.azureClusterCreateCredentials }}

- name: Login to AWS (IAM service principal)
if: inputs.cloudProvider == 'aws'
uses: aws-actions/configure-aws-credentials@5fd3084fc36e372ff1fff382a39b10d03659f355 # v2.2.0
with:
role-to-assume: arn:aws:iam::795746500882:role/GithubActionsE2EIAM
aws-region: eu-central-1
# extend token expiry to 6 hours to ensure constellation can terminate
role-duration-seconds: 21600

- name: Add AWS Secrets Manager access role
if: inputs.cloudProvider == 'aws'
shell: bash
run: |
INSTANCE_PROFILE=$(yq eval ".provider.aws.iamProfileControlPlane" constellation-conf.yaml)
ROLE_NAME=$(aws iam get-instance-profile --instance-profile-name "$INSTANCE_PROFILE" | yq ".InstanceProfile.Roles[0].RoleName")
aws iam attach-role-policy \
--role-name "$ROLE_NAME" \
--policy-arn arn:aws:iam::795746500882:policy/GitHubActionsOSCredAccess

- name: Login to AWS (Cluster service principal)
if: inputs.cloudProvider == 'aws'
uses: aws-actions/configure-aws-credentials@5fd3084fc36e372ff1fff382a39b10d03659f355 # v2.2.0
with:
role-to-assume: arn:aws:iam::795746500882:role/GithubActionsE2ECluster
aws-region: eu-central-1
# extend token expiry to 6 hours to ensure constellation can terminate
role-duration-seconds: 21600

- name: Cdbg deploy
shell: bash
run: |
echo "::group::cdbg deploy"
chmod +x $GITHUB_WORKSPACE/build/cdbg
cdbg deploy \
--bootstrapper "${{ github.workspace }}/build/bootstrapper" \
--upgrade-agent "${{ github.workspace }}/build/upgrade-agent" \
--info logcollect=true \
--info logcollect.github.actor="${{ github.triggering_actor }}" \
--info logcollect.github.workflow="${{ github.workflow }}" \
--info logcollect.github.run-id="${{ github.run_id }}" \
--info logcollect.github.run-attempt="${{ github.run_attempt }}" \
--info logcollect.github.ref-name="${{ github.ref_name }}" \
--info logcollect.github.sha="${{ github.sha }}" \
--info logcollect.github.runner-os="${{ runner.os }}" \
--info logcollect.github.e2e-test-payload="${{ inputs.test }}" \
--info logcollect.github.is-debug-cluster=false \
--info logcollect.deployment-type="debugd" \
--verbosity=-1 \
--force
echo "::endgroup::"
33 changes: 12 additions & 21 deletions .github/actions/constellation_create/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ inputs:
test:
description: "The e2e test payload."
required: true
azureClusterCreateCredentials:
description: "Azure credentials authorized to create a Constellation cluster."
required: true
azureIAMCreateCredentials:
description: "Azure credentials authorized to create an IAM configuration."
required: true

outputs:
kubeconfig:
Expand Down Expand Up @@ -119,27 +125,12 @@ runs:

- name: Cdbg deploy
if: inputs.isDebugImage == 'true'
shell: bash
run: |
echo "::group::cdbg deploy"
chmod +x $GITHUB_WORKSPACE/build/cdbg
cdbg deploy \
--bootstrapper "${{ github.workspace }}/build/bootstrapper" \
--upgrade-agent "${{ github.workspace }}/build/upgrade-agent" \
--info logcollect=true \
--info logcollect.github.actor="${{ github.triggering_actor }}" \
--info logcollect.github.workflow="${{ github.workflow }}" \
--info logcollect.github.run-id="${{ github.run_id }}" \
--info logcollect.github.run-attempt="${{ github.run_attempt }}" \
--info logcollect.github.ref-name="${{ github.ref_name }}" \
--info logcollect.github.sha="${{ github.sha }}" \
--info logcollect.github.runner-os="${{ runner.os }}" \
--info logcollect.github.e2e-test-payload="${{ inputs.test }}" \
--info logcollect.github.is-debug-cluster=false \
--info logcollect.deployment-type="debugd" \
--verbosity=-1 \
--force
echo "::endgroup::"
uses: ./.github/actions/cdbg_deploy
with:
cloudProvider: ${{ inputs.cloudProvider }}
test: ${{ inputs.test }}
azureClusterCreateCredentials: ${{ inputs.azureClusterCreateCredentials }}
azureIAMCreateCredentials: ${{ inputs.azureIAMCreateCredentials }}

- name: Constellation init
id: constellation-init
Expand Down
4 changes: 3 additions & 1 deletion .github/actions/e2e_test/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -247,8 +247,10 @@ runs:
fetchMeasurements: ${{ inputs.fetchMeasurements }}
cliVersion: ${{ inputs.cliVersion }}
azureSNPEnforcementPolicy: ${{ inputs.azureSNPEnforcementPolicy }}
azureIAMCreateCredentials: ${{ inputs.azureIAMCreateCredentials }}
azureClusterCreateCredentials: ${{ inputs.azureClusterCreateCredentials }}

- name: Deploy log- and metrics-collection
- name: Deploy log- and metrics-collection (Kubernetes)
id: deploy-logcollection
if: inputs.isDebugImage == 'false'
uses: ./.github/actions/deploy_logcollection
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build-logcollector-images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ jobs:
dockerfile: debugd/filebeat/Dockerfile
githubToken: ${{ secrets.GITHUB_TOKEN }}

- name: Build and upload Filebeat container image
- name: Build and upload Metricbeat container image
uses: ./.github/actions/build_micro_service
with:
name: metricbeat-debugd
Expand Down
5 changes: 1 addition & 4 deletions debugd/filebeat/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,7 @@ load("@io_bazel_rules_go//go:def.bzl", "go_library")
go_library(
name = "filebeat",
srcs = ["assets.go"],
embedsrcs = [
"templates/filebeat.yml",
"inputs.yml",
],
embedsrcs = ["templates/filebeat.yml"],
importpath = "github.com/edgelesssys/constellation/v2/debugd/filebeat",
visibility = ["//visibility:public"],
)
3 changes: 1 addition & 2 deletions debugd/filebeat/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ FROM fedora:38@sha256:61f921e0c7b51e162e6f94b14ef4e6b0d38eac5987286fe4f52a2c1158

RUN dnf install -y https://artifacts.elastic.co/downloads/beats/filebeat/filebeat-8.6.2-x86_64.rpm

COPY debugd/filebeat/inputs.yml /usr/share/filebeat/inputs.yml
COPY debugd/filebeat/templates/ /usr/share/filebeat/templates/
COPY debugd/filebeat/templates/ /usr/share/constellogs/templates/

ENTRYPOINT ["/usr/share/filebeat/bin/filebeat", "-e", "--path.home", "/usr/share/filebeat", "--path.data", "/usr/share/filebeat/data"]
1 change: 0 additions & 1 deletion debugd/filebeat/assets.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,5 @@ import "embed"

// Assets are the exported Filebeat template files.
//
//go:embed *.yml
//go:embed templates/*
var Assets embed.FS
9 changes: 0 additions & 9 deletions debugd/filebeat/inputs.yml

This file was deleted.

16 changes: 11 additions & 5 deletions debugd/filebeat/templates/filebeat.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,15 @@ logging:
metrics.enabled: false
level: warning

filebeat.config:
inputs:
filebeat.inputs:
- type: journald
enabled: true
path: /usr/share/filebeat/inputs.yml
# reload.enabled: true
# reload.period: 10s
id: journald
- type: filestream
enabled: true
id: container
paths:
- /var/log/pods/*/*/*.log

timestamp.precision: nanosecond

Expand All @@ -27,3 +30,6 @@ processors:
field: "log.file.path"
target_prefix: "kubernetes"
ignore_failure: true
{{ if .AddCloudMetadata }}
- add_cloud_metadata: ~
{{ end }}
17 changes: 10 additions & 7 deletions debugd/internal/debugd/logcollector/logcollector.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ func NewStartTrigger(ctx context.Context, wg *sync.WaitGroup, provider cloudprov
return
}

logger.Infof("Getting logstash pipeline template")
logger.Infof("Getting logstash pipeline template from image %s", versions.LogstashImage)
tmpl, err := getTemplate(ctx, logger, versions.LogstashImage, "/run/logstash/templates/pipeline.conf", "/run/logstash")
if err != nil {
logger.Errorf("Getting logstash pipeline template: %v", err)
Expand Down Expand Up @@ -102,21 +102,22 @@ func NewStartTrigger(ctx context.Context, wg *sync.WaitGroup, provider cloudprov
return
}

logger.Infof("Getting filebeat config template")
logger.Infof("Getting filebeat config template from image %s", versions.FilebeatImage)
tmpl, err = getTemplate(ctx, logger, versions.FilebeatImage, "/run/filebeat/templates/filebeat.yml", "/run/filebeat")
if err != nil {
logger.Errorf("Getting filebeat config template: %v", err)
return
}
filebeatConf := filebeatConfInput{
LogstashHost: "localhost:5044",
LogstashHost: "localhost:5044",
AddCloudMetadata: true,
}
if err := writeTemplate("/run/filebeat/filebeat.yml", tmpl, filebeatConf); err != nil {
logger.Errorf("Writing filebeat pipeline: %v", err)
return
}

logger.Infof("Getting metricbeat config template")
logger.Infof("Getting metricbeat config template from image %s", versions.MetricbeatImage)
tmpl, err = getTemplate(ctx, logger, versions.MetricbeatImage, "/run/metricbeat/templates/metricbeat.yml", "/run/metricbeat")
if err != nil {
logger.Errorf("Getting metricbeat config template: %v", err)
Expand All @@ -126,6 +127,7 @@ func NewStartTrigger(ctx context.Context, wg *sync.WaitGroup, provider cloudprov
LogstashHost: "localhost:5044",
Port: 5066,
CollectSystemMetrics: true,
AddCloudMetadata: true,
}
if err := writeTemplate("/run/metricbeat/metricbeat.yml", tmpl, metricbeatConf); err != nil {
logger.Errorf("Writing metricbeat pipeline: %v", err)
Expand Down Expand Up @@ -231,7 +233,7 @@ func startPod(ctx context.Context, logger *logger.Logger) error {
"--volume=/run/systemd:/run/systemd:ro",
"--volume=/run/systemd/journal/socket:/run/systemd/journal/socket:rw",
"--volume=/run/state/var/log:/var/log:ro",
"--volume=/run/filebeat:/usr/share/filebeat/:ro",
"--volume=/run/filebeat/filebeat.yml:/usr/share/filebeat/filebeat.yml:ro",
versions.FilebeatImage,
}
runFilebeatCmd := exec.CommandContext(ctx, "podman", runFilebeatArgs...)
Expand All @@ -253,7 +255,7 @@ func startPod(ctx context.Context, logger *logger.Logger) error {
"--log-driver=none",
"--volume=/proc:/hostfs/proc:ro",
"--volume=/sys/fs/cgroup:/hostfs/sys/fs/cgroup:ro",
"--volume=/run/metricbeat:/usr/share/metricbeat/:ro",
"--volume=/run/metricbeat/metricbeat.yml:/usr/share/metricbeat/metricbeat.yml:ro",
versions.MetricbeatImage,
}
runMetricbeatCmd := exec.CommandContext(ctx, "podman", runMetricbeatArgs...)
Expand All @@ -276,7 +278,8 @@ type logstashConfInput struct {
}

type filebeatConfInput struct {
LogstashHost string
LogstashHost string
AddCloudMetadata bool
}

type metricbeatConfInput struct {
Expand Down
2 changes: 1 addition & 1 deletion debugd/metricbeat/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ FROM fedora:38@sha256:61f921e0c7b51e162e6f94b14ef4e6b0d38eac5987286fe4f52a2c1158

RUN dnf install -y https://artifacts.elastic.co/downloads/beats/metricbeat/metricbeat-8.9.2-x86_64.rpm

COPY debugd/metricbeat/templates/ /usr/share/metricbeat/templates/
COPY debugd/metricbeat/templates/ /usr/share/constellogs/templates/

# -e enables logging to stderr
# -E http.enabled=true enables http endpoint as seen in https://github.com/elastic/helm-charts/blob/main/metricbeat/templates/daemonset.yaml
Expand Down
8 changes: 8 additions & 0 deletions dev-docs/workflows/logcollection.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,14 @@ The logcollection functionality can be deployed to both [debug](./debug-cluster.
In debug clusters, logcollection functionality should be deployed automatically through the debug daemon `debugd`, which runs *before* the bootstrapper
and can therefore, contrary to non-debug clusters, also collect logs of the bootstrapper.

> [!WARNING]
> If logs from a E2E test run for a debug-cluster with a bootstrapping-failure are missing in OpenSearch, this might be caused by a race condition
> between the termination of the cluster and the start-up of the logcollection containers in the debugd.
> If the failure can be reproduced manually, it is best to do so and observe the serial console of the bootstrapping node with the following command until the logcollection containers have started.
> ```bash
> journalctl _SYSTEMD_UNIT=debugd.service | grep > logcollect
> ```

## Deployment in Non-Debug Clusters

In non-debug clusters, logcollection functionality needs to be explicitly deployed as a Kubernetes Deployment through Helm. To do that, a few steps need to be followed:
Expand Down
6 changes: 4 additions & 2 deletions hack/logcollector/internal/filebeat.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ func NewFilebeatPreparer(port int) *FilebeatPreparer {
// Prepare prepares the Filebeat Helm chart by templating the filebeat.yml and inputs.yml files and placing them in the specified directory.
func (p *FilebeatPreparer) Prepare(dir string) error {
templatedFilebeatYaml, err := p.template(filebeatAssets, "templates/filebeat.yml", FilebeatTemplateData{
LogstashHost: fmt.Sprintf("logstash-logstash:%d", p.port),
LogstashHost: fmt.Sprintf("logstash-logstash:%d", p.port),
AddCloudMetadata: true,
})
if err != nil {
return fmt.Errorf("template filebeat.yml: %w", err)
Expand Down Expand Up @@ -79,7 +80,8 @@ func (p *FilebeatPreparer) Prepare(dir string) error {

// FilebeatTemplateData is template data.
type FilebeatTemplateData struct {
LogstashHost string
LogstashHost string
AddCloudMetadata bool
}

// FilebeatHelmValues repesents the Helm values.yml.
Expand Down
2 changes: 1 addition & 1 deletion internal/osimage/aws/awsupload.go
Original file line number Diff line number Diff line change
Expand Up @@ -588,7 +588,7 @@ func toPtr[T any](v T) *T {

const (
waitInterval = 15 * time.Second
maxWait = 15 * time.Minute
maxWait = 30 * time.Minute
timestampFormat = "20060102150405"
)

Expand Down
6 changes: 3 additions & 3 deletions internal/versions/versions.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,11 +170,11 @@ const (
// NodeMaintenanceOperatorImage is the image for the node maintenance operator.
NodeMaintenanceOperatorImage = "quay.io/medik8s/node-maintenance-operator:v0.15.0@sha256:8cb8dad93283268282c30e75c68f4bd76b28def4b68b563d2f9db9c74225d634" // renovate:container
// LogstashImage is the container image of logstash, used for log collection by debugd.
LogstashImage = "ghcr.io/edgelesssys/constellation/filebeat-debugd:v2.11.0-pre.0.20230821060133-60bf770e62bc@sha256:89ea1925345922a5471f26de6bc2344a83a76f2f908a6f048230699f8b810114" // renovate:container
LogstashImage = "ghcr.io/edgelesssys/constellation/logstash-debugd:v2.12.0-pre.0.20230922130827-dbc6d7cec47b@sha256:d2258bd6f02394b33cca26b4565a8e1f44b29d85d0dec76027bac6afb7da2bee" // renovate:container
// FilebeatImage is the container image of filebeat, used for log collection by debugd.
FilebeatImage = "ghcr.io/edgelesssys/constellation/filebeat-debugd:v2.11.0-pre.0.20230821060133-60bf770e62bc@sha256:89ea1925345922a5471f26de6bc2344a83a76f2f908a6f048230699f8b810114" // renovate:container
FilebeatImage = "ghcr.io/edgelesssys/constellation/filebeat-debugd:v2.12.0-pre.0.20230922130827-dbc6d7cec47b@sha256:460a5e754438d97ece2e1672ea469055f2bdfdd99290b6c727c493d030d0c382" // renovate:container
// MetricbeatImage is the container image of filebeat, used for log collection by debugd.
MetricbeatImage = "ghcr.io/edgelesssys/constellation/metricbeat-debugd:v2.12.0-pre.0.20230918092128-9bc15334c100@sha256:5941e91aebdbf5a93c041fab03d020aade0715a9f7aa7723cca631b49065b1e0" // renovate:container
MetricbeatImage = "ghcr.io/edgelesssys/constellation/metricbeat-debugd:v2.12.0-pre.0.20230922130827-dbc6d7cec47b@sha256:63ba8b5aa06b8186e9b6d1f37967363c2807aef05d998a5db70df08ee6734259" // renovate:container

// currently supported versions.
//nolint:revive
Expand Down
Loading