From 5415992df88941e2bc3dfdf53dc288bf70247de9 Mon Sep 17 00:00:00 2001 From: Kulwant Singh Date: Mon, 3 Jun 2024 11:53:23 -0700 Subject: [PATCH] Fix fluent bit terraform tests (#410) * fix fluent bit terraform tests 1. Fix timeout issue in fluent bit tests when destroying cluster 2. Fix CW template yaml with workingDir field * Change fluentbit integ tests to include fluentbit configuration changes * Address mitali's comment about file name * Update CW agent version in fluentbit tests --- ...ws.yaml => cwagent-windows-daemonset.yaml} | 1 + .../fluenbit-windows-configmap.yaml | 24 +- terraform/eks/daemon/fluent/common/main.tf | 2 +- terraform/eks/daemon/fluent/windows/main.tf | 443 ++++++++++++++++-- terraform/eks/daemon/windows/main.tf | 2 +- test/fluent/fluent_test.go | 1 + 6 files changed, 438 insertions(+), 35 deletions(-) rename terraform/eks/daemon/default_resources/{cwagent-windows.yaml => cwagent-windows-daemonset.yaml} (94%) diff --git a/terraform/eks/daemon/default_resources/cwagent-windows.yaml b/terraform/eks/daemon/default_resources/cwagent-windows-daemonset.yaml similarity index 94% rename from terraform/eks/daemon/default_resources/cwagent-windows.yaml rename to terraform/eks/daemon/default_resources/cwagent-windows-daemonset.yaml index 04116b291..d52f83fa5 100644 --- a/terraform/eks/daemon/default_resources/cwagent-windows.yaml +++ b/terraform/eks/daemon/default_resources/cwagent-windows-daemonset.yaml @@ -21,6 +21,7 @@ spec: containers: - name: cloudwatch-agent image: CW_TEST_IMAGE + workingDir: "%CONTAINER_SANDBOX_MOUNT_POINT%\\Program Files\\Amazon\\AmazonCloudWatchAgent" volumeMounts: - name: cwagentconfig mountPath: C:\Program Files\Amazon\AmazonCloudWatchAgent\cwagentconfig diff --git a/terraform/eks/daemon/default_resources/fluenbit-windows-configmap.yaml b/terraform/eks/daemon/default_resources/fluenbit-windows-configmap.yaml index 012da153f..1b5517125 100644 --- a/terraform/eks/daemon/default_resources/fluenbit-windows-configmap.yaml +++ b/terraform/eks/daemon/default_resources/fluenbit-windows-configmap.yaml @@ -91,11 +91,22 @@ data: Refresh_Interval 10 Read_from_Head ${READ_FROM_HEAD} + [INPUT] + Name winlog + Channels EKS + DB C:\\var\\fluent-bit\\state\\flb_eks_winlog.db + Interval_Sec 60 + [FILTER] Name aws Match dataplane.* imds_version v2 + [FILTER] + Name aws + Match winlog.* + imds_version v2 + [OUTPUT] Name cloudwatch_logs Match dataplane.* @@ -105,10 +116,19 @@ data: auto_create_group true extra_user_agent container-insights + [OUTPUT] + Name cloudwatch_logs + Match winlog.* + region ${AWS_REGION} + log_group_name /aws/containerinsights/${CLUSTER_NAME}/dataplane + log_stream_name ${HOST_NAME}.windows.kubelet.kubeproxy.service + auto_create_group true + extra_user_agent container-insights + host-log.conf: | [INPUT] Name winlog - Channels EKS, System + Channels System DB C:\\var\\fluent-bit\\state\\flb_system_winlog.db Interval_Sec 60 @@ -122,7 +142,7 @@ data: Match winlog.* region ${AWS_REGION} log_group_name /aws/containerinsights/${CLUSTER_NAME}/host - log_stream_prefix ${HOST_NAME}. + log_stream_name ${HOST_NAME}.windows.system.events auto_create_group true extra_user_agent container-insights diff --git a/terraform/eks/daemon/fluent/common/main.tf b/terraform/eks/daemon/fluent/common/main.tf index 93edbfb9c..678b984b4 100644 --- a/terraform/eks/daemon/fluent/common/main.tf +++ b/terraform/eks/daemon/fluent/common/main.tf @@ -280,7 +280,7 @@ resource "kubernetes_daemonset" "agent_daemon" { spec { container { name = "cloudwatch-agent" - image = "public.ecr.aws/cloudwatch-agent/cloudwatch-agent:1.247359.1b252618" + image = "public.ecr.aws/cloudwatch-agent/cloudwatch-agent:1.300040.0b650" resources { limits = { "cpu" : "200m", diff --git a/terraform/eks/daemon/fluent/windows/main.tf b/terraform/eks/daemon/fluent/windows/main.tf index 9c48c2580..dde0d010c 100644 --- a/terraform/eks/daemon/fluent/windows/main.tf +++ b/terraform/eks/daemon/fluent/windows/main.tf @@ -1,10 +1,10 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: MIT -module "fluent_common" { - source = "../common" - ami_type = var.ami_type - instance_type = var.instance_type +module "common" { + source = "../../../../common" + cwagent_image_repo = var.cwagent_image_repo + cwagent_image_tag = var.cwagent_image_tag } module "basic_components" { @@ -13,29 +13,37 @@ module "basic_components" { region = var.region } -locals { - aws_eks = "aws eks --region ${var.region}" - cluster_name = module.fluent_common.cluster_name +resource "aws_eks_cluster" "cluster" { + name = "cwagent-eks-integ-${module.common.testing_id}" + role_arn = module.basic_components.role_arn + version = var.k8s_version + enabled_cluster_log_types = [ + "api", + "audit", + "authenticator", + "controllerManager", + "scheduler" + ] + vpc_config { + subnet_ids = module.basic_components.public_subnet_ids + security_group_ids = [module.basic_components.security_group] + } } -data "aws_caller_identity" "account_id" {} - -data "aws_eks_cluster" "eks_windows_cluster_ca" { - name = module.fluent_common.cluster_name +locals { + aws_eks = "aws eks --region ${var.region}" + cluster_name = aws_eks_cluster.cluster.name } -output "account_id" { - value = data.aws_caller_identity.account_id.account_id +data "aws_eks_cluster_auth" "cluster_auth" { + name = aws_eks_cluster.cluster.name } -data "aws_eks_cluster_auth" "this" { - name = module.fluent_common.cluster_name -} +data "aws_caller_identity" "account_id" {} ## EKS Cluster Addon - resource "aws_eks_addon" "eks_windows_addon" { - cluster_name = module.fluent_common.cluster_name + cluster_name = aws_eks_cluster.cluster.name addon_name = "vpc-cni" } @@ -43,7 +51,7 @@ resource "aws_eks_addon" "eks_windows_addon" { resource "kubernetes_config_map_v1_data" "amazon_vpc_cni_windows" { depends_on = [ - module.fluent_common, + aws_eks_cluster.cluster, aws_eks_addon.eks_windows_addon ] metadata { @@ -58,6 +66,23 @@ resource "kubernetes_config_map_v1_data" "amazon_vpc_cni_windows" { } } +# EKS Node IAM Role +resource "aws_iam_role" "node_role" { + name = "cwagent-eks-Worker-Role-${module.common.testing_id}" + assume_role_policy = jsonencode({ + Version = "2012-10-17", + Statement = [ + { + Effect = "Allow", + Principal = { + Service = "ec2.amazonaws.com" + }, + Action = "sts:AssumeRole" + } + ] + }) +} + ## AWS CONFIGMAP resource "kubernetes_config_map" "configmap" { @@ -66,13 +91,13 @@ resource "kubernetes_config_map" "configmap" { - groups: - system:bootstrappers - system:nodes - rolearn: arn:aws:iam::${data.aws_caller_identity.account_id.account_id}:role/${module.fluent_common.node_role_name} + rolearn: arn:aws:iam::${data.aws_caller_identity.account_id.account_id}:role/${aws_iam_role.node_role.name} username: system:node:{{EC2PrivateDNSName}} - groups: - eks:kube-proxy-windows - system:bootstrappers - system:nodes - rolearn: arn:aws:iam::${data.aws_caller_identity.account_id.account_id}:role/${module.fluent_common.node_role_name} + rolearn: arn:aws:iam::${data.aws_caller_identity.account_id.account_id}:role/${aws_iam_role.node_role.name} username: system:node:{{EC2PrivateDNSName}} - groups: - system:masters @@ -87,11 +112,37 @@ EOT } } +# EKS Node Groups +resource "aws_eks_node_group" "node_group" { + cluster_name = aws_eks_cluster.cluster.name + node_group_name = "cwagent-eks-integ-node" + node_role_arn = aws_iam_role.node_role.arn + subnet_ids = module.basic_components.public_subnet_ids + + scaling_config { + desired_size = 1 + max_size = 1 + min_size = 1 + } + + ami_type = var.ami_type + capacity_type = "ON_DEMAND" + disk_size = 20 + instance_types = [var.instance_type] + + depends_on = [ + aws_iam_role_policy_attachment.node_AmazonEC2ContainerRegistryReadOnly, + aws_iam_role_policy_attachment.node_AmazonEKS_CNI_Policy, + aws_iam_role_policy_attachment.node_AmazonEKSWorkerNodePolicy, + aws_iam_role_policy_attachment.node_CloudWatchAgentServerPolicy, + ] +} + # EKS Windows Node Groups resource "aws_eks_node_group" "node_group_windows" { - cluster_name = module.fluent_common.cluster_name + cluster_name = aws_eks_cluster.cluster.name node_group_name = "${local.cluster_name}-windows-node" - node_role_arn = module.fluent_common.node_role_arn + node_role_arn = aws_iam_role.node_role.arn subnet_ids = module.basic_components.public_subnet_ids scaling_config { @@ -106,33 +157,365 @@ resource "aws_eks_node_group" "node_group_windows" { instance_types = ["t3.large"] depends_on = [ - module.fluent_common + aws_iam_role_policy_attachment.node_CloudWatchAgentServerPolicy, + aws_iam_role_policy_attachment.node_AmazonEC2ContainerRegistryReadOnly, + aws_iam_role_policy_attachment.node_AmazonEKS_CNI_Policy, + aws_iam_role_policy_attachment.node_AmazonEKSWorkerNodePolicy ] } +resource "aws_iam_role_policy_attachment" "node_AmazonEKSWorkerNodePolicy" { + policy_arn = "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy" + role = aws_iam_role.node_role.name +} + +resource "aws_iam_role_policy_attachment" "node_AmazonEKS_CNI_Policy" { + policy_arn = "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy" + role = aws_iam_role.node_role.name +} + +resource "aws_iam_role_policy_attachment" "node_AmazonEC2ContainerRegistryReadOnly" { + policy_arn = "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly" + role = aws_iam_role.node_role.name +} + +resource "aws_iam_role_policy_attachment" "node_CloudWatchAgentServerPolicy" { + policy_arn = "arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy" + role = aws_iam_role.node_role.name +} + +# TODO: these security groups be created once and then reused +# EKS Cluster Security Group +resource "aws_security_group" "eks_cluster_sg" { + name = "cwagent-eks-cluster-sg-${module.common.testing_id}" + description = "Cluster communication with worker nodes" + vpc_id = module.basic_components.vpc_id +} + +resource "aws_security_group_rule" "cluster_inbound" { + description = "Allow worker nodes to communicate with the cluster API Server" + from_port = 443 + protocol = "tcp" + security_group_id = aws_security_group.eks_cluster_sg.id + source_security_group_id = aws_security_group.eks_nodes_sg.id + to_port = 443 + type = "ingress" +} + +resource "aws_security_group_rule" "cluster_outbound" { + description = "Allow cluster API Server to communicate with the worker nodes" + from_port = 1024 + protocol = "tcp" + security_group_id = aws_security_group.eks_cluster_sg.id + source_security_group_id = aws_security_group.eks_nodes_sg.id + to_port = 65535 + type = "egress" +} + + +# EKS Node Security Group +resource "aws_security_group" "eks_nodes_sg" { + name = "cwagent-eks-node-sg-${module.common.testing_id}" + description = "Security group for all nodes in the cluster" + vpc_id = module.basic_components.vpc_id + + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } +} + +resource "aws_security_group_rule" "nodes_internal" { + description = "Allow nodes to communicate with each other" + from_port = 0 + protocol = "-1" + security_group_id = aws_security_group.eks_nodes_sg.id + source_security_group_id = aws_security_group.eks_nodes_sg.id + to_port = 65535 + type = "ingress" +} + +resource "aws_security_group_rule" "nodes_cluster_inbound" { + description = "Allow worker Kubelets and pods to receive communication from the cluster control plane" + from_port = 1025 + protocol = "tcp" + security_group_id = aws_security_group.eks_nodes_sg.id + source_security_group_id = aws_security_group.eks_cluster_sg.id + to_port = 65535 + type = "ingress" +} + +resource "kubernetes_namespace" "namespace" { + metadata { + name = "amazon-cloudwatch" + } +} + +resource "kubernetes_service_account" "cwagentservice" { + depends_on = [kubernetes_namespace.namespace] + metadata { + name = "cloudwatch-agent" + namespace = "amazon-cloudwatch" + } +} + +resource "kubernetes_cluster_role" "clusterrole" { + depends_on = [kubernetes_namespace.namespace] + metadata { + name = "cloudwatch-agent-role" + } + rule { + verbs = ["list", "watch"] + resources = ["pods", "nodes", "endpoints"] + api_groups = [""] + } + rule { + verbs = ["list", "watch"] + resources = ["replicasets"] + api_groups = ["apps"] + } + rule { + verbs = ["list", "watch"] + resources = ["jobs"] + api_groups = ["batch"] + } + rule { + verbs = ["get"] + resources = ["nodes/proxy"] + api_groups = ["get"] + } + rule { + verbs = ["create", "get"] + resources = ["nodes/stats", "configmaps", "events"] + api_groups = [""] + } + rule { + verbs = ["get", "update"] + resource_names = ["cwagent-clusterleader"] + resources = ["configmaps"] + api_groups = [""] + } +} + +resource "kubernetes_cluster_role_binding" "rolebinding" { + depends_on = [ + kubernetes_service_account.cwagentservice, + kubernetes_cluster_role.clusterrole + ] + metadata { + name = "cloudwatch-agent-role-binding" + } + role_ref { + api_group = "rbac.authorization.k8s.io" + kind = "ClusterRole" + name = "cloudwatch-agent-role" + } + subject { + kind = "ServiceAccount" + name = "cloudwatch-agent" + namespace = "amazon-cloudwatch" + } +} + +resource "kubernetes_config_map" "cwagentconfig" { + metadata { + name = "cwagentconfig" + namespace = "amazon-cloudwatch" + } + data = { + "cwagentconfig.json" = <