From e907beff60abf082fcb47359481d026d2950f957 Mon Sep 17 00:00:00 2001 From: Charles Bushong Date: Wed, 29 May 2024 12:57:26 -0400 Subject: [PATCH] Configurable workload node selectors and tolerations (Fixes #48) --- .../linux/cloudwatch-agent-daemonset.yaml | 7 +++++-- .../templates/linux/dcgm-exporter-daemonset.yaml | 9 ++++++--- .../templates/linux/fluent-bit-daemonset.yaml | 7 +++++-- .../templates/linux/neuron-monitor-daemonset.yaml | 7 +++++-- .../templates/operator-deployment.yaml | 4 ++-- .../cloudwatch-agent-windows-daemonset.yaml | 7 +++++-- .../windows/fluent-bit-windows-daemonset.yaml | 7 +++++-- charts/amazon-cloudwatch-observability/values.yaml | 14 +++++++++++++- 8 files changed, 46 insertions(+), 16 deletions(-) diff --git a/charts/amazon-cloudwatch-observability/templates/linux/cloudwatch-agent-daemonset.yaml b/charts/amazon-cloudwatch-observability/templates/linux/cloudwatch-agent-daemonset.yaml index 84b26b7..8c3b9b3 100644 --- a/charts/amazon-cloudwatch-observability/templates/linux/cloudwatch-agent-daemonset.yaml +++ b/charts/amazon-cloudwatch-observability/templates/linux/cloudwatch-agent-daemonset.yaml @@ -29,6 +29,9 @@ spec: mode: daemonset nodeSelector: kubernetes.io/os: linux + {{- if .Values.agent.nodeSelector }} + {{- toYaml .Values.agent.nodeSelector | nindent 4 }} + {{- end }} serviceAccount: {{ template "cloudwatch-agent.serviceAccountName" . }} {{- if .Values.agent.config }} config: {{ include "cloudwatch-agent.modify-config" (merge (dict "Config" .Values.agent.config) . ) }} @@ -111,7 +114,7 @@ spec: valueFrom: fieldRef: fieldPath: metadata.namespace - {{- with .Values.tolerations }} + {{- with (.Values.agent.tolerations | default .Values.tolerations ) }} tolerations: {{- toYaml . | nindent 2}} {{- end }} -{{- end }} \ No newline at end of file +{{- end }} diff --git a/charts/amazon-cloudwatch-observability/templates/linux/dcgm-exporter-daemonset.yaml b/charts/amazon-cloudwatch-observability/templates/linux/dcgm-exporter-daemonset.yaml index 82ce49c..b7d39a7 100644 --- a/charts/amazon-cloudwatch-observability/templates/linux/dcgm-exporter-daemonset.yaml +++ b/charts/amazon-cloudwatch-observability/templates/linux/dcgm-exporter-daemonset.yaml @@ -10,6 +10,12 @@ spec: image: {{ template "dcgm-exporter.image" . }} nodeSelector: kubernetes.io/os: linux + {{- with .Values.dcgmExporter.additionalNodeSelector }} + {{- toYaml . | nindent 4 }} + {{- end }} + {{- with (.Values.dcgmExporter.tolerations | default .Values.tolerations ) }} + tolerations: {{- toYaml . | nindent 2}} + {{- end }} serviceAccount: {{ template "dcgm-exporter.serviceAccountName" . }} affinity: nodeAffinity: @@ -71,6 +77,3 @@ spec: tls_server_config: cert_file: /etc/amazon-cloudwatch-observability-dcgm-cert/server.crt key_file: /etc/amazon-cloudwatch-observability-dcgm-cert/server.key - {{- with .Values.tolerations }} - tolerations: {{- toYaml . | nindent 2}} - {{- end }} \ No newline at end of file diff --git a/charts/amazon-cloudwatch-observability/templates/linux/fluent-bit-daemonset.yaml b/charts/amazon-cloudwatch-observability/templates/linux/fluent-bit-daemonset.yaml index ed8a8d9..6373631 100644 --- a/charts/amazon-cloudwatch-observability/templates/linux/fluent-bit-daemonset.yaml +++ b/charts/amazon-cloudwatch-observability/templates/linux/fluent-bit-daemonset.yaml @@ -97,7 +97,10 @@ spec: serviceAccountName: {{ template "cloudwatch-agent.serviceAccountName" . }} nodeSelector: kubernetes.io/os: linux - {{- with .Values.tolerations }} + {{- with .Values.fluentBit.additionalNodeSelector }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with (.Values.fluentBit.tolerations | default .Values.tolerations ) }} tolerations: {{- toYaml . | nindent 6}} {{- end }} -{{- end }} \ No newline at end of file +{{- end }} diff --git a/charts/amazon-cloudwatch-observability/templates/linux/neuron-monitor-daemonset.yaml b/charts/amazon-cloudwatch-observability/templates/linux/neuron-monitor-daemonset.yaml index 3213648..43ba78e 100644 --- a/charts/amazon-cloudwatch-observability/templates/linux/neuron-monitor-daemonset.yaml +++ b/charts/amazon-cloudwatch-observability/templates/linux/neuron-monitor-daemonset.yaml @@ -21,6 +21,9 @@ spec: - key: {{ .Values.nodeLabelKey }} operator: In values: {{ .Values.neuronInstances | toYaml | nindent 20 }} + {{- with .Values.neuronMonitor.additionalNodeAffinityTerms }} + {{- toYaml . | nindent 10 }} + {{- end } resources: limits: cpu: 500m @@ -91,6 +94,6 @@ spec: } ] } - {{- with .Values.tolerations }} + {{- with (.Values.neuronMonitor.tolerations | default .Values.tolerations ) }} tolerations: {{- toYaml . | nindent 2}} - {{- end }} \ No newline at end of file + {{- end }} diff --git a/charts/amazon-cloudwatch-observability/templates/operator-deployment.yaml b/charts/amazon-cloudwatch-observability/templates/operator-deployment.yaml index 9362df6..de330f1 100644 --- a/charts/amazon-cloudwatch-observability/templates/operator-deployment.yaml +++ b/charts/amazon-cloudwatch-observability/templates/operator-deployment.yaml @@ -51,6 +51,6 @@ spec: secretName: {{ template "amazon-cloudwatch-observability.certificateSecretName" . }} nodeSelector: kubernetes.io/os: linux - {{- with .Values.tolerations }} + {{- with (.Values.manager.tolerations | default .Values.tolerations ) }} tolerations: {{- toYaml . | nindent 6}} - {{- end }} \ No newline at end of file + {{- end }} diff --git a/charts/amazon-cloudwatch-observability/templates/windows/cloudwatch-agent-windows-daemonset.yaml b/charts/amazon-cloudwatch-observability/templates/windows/cloudwatch-agent-windows-daemonset.yaml index f801bbb..154055e 100644 --- a/charts/amazon-cloudwatch-observability/templates/windows/cloudwatch-agent-windows-daemonset.yaml +++ b/charts/amazon-cloudwatch-observability/templates/windows/cloudwatch-agent-windows-daemonset.yaml @@ -18,6 +18,9 @@ spec: serviceAccount: {{ template "cloudwatch-agent.serviceAccountName" . }} nodeSelector: kubernetes.io/os: windows + {{- with .Values.agent.additionalNodeSelector }} + {{- toYaml . | nindent 4 }} + {{- end }} config: {{ .Values.agent.windowsDefaultConfig | toJson | quote }} resources: requests: @@ -47,7 +50,7 @@ spec: value: "True" - name: RUN_AS_HOST_PROCESS_CONTAINER value: "True" - {{- with .Values.tolerations }} + {{- with (.Values.agent.tolerations | default .Values.tolerations ) }} tolerations: {{- toYaml . | nindent 2}} {{- end }} -{{- end }} \ No newline at end of file +{{- end }} diff --git a/charts/amazon-cloudwatch-observability/templates/windows/fluent-bit-windows-daemonset.yaml b/charts/amazon-cloudwatch-observability/templates/windows/fluent-bit-windows-daemonset.yaml index 83adc0a..18c5479 100644 --- a/charts/amazon-cloudwatch-observability/templates/windows/fluent-bit-windows-daemonset.yaml +++ b/charts/amazon-cloudwatch-observability/templates/windows/fluent-bit-windows-daemonset.yaml @@ -30,6 +30,9 @@ spec: hostNetwork: true nodeSelector: kubernetes.io/os: windows + {{- with .Values.fluentBit.additionalNodeSelector }} + {{- toYaml . | nindent 8 }} + {{- end }} containers: - name: fluent-bit image: {{ template "fluent-bit-windows.image" . }} @@ -70,7 +73,7 @@ spec: terminationGracePeriodSeconds: 10 dnsPolicy: ClusterFirstWithHostNet serviceAccountName: {{ template "cloudwatch-agent.serviceAccountName" . }} - {{- with .Values.tolerations }} + {{- with (.Values.fluentBit.tolerations | default .Values.tolerations ) }} tolerations: {{- toYaml . | nindent 6}} {{- end }} -{{- end }} \ No newline at end of file +{{- end }} diff --git a/charts/amazon-cloudwatch-observability/values.yaml b/charts/amazon-cloudwatch-observability/values.yaml index f652e1d..6cdbd31 100644 --- a/charts/amazon-cloudwatch-observability/values.yaml +++ b/charts/amazon-cloudwatch-observability/values.yaml @@ -28,6 +28,10 @@ neuronInstances: [ trn1.2xlarge, trn1.32xlarge, trn1n.32xlarge, inf1.xlarge, inf tolerations: - operator: Exists +fluentBit: + tolerations: [] # Override the default tolerations + additionalNodeSelector: {} # Additional node selector key-value pairs + containerLogs: enabled: true fluentBit: @@ -423,6 +427,8 @@ manager: service: name: + tolerations: [] # Override the default tolerations + additionalNodeSelector: {} # Additional node selector key-value pairs ## Admission webhooks make sure only requests with correctly formatted rules will get into the Operator. admissionWebhooks: @@ -532,6 +538,8 @@ agent: } } } + tolerations: [] # Override the default tolerations + additionalNodeSelector: {} # Additional node selector key-value pairs dcgmExporter: name: @@ -554,6 +562,8 @@ dcgmExporter: kubeletPath: "/var/lib/kubelet/pod-resources" serviceAccount: name: # override exporter service account name + tolerations: [] # Override the default tolerations + additionalNodeSelector: {} # Additional node selector key-value pairs neuronMonitor: name: @@ -574,4 +584,6 @@ neuronMonitor: capabilities: add: ["SYS_ADMIN"] serviceAccount: - name: # override exporter service account name \ No newline at end of file + name: # override exporter service account name + tolerations: [] # Override the default tolerations + additionalNodeAffinityTerms: [] # Additional required node affinity selector terms