diff --git a/README.md b/README.md index 15bbffcef2..19baadcf54 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ ![Release Charts](https://github.com/kubernetes-sigs/descheduler/workflows/Release%20Charts/badge.svg)
- ↖️ Click at the [bullet list icon] at the top left corner of the Readme visualization for the github generated table of contents. + ↗️️ Click at the [bullet list icon] at the top right corner of the Readme visualization for the github generated table of contents.
diff --git a/charts/descheduler/templates/NOTES.txt b/charts/descheduler/templates/NOTES.txt
index d0a0f8b2da..5882980483 100644
--- a/charts/descheduler/templates/NOTES.txt
+++ b/charts/descheduler/templates/NOTES.txt
@@ -1,7 +1,7 @@
Descheduler installed as a {{ .Values.kind }}.
{{- if eq .Values.kind "Deployment" }}
-{{- if eq .Values.replicas 1.0}}
+{{- if eq (.Values.replicas | int) 1 }}
WARNING: You set replica count as 1 and workload kind as Deployment however leaderElection is not enabled. Consider enabling Leader Election for HA mode.
{{- end}}
{{- if .Values.leaderElection }}
diff --git a/charts/descheduler/templates/cronjob.yaml b/charts/descheduler/templates/cronjob.yaml
index 73e3714b9f..9d18adf093 100644
--- a/charts/descheduler/templates/cronjob.yaml
+++ b/charts/descheduler/templates/cronjob.yaml
@@ -81,7 +81,11 @@ spec:
args:
- --policy-config-file=/policy-dir/policy.yaml
{{- range $key, $value := .Values.cmdOptions }}
- - {{ printf "--%s" $key }}{{ if $value }}={{ $value }}{{ end }}
+ {{- if ne $value nil }}
+ - {{ printf "--%s=%s" $key (toString $value) }}
+ {{- else }}
+ - {{ printf "--%s" $key }}
+ {{- end }}
{{- end }}
livenessProbe:
{{- toYaml .Values.livenessProbe | nindent 16 }}
diff --git a/charts/descheduler/templates/deployment.yaml b/charts/descheduler/templates/deployment.yaml
index ff90a6602f..53d18cb665 100644
--- a/charts/descheduler/templates/deployment.yaml
+++ b/charts/descheduler/templates/deployment.yaml
@@ -7,7 +7,7 @@ metadata:
labels:
{{- include "descheduler.labels" . | nindent 4 }}
spec:
- {{- if gt .Values.replicas 1.0}}
+ {{- if gt (.Values.replicas | int) 1 }}
{{- if not .Values.leaderElection.enabled }}
{{- fail "You must set leaderElection to use more than 1 replica"}}
{{- end}}
@@ -53,7 +53,11 @@ spec:
- --policy-config-file=/policy-dir/policy.yaml
- --descheduling-interval={{ required "deschedulingInterval required for running as Deployment" .Values.deschedulingInterval }}
{{- range $key, $value := .Values.cmdOptions }}
- - {{ printf "--%s" $key }}{{ if $value }}={{ $value }}{{ end }}
+ {{- if ne $value nil }}
+ - {{ printf "--%s=%s" $key (toString $value) }}
+ {{- else }}
+ - {{ printf "--%s" $key }}
+ {{- end }}
{{- end }}
{{- include "descheduler.leaderElection" . | nindent 12 }}
ports:
diff --git a/charts/descheduler/values.yaml b/charts/descheduler/values.yaml
index 5d4d8ed9de..7013ce52ff 100644
--- a/charts/descheduler/values.yaml
+++ b/charts/descheduler/values.yaml
@@ -111,14 +111,13 @@ deschedulerPolicy:
args:
podRestartThreshold: 100
includingInitContainers: true
- - name: RemovePodsViolatingNodeTaints
+ - name: RemovePodsViolatingNodeAffinity
args:
nodeAffinityType:
- - requiredDuringSchedulingIgnoredDuringExecution
+ - requiredDuringSchedulingIgnoredDuringExecution
+ - name: RemovePodsViolatingNodeTaints
- name: RemovePodsViolatingInterPodAntiAffinity
- name: RemovePodsViolatingTopologySpreadConstraint
- args:
- includeSoftConstraints: false
- name: LowNodeUtilization
args:
thresholds:
@@ -133,7 +132,6 @@ deschedulerPolicy:
balance:
enabled:
- RemoveDuplicates
- - RemovePodsViolatingNodeAffinity
- RemovePodsViolatingTopologySpreadConstraint
- LowNodeUtilization
deschedule:
diff --git a/cmd/descheduler/app/server.go b/cmd/descheduler/app/server.go
index 3056a25fa3..e7ef713460 100644
--- a/cmd/descheduler/app/server.go
+++ b/cmd/descheduler/app/server.go
@@ -77,7 +77,7 @@ func NewDeschedulerCommand(out io.Writer) *cobra.Command {
secureServing.DisableHTTP2 = !s.EnableHTTP2
- ctx, done := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
+ ctx, done := signal.NotifyContext(cmd.Context(), syscall.SIGINT, syscall.SIGTERM)
pathRecorderMux := mux.NewPathRecorderMux("descheduler")
if !s.DisableMetrics {
diff --git a/pkg/descheduler/node/node.go b/pkg/descheduler/node/node.go
index eed0604639..d283280656 100644
--- a/pkg/descheduler/node/node.go
+++ b/pkg/descheduler/node/node.go
@@ -343,9 +343,30 @@ func podMatchesInterPodAntiAffinity(nodeIndexer podutil.GetPodsAssignedToNodeFun
if err != nil {
return false, fmt.Errorf("error listing all pods: %v", err)
}
+ assignedPodsInNamespace := podutil.GroupByNamespace(podsOnNode)
- podsInANamespace := podutil.GroupByNamespace(podsOnNode)
- nodeMap := utils.CreateNodeMap([]*v1.Node{node})
+ for _, term := range utils.GetPodAntiAffinityTerms(pod.Spec.Affinity.PodAntiAffinity) {
+ namespaces := utils.GetNamespacesFromPodAffinityTerm(pod, &term)
+ selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
+ if err != nil {
+ klog.ErrorS(err, "Unable to convert LabelSelector into Selector")
+ return false, err
+ }
+
+ for namespace := range namespaces {
+ for _, assignedPod := range assignedPodsInNamespace[namespace] {
+ if assignedPod.Name == pod.Name || !utils.PodMatchesTermsNamespaceAndSelector(assignedPod, namespaces, selector) {
+ klog.V(4).InfoS("Pod doesn't match inter-pod anti-affinity rule of assigned pod on node", "candidatePod", klog.KObj(pod), "assignedPod", klog.KObj(assignedPod))
+ continue
+ }
+
+ if _, ok := node.Labels[term.TopologyKey]; ok {
+ klog.V(1).InfoS("Pod matches inter-pod anti-affinity rule of assigned pod on node", "candidatePod", klog.KObj(pod), "assignedPod", klog.KObj(assignedPod))
+ return true, nil
+ }
+ }
+ }
+ }
- return utils.CheckPodsWithAntiAffinityExist(pod, podsInANamespace, nodeMap), nil
+ return false, nil
}
diff --git a/pkg/descheduler/node/node_test.go b/pkg/descheduler/node/node_test.go
index dafc0e9205..383e18bc51 100644
--- a/pkg/descheduler/node/node_test.go
+++ b/pkg/descheduler/node/node_test.go
@@ -759,6 +759,9 @@ func TestNodeFit(t *testing.T) {
"region": "main-region",
}
})
+
+ nodeNolabel := test.BuildTestNode("node", 64000, 128*1000*1000*1000, 2, nil)
+
tests := []struct {
description string
pod *v1.Pod
@@ -767,7 +770,7 @@ func TestNodeFit(t *testing.T) {
err error
}{
{
- description: "insufficient cpu",
+ description: "Insufficient cpu",
pod: test.BuildTestPod("p1", 10000, 2*1000*1000*1000, "", nil),
node: node,
podsOnNode: []*v1.Pod{
@@ -776,7 +779,7 @@ func TestNodeFit(t *testing.T) {
err: errors.New("insufficient cpu"),
},
{
- description: "insufficient pod num",
+ description: "Insufficient pod num",
pod: test.BuildTestPod("p1", 1000, 2*1000*1000*1000, "", nil),
node: node,
podsOnNode: []*v1.Pod{
@@ -786,7 +789,7 @@ func TestNodeFit(t *testing.T) {
err: errors.New("insufficient pods"),
},
{
- description: "matches inter-pod anti-affinity rule of pod on node",
+ description: "Pod matches inter-pod anti-affinity rule of other pod on node",
pod: test.PodWithPodAntiAffinity(test.BuildTestPod("p1", 1000, 1000, node.Name, nil), "foo", "bar"),
node: node,
podsOnNode: []*v1.Pod{
@@ -795,11 +798,36 @@ func TestNodeFit(t *testing.T) {
err: errors.New("pod matches inter-pod anti-affinity rule of other pod on node"),
},
{
- description: "pod fits on node",
+ description: "Pod doesn't match inter-pod anti-affinity rule of other pod on node, because pod and other pod is not same namespace",
+ pod: test.PodWithPodAntiAffinity(test.BuildTestPod("p1", 1000, 1000, node.Name, nil), "foo", "bar"),
+ node: node,
+ podsOnNode: []*v1.Pod{
+ test.PodWithPodAntiAffinity(test.BuildTestPod("p2", 1000, 1000, node.Name, func(pod *v1.Pod) {
+ pod.Namespace = "test"
+ }), "foo", "bar"),
+ },
+ },
+ {
+ description: "Pod doesn't match inter-pod anti-affinity rule of other pod on node, because other pod not match labels of pod",
+ pod: test.PodWithPodAntiAffinity(test.BuildTestPod("p1", 1000, 1000, node.Name, nil), "foo", "bar"),
+ node: node,
+ podsOnNode: []*v1.Pod{
+ test.PodWithPodAntiAffinity(test.BuildTestPod("p2", 1000, 1000, node.Name, nil), "foo1", "bar1"),
+ },
+ },
+ {
+ description: "Pod doesn't match inter-pod anti-affinity rule of other pod on node, because node have no topologyKey",
+ pod: test.PodWithPodAntiAffinity(test.BuildTestPod("p1", 1000, 1000, "node1", nil), "foo", "bar"),
+ node: nodeNolabel,
+ podsOnNode: []*v1.Pod{
+ test.PodWithPodAntiAffinity(test.BuildTestPod("p2", 1000, 1000, node.Name, nil), "foo", "bar"),
+ },
+ },
+ {
+ description: "Pod fits on node",
pod: test.BuildTestPod("p1", 1000, 1000, "", func(pod *v1.Pod) {}),
node: node,
podsOnNode: []*v1.Pod{},
- err: nil,
},
}
diff --git a/pkg/utils/predicates.go b/pkg/utils/predicates.go
index 48f25a1378..7153f17926 100644
--- a/pkg/utils/predicates.go
+++ b/pkg/utils/predicates.go
@@ -24,10 +24,37 @@ import (
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
+ "k8s.io/apimachinery/pkg/util/sets"
"k8s.io/component-helpers/scheduling/corev1"
"k8s.io/klog/v2"
)
+// GetNamespacesFromPodAffinityTerm returns a set of names
+// according to the namespaces indicated in podAffinityTerm.
+// If namespaces is empty it considers the given pod's namespace.
+func GetNamespacesFromPodAffinityTerm(pod *v1.Pod, podAffinityTerm *v1.PodAffinityTerm) sets.Set[string] {
+ names := sets.New[string]()
+ if len(podAffinityTerm.Namespaces) == 0 {
+ names.Insert(pod.Namespace)
+ } else {
+ names.Insert(podAffinityTerm.Namespaces...)
+ }
+ return names
+}
+
+// PodMatchesTermsNamespaceAndSelector returns true if the given