From a16d019569a3e1d097f949718a239cac21639a4e Mon Sep 17 00:00:00 2001 From: Parul Singh Date: Thu, 29 Jun 2023 06:43:55 -0400 Subject: [PATCH] Merge following kepler ds changes into the operator: - https://github.com/sustainable-computing-io/kepler/pull/578 - https://github.com/sustainable-computing-io/kepler/pull/683 - https://github.com/sustainable-computing-io/kepler/pull/635 - https://github.com/sustainable-computing-io/kepler/pull/733 Signed-off-by: Parul Singh --- config/manager/kustomization.yaml | 2 +- config/samples/kepler.system_v1alpha1_kepler.yaml | 2 -- controllers/kepler_controller.go | 12 ++++++++++-- controllers/sahandler.go | 2 +- 4 files changed, 12 insertions(+), 6 deletions(-) diff --git a/config/manager/kustomization.yaml b/config/manager/kustomization.yaml index 00a83003..9fb90417 100644 --- a/config/manager/kustomization.yaml +++ b/config/manager/kustomization.yaml @@ -5,4 +5,4 @@ kind: Kustomization images: - name: controller newName: quay.io/sustainable_computing_io/kepler-operator - newTag: 0.4.1 + newTag: "0.5" diff --git a/config/samples/kepler.system_v1alpha1_kepler.yaml b/config/samples/kepler.system_v1alpha1_kepler.yaml index 49d888a0..ef8bac65 100644 --- a/config/samples/kepler.system_v1alpha1_kepler.yaml +++ b/config/samples/kepler.system_v1alpha1_kepler.yaml @@ -13,8 +13,6 @@ spec: collector: image: quay.io/sustainable_computing_io/kepler:latest collectorPort: 9103 - model-server-exporter: - port: 8100 diff --git a/controllers/kepler_controller.go b/controllers/kepler_controller.go index 265af394..ef953c3d 100644 --- a/controllers/kepler_controller.go +++ b/controllers/kepler_controller.go @@ -309,7 +309,7 @@ func (r *collectorReconciler) ensureConfigMap(l klog.Logger) (bool, error) { var data_map = make(map[string]string) data_map["KEPLER_NAMESPACE"] = r.Instance.Namespace - data_map["KEPLER_LOG_LEVEL"] = "5" + data_map["KEPLER_LOG_LEVEL"] = "1" data_map["METRIC_PATH"] = "/metrics" data_map["BIND_ADDRESS"] = bindAddress data_map["ENABLE_GPU"] = "true" @@ -318,6 +318,8 @@ func (r *collectorReconciler) ensureConfigMap(l klog.Logger) (bool, error) { data_map["CGROUP_METRICS"] = "*" data_map["MODEL_CONFIG"] = "| CONTAINER_COMPONENTS_ESTIMATOR=false CONTAINER_COMPONENTS_INIT_URL=https://raw.githubusercontent.com/sustainable-computing-io/kepler-model-server/main/tests/test_models/DynComponentModelWeight/CgroupOnly/ScikitMixed/ScikitMixed.json" + data_map["EXPOSE_HW_COUNTER_METRICS"] = "true" + data_map["EXPOSE_CGROUP_METRICS"] = "true" r.configMap.Data = data_map return nil @@ -380,7 +382,7 @@ func (r *collectorReconciler) ensureDaemonSet(l klog.Logger) (bool, error) { Privileged: &scc_value, }, Image: image, - Command: []string{"/usr/bin/kepler", "-address", bindAddress, "-enable-gpu=true", "enable-cgroup-id=true", "v=5"}, + Command: []string{"/usr/bin/kepler", "-address", bindAddress, "-enable-gpu=true", "enable-cgroup-id=true", "v=5", "-kernel-source-dir=/usr/share/kepler/kernel_sources"}, Ports: []corev1.ContainerPort{{ ContainerPort: collectorPort, Name: "http", @@ -411,8 +413,14 @@ func (r *collectorReconciler) ensureDaemonSet(l klog.Logger) (bool, error) { FieldPath: "status.hostIP", }, } + envFromSourceNode := corev1.EnvVarSource{ + FieldRef: &corev1.ObjectFieldSelector{ + FieldPath: "spec.nodeName", + }, + } r.daemonSet.Spec.Template.Spec.Containers[0].Env = []corev1.EnvVar{ {Name: "NODE_IP", ValueFrom: &envFromSource}, + {Name: "NODE_NAME", ValueFrom: &envFromSourceNode}, } r.daemonSet.Spec.Template.Spec.Containers[0].VolumeMounts = []corev1.VolumeMount{ diff --git a/controllers/sahandler.go b/controllers/sahandler.go index be2c16c1..a018b2ed 100644 --- a/controllers/sahandler.go +++ b/controllers/sahandler.go @@ -130,7 +130,7 @@ func (d *keplerSADescription) createOrUpdateClusterRole(l klog.Logger) (*rbacv1. Rules: []rbacv1.PolicyRule{ { APIGroups: []string{""}, - Resources: []string{"nodes/metrics", "nodes/proxy", "nodes/stats"}, + Resources: []string{"nodes/metrics", "nodes/proxy", "nodes/stats", "pods"}, Verbs: []string{"get", "watch", "list"}, }, },