Yelp · gmdfalk · Oct 13, 2023 · Jun 27, 2023 · Jun 29, 2023 · Jun 29, 2023
diff --git a/acceptance/srv-configs/clusterman-clusters/local-dev/default.kubernetes b/acceptance/srv-configs/clusterman-clusters/local-dev/default.kubernetes
@@ -21,3 +21,4 @@ autoscaling:
   instance_loss_threshold: 3
 
 alert_on_max_capacity: false
+pool_owner: compute_infra
diff --git a/acceptance/srv-configs/clusterman-clusters/local-dev/default.mesos b/acceptance/srv-configs/clusterman-clusters/local-dev/default.mesos
@@ -29,3 +29,4 @@ autoscale_signal:
       minute_range: 10
 
 alert_on_max_capacity: false
+pool_owner: compute_infra
diff --git a/clusterman/autoscaler/autoscaler.py b/clusterman/autoscaler/autoscaler.py
@@ -181,7 +181,7 @@ def run(self, dry_run: bool = False, timestamp: Optional[arrow.Arrow] = None) ->
             self.target_capacity_gauge.set(new_target_capacity, {"dry_run": dry_run})
             self.max_capacity_gauge.set(
                 self.pool_manager.max_capacity,
-                {"dry_run": dry_run, "alert_on_max_capacity": self.pool_manager.alert_on_max_capacity},
+                {"dry_run": dry_run, "alert_on_max_capacity": self.pool_manager.alert_on_max_capacity, "team": self.pool_manager.pool_owner},
             )
             self.setpoint_gauge.set(self.autoscaling_config.setpoint, {"dry_run": dry_run})
             self._emit_requested_resource_metrics(resource_request, dry_run=dry_run)

diff --git a/clusterman/autoscaler/pool_manager.py b/clusterman/autoscaler/pool_manager.py
@@ -86,6 +86,7 @@ def __init__(
             "autoscaling.killable_nodes_prioritizing_v2", default=False
         )
         self.alert_on_max_capacity = self.pool_config.read_bool("alert_on_max_capacity", default=True)
+        self.pool_owner = self.pool_config.read_string("pool_owner", default="compute_infra")
         monitoring_info = {"cluster": cluster, "pool": pool}
         self.killable_nodes_counter = get_monitoring_client().create_counter(SFX_KILLABLE_NODES_COUNT, monitoring_info)
 

diff --git a/clusterman/simulator/simulated_pool_manager.py b/clusterman/simulator/simulated_pool_manager.py
@@ -59,6 +59,7 @@ def __init__(
             MAX_MIN_NODE_SCALEIN_UPTIME_SECONDS,
         )
         self.alert_on_max_capacity = self.pool_config.read_bool("alert_on_max_capacity", default=True)
+        self.pool_owner = self.pool_config.read_string("pool_owner", default="compute_infra")
         self.killable_nodes_prioritizing_v2 = self.pool_config.read_bool(
             "autoscaling.killable_nodes_prioritizing_v2", default=False
         )

diff --git a/examples/schemas/pool.json b/examples/schemas/pool.json
@@ -64,7 +64,8 @@
             "additionalProperties": false
         },
         "sensu_config": {"$ref": "definitions.json#sensu_config"},
-        "alert_on_max_capacity": {"type": "boolean"}
+        "alert_on_max_capacity": {"type": "boolean"},
+        "pool_owner": {"type": "string"}
     },
     "additionalProperties": false
 }
diff --git a/itests/environment.py b/itests/environment.py
@@ -121,6 +121,7 @@ def setup_configurations(context):
             ],
         },
         "alert_on_max_capacity": True,
+        "pool_owner": "compute_infra",
     }
     kube_pool_config = {
         "resource_groups": [
@@ -144,6 +145,7 @@ def setup_configurations(context):
             "period_minutes": 7,
         },
         "alert_on_max_capacity": True,
+        "pool_owner": "compute_infra",
     }
     with staticconf.testing.MockConfiguration(
         boto_config, namespace=CREDENTIALS_NAMESPACE

diff --git a/tests/autoscaler/autoscaler_test.py b/tests/autoscaler/autoscaler_test.py
@@ -49,6 +49,7 @@ def pool_configs():
                 "max_weight_to_remove": 10,
             },
             "alert_on_max_capacity": True,
+            "pool_owner": "compute_infra",
         },
         namespace=POOL_NAMESPACE.format(pool="bar", scheduler="mesos"),
     ):
@@ -91,6 +92,7 @@ def mock_autoscaler():
         "alert_on_max_capacity",
         namespace=POOL_NAMESPACE.format(pool="bar", scheduler="mesos"),
     )
+    mock_autoscaler.pool_manager.pool_owner = "compute_infra"
     mock_autoscaler.pool_manager.non_orphan_fulfilled_capacity = 0
 
     mock_autoscaler.target_capacity_gauge = mock.Mock(spec=GaugeProtocol)
@@ -160,7 +162,7 @@ def test_autoscaler_run(dry_run, mock_autoscaler, run_timestamp):
 
     assert mock_autoscaler.target_capacity_gauge.set.call_args == mock.call(100, {"dry_run": dry_run})
     assert mock_autoscaler.max_capacity_gauge.set.call_args == mock.call(
-        mock_autoscaler.pool_manager.max_capacity, {"dry_run": dry_run, "alert_on_max_capacity": True}
+        mock_autoscaler.pool_manager.max_capacity, {"dry_run": dry_run, "alert_on_max_capacity": True, "pool_owner": "compute_infra"}
     )
     assert mock_autoscaler.setpoint_gauge.set.call_args == mock.call(0.7, {"dry_run": dry_run})
     assert mock_autoscaler._compute_target_capacity.call_args == mock.call(resource_request)

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -145,6 +145,7 @@ def clusterman_pool_config():
             ],
         },
         "alert_on_max_capacity": True,
+        "pool_owner": "compute_infra",
     }
     with staticconf.testing.MockConfiguration(config, namespace="bar.mesos_config"):
         yield
@@ -202,6 +203,7 @@ def clusterman_k8s_pool_config():
             "disable_autoscaling": False,
         },
         "alert_on_max_capacity": False,
+        "pool_owner": "foo",
     }
     with staticconf.testing.MockConfiguration(config, namespace="bar.kubernetes_config"):
         yield
Original file line number	Diff line number	Diff line change
Expand Up		@@ -21,3 +21,4 @@ autoscaling:
		instance_loss_threshold: 3

		alert_on_max_capacity: false
		pool_owner: compute_infra
Original file line number	Diff line number	Diff line change
Expand Up		@@ -29,3 +29,4 @@ autoscale_signal:
		minute_range: 10

		alert_on_max_capacity: false
		pool_owner: compute_infra
gmdfalk marked this conversation as resolved. Show resolved Hide resolved