From e1cf5e5ec2da014a5c0f6396cc7d577092d751e3 Mon Sep 17 00:00:00 2001 From: ffppa Date: Wed, 20 Nov 2024 17:21:11 +0100 Subject: [PATCH] fix: [PAYMCLOUD-171]: Update metric configuration for monitoring pods of nodo-cron (#2567) Update metric configuration for monitoring pods Change metric_namespace to Microsoft.ContainerService/managedClusters and metric_name to kube_pod_status_phase. Added new dimension for pod phases with values "Failed" and "Pending." This improves the granularity of pod status monitoring. Signed-off-by: Fabio Felici --- src/domains/nodo-app/00_monitor.tf | 17 ++++++++----- src/domains/nodo-app/README.md | 38 +++++++++++++++--------------- 2 files changed, 30 insertions(+), 25 deletions(-) diff --git a/src/domains/nodo-app/00_monitor.tf b/src/domains/nodo-app/00_monitor.tf index 9eb9173311..8a74ffde17 100644 --- a/src/domains/nodo-app/00_monitor.tf +++ b/src/domains/nodo-app/00_monitor.tf @@ -43,15 +43,20 @@ resource "azurerm_monitor_metric_alert" "aks_nodo_moetrics" { criteria { aggregation = "Average" - metric_namespace = "Insights.Container/pods" - metric_name = "podCount" + metric_namespace = "Microsoft.ContainerService/managedClusters" + metric_name = "kube_pod_status_phase" operator = "GreaterThan" threshold = 200 dimension { - name = "kubernetes namespace" + name = "Namespace" operator = "Include" values = ["nodo-cron"] } + dimension { + name = "phase" + operator = "Include" + values = ["Failed", "Pending"] + } } action { action_group_id = data.azurerm_monitor_action_group.slack.id @@ -67,12 +72,12 @@ resource "azurerm_monitor_metric_alert" "aks_nodo_moetrics_error" { criteria { aggregation = "Average" - metric_namespace = "Insights.Container/pods" - metric_name = "podCount" + metric_namespace = "Microsoft.ContainerService/managedClusters" + metric_name = "kube_pod_status_phase" operator = "GreaterThan" threshold = 30 dimension { - name = "kubernetes namespace" + name = "Namespace" operator = "Include" values = ["nodo-cron"] } diff --git a/src/domains/nodo-app/README.md b/src/domains/nodo-app/README.md index 48a4595a5d..839b97cbd0 100644 --- a/src/domains/nodo-app/README.md +++ b/src/domains/nodo-app/README.md @@ -1,7 +1,7 @@ # nodo-app - + ## Requirements | Name | Version | @@ -285,7 +285,7 @@ | [apim\_dns\_zone\_prefix](#input\_apim\_dns\_zone\_prefix) | The dns subdomain for apim. | `string` | `null` | no | | [apim\_nodo\_auth\_decoupler\_enable](#input\_apim\_nodo\_auth\_decoupler\_enable) | Apply decoupler to nodo-auth product apim policy | `bool` | `true` | no | | [apim\_nodo\_decoupler\_enable](#input\_apim\_nodo\_decoupler\_enable) | Apply decoupler to nodo product apim policy | `bool` | `true` | no | -| [app\_gateway\_allowed\_paths\_pagopa\_onprem\_only](#input\_app\_gateway\_allowed\_paths\_pagopa\_onprem\_only) | Allowed paths from pagopa onprem only |
object({
paths = list(string)
ips = list(string)
})
| n/a | yes | +| [app\_gateway\_allowed\_paths\_pagopa\_onprem\_only](#input\_app\_gateway\_allowed\_paths\_pagopa\_onprem\_only) | Allowed paths from pagopa onprem only |
object({
paths = list(string)
ips = list(string)
})
| n/a | yes | | [cidr\_subnet\_vmss](#input\_cidr\_subnet\_vmss) | VMSS network address space. | `list(string)` | n/a | yes | | [cname\_record\_name](#input\_cname\_record\_name) | n/a | `string` | `"config"` | no | | [create\_wisp\_converter](#input\_create\_wisp\_converter) | CREATE WISP dismantling system infra | `bool` | `false` | no | @@ -312,39 +312,39 @@ | [nodo\_pagamenti\_auth\_password](#input\_nodo\_pagamenti\_auth\_password) | Default password used for nodo-auth | `string` | `"PLACEHOLDER"` | no | | [nodo\_pagamenti\_subkey\_required](#input\_nodo\_pagamenti\_subkey\_required) | Enabled subkeys for nodo dei pagamenti api | `bool` | `false` | no | | [nodo\_pagamenti\_x\_forwarded\_for](#input\_nodo\_pagamenti\_x\_forwarded\_for) | X-Forwarded-For IP address used for nodo-auth | `string` | n/a | yes | -| [nodo\_re\_to\_datastore\_function](#input\_nodo\_re\_to\_datastore\_function) | Nodo RE to datastore function |
object({
always_on = bool
kind = string
sku_size = string
sku_tier = string
maximum_elastic_worker_count = number
})
|
{
"always_on": true,
"kind": "Linux",
"maximum_elastic_worker_count": 1,
"sku_size": "B1",
"sku_tier": "Basic"
}
| no | +| [nodo\_re\_to\_datastore\_function](#input\_nodo\_re\_to\_datastore\_function) | Nodo RE to datastore function |
object({
always_on = bool
kind = string
sku_size = string
sku_tier = string
maximum_elastic_worker_count = number
})
|
{
"always_on": true,
"kind": "Linux",
"maximum_elastic_worker_count": 1,
"sku_size": "B1",
"sku_tier": "Basic"
}
| no | | [nodo\_re\_to\_datastore\_function\_app\_image\_tag](#input\_nodo\_re\_to\_datastore\_function\_app\_image\_tag) | Nodo RE to Datastore function app docker image tag. Defaults to 'latest' | `string` | `"latest"` | no | -| [nodo\_re\_to\_datastore\_function\_autoscale](#input\_nodo\_re\_to\_datastore\_function\_autoscale) | Nodo RE functions autoscaling parameters |
object({
default = number
minimum = number
maximum = number
})
| n/a | yes | +| [nodo\_re\_to\_datastore\_function\_autoscale](#input\_nodo\_re\_to\_datastore\_function\_autoscale) | Nodo RE functions autoscaling parameters |
object({
default = number
minimum = number
maximum = number
})
| n/a | yes | | [nodo\_re\_to\_datastore\_function\_subnet](#input\_nodo\_re\_to\_datastore\_function\_subnet) | Address prefixes subnet | `list(string)` | `null` | no | | [nodo\_re\_to\_datastore\_network\_policies\_enabled](#input\_nodo\_re\_to\_datastore\_network\_policies\_enabled) | Network policies enabled | `bool` | `false` | no | -| [nodo\_re\_to\_tablestorage\_function](#input\_nodo\_re\_to\_tablestorage\_function) | Nodo RE to datastore function |
object({
always_on = bool
kind = string
sku_size = string
sku_tier = string
maximum_elastic_worker_count = number
})
| n/a | yes | +| [nodo\_re\_to\_tablestorage\_function](#input\_nodo\_re\_to\_tablestorage\_function) | Nodo RE to datastore function |
object({
always_on = bool
kind = string
sku_size = string
sku_tier = string
maximum_elastic_worker_count = number
})
| n/a | yes | | [nodo\_re\_to\_tablestorage\_function\_app\_image\_tag](#input\_nodo\_re\_to\_tablestorage\_function\_app\_image\_tag) | Nodo RE to Table Storage function app docker image tag. Defaults to 'latest' | `string` | `"latest"` | no | -| [nodo\_re\_to\_tablestorage\_function\_autoscale](#input\_nodo\_re\_to\_tablestorage\_function\_autoscale) | Nodo RE functions autoscaling parameters |
object({
default = number
minimum = number
maximum = number
})
| n/a | yes | +| [nodo\_re\_to\_tablestorage\_function\_autoscale](#input\_nodo\_re\_to\_tablestorage\_function\_autoscale) | Nodo RE functions autoscaling parameters |
object({
default = number
minimum = number
maximum = number
})
| n/a | yes | | [nodo\_re\_to\_tablestorage\_function\_subnet](#input\_nodo\_re\_to\_tablestorage\_function\_subnet) | Address prefixes subnet | `list(string)` | `null` | no | | [nodo\_re\_to\_tablestorage\_network\_policies\_enabled](#input\_nodo\_re\_to\_tablestorage\_network\_policies\_enabled) | Network policies enabled | `bool` | `false` | no | -| [nodo\_user\_node\_pool](#input\_nodo\_user\_node\_pool) | AKS node pool user configuration |
object({
enabled = bool,
name = string,
vm_size = string,
os_disk_type = string,
os_disk_size_gb = string,
node_count_min = number,
node_count_max = number,
node_labels = map(any),
node_taints = list(string),
node_tags = map(any),
nodo_pool_max_pods = number,
})
| n/a | yes | -| [nodo\_verifyko\_to\_datastore\_function](#input\_nodo\_verifyko\_to\_datastore\_function) | Nodo Verify KO events to datastore function |
object({
always_on = bool
kind = string
sku_size = string
sku_tier = string
maximum_elastic_worker_count = number
zone_balancing_enabled = bool
})
| n/a | yes | +| [nodo\_user\_node\_pool](#input\_nodo\_user\_node\_pool) | AKS node pool user configuration |
object({
enabled = bool,
name = string,
vm_size = string,
os_disk_type = string,
os_disk_size_gb = string,
node_count_min = number,
node_count_max = number,
node_labels = map(any),
node_taints = list(string),
node_tags = map(any),
nodo_pool_max_pods = number,
})
| n/a | yes | +| [nodo\_verifyko\_to\_datastore\_function](#input\_nodo\_verifyko\_to\_datastore\_function) | Nodo Verify KO events to datastore function |
object({
always_on = bool
kind = string
sku_size = string
sku_tier = string
maximum_elastic_worker_count = number
zone_balancing_enabled = bool
})
| n/a | yes | | [nodo\_verifyko\_to\_datastore\_function\_app\_image\_tag](#input\_nodo\_verifyko\_to\_datastore\_function\_app\_image\_tag) | Nodo Verify KO to Datastore function app docker image tag. Defaults to 'latest' | `string` | `"latest"` | no | -| [nodo\_verifyko\_to\_datastore\_function\_autoscale](#input\_nodo\_verifyko\_to\_datastore\_function\_autoscale) | Nodo Verify KO event functions autoscaling parameters |
object({
default = number
minimum = number
maximum = number
})
| n/a | yes | +| [nodo\_verifyko\_to\_datastore\_function\_autoscale](#input\_nodo\_verifyko\_to\_datastore\_function\_autoscale) | Nodo Verify KO event functions autoscaling parameters |
object({
default = number
minimum = number
maximum = number
})
| n/a | yes | | [nodo\_verifyko\_to\_datastore\_function\_subnet](#input\_nodo\_verifyko\_to\_datastore\_function\_subnet) | Address prefixes subnet | `list(string)` | `null` | no | | [nodo\_verifyko\_to\_datastore\_network\_policies\_enabled](#input\_nodo\_verifyko\_to\_datastore\_network\_policies\_enabled) | Network policies enabled | `bool` | `false` | no | -| [nodo\_verifyko\_to\_tablestorage\_function](#input\_nodo\_verifyko\_to\_tablestorage\_function) | Nodo Verify KO events to table storage function |
object({
always_on = bool
kind = string
sku_size = string
sku_tier = string
maximum_elastic_worker_count = number
zone_balancing_enabled = bool
})
| n/a | yes | +| [nodo\_verifyko\_to\_tablestorage\_function](#input\_nodo\_verifyko\_to\_tablestorage\_function) | Nodo Verify KO events to table storage function |
object({
always_on = bool
kind = string
sku_size = string
sku_tier = string
maximum_elastic_worker_count = number
zone_balancing_enabled = bool
})
| n/a | yes | | [nodo\_verifyko\_to\_tablestorage\_function\_app\_image\_tag](#input\_nodo\_verifyko\_to\_tablestorage\_function\_app\_image\_tag) | Nodo Verify KO events to Table Storage function app docker image tag. Defaults to 'latest' | `string` | `"latest"` | no | -| [nodo\_verifyko\_to\_tablestorage\_function\_autoscale](#input\_nodo\_verifyko\_to\_tablestorage\_function\_autoscale) | Nodo Verify KO events to Table Storage functions autoscaling parameters |
object({
default = number
minimum = number
maximum = number
})
| n/a | yes | +| [nodo\_verifyko\_to\_tablestorage\_function\_autoscale](#input\_nodo\_verifyko\_to\_tablestorage\_function\_autoscale) | Nodo Verify KO events to Table Storage functions autoscaling parameters |
object({
default = number
minimum = number
maximum = number
})
| n/a | yes | | [nodo\_verifyko\_to\_tablestorage\_function\_subnet](#input\_nodo\_verifyko\_to\_tablestorage\_function\_subnet) | Address prefixes subnet | `list(string)` | `null` | no | | [nodo\_verifyko\_to\_tablestorage\_network\_policies\_enabled](#input\_nodo\_verifyko\_to\_tablestorage\_network\_policies\_enabled) | Network policies enabled | `bool` | `false` | no | -| [pod\_disruption\_budgets](#input\_pod\_disruption\_budgets) | Pod disruption budget for domain namespace |
map(object({
name = optional(string, null)
minAvailable = optional(number, null)
matchLabels = optional(map(any), {})
}))
| `{}` | no | +| [pod\_disruption\_budgets](#input\_pod\_disruption\_budgets) | Pod disruption budget for domain namespace |
map(object({
name = optional(string, null)
minAvailable = optional(number, null)
matchLabels = optional(map(any), {})
}))
| `{}` | no | | [prefix](#input\_prefix) | n/a | `string` | n/a | yes | -| [route\_aks](#input\_route\_aks) | AKS routing table |
list(object({
name = string
address_prefix = string
next_hop_type = string
next_hop_in_ip_address = string
}))
| n/a | yes | -| [storage\_account\_info](#input\_storage\_account\_info) | Storage account |
object({
account_kind = string
account_tier = string
account_replication_type = string
access_tier = string
advanced_threat_protection_enable = bool
})
|
{
"access_tier": "Hot",
"account_kind": "StorageV2",
"account_replication_type": "LRS",
"account_tier": "Standard",
"advanced_threat_protection_enable": true
}
| no | -| [tags](#input\_tags) | n/a | `map(any)` |
{
"CreatedBy": "Terraform"
}
| no | -| [tls\_cert\_check\_helm](#input\_tls\_cert\_check\_helm) | tls cert helm chart configuration |
object({
chart_version = string,
image_name = string,
image_tag = string
})
| n/a | yes | +| [route\_aks](#input\_route\_aks) | AKS routing table |
list(object({
name = string
address_prefix = string
next_hop_type = string
next_hop_in_ip_address = string
}))
| n/a | yes | +| [storage\_account\_info](#input\_storage\_account\_info) | Storage account |
object({
account_kind = string
account_tier = string
account_replication_type = string
access_tier = string
advanced_threat_protection_enable = bool
})
|
{
"access_tier": "Hot",
"account_kind": "StorageV2",
"account_replication_type": "LRS",
"account_tier": "Standard",
"advanced_threat_protection_enable": true
}
| no | +| [tags](#input\_tags) | n/a | `map(any)` |
{
"CreatedBy": "Terraform"
}
| no | +| [tls\_cert\_check\_helm](#input\_tls\_cert\_check\_helm) | tls cert helm chart configuration |
object({
chart_version = string,
image_name = string,
image_tag = string
})
| n/a | yes | | [vmss\_instance\_number](#input\_vmss\_instance\_number) | availability zones for vmss | `number` | n/a | yes | | [vmss\_zones](#input\_vmss\_zones) | availability zones for vmss | `list(string)` | n/a | yes | -| [wfesp\_dismantling](#input\_wfesp\_dismantling) | n/a |
object({
channel_list = string
wfesp_fixed_url = string
})
| n/a | yes | -| [wisp\_converter](#input\_wisp\_converter) | n/a |
object({
enable_apim_switch = bool # enable WISP dismantling
brokerPSP_whitelist = string
channel_whitelist = string
nodoinviarpt_paymenttype_whitelist = string
dismantling_primitives = string
dismantling_rt_primitives = string
checkout_predefined_expiration_time = number
wisp_ecommerce_channels = string
})
| n/a | yes | +| [wfesp\_dismantling](#input\_wfesp\_dismantling) | n/a |
object({
channel_list = string
wfesp_fixed_url = string
})
| n/a | yes | +| [wisp\_converter](#input\_wisp\_converter) | n/a |
object({
enable_apim_switch = bool # enable WISP dismantling
brokerPSP_whitelist = string
channel_whitelist = string
nodoinviarpt_paymenttype_whitelist = string
dismantling_primitives = string
dismantling_rt_primitives = string
checkout_predefined_expiration_time = number
wisp_ecommerce_channels = string
})
| n/a | yes | ## Outputs No outputs. - +