[azure_metrics] Duplicated metrics/metrics are not groupped #7621

tetianakravchenko · 2023-08-31T09:04:42Z

Case 1. Duplicated documents. Comparing the documents below - the only difference is document id, event.duration and event.ingested

It seems to be fine to drop 1 document (it will happen when enabled tsdb), since the list of metrics and the values are the same (at least for this example provided below)

document 1

{
    "_index": ".ds-metrics-azure.compute_vm-default-2023.08.30-000001",
    "_id": "aJczR4oBIEGol7NgDFZE",
    "_score": null,
    "_source": {
        "cloud": {
            "instance": {
                "name": "tkravchenko-windows-vm",
                "id": "38fc6545-1c07-4bdb-ae6a-9b0261a2c0d4"
            },
            "provider": "azure",
            "machine": {
                "type": "Standard_DS1_v2"
            },
            "region": "eastus",
            "account": {
                "id": "0e073ec1-c22f-4488-adde-da35ed609ccd"
            }
        },
        "agent": {
            "name": "docker-fleet-agent",
            "id": "80cd138d-4111-488c-af88-b37f06813fec",
            "ephemeral_id": "9f323027-c385-45cb-9dc0-8a4c86539055",
            "type": "metricbeat",
            "version": "8.8.2"
        },
        "@timestamp": "2023-08-30T15:57:00.000Z",
        "ecs": {
            "version": "8.0.0"
        },
        "service": {
            "type": "azure"
        },
        "data_stream": {
            "namespace": "default",
            "type": "metrics",
            "dataset": "azure.compute_vm"
        },
        "elastic_agent": {
            "id": "80cd138d-4111-488c-af88-b37f06813fec",
            "version": "8.8.2",
            "snapshot": false
        },
        "host": {
            "hostname": "docker-fleet-agent",
            "os": {
                "kernel": "5.15.49-linuxkit-pr",
                "codename": "focal",
                "name": "Ubuntu",
                "type": "linux",
                "family": "debian",
                "version": "20.04.6 LTS (Focal Fossa)",
                "platform": "ubuntu"
            },
            "ip": [
                "172.20.0.10"
            ],
            "containerized": false,
            "name": "docker-fleet-agent",
            "id": "f61391496aaa43bb94736676494450c5",
            "mac": [
                "02-42-AC-14-00-0A"
            ],
            "architecture": "x86_64"
        },
        "metricset": {
            "period": 300000,
            "name": "monitor"
        },
        "event": {
            "duration": 39644670992,
            "agent_id_status": "verified",
            "ingested": "2023-08-30T16:07:22Z",
            "module": "azure",
            "dataset": "azure.compute_vm"
        },
        "azure": {
            "subscription_id": "0e073ec1-c22f-4488-adde-da35ed609ccd",
            "timegrain": "PT5M",
            "resource": {
                "name": "tkravchenko-windows-vm",
                "id": "/subscriptions/0e073ec1-c22f-4488-adde-da35ed609ccd/resourceGroups/TETIANA-TEST/providers/Microsoft.Compute/virtualMachines/tkravchenko-windows-vm",
                "type": "Microsoft.Compute/virtualMachines",
                "tags": {
                    "custodian_status": "Resource does not meet policy: stop@2023/08/31"
                },
                "group": "TETIANA-TEST"
            },
            "namespace": "Azure.VM.Windows.GuestMetrics",
            "compute_vm": {
                "processor_information_pct_processor_time": {
                    "avg": 3.2
                },
                "process_working_set": {
                    "avg": 1426192793.6
                },
                "network_interface_packets_sent_per_sec": {
                    "avg": 34.8
                },
                "processor_information_pct_user_time": {
                    "avg": 1.6
                },
                "memory_pct_committed_bytes_in_use": {
                    "avg": 36
                },
                "system_context_switches_per_sec": {
                    "avg": 604.8
                },
                "memory_available_bytes": {
                    "avg": 2002427084.8
                },
                "network_interface_packets_received_per_sec": {
                    "avg": 25.2
                },
                "memory_committed_bytes": {
                    "avg": 1586061312.0
                },
                "memory_pool_nonpaged_bytes": {
                    "avg": 227979264.0
                },
                "processor_information_processor_frequency": {
                    "avg": 2295
                },
                "web_service_bytes_total_per_sec": {
                    "avg": 0
                },
                "system_processor_queue_length": {
                    "avg": 3.2
                },
                "network_interface_bytes_sent_per_sec": {
                    "avg": 57073.7
                },
                "network_interface_bytes_total_per_sec": {
                    "avg": 65903.4
                },
                "memory_page_faults_per_sec": {
                    "avg": 573.4
                },
                "memory_pages_per_sec": {
                    "avg": 23
                },
                "web_service_current_connections": {
                    "avg": 0
                },
                "system_system_up_time": {
                    "avg": 1808886.0
                },
                "network_interface_packets_outbound_errors": {
                    "avg": 0
                },
                "web_service_total_method_requests_per_sec": {
                    "avg": 0
                },
                "network_interface_packets_per_sec": {
                    "avg": 60.4
                },
                "process_thread_count": {
                    "avg": 925.4
                },
                "memory_cache_bytes": {
                    "avg": 130556723.2
                },
                "process_working_set_private": {
                    "avg": 626439782.4
                },
                "web_service_isapi_extension_requests_per_sec": {
                    "avg": 0
                },
                "processor_information_pct_privileged_time": {
                    "avg": 1.2
                },
                "process_handle_count": {
                    "avg": 71451.6
                },
                "memory_pool_paged_bytes": {
                    "avg": 212100710.4
                },
                "network_interface_bytes_received_per_sec": {
                    "avg": 8829.2
                },
                "system_processes": {
                    "avg": 53.2
                },
                "network_interface_packets_received_errors": {
                    "avg": 3.5
                }
            }
        }
    },
    "sort": [
        1693411020000
    ]
}

document 2

{
    "_index": ".ds-metrics-azure.compute_vm-default-2023.08.30-000001",
    "_id": "nJcuR4oBIEGol7NgQ1Ld",
    "_score": null,
    "_source": {
        "cloud": {
            "instance": {
                "name": "tkravchenko-windows-vm",
                "id": "38fc6545-1c07-4bdb-ae6a-9b0261a2c0d4"
            },
            "provider": "azure",
            "machine": {
                "type": "Standard_DS1_v2"
            },
            "region": "eastus",
            "account": {
                "id": "0e073ec1-c22f-4488-adde-da35ed609ccd"
            }
        },
        "agent": {
            "name": "docker-fleet-agent",
            "id": "80cd138d-4111-488c-af88-b37f06813fec",
            "type": "metricbeat",
            "ephemeral_id": "9f323027-c385-45cb-9dc0-8a4c86539055",
            "version": "8.8.2"
        },
        "@timestamp": "2023-08-30T15:57:00.000Z",
        "ecs": {
            "version": "8.0.0"
        },
        "service": {
            "type": "azure"
        },
        "data_stream": {
            "namespace": "default",
            "type": "metrics",
            "dataset": "azure.compute_vm"
        },
        "host": {
            "hostname": "docker-fleet-agent",
            "os": {
                "kernel": "5.15.49-linuxkit-pr",
                "codename": "focal",
                "name": "Ubuntu",
                "family": "debian",
                "type": "linux",
                "version": "20.04.6 LTS (Focal Fossa)",
                "platform": "ubuntu"
            },
            "containerized": false,
            "ip": [
                "172.20.0.10"
            ],
            "name": "docker-fleet-agent",
            "id": "f61391496aaa43bb94736676494450c5",
            "mac": [
                "02-42-AC-14-00-0A"
            ],
            "architecture": "x86_64"
        },
        "elastic_agent": {
            "id": "80cd138d-4111-488c-af88-b37f06813fec",
            "version": "8.8.2",
            "snapshot": false
        },
        "metricset": {
            "period": 300000,
            "name": "monitor"
        },
        "event": {
            "duration": 25743655585,
            "agent_id_status": "verified",
            "ingested": "2023-08-30T16:02:08Z",
            "module": "azure",
            "dataset": "azure.compute_vm"
        },
        "azure": {
            "subscription_id": "0e073ec1-c22f-4488-adde-da35ed609ccd",
            "timegrain": "PT5M",
            "resource": {
                "name": "tkravchenko-windows-vm",
                "id": "/subscriptions/0e073ec1-c22f-4488-adde-da35ed609ccd/resourceGroups/TETIANA-TEST/providers/Microsoft.Compute/virtualMachines/tkravchenko-windows-vm",
                "type": "Microsoft.Compute/virtualMachines",
                "group": "TETIANA-TEST",
                "tags": {
                    "custodian_status": "Resource does not meet policy: stop@2023/08/31"
                }
            },
            "namespace": "Azure.VM.Windows.GuestMetrics",
            "compute_vm": {
                "process_working_set": {
                    "avg": 1426192793.6
                },
                "processor_information_pct_processor_time": {
                    "avg": 3.2
                },
                "network_interface_packets_sent_per_sec": {
                    "avg": 34.8
                },
                "memory_pct_committed_bytes_in_use": {
                    "avg": 36
                },
                "processor_information_pct_user_time": {
                    "avg": 1.6
                },
                "system_context_switches_per_sec": {
                    "avg": 604.8
                },
                "memory_available_bytes": {
                    "avg": 2002427084.8
                },
                "network_interface_packets_received_per_sec": {
                    "avg": 25.2
                },
                "memory_pool_nonpaged_bytes": {
                    "avg": 227979264.0
                },
                "memory_committed_bytes": {
                    "avg": 1586061312.0
                },
                "processor_information_processor_frequency": {
                    "avg": 2295
                },
                "web_service_bytes_total_per_sec": {
                    "avg": 0
                },
                "system_processor_queue_length": {
                    "avg": 3.2
                },
                "network_interface_bytes_sent_per_sec": {
                    "avg": 57073.7
                },
                "network_interface_bytes_total_per_sec": {
                    "avg": 65903.4
                },
                "memory_page_faults_per_sec": {
                    "avg": 573.4
                },
                "memory_pages_per_sec": {
                    "avg": 23
                },
                "web_service_current_connections": {
                    "avg": 0
                },
                "system_system_up_time": {
                    "avg": 1808886.0
                },
                "web_service_total_method_requests_per_sec": {
                    "avg": 0
                },
                "network_interface_packets_outbound_errors": {
                    "avg": 0
                },
                "network_interface_packets_per_sec": {
                    "avg": 60.4
                },
                "process_thread_count": {
                    "avg": 925.4
                },
                "memory_cache_bytes": {
                    "avg": 130556723.2
                },
                "process_working_set_private": {
                    "avg": 626439782.4
                },
                "web_service_isapi_extension_requests_per_sec": {
                    "avg": 0
                },
                "processor_information_pct_privileged_time": {
                    "avg": 1.2
                },
                "memory_pool_paged_bytes": {
                    "avg": 212100710.4
                },
                "process_handle_count": {
                    "avg": 71451.6
                },
                "system_processes": {
                    "avg": 53.2
                },
                "network_interface_bytes_received_per_sec": {
                    "avg": 8829.2
                },
                "network_interface_packets_received_errors": {
                    "avg": 3.5
                }
            }
        }
    },
    "sort": [
        1693411020000
    ]
}

Case 2. List of metrics in documents is different, and the values for the same metric name are different. In such case it seems to be some mistake.
Another strange thing: one document has "ingested": "2023-08-30T15:36:46Z", another "ingested": "2023-08-30T15:41:49Z", so 5 min apart, but they are published under the same timestamp "@timestamp": "2023-08-30T15:31:00.000Z"

document 1

{
    "_index": ".ds-metrics-azure.compute_vm-default-2023.08.30-000001",
    "_id": "BpcXR4oBIEGol7NgCEDJ",
    "_score": null,
    "_source": {
        "cloud": {
            "instance": {
                "name": "tommyers-azure-vm",
                "id": "413a14d6-c8cb-44b5-8861-10c257c4db1b"
            },
            "provider": "azure",
            "machine": {
                "type": "Standard_B2s"
            },
            "region": "uksouth",
            "account": {
                "id": "0e073ec1-c22f-4488-adde-da35ed609ccd"
            }
        },
        "agent": {
            "name": "docker-fleet-agent",
            "id": "80cd138d-4111-488c-af88-b37f06813fec",
            "ephemeral_id": "9f323027-c385-45cb-9dc0-8a4c86539055",
            "type": "metricbeat",
            "version": "8.8.2"
        },
        "@timestamp": "2023-08-30T15:31:00.000Z",
        "ecs": {
            "version": "8.0.0"
        },
        "data_stream": {
            "namespace": "default",
            "type": "metrics",
            "dataset": "azure.compute_vm"
        },
        "service": {
            "type": "azure"
        },
        "elastic_agent": {
            "id": "80cd138d-4111-488c-af88-b37f06813fec",
            "version": "8.8.2",
            "snapshot": false
        },
        "host": {
            "hostname": "docker-fleet-agent",
            "disk": {
                "read": {
                    "bytes": 0
                },
                "write": {
                    "bytes": 561099.84
                }
            },
            "os": {
                "kernel": "5.15.49-linuxkit-pr",
                "codename": "focal",
                "name": "Ubuntu",
                "family": "debian",
                "type": "linux",
                "version": "20.04.6 LTS (Focal Fossa)",
                "platform": "ubuntu"
            },
            "containerized": false,
            "ip": [
                "172.20.0.10"
            ],
            "name": "docker-fleet-agent",
            "cpu": {
                "usage": 0.0078625
            },
            "id": "f61391496aaa43bb94736676494450c5",
            "mac": [
                "02-42-AC-14-00-0A"
            ],
            "architecture": "x86_64",
            "network": {
                "ingress": {
                    "bytes": 376319
                },
                "egress": {
                    "bytes": 318319
                }
            }
        },
        "metricset": {
            "period": 300000,
            "name": "monitor"
        },
        "event": {
            "duration": 2692989380,
            "agent_id_status": "verified",
            "ingested": "2023-08-30T15:36:46Z",
            "module": "azure",
            "dataset": "azure.compute_vm"
        },
        "azure": {
            "subscription_id": "0e073ec1-c22f-4488-adde-da35ed609ccd",
            "timegrain": "PT5M",
            "resource": {
                "name": "tommyers-azure-vm",
                "id": "/subscriptions/0e073ec1-c22f-4488-adde-da35ed609ccd/resourceGroups/TOMMYERS-RG/providers/Microsoft.Compute/virtualMachines/tommyers-azure-vm",
                "type": "Microsoft.Compute/virtualMachines",
                "tags": {
                    "test": "true",
                    "custodian_status": "Resource does not meet policy: stop@2023/08/31"
                },
                "group": "TOMMYERS-RG"
            },
            "namespace": "Microsoft.Compute/virtualMachines",
            "compute_vm": {
                "os_disk_queue_depth": {
                    "avg": 0
                },
                "inbound_flows_maximum_creation_rate": {
                    "avg": 6.2
                },
                "outbound_flows": {
                    "avg": 58.2
                },
                "inbound_flows": {
                    "avg": 58.2
                },
                "percentage_cpu": {
                    "avg": 0.78625
                },
                "cpu_credits_consumed": {
                    "avg": 0.02
                },
                "outbound_flows_maximum_creation_rate": {
                    "avg": 6.4
                },
                "network_in_total": {
                    "total": 376319
                },
                "disk_write_operations_per_sec": {
                    "avg": 0.17375
                },
                "network_out_total": {
                    "total": 318319
                },
                "available_memory_bytes": {
                    "avg": 2748317696.0
                },
                "disk_read_bytes": {
                    "total": 0
                },
                "disk_read_operations_per_sec": {
                    "avg": 0
                },
                "cpu_credits_remaining": {
                    "avg": 636
                },
                "disk_write_bytes": {
                    "total": 561099.84
                }
            }
        }
    },
    "sort": [
        1693409460000
    ]
}

document 2

{
    "_index": ".ds-metrics-azure.compute_vm-default-2023.08.30-000001",
    "_id": "spcbR4oBIEGol7NgqUPY",
    "_score": null,
    "_source": {
        "cloud": {
            "instance": {
                "name": "tommyers-azure-vm",
                "id": "413a14d6-c8cb-44b5-8861-10c257c4db1b"
            },
            "provider": "azure",
            "machine": {
                "type": "Standard_B2s"
            },
            "region": "uksouth",
            "account": {
                "id": "0e073ec1-c22f-4488-adde-da35ed609ccd"
            }
        },
        "agent": {
            "name": "docker-fleet-agent",
            "id": "80cd138d-4111-488c-af88-b37f06813fec",
            "type": "metricbeat",
            "ephemeral_id": "9f323027-c385-45cb-9dc0-8a4c86539055",
            "version": "8.8.2"
        },
        "@timestamp": "2023-08-30T15:31:00.000Z",
        "ecs": {
            "version": "8.0.0"
        },
        "data_stream": {
            "namespace": "default",
            "type": "metrics",
            "dataset": "azure.compute_vm"
        },
        "service": {
            "type": "azure"
        },
        "elastic_agent": {
            "id": "80cd138d-4111-488c-af88-b37f06813fec",
            "version": "8.8.2",
            "snapshot": false
        },
        "host": {
            "disk": {
                "write": {
                    "bytes": 667579.08
                }
            },
            "hostname": "docker-fleet-agent",
            "os": {
                "kernel": "5.15.49-linuxkit-pr",
                "codename": "focal",
                "name": "Ubuntu",
                "family": "debian",
                "type": "linux",
                "version": "20.04.6 LTS (Focal Fossa)",
                "platform": "ubuntu"
            },
            "ip": [
                "172.20.0.10"
            ],
            "containerized": false,
            "name": "docker-fleet-agent",
            "cpu": {
                "usage": 0.007905
            },
            "id": "f61391496aaa43bb94736676494450c5",
            "mac": [
                "02-42-AC-14-00-0A"
            ],
            "architecture": "x86_64"
        },
        "metricset": {
            "period": 300000,
            "name": "monitor"
        },
        "event": {
            "duration": 6339410160,
            "agent_id_status": "verified",
            "ingested": "2023-08-30T15:41:49Z",
            "module": "azure",
            "dataset": "azure.compute_vm"
        },
        "azure": {
            "subscription_id": "0e073ec1-c22f-4488-adde-da35ed609ccd",
            "timegrain": "PT5M",
            "resource": {
                "name": "tommyers-azure-vm",
                "id": "/subscriptions/0e073ec1-c22f-4488-adde-da35ed609ccd/resourceGroups/TOMMYERS-RG/providers/Microsoft.Compute/virtualMachines/tommyers-azure-vm",
                "type": "Microsoft.Compute/virtualMachines",
                "tags": {
                    "test": "true",
                    "custodian_status": "Resource does not meet policy: stop@2023/08/31"
                },
                "group": "TOMMYERS-RG"
            },
            "namespace": "Microsoft.Compute/virtualMachines",
            "compute_vm": {
                "percentage_cpu": {
                    "avg": 0.7905
                },
                "disk_write_operations_per_sec": {
                    "avg": 0.16999999999999998
                },
                "disk_write_bytes": {
                    "total": 667579.08
                }
            }
        }
    },
    "sort": [
        1693409460000
    ]
}

Note: this behavior I could see for both compute_vm and compute_vm_scaleset, monitor

cc @zmoog @tommyers-elastic

The text was updated successfully, but these errors were encountered:

zmoog · 2023-09-04T10:24:49Z

Case 2 may be a side-effect of how the Azure Metrics metricset works, like at #7162 (comment).

I'll try to reproduce these issues. Is there a metricset where this always happens?

tetianakravchenko · 2023-09-04T10:55:48Z

Is there a metricset where this always happens?

I could always reproduce it for compute_vm

zmoog · 2023-10-11T09:50:34Z

@tetianakravchenko, we are changing the interval and the grouping logic for the metricset. I pushed a custom build of the agent with these changes.

Would you mind to give it a try on your local stack with the Compute VM metrics?

The agent version is 8.12.0-SNAPSHOT and the docker image id is a1c23d34b76e. Here are the steps required to run it on a local stack:

zmoog/public-notes#35 (comment)

I pushed the linux/arm64 version, let me know if you need a different architecture.

tetianakravchenko · 2023-10-11T13:13:04Z

@zmoog thanks! I will give it a try and come back to you today-tomorrow

zmoog · 2023-10-11T13:48:13Z

We made a few additional changes to the interval and added a event.batch_id field to track metrics collection and possibly use it as a workaround for the PT1H issue.

I'm building a new image, updates soon!

zmoog · 2023-10-11T14:08:40Z

Here's the latest custom agent build info:

version: 8.12.0-SNAPSHOT
image id: e68d8390c9a3

Changes:

Fixed time interval to only fetch one timestamp per iteration instead of two.
Added field event.batch_id with a unique UUID v4 for each iteration. It can be used as a workaround in some edge cases where the time grain is greater that the collection period.

zmoog · 2023-10-11T14:50:34Z

I am currently focusing the testing on compute_vm, but we expect the changes to work on other metrics as well.

zmoog · 2023-10-16T21:49:38Z

@tetianakravchenko, did you get a chance to run some tests using the custom agent build?

zmoog · 2023-10-17T16:03:11Z

Here's the latest custom agent build info:

version: 8.12.0-SNAPSHOT
image id: ce359fb5d5e2

Changes:

Removed field event.batch_id.

tetianakravchenko · 2023-10-18T10:37:14Z

@zmoog could you please build and share linux/amd64 image?

WARNING: The requested image's platform (linux/arm64/v8) does not match the detected host platform (linux/amd64/v3) and no specific platform was requested

I am getting warning above. thought I can run the container, for some reason I do not see the azure metrics

zmoog · 2023-10-18T13:18:54Z

I am getting warning above. thought I can run the container, for some reason I do not see the azure metrics

Which metrics are you testing?

zmoog · 2023-10-18T13:28:27Z

could you please build and share linux/amd64 image?

Yep! I'll rebuilt it for linux/amd64.

zmoog · 2023-10-18T16:31:08Z

@tetianakravchenko, here's the latest custom agent build info for the linux/amd64 platform ¹:

version: 8.12.0-SNAPSHOT
image id: ~~75eead80ccd2~~ fe0c034193a1 ²

Same changes:

Removed field event.batch_id.

My apologies for taking so much time; I can cross build filebeat, metricbeat, and agent binaries on my Mac, but when it comes to building the docker image, I can only do it for the same architecture. I am probably missing some option when the package format is docker. I'll look more into this. In thee meantime, I build this image on an amd64 machine. ↩
the image id 75eead80ccd2 is not working — I build it from a broken version of the agent; use fe0c034193a1 instead. ↩

tetianakravchenko · 2023-10-19T14:15:45Z

@zmoog

tested the compute_vm datastream:

what is strange that I see metrics only for the first scrape ( the first time after the start of the agent). Do you see the same behavior?

At the same time monitor datastream (with default configuration) is scraped each 5min:

For some reason I am not getting metrics for other datastream:

even though I've enabled all of them, except storage account:

investigating why there is no metrics for other datastream.

tetianakravchenko · 2023-10-19T14:17:17Z

found some errors in logs:

"message": "recovered from panic while fetching 'azure/monitor' for host ''. Recovering, but please report this.",
    "error": {
      "message": "assignment to entry in nil map"
    },

zmoog · 2023-10-19T21:40:19Z

Here's the latest custom agent build info:

version: 8.12.0-SNAPSHOT
image id: 2dd1aaa67a67

Changes:

Fix a panic due to an uninitialized mapstr.M

zmoog · 2023-10-20T07:55:49Z

@tetianakravchenko, I am running the agent build 2dd1aaa67a67 with the compute_vm metrics for 10+ hours reliably, so I guess the latest fix resolved the problem:

tetianakravchenko · 2023-10-25T15:47:04Z

@zmoog

I've tested compute_vm - all works fine. Also no duplication for: container_registry, but for other datastreams:

compute_vm_scaleset 1 duplication pair, that contains the error.message (ERROR CODE: ResourceNotFound)
- the same for container_service
container_instance - there seems to be some mistake in the naming - for some reason there are 2 fields with the same name: container_name that is a dimension, and the containername - that is not defined in the package fields, and is not a dimension

since only container_name is a dimension, now this field is missing (but containername is present) as result lot of files are defined as duplicated by the TSDB-migration-test-kit

The same issue as for container_instance I can also see for: database_account

monitor - exactly the same document but with different _id, event.ingested and event.duration

doc1

{
    "_index": ".ds-metrics-azure.monitor-default-2023.10.25-000001",
    "_id": "I3XHZosBbbB7SapBDp6W",
    "_score": null,
    "_source": {
        "cloud": {
            "provider": "azure",
            "region": "westus"
        },
        "agent": {
            "name": "2fe138af916a",
            "id": "c1075b99-f91e-4081-817a-d319fa43aaae",
            "ephemeral_id": "a29837fb-3fce-4aef-815e-ed2e56e687bd",
            "type": "metricbeat",
            "version": "8.12.0"
        },
        "@timestamp": "2023-10-25T12:10:00.000Z",
        "ecs": {
            "version": "8.0.0"
        },
        "service": {
            "type": "azure"
        },
        "data_stream": {
            "namespace": "default",
            "type": "metrics",
            "dataset": "azure.monitor"
        },
        "host": {
            "hostname": "2fe138af916a",
            "os": {
                "kernel": "5.15.49-linuxkit-pr",
                "codename": "focal",
                "name": "Ubuntu",
                "type": "linux",
                "family": "debian",
                "version": "20.04.6 LTS (Focal Fossa)",
                "platform": "ubuntu"
            },
            "containerized": false,
            "ip": [
                "172.20.0.4"
            ],
            "name": "2fe138af916a",
            "mac": [
                "02-42-AC-14-00-04"
            ],
            "architecture": "x86_64"
        },
        "elastic_agent": {
            "id": "c1075b99-f91e-4081-817a-d319fa43aaae",
            "version": "8.12.0",
            "snapshot": true
        },
        "metricset": {
            "period": 300000,
            "name": "monitor"
        },
        "event": {
            "duration": 954114345109,
            "agent_id_status": "verified",
            "ingested": "2023-10-25T12:20:03Z",
            "module": "azure",
            "dataset": "azure.monitor"
        },
        "azure": {
            "subscription_id": "0e073ec1-c22f-4488-adde-da35ed609ccd",
            "timegrain": "PT5M",
            "resource": {
                "name": "tkravchenko",
                "id": "/subscriptions/0e073ec1-c22f-4488-adde-da35ed609ccd/resourceGroups/tetiana-test/providers/Microsoft.DocumentDb/databaseAccounts/tkravchenko",
                "type": "Microsoft.DocumentDb/databaseAccounts",
                "tags": {
                    "defaultExperience": "Azure Cosmos DB for MongoDB API",
                    "hidden-cosmos-mmspecial": ""
                },
                "group": "tetiana-test"
            },
            "namespace": "Microsoft.DocumentDb/databaseAccounts",
            "metrics": {
                "document_count": {
                    "total": 2
                },
                "data_usage": {
                    "total": 0
                },
                "document_quota": {
                    "total": 0
                }
            }
        }
    },
    "sort": [
        1698235800000
    ]
}

doc2

{
    "_index": ".ds-metrics-azure.monitor-default-2023.10.25-000001",
    "_id": "LnXHZosBbbB7SapBHp4B",
    "_score": null,
    "_source": {
        "cloud": {
            "provider": "azure",
            "region": "westus"
        },
        "agent": {
            "name": "2fe138af916a",
            "id": "c1075b99-f91e-4081-817a-d319fa43aaae",
            "type": "metricbeat",
            "ephemeral_id": "a29837fb-3fce-4aef-815e-ed2e56e687bd",
            "version": "8.12.0"
        },
        "@timestamp": "2023-10-25T12:10:00.000Z",
        "ecs": {
            "version": "8.0.0"
        },
        "service": {
            "type": "azure"
        },
        "data_stream": {
            "namespace": "default",
            "type": "metrics",
            "dataset": "azure.monitor"
        },
        "host": {
            "hostname": "2fe138af916a",
            "os": {
                "kernel": "5.15.49-linuxkit-pr",
                "codename": "focal",
                "name": "Ubuntu",
                "type": "linux",
                "family": "debian",
                "version": "20.04.6 LTS (Focal Fossa)",
                "platform": "ubuntu"
            },
            "containerized": false,
            "ip": [
                "172.20.0.4"
            ],
            "name": "2fe138af916a",
            "mac": [
                "02-42-AC-14-00-04"
            ],
            "architecture": "x86_64"
        },
        "elastic_agent": {
            "id": "c1075b99-f91e-4081-817a-d319fa43aaae",
            "version": "8.12.0",
            "snapshot": true
        },
        "metricset": {
            "period": 300000,
            "name": "monitor"
        },
        "event": {
            "duration": 3596448960,
            "agent_id_status": "verified",
            "ingested": "2023-10-25T12:20:07Z",
            "module": "azure",
            "dataset": "azure.monitor"
        },
        "azure": {
            "subscription_id": "0e073ec1-c22f-4488-adde-da35ed609ccd",
            "timegrain": "PT5M",
            "resource": {
                "name": "tkravchenko",
                "id": "/subscriptions/0e073ec1-c22f-4488-adde-da35ed609ccd/resourceGroups/tetiana-test/providers/Microsoft.DocumentDb/databaseAccounts/tkravchenko",
                "type": "Microsoft.DocumentDb/databaseAccounts",
                "tags": {
                    "defaultExperience": "Azure Cosmos DB for MongoDB API",
                    "hidden-cosmos-mmspecial": ""
                },
                "group": "tetiana-test"
            },
            "namespace": "Microsoft.DocumentDb/databaseAccounts",
            "metrics": {
                "document_count": {
                    "total": 2
                },
                "data_usage": {
                    "total": 0
                },
                "document_quota": {
                    "total": 0
                }
            }
        }
    },
    "sort": [
        1698235800000
    ]
}

zmoog · 2023-10-25T21:18:10Z

Thanks for testing all these metricsets!

I believe I found the root cause for the azure.dimension.* naming issue; it's something I missed in the refactoring of mapping and grouping of the value of the metrics.

I'm building a new custom agent image to fix this error for all metrics.

zmoog · 2023-10-25T23:15:39Z

Here's the latest custom agent build info:

version: 8.12.0-SNAPSHOT
image id: 167b61406764

Changes:

Fix dimension naming issue: dimensions list in metric definition and metric values are in a different order; the mapping function must handle this correctly.

tetianakravchenko · 2023-10-27T12:26:47Z

Test results:

some random errors duplication RESPONSE 529: 529 \nERROR CODE UNAVAILABLE for compute_vm_scaleset and compute_vm, seems like a temporary issue, not sure though why such messages are duplicated
azure.dimension.* naming issue is fixed
for database_account few azure.dimension.* are missing (like status_code) - I will add dimension: true
database_account - in case when there is no azure.dimension.* document is duplicated with different _id, event.ingested and event.duration

doc1

{
    "_index": ".ds-metrics-azure.database_account-default-2023.10.27-000001",
    "_id": "4GBMcIsBwmuzi5B5henW",
    "_score": null,
    "_source": {
        "cloud": {
            "provider": "azure",
            "region": "westus"
        },
        "agent": {
            "name": "63f7f6d2544f",
            "id": "90c26370-a605-4163-bcda-8fd331764e2c",
            "type": "metricbeat",
            "ephemeral_id": "dfa6d20d-bfc7-4594-8834-c9faadfa9044",
            "version": "8.12.0"
        },
        "@timestamp": "2023-10-27T08:32:00.000Z",
        "ecs": {
            "version": "8.0.0"
        },
        "service": {
            "type": "azure"
        },
        "data_stream": {
            "namespace": "default",
            "type": "metrics",
            "dataset": "azure.database_account"
        },
        "elastic_agent": {
            "id": "90c26370-a605-4163-bcda-8fd331764e2c",
            "version": "8.12.0",
            "snapshot": true
        },
        "host": {
            "hostname": "63f7f6d2544f",
            "os": {
                "kernel": "5.15.49-linuxkit-pr",
                "codename": "focal",
                "name": "Ubuntu",
                "type": "linux",
                "family": "debian",
                "version": "20.04.6 LTS (Focal Fossa)",
                "platform": "ubuntu"
            },
            "containerized": false,
            "ip": [
                "172.18.0.4"
            ],
            "name": "63f7f6d2544f",
            "mac": [
                "02-42-AC-12-00-04"
            ],
            "architecture": "x86_64"
        },
        "metricset": {
            "period": 300000,
            "name": "monitor"
        },
        "event": {
            "duration": 22983936622,
            "agent_id_status": "verified",
            "ingested": "2023-10-27T08:42:24Z",
            "module": "azure",
            "dataset": "azure.database_account"
        },
        "azure": {
            "subscription_id": "0e073ec1-c22f-4488-adde-da35ed609ccd",
            "timegrain": "PT1H",
            "resource": {
                "name": "mbranca-sdh-3140",
                "id": "/subscriptions/0e073ec1-c22f-4488-adde-da35ed609ccd/resourceGroups/mbranca-sdh-3140/providers/Microsoft.DocumentDb/databaseAccounts/mbranca-sdh-3140",
                "type": "Microsoft.DocumentDb/databaseAccounts",
                "tags": {
                    "defaultExperience": "Cassandra",
                    "hidden-cosmos-mmspecial": ""
                },
                "group": "mbranca-sdh-3140"
            },
            "database_account": {
                "service_availability": {
                    "avg": 100
                }
            },
            "namespace": "Microsoft.DocumentDb/databaseAccounts"
        }
    },
    "sort": [
        1698395520000
    ]
}

doc2

{
    "_index": ".ds-metrics-azure.database_account-default-2023.10.27-000001",
    "_id": "c2GxcIsBwmuzi5B5bgFr",
    "_score": null,
    "_source": {
        "cloud": {
            "provider": "azure",
            "region": "westus"
        },
        "agent": {
            "name": "63f7f6d2544f",
            "id": "90c26370-a605-4163-bcda-8fd331764e2c",
            "ephemeral_id": "dfa6d20d-bfc7-4594-8834-c9faadfa9044",
            "type": "metricbeat",
            "version": "8.12.0"
        },
        "@timestamp": "2023-10-27T08:32:00.000Z",
        "ecs": {
            "version": "8.0.0"
        },
        "service": {
            "type": "azure"
        },
        "data_stream": {
            "namespace": "default",
            "type": "metrics",
            "dataset": "azure.database_account"
        },
        "elastic_agent": {
            "id": "90c26370-a605-4163-bcda-8fd331764e2c",
            "version": "8.12.0",
            "snapshot": true
        },
        "host": {
            "hostname": "63f7f6d2544f",
            "os": {
                "kernel": "5.15.49-linuxkit-pr",
                "codename": "focal",
                "name": "Ubuntu",
                "type": "linux",
                "family": "debian",
                "version": "20.04.6 LTS (Focal Fossa)",
                "platform": "ubuntu"
            },
            "containerized": false,
            "ip": [
                "172.18.0.4"
            ],
            "name": "63f7f6d2544f",
            "mac": [
                "02-42-AC-12-00-04"
            ],
            "architecture": "x86_64"
        },
        "metricset": {
            "period": 300000,
            "name": "monitor"
        },
        "event": {
            "duration": 8747479103,
            "agent_id_status": "verified",
            "ingested": "2023-10-27T10:32:38Z",
            "module": "azure",
            "dataset": "azure.database_account"
        },
        "azure": {
            "subscription_id": "0e073ec1-c22f-4488-adde-da35ed609ccd",
            "timegrain": "PT1H",
            "resource": {
                "name": "mbranca-sdh-3140",
                "id": "/subscriptions/0e073ec1-c22f-4488-adde-da35ed609ccd/resourceGroups/mbranca-sdh-3140/providers/Microsoft.DocumentDb/databaseAccounts/mbranca-sdh-3140",
                "type": "Microsoft.DocumentDb/databaseAccounts",
                "group": "mbranca-sdh-3140",
                "tags": {
                    "defaultExperience": "Cassandra",
                    "hidden-cosmos-mmspecial": ""
                }
            },
            "database_account": {
                "service_availability": {
                    "avg": 100
                }
            },
            "namespace": "Microsoft.DocumentDb/databaseAccounts"
        }
    },
    "sort": [
        1698395520000
    ]
}

since there is used "timegrain": "PT1H" I assume it could be fixed by the elastic/beats#36778 @tdancheva was working?

@zmoog if you think that the last point should be addressed by the elastic/beats#36778, I think we are good to open elastic/beats#36823

zmoog · 2023-10-27T13:24:16Z

some random errors duplication RESPONSE 529: 529 \nERROR CODE UNAVAILABLE for compute_vm_scaleset and compute_vm, seems like a temporary issue, not sure though why such messages are duplicated

529 is an unofficial HTTP status code that Azure sometimes uses. It seems to be a transient error due to too many requests hitting the servers on the Azure side.

The duplication is probably happening because the metricset didn't complete processing, and the event lacks some dimensions.

zmoog · 2023-10-27T13:30:39Z

azure.dimension.* naming issue is fixed

\o/

for database_account few azure.dimension.* are missing (like status_code) - I will add dimension: true

Okay, so it looks fine and we only need to add a missing dimension definition here?

tetianakravchenko · 2023-10-27T13:44:25Z

Okay, so it looks fine and we only need to add a missing dimension definition here?

yes, some number of duplicated documents are related to the fact that azure.dimension.status_code is not set as a dimension.

Other duplication group I believe could be related to the different "timegrain" (see the documents in the comment #7621 (comment))

zmoog · 2023-10-27T13:47:56Z

database_account - in case when there is no azure.dimension.* document is duplicated with different _id, event.ingested and event.duration

Yeah, I see this metric example you reported has a PT1H timegrain. PT1H duplication happens on data streams with mixed time grains, with a collection period lower than the most extended time grain.

This integration has PT5M and PT1H time grains and a 5m collection period. The metricset collects the same PT1H data point multiple times.

This duplication is not a blocker for TSDB because it improves the situation by dropping the extra documents. However, we can try to integrate the work @tdancheva did to solve this problem for TSDB and non-TSDB data streams.

tetianakravchenko · 2023-10-27T15:14:14Z

opened #8319, testing if with this change I don't see any overlapping docs (beside the docs with PT1H)

tetianakravchenko · 2023-10-30T08:22:59Z

testing if with this change I don't see any overlapping docs (beside the docs with PT1H)

The only overlapping documents are the one with PT1H

zmoog · 2023-10-30T08:32:10Z

The only overlapping documents are the one with PT1H

I am testing a build that handles data streams with mixed time grains scenario, like a metricset with a 5 min collection period with metrics with PT5M and PT1H time grains.

I'll ship a custom agent build a little later this morning with this change.

tetianakravchenko · 2023-10-30T09:40:58Z

I am testing a build that handles data streams with mixed time grains scenario, like a metricset with a 5 min collection period with metrics with PT5M and PT1H time grains.

I'll ship a custom agent build a little later this morning with this change.

are you testing the build that includes elastic/beats#36778 ?

zmoog · 2023-10-30T09:47:05Z

are you testing the build that includes elastic/beats#36778 ?

Yep, parts of it.

I had to make additional changes because some metrics configurations (for example, the first configuration in the database account) do not specify a time grain value. If we don't have time grain in the config, we can't calculate the intervals before sending the requests.

zmoog · 2023-10-30T10:34:49Z

Here's the latest custom agent build info:

version: 8.12.0-SNAPSHOT
image id: dcf8b452def4

Changes:

Deal with metricsets having mixed time grains (e.g., metrics with PT5M and PT1H).

tetianakravchenko · 2023-10-30T14:41:42Z

Changes:

Deal with metricsets having mixed time grains (e.g., metrics with PT5M and PT1H).

I've tested this image for all datastreams - no duplications (except one pair of RESPONSE 529: 529 \nERROR CODE UNAVAILABLE, but it is a known one as mentioned in #7621 (comment))

botelastic · 2024-10-29T15:51:45Z

Hi! We just realized that we haven't looked into this issue in a while. We're sorry! We're labeling this issue as Stale to make it hit our filters and make sure we get back to it as soon as possible. In the meantime, it'd be extremely helpful if you could take a look at it as well and confirm its relevance. A simple comment with a nice emoji will be enough :+1. Thank you for your contribution!

zmoog · 2025-01-08T08:41:46Z

Closing this as we have no reports of grouping issues anymore. Feel free to reopen if new data becomes available.

This was referenced Aug 31, 2023

[Azure] TSDB enablement - track all metrics data streams #7140

Closed

[Azure] [container_service] Duplicated documents for the kube_node_status_condition metric #7160

Closed

zmoog self-assigned this Sep 4, 2023

zmoog added bug Something isn't working, use only for issues Integration:azure Azure Logs labels Sep 4, 2023

tdancheva mentioned this issue Oct 6, 2023

[Work in progress] Metricbeat - fix Azure duplicates elastic/beats#36778

Draft

6 tasks

tdancheva mentioned this issue Oct 12, 2023

[Azure][container_registry] Metrics with PT1H time grain problems #7646

Open

lalit-satapathy mentioned this issue Oct 16, 2023

[Azure][Storage_account] Enable TSDB #8064

Merged

4 tasks

bturquet assigned tdancheva Oct 27, 2023

zmoog mentioned this issue Nov 6, 2023

Azure Monitor: adjust grouping logic and avoid duplicating documents to make the metricset TSDB-friendly elastic/beats#36823

Merged

6 tasks

botelastic bot added the Stalled label Oct 29, 2024

zmoog closed this as completed Jan 8, 2025

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[azure_metrics] Duplicated metrics/metrics are not groupped #7621

[azure_metrics] Duplicated metrics/metrics are not groupped #7621

tetianakravchenko commented Aug 31, 2023 •

edited

Loading

zmoog commented Sep 4, 2023

tetianakravchenko commented Sep 4, 2023

zmoog commented Oct 11, 2023 •

edited

Loading

tetianakravchenko commented Oct 11, 2023 •

edited

Loading

zmoog commented Oct 11, 2023

zmoog commented Oct 11, 2023

zmoog commented Oct 11, 2023

zmoog commented Oct 16, 2023

zmoog commented Oct 17, 2023

tetianakravchenko commented Oct 18, 2023 •

edited

Loading

zmoog commented Oct 18, 2023

zmoog commented Oct 18, 2023

zmoog commented Oct 18, 2023 •

edited

Loading

tetianakravchenko commented Oct 19, 2023

tetianakravchenko commented Oct 19, 2023

zmoog commented Oct 19, 2023

zmoog commented Oct 20, 2023

tetianakravchenko commented Oct 25, 2023

zmoog commented Oct 25, 2023

zmoog commented Oct 25, 2023

tetianakravchenko commented Oct 27, 2023 •

edited

Loading

zmoog commented Oct 27, 2023

zmoog commented Oct 27, 2023

tetianakravchenko commented Oct 27, 2023

zmoog commented Oct 27, 2023

tetianakravchenko commented Oct 27, 2023

tetianakravchenko commented Oct 30, 2023

zmoog commented Oct 30, 2023

tetianakravchenko commented Oct 30, 2023

zmoog commented Oct 30, 2023 •

edited

Loading

zmoog commented Oct 30, 2023

tetianakravchenko commented Oct 30, 2023 •

edited

Loading

botelastic bot commented Oct 29, 2024

zmoog commented Jan 8, 2025

[azure_metrics] Duplicated metrics/metrics are not groupped #7621

[azure_metrics] Duplicated metrics/metrics are not groupped #7621

Comments

tetianakravchenko commented Aug 31, 2023 • edited Loading

zmoog commented Sep 4, 2023

tetianakravchenko commented Sep 4, 2023

zmoog commented Oct 11, 2023 • edited Loading

tetianakravchenko commented Oct 11, 2023 • edited Loading

zmoog commented Oct 11, 2023

zmoog commented Oct 11, 2023

zmoog commented Oct 11, 2023

zmoog commented Oct 16, 2023

zmoog commented Oct 17, 2023

tetianakravchenko commented Oct 18, 2023 • edited Loading

zmoog commented Oct 18, 2023

zmoog commented Oct 18, 2023

zmoog commented Oct 18, 2023 • edited Loading

Footnotes

tetianakravchenko commented Oct 19, 2023

tetianakravchenko commented Oct 19, 2023

zmoog commented Oct 19, 2023

zmoog commented Oct 20, 2023

tetianakravchenko commented Oct 25, 2023

zmoog commented Oct 25, 2023

zmoog commented Oct 25, 2023

tetianakravchenko commented Oct 27, 2023 • edited Loading

zmoog commented Oct 27, 2023

zmoog commented Oct 27, 2023

tetianakravchenko commented Oct 27, 2023

zmoog commented Oct 27, 2023

tetianakravchenko commented Oct 27, 2023

tetianakravchenko commented Oct 30, 2023

zmoog commented Oct 30, 2023

tetianakravchenko commented Oct 30, 2023

zmoog commented Oct 30, 2023 • edited Loading

zmoog commented Oct 30, 2023

tetianakravchenko commented Oct 30, 2023 • edited Loading

botelastic bot commented Oct 29, 2024

zmoog commented Jan 8, 2025

tetianakravchenko commented Aug 31, 2023 •

edited

Loading

zmoog commented Oct 11, 2023 •

edited

Loading

tetianakravchenko commented Oct 11, 2023 •

edited

Loading

tetianakravchenko commented Oct 18, 2023 •

edited

Loading

zmoog commented Oct 18, 2023 •

edited

Loading

tetianakravchenko commented Oct 27, 2023 •

edited

Loading

zmoog commented Oct 30, 2023 •

edited

Loading

tetianakravchenko commented Oct 30, 2023 •

edited

Loading