From fe78895fdf42c6f81bcded05f89df3ce32f6315c Mon Sep 17 00:00:00 2001 From: Romain Pelisse Date: Tue, 3 Oct 2023 13:30:49 +0200 Subject: [PATCH] Add support for Prometheus metrics exporter --- playbooks/amq_streams_all_cluster.yml | 3 + roles/amq_streams_broker/defaults/main.yml | 6 + roles/amq_streams_broker/tasks/main.yml | 12 ++ roles/amq_streams_broker/tasks/service.yml | 4 + roles/amq_streams_broker/tasks/validate.yml | 10 ++ roles/amq_streams_common/defaults/main.yml | 8 + roles/amq_streams_common/tasks/prometheus.yml | 8 +- .../tasks/validation/prometheus.yml | 12 ++ .../prometheus/broker-config.yaml.j2 | 144 ++++++++++++++++++ .../templates/prometheus/zk-config.yaml.j2 | 30 ++++ .../templates/service.conf.j2 | 5 +- roles/amq_streams_zookeeper/defaults/main.yml | 6 + roles/amq_streams_zookeeper/tasks/main.yml | 12 ++ roles/amq_streams_zookeeper/tasks/service.yml | 4 + .../amq_streams_zookeeper/tasks/validate.yml | 10 ++ 15 files changed, 270 insertions(+), 4 deletions(-) create mode 100644 roles/amq_streams_common/tasks/validation/prometheus.yml create mode 100644 roles/amq_streams_common/templates/prometheus/broker-config.yaml.j2 create mode 100644 roles/amq_streams_common/templates/prometheus/zk-config.yaml.j2 diff --git a/playbooks/amq_streams_all_cluster.yml b/playbooks/amq_streams_all_cluster.yml index a174584..7c03d52 100644 --- a/playbooks/amq_streams_all_cluster.yml +++ b/playbooks/amq_streams_all_cluster.yml @@ -4,6 +4,9 @@ vars: amq_streams_common_download_node: "" amq_streams_common_download_dir: "/tmp" + amq_streams_broker_prometheus_enabled: True + amq_streams_zookeeper_prometheus_enabled: True + amq_streams_zookeeper_prometheus_port: 18081 # Topic Management amq_streams_broker_topics: diff --git a/roles/amq_streams_broker/defaults/main.yml b/roles/amq_streams_broker/defaults/main.yml index b5996cd..3dd0ec6 100644 --- a/roles/amq_streams_broker/defaults/main.yml +++ b/roles/amq_streams_broker/defaults/main.yml @@ -109,3 +109,9 @@ amq_streams_broker_admin_cli_config_file: '/tmp/admin-cli.properties' amq_streams_broker_admin_mechanism: PLAIN amq_streams_broker_admin_username: PLEASE_IDENTIFY_THE_ADMIN_USER amq_streams_broker_admin_password: PLEASE_CHANGEME_IAMNOTGOOD_FOR_PRODUCTION + +amq_streams_broker_prometheus_enabled: False +amq_streams_broker_prometheus_port: 18080 +amq_streams_broker_prometheus_bind_addr: '0.0.0.0' +amq_streams_broker_prometheus_config_file: broker-config.yaml +amq_streams_broker_prometheus_config_file_template: prometheus/broker-config.yaml.j2 diff --git a/roles/amq_streams_broker/tasks/main.yml b/roles/amq_streams_broker/tasks/main.yml index 9f11aa9..77df4c8 100644 --- a/roles/amq_streams_broker/tasks/main.yml +++ b/roles/amq_streams_broker/tasks/main.yml @@ -2,6 +2,18 @@ - name: "Ensure requirements for broker are fulfilled." ansible.builtin.include_tasks: prereqs.yml +- name: "Ensure Prometheus requirements are met (if enabled: {{ amq_streams_broker_prometheus_enabled }})." + ansible.builtin.include_role: + name: amq_streams_common + tasks_from: prometheus.yml + vars: + amq_streams_common_prometheus_port: "{{ amq_streams_broker_prometheus_port }}" + amq_streams_common_prometheus_bind_addr: "{{ amq_streams_broker_prometheus_bind_addr }}" + amq_streams_common_prometheus_config_file: "{{ amq_streams_broker_prometheus_config_file }}" + amq_streams_common_prometheus_config_file_template: "{{ amq_streams_broker_prometheus_config_file_template }}" + when: + - amq_streams_broker_prometheus_enabled is defined and amq_streams_broker_prometheus_enabled + - name: "Ensure OS has been properly tuned (RHEL specific)." ansible.builtin.include_role: name: amq_streams_common diff --git a/roles/amq_streams_broker/tasks/service.yml b/roles/amq_streams_broker/tasks/service.yml index 61724d2..3594d9a 100644 --- a/roles/amq_streams_broker/tasks/service.yml +++ b/roles/amq_streams_broker/tasks/service.yml @@ -42,3 +42,7 @@ server_java_gc_log_opts: "{{ amq_streams_broker_java_java_gc_log_opts | default('') }}" server_java_jmx_opts: "{{ amq_streams_broker_java_jmx_opts | default('') }}" server_start_sleep: "{{ amq_streams_broker_zookeeper_session_timeout_ms }}" + server_prometheus_enabled: "{{ amq_streams_broker_prometheus_enabled }}" + server_prometheus_config_file: "{{ amq_streams_broker_prometheus_config_file | default('') }}" + amq_streams_common_prometheus_port: "{{ amq_streams_broker_prometheus_port }}" + amq_streams_common_prometheus_bind_addr: "{{ amq_streams_broker_prometheus_bind_addr }}" diff --git a/roles/amq_streams_broker/tasks/validate.yml b/roles/amq_streams_broker/tasks/validate.yml index 60a2055..0268cd8 100644 --- a/roles/amq_streams_broker/tasks/validate.yml +++ b/roles/amq_streams_broker/tasks/validate.yml @@ -13,3 +13,13 @@ vars: server_log_validation_min_size: "{{ amq_streams_broker_server_log_validation_min_size }}" server_log_dir: "{{ amq_streams_broker_logs_dir }}/server.log" + +- name: "Check that Prometheus port is accessible (if enabled: {{ amq_streams_broker_prometheus_enabled }}" + ansible.builtin.include_role: + name: amq_streams_common + tasks_from: validation/prometheus.yml + vars: + amq_streams_common_prometheus_bind_addr: "{{ amq_streams_broker_prometheus_bind_addr }}" + amq_streams_common_prometheus_port: "{{ amq_streams_broker_prometheus_port }}" + when: + - amq_streams_broker_prometheus_enabled is defined and amq_streams_broker_prometheus_enabled diff --git a/roles/amq_streams_common/defaults/main.yml b/roles/amq_streams_common/defaults/main.yml index 192f7f3..6e49bf1 100644 --- a/roles/amq_streams_common/defaults/main.yml +++ b/roles/amq_streams_common/defaults/main.yml @@ -24,3 +24,11 @@ amq_streams_common_firewalld_package_name: amq_streams_common_firewalld_enabled: false amq_streams_common_offline_install: false + +amq_streams_common_prometheus_install_rpm: True +amq_streams_common_prometheus_package_name: 'prometheus-jmx-exporter' +amq_streams_common_prometheus_path_to_jar: /usr/share/java/prometheus-jmx-exporter +amq_streams_common_prometheus_jarname: jmx_prometheus_javaagent.jar +amq_streams_common_prometheus_bind_addr: 0.0.0.0 +amq_streams_common_prometheus_port: 18080 +amq_streams_common_prometheus_metrics_config_home: /etc/ diff --git a/roles/amq_streams_common/tasks/prometheus.yml b/roles/amq_streams_common/tasks/prometheus.yml index 27d7e6d..1cb1ce5 100644 --- a/roles/amq_streams_common/tasks/prometheus.yml +++ b/roles/amq_streams_common/tasks/prometheus.yml @@ -19,8 +19,10 @@ group: "{{ amq_streams_common_prometheus_group | default(omit) }}" mode: 0644 -- name: "Add prometheus dependency (if enabled: {{ amq_streams_common_prometheus_enabled }})." - ansible.builtin.set_fact: - amq_streams_common_dependencies: "{{ [amq_streams_common_dependencies + ['{{ amq_streams_common_prometheus_package_name }}']] | flatten }}" +- name: "Ensure Prometheus jar exporter is installed ({{ amq_streams_common_prometheus_package_name }})." + ansible.builtin.package: + name: "{{ amq_streams_common_prometheus_package_name }}" + state: present when: - amq_streams_common_prometheus_install_rpm is defined and amq_streams_common_prometheus_install_rpm + - not amq_streams_common_skip_download is defined or not amq_streams_common_skip_download diff --git a/roles/amq_streams_common/tasks/validation/prometheus.yml b/roles/amq_streams_common/tasks/validation/prometheus.yml new file mode 100644 index 0000000..937c2bc --- /dev/null +++ b/roles/amq_streams_common/tasks/validation/prometheus.yml @@ -0,0 +1,12 @@ +--- +- name: "Ensure required parameters are provided." + ansible.builtin.assert: + that: + - amq_streams_common_prometheus_bind_addr is defined and amq_streams_common_prometheus_bind_addr | length > 0 + - amq_streams_common_prometheus_port is defined and amq_streams_common_prometheus_port > 0 + quiet: True + +- name: "Check that Prometheus port is accessible: {{ amq_streams_common_prometheus_bind_addr }}:{{ amq_streams_common_prometheus_port }}" + ansible.builtin.wait_for: + host: "{{ amq_streams_common_prometheus_bind_addr }}" + port: "{{ amq_streams_common_prometheus_port }}" diff --git a/roles/amq_streams_common/templates/prometheus/broker-config.yaml.j2 b/roles/amq_streams_common/templates/prometheus/broker-config.yaml.j2 new file mode 100644 index 0000000..5506eb5 --- /dev/null +++ b/roles/amq_streams_common/templates/prometheus/broker-config.yaml.j2 @@ -0,0 +1,144 @@ +# See https://github.com/prometheus/jmx_exporter for more info about JMX Prometheus Exporter metrics + lowercaseOutputName: true + rules: + # Special cases and very specific rules + - pattern: kafka.server<>Value + name: kafka_server_$1_$2 + type: GAUGE + labels: + clientId: "$3" + topic: "$4" + partition: "$5" + - pattern: kafka.server<>Value + name: kafka_server_$1_$2 + type: GAUGE + labels: + clientId: "$3" + broker: "$4:$5" + - pattern: kafka.server<>connections + name: kafka_server_$1_connections_tls_info + type: GAUGE + labels: + cipher: "$2" + protocol: "$3" + listener: "$4" + networkProcessor: "$5" + - pattern: kafka.server<>connections + name: kafka_server_$1_connections_software + type: GAUGE + labels: + clientSoftwareName: "$2" + clientSoftwareVersion: "$3" + listener: "$4" + networkProcessor: "$5" + - pattern: "kafka.server<>(.+):" + name: kafka_server_$1_$4 + type: GAUGE + labels: + listener: "$2" + networkProcessor: "$3" + - pattern: kafka.server<>(.+) + name: kafka_server_$1_$4 + type: GAUGE + labels: + listener: "$2" + networkProcessor: "$3" + # Some percent metrics use MeanRate attribute + # Ex) kafka.server<>MeanRate + - pattern: kafka.(\w+)<>MeanRate + name: kafka_$1_$2_$3_percent + type: GAUGE + # Generic gauges for percents + - pattern: kafka.(\w+)<>Value + name: kafka_$1_$2_$3_percent + type: GAUGE + - pattern: kafka.(\w+)<>Value + name: kafka_$1_$2_$3_percent + type: GAUGE + labels: + "$4": "$5" + # Generic per-second counters with 0-2 key/value pairs + - pattern: kafka.(\w+)<>Count + name: kafka_$1_$2_$3_total + type: COUNTER + labels: + "$4": "$5" + "$6": "$7" + - pattern: kafka.(\w+)<>Count + name: kafka_$1_$2_$3_total + type: COUNTER + labels: + "$4": "$5" + - pattern: kafka.(\w+)<>Count + name: kafka_$1_$2_$3_total + type: COUNTER + # Generic gauges with 0-2 key/value pairs + - pattern: kafka.(\w+)<>Value + name: kafka_$1_$2_$3 + type: GAUGE + labels: + "$4": "$5" + "$6": "$7" + - pattern: kafka.(\w+)<>Value + name: kafka_$1_$2_$3 + type: GAUGE + labels: + "$4": "$5" + - pattern: kafka.(\w+)<>Value + name: kafka_$1_$2_$3 + type: GAUGE + # Emulate Prometheus 'Summary' metrics for the exported 'Histogram's. + # Note that these are missing the '_sum' metric! + - pattern: kafka.(\w+)<>Count + name: kafka_$1_$2_$3_count + type: COUNTER + labels: + "$4": "$5" + "$6": "$7" + - pattern: kafka.(\w+)<>(\d+)thPercentile + name: kafka_$1_$2_$3 + type: GAUGE + labels: + "$4": "$5" + "$6": "$7" + quantile: "0.$8" + - pattern: kafka.(\w+)<>Count + name: kafka_$1_$2_$3_count + type: COUNTER + labels: + "$4": "$5" + - pattern: kafka.(\w+)<>(\d+)thPercentile + name: kafka_$1_$2_$3 + type: GAUGE + labels: + "$4": "$5" + quantile: "0.$6" + - pattern: kafka.(\w+)<>Count + name: kafka_$1_$2_$3_count + type: COUNTER + - pattern: kafka.(\w+)<>(\d+)thPercentile + name: kafka_$1_$2_$3 + type: GAUGE + labels: + quantile: "0.$4" + # KRaft mode: uncomment the following lines to export KRaft related metrics + # KRaft overall related metrics + # distinguish between always increasing COUNTER (total and max) and variable GAUGE (all others) metrics + #- pattern: "kafka.server<>(.+-total|.+-max):" + # name: kafka_server_raftmetrics_$1 + # type: COUNTER + #- pattern: "kafka.server<>(.+):" + # name: kafka_server_raftmetrics_$1 + # type: GAUGE + # KRaft "low level" channels related metrics + # distinguish between always increasing COUNTER (total and max) and variable GAUGE (all others) metrics + #- pattern: "kafka.server<>(.+-total|.+-max):" + # name: kafka_server_raftchannelmetrics_$1 + # type: COUNTER + #- pattern: "kafka.server<>(.+):" + # name: kafka_server_raftchannelmetrics_$1 + # type: GAUGE + # Broker metrics related to fetching metadata topic records in KRaft mode + #- pattern: "kafka.server<>(.+):" + # name: kafka_server_brokermetadatametrics_$1 + # type: GAUGE diff --git a/roles/amq_streams_common/templates/prometheus/zk-config.yaml.j2 b/roles/amq_streams_common/templates/prometheus/zk-config.yaml.j2 new file mode 100644 index 0000000..e2a1f83 --- /dev/null +++ b/roles/amq_streams_common/templates/prometheus/zk-config.yaml.j2 @@ -0,0 +1,30 @@ + # See https://github.com/prometheus/jmx_exporter for more info about JMX Prometheus Exporter metrics + lowercaseOutputName: true + rules: + # replicated Zookeeper + - pattern: "org.apache.ZooKeeperService<>(\\w+)" + name: "zookeeper_$2" + type: GAUGE + - pattern: "org.apache.ZooKeeperService<>(\\w+)" + name: "zookeeper_$3" + type: GAUGE + labels: + replicaId: "$2" + - pattern: "org.apache.ZooKeeperService<>(Packets\\w+)" + name: "zookeeper_$4" + type: COUNTER + labels: + replicaId: "$2" + memberType: "$3" + - pattern: "org.apache.ZooKeeperService<>(\\w+)" + name: "zookeeper_$4" + type: GAUGE + labels: + replicaId: "$2" + memberType: "$3" + - pattern: "org.apache.ZooKeeperService<>(\\w+)" + name: "zookeeper_$4_$5" + type: GAUGE + labels: + replicaId: "$2" + memberType: "$3" diff --git a/roles/amq_streams_common/templates/service.conf.j2 b/roles/amq_streams_common/templates/service.conf.j2 index 4170c5d..7541032 100644 --- a/roles/amq_streams_common/templates/service.conf.j2 +++ b/roles/amq_streams_common/templates/service.conf.j2 @@ -1,7 +1,10 @@ # {{ ansible_managed }} {% if server_log_dir is defined %}LOG_DIR="{{ server_log_dir }}" {% endif %} -{% if server_java_opts is defined %}KAFKA_OPTS="{{ server_java_opts }}" +{% if server_java_opts is defined and server_java_opts | length > 0 %}KAFKA_OPTS="{{ server_java_opts }}" +{% endif %} +{% if server_prometheus_enabled is defined and server_prometheus_enabled %} +KAFKA_OPTS="{{ server_java_opts }} -javaagent:{{ amq_streams_common_prometheus_path_to_jar }}/{{ amq_streams_common_prometheus_jarname }}={{ amq_streams_common_prometheus_bind_addr }}:{{ amq_streams_common_prometheus_port }}:{{ amq_streams_common_prometheus_metrics_config_home }}{{ server_prometheus_config_file }}" {% endif %} {% if server_log4j_opts is defined %}KAFKA_LOG4J_OPTS="{{ server_log4j_opts }}" {% endif %} diff --git a/roles/amq_streams_zookeeper/defaults/main.yml b/roles/amq_streams_zookeeper/defaults/main.yml index c1d364c..1618fa9 100644 --- a/roles/amq_streams_zookeeper/defaults/main.yml +++ b/roles/amq_streams_zookeeper/defaults/main.yml @@ -42,3 +42,9 @@ amq_streams_zookeeper_auth_user: zookeeper amq_streams_zookeeper_auth_pass: PLEASE_CHANGE_ME_I_AM_NO_GOOD_FOR_PRODUCTION amq_streams_zookeeper_server_log_validation_min_size: 20 + +amq_streams_zookeeper_prometheus_enabled: False +amq_streams_zookeeper_prometheus_port: 18080 +amq_streams_zookeeper_prometheus_bind_addr: '0.0.0.0' +amq_streams_zookeeper_prometheus_config_file: zk-config.yaml +amq_streams_zookeeper_prometheus_config_file_template: prometheus/zk-config.yaml.j2 diff --git a/roles/amq_streams_zookeeper/tasks/main.yml b/roles/amq_streams_zookeeper/tasks/main.yml index 6332152..52aad4f 100644 --- a/roles/amq_streams_zookeeper/tasks/main.yml +++ b/roles/amq_streams_zookeeper/tasks/main.yml @@ -13,6 +13,18 @@ when: - ansible_distribution == "RedHat" +- name: "Ensure Prometheus requirements are met (if enabled: {{ amq_streams_zookeeper_prometheus_enabled }}) - {{ amq_streams_zookeeper_prometheus_config_file_template }}." + ansible.builtin.include_role: + name: amq_streams_common + tasks_from: prometheus.yml + vars: + amq_streams_common_prometheus_port: "{{ amq_streams_zookeeper_prometheus_port }}" + amq_streams_common_prometheus_bind_addr: "{{ amq_streams_zookeeper_prometheus_bind_addr }}" + amq_streams_common_prometheus_config_file: "{{ amq_streams_zookeeper_prometheus_config_file }}" + amq_streams_common_prometheus_config_file_template: "{{ amq_streams_zookeeper_prometheus_config_file_template }}" + when: + - amq_streams_zookeeper_prometheus_enabled is defined and amq_streams_zookeeper_prometheus_enabled + - name: "Ensure Zookeeper data dir exists and belongs to the appropriate user - if user is defined." ansible.builtin.file: path: "{{ amq_streams_zookeeper_data_dir }}" diff --git a/roles/amq_streams_zookeeper/tasks/service.yml b/roles/amq_streams_zookeeper/tasks/service.yml index 62ea40b..cb1e8af 100644 --- a/roles/amq_streams_zookeeper/tasks/service.yml +++ b/roles/amq_streams_zookeeper/tasks/service.yml @@ -29,3 +29,7 @@ server_java_performance_opts: "{{ amq_streams_zookeeper_java_performance_opts | default('') }}" server_java_gc_log_opts: "{{ amq_streams_zookeeper_java_java_gc_log_opts | default('') }}" server_java_jmx_opts: "{{ amq_streams_zookeeper_java_jmx_opts | default('') }}" + server_prometheus_enabled: "{{ amq_streams_zookeeper_prometheus_enabled }}" + server_prometheus_config_file: "{{ amq_streams_zookeeper_prometheus_config_file | default('') }}" + amq_streams_common_prometheus_port: "{{ amq_streams_zookeeper_prometheus_port }}" + amq_streams_common_prometheus_bind_addr: "{{ amq_streams_zookeeper_prometheus_bind_addr }}" diff --git a/roles/amq_streams_zookeeper/tasks/validate.yml b/roles/amq_streams_zookeeper/tasks/validate.yml index f4e9228..3c9a1a2 100644 --- a/roles/amq_streams_zookeeper/tasks/validate.yml +++ b/roles/amq_streams_zookeeper/tasks/validate.yml @@ -23,3 +23,13 @@ when: - amq_streams_deployment_balance_check_enabled is defined and amq_streams_deployment_balance_check_enabled - amq_streams_zookeeper_instance_count is defined + +- name: "Check that Prometheus port is accessible (if enabled: {{ amq_streams_zookeeper_prometheus_enabled }})" + ansible.builtin.include_role: + name: amq_streams_common + tasks_from: validation/prometheus.yml + vars: + amq_streams_zookeeper_prometheus_bind_addr: "{{ amq_streams_broker_prometheus_bind_addr }}" + amq_streams_zookeeper_prometheus_port: "{{ amq_streams_broker_prometheus_port }}" + when: + - amq_streams_zookeeper_prometheus_enabled is defined and amq_streams_zookeeper_prometheus_enabled