Skip to content

Commit

Permalink
Add support for Prometheus metrics exporter
Browse files Browse the repository at this point in the history
  • Loading branch information
rpelisse committed Oct 10, 2023
1 parent caa50ba commit fe78895
Show file tree
Hide file tree
Showing 15 changed files with 270 additions and 4 deletions.
3 changes: 3 additions & 0 deletions playbooks/amq_streams_all_cluster.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
vars:
amq_streams_common_download_node: ""
amq_streams_common_download_dir: "/tmp"
amq_streams_broker_prometheus_enabled: True
amq_streams_zookeeper_prometheus_enabled: True
amq_streams_zookeeper_prometheus_port: 18081

# Topic Management
amq_streams_broker_topics:
Expand Down
6 changes: 6 additions & 0 deletions roles/amq_streams_broker/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -109,3 +109,9 @@ amq_streams_broker_admin_cli_config_file: '/tmp/admin-cli.properties'
amq_streams_broker_admin_mechanism: PLAIN
amq_streams_broker_admin_username: PLEASE_IDENTIFY_THE_ADMIN_USER
amq_streams_broker_admin_password: PLEASE_CHANGEME_IAMNOTGOOD_FOR_PRODUCTION

amq_streams_broker_prometheus_enabled: False
amq_streams_broker_prometheus_port: 18080
amq_streams_broker_prometheus_bind_addr: '0.0.0.0'
amq_streams_broker_prometheus_config_file: broker-config.yaml
amq_streams_broker_prometheus_config_file_template: prometheus/broker-config.yaml.j2
12 changes: 12 additions & 0 deletions roles/amq_streams_broker/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,18 @@
- name: "Ensure requirements for broker are fulfilled."
ansible.builtin.include_tasks: prereqs.yml

- name: "Ensure Prometheus requirements are met (if enabled: {{ amq_streams_broker_prometheus_enabled }})."
ansible.builtin.include_role:
name: amq_streams_common
tasks_from: prometheus.yml
vars:
amq_streams_common_prometheus_port: "{{ amq_streams_broker_prometheus_port }}"
amq_streams_common_prometheus_bind_addr: "{{ amq_streams_broker_prometheus_bind_addr }}"
amq_streams_common_prometheus_config_file: "{{ amq_streams_broker_prometheus_config_file }}"
amq_streams_common_prometheus_config_file_template: "{{ amq_streams_broker_prometheus_config_file_template }}"
when:
- amq_streams_broker_prometheus_enabled is defined and amq_streams_broker_prometheus_enabled

- name: "Ensure OS has been properly tuned (RHEL specific)."
ansible.builtin.include_role:
name: amq_streams_common
Expand Down
4 changes: 4 additions & 0 deletions roles/amq_streams_broker/tasks/service.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,7 @@
server_java_gc_log_opts: "{{ amq_streams_broker_java_java_gc_log_opts | default('') }}"
server_java_jmx_opts: "{{ amq_streams_broker_java_jmx_opts | default('') }}"
server_start_sleep: "{{ amq_streams_broker_zookeeper_session_timeout_ms }}"
server_prometheus_enabled: "{{ amq_streams_broker_prometheus_enabled }}"
server_prometheus_config_file: "{{ amq_streams_broker_prometheus_config_file | default('') }}"
amq_streams_common_prometheus_port: "{{ amq_streams_broker_prometheus_port }}"
amq_streams_common_prometheus_bind_addr: "{{ amq_streams_broker_prometheus_bind_addr }}"
10 changes: 10 additions & 0 deletions roles/amq_streams_broker/tasks/validate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,13 @@
vars:
server_log_validation_min_size: "{{ amq_streams_broker_server_log_validation_min_size }}"
server_log_dir: "{{ amq_streams_broker_logs_dir }}/server.log"

- name: "Check that Prometheus port is accessible (if enabled: {{ amq_streams_broker_prometheus_enabled }}"
ansible.builtin.include_role:
name: amq_streams_common
tasks_from: validation/prometheus.yml
vars:
amq_streams_common_prometheus_bind_addr: "{{ amq_streams_broker_prometheus_bind_addr }}"
amq_streams_common_prometheus_port: "{{ amq_streams_broker_prometheus_port }}"
when:
- amq_streams_broker_prometheus_enabled is defined and amq_streams_broker_prometheus_enabled
8 changes: 8 additions & 0 deletions roles/amq_streams_common/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,11 @@ amq_streams_common_firewalld_package_name:
amq_streams_common_firewalld_enabled: false

amq_streams_common_offline_install: false

amq_streams_common_prometheus_install_rpm: True
amq_streams_common_prometheus_package_name: 'prometheus-jmx-exporter'
amq_streams_common_prometheus_path_to_jar: /usr/share/java/prometheus-jmx-exporter
amq_streams_common_prometheus_jarname: jmx_prometheus_javaagent.jar
amq_streams_common_prometheus_bind_addr: 0.0.0.0
amq_streams_common_prometheus_port: 18080
amq_streams_common_prometheus_metrics_config_home: /etc/
8 changes: 5 additions & 3 deletions roles/amq_streams_common/tasks/prometheus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@
group: "{{ amq_streams_common_prometheus_group | default(omit) }}"
mode: 0644

- name: "Add prometheus dependency (if enabled: {{ amq_streams_common_prometheus_enabled }})."
ansible.builtin.set_fact:
amq_streams_common_dependencies: "{{ [amq_streams_common_dependencies + ['{{ amq_streams_common_prometheus_package_name }}']] | flatten }}"
- name: "Ensure Prometheus jar exporter is installed ({{ amq_streams_common_prometheus_package_name }})."
ansible.builtin.package:
name: "{{ amq_streams_common_prometheus_package_name }}"
state: present
when:
- amq_streams_common_prometheus_install_rpm is defined and amq_streams_common_prometheus_install_rpm
- not amq_streams_common_skip_download is defined or not amq_streams_common_skip_download
12 changes: 12 additions & 0 deletions roles/amq_streams_common/tasks/validation/prometheus.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
- name: "Ensure required parameters are provided."
ansible.builtin.assert:
that:
- amq_streams_common_prometheus_bind_addr is defined and amq_streams_common_prometheus_bind_addr | length > 0
- amq_streams_common_prometheus_port is defined and amq_streams_common_prometheus_port > 0
quiet: True

- name: "Check that Prometheus port is accessible: {{ amq_streams_common_prometheus_bind_addr }}:{{ amq_streams_common_prometheus_port }}"
ansible.builtin.wait_for:
host: "{{ amq_streams_common_prometheus_bind_addr }}"
port: "{{ amq_streams_common_prometheus_port }}"
144 changes: 144 additions & 0 deletions roles/amq_streams_common/templates/prometheus/broker-config.yaml.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
# See https://github.com/prometheus/jmx_exporter for more info about JMX Prometheus Exporter metrics
lowercaseOutputName: true
rules:
# Special cases and very specific rules
- pattern: kafka.server<type=(.+), name=(.+), clientId=(.+), topic=(.+), partition=(.*)><>Value
name: kafka_server_$1_$2
type: GAUGE
labels:
clientId: "$3"
topic: "$4"
partition: "$5"
- pattern: kafka.server<type=(.+), name=(.+), clientId=(.+), brokerHost=(.+), brokerPort=(.+)><>Value
name: kafka_server_$1_$2
type: GAUGE
labels:
clientId: "$3"
broker: "$4:$5"
- pattern: kafka.server<type=(.+), cipher=(.+), protocol=(.+), listener=(.+), networkProcessor=(.+)><>connections
name: kafka_server_$1_connections_tls_info
type: GAUGE
labels:
cipher: "$2"
protocol: "$3"
listener: "$4"
networkProcessor: "$5"
- pattern: kafka.server<type=(.+), clientSoftwareName=(.+), clientSoftwareVersion=(.+), listener=(.+), networkProcessor=(.+)><>connections
name: kafka_server_$1_connections_software
type: GAUGE
labels:
clientSoftwareName: "$2"
clientSoftwareVersion: "$3"
listener: "$4"
networkProcessor: "$5"
- pattern: "kafka.server<type=(.+), listener=(.+), networkProcessor=(.+)><>(.+):"
name: kafka_server_$1_$4
type: GAUGE
labels:
listener: "$2"
networkProcessor: "$3"
- pattern: kafka.server<type=(.+), listener=(.+), networkProcessor=(.+)><>(.+)
name: kafka_server_$1_$4
type: GAUGE
labels:
listener: "$2"
networkProcessor: "$3"
# Some percent metrics use MeanRate attribute
# Ex) kafka.server<type=(KafkaRequestHandlerPool), name=(RequestHandlerAvgIdlePercent)><>MeanRate
- pattern: kafka.(\w+)<type=(.+), name=(.+)Percent\w*><>MeanRate
name: kafka_$1_$2_$3_percent
type: GAUGE
# Generic gauges for percents
- pattern: kafka.(\w+)<type=(.+), name=(.+)Percent\w*><>Value
name: kafka_$1_$2_$3_percent
type: GAUGE
- pattern: kafka.(\w+)<type=(.+), name=(.+)Percent\w*, (.+)=(.+)><>Value
name: kafka_$1_$2_$3_percent
type: GAUGE
labels:
"$4": "$5"
# Generic per-second counters with 0-2 key/value pairs
- pattern: kafka.(\w+)<type=(.+), name=(.+)PerSec\w*, (.+)=(.+), (.+)=(.+)><>Count
name: kafka_$1_$2_$3_total
type: COUNTER
labels:
"$4": "$5"
"$6": "$7"
- pattern: kafka.(\w+)<type=(.+), name=(.+)PerSec\w*, (.+)=(.+)><>Count
name: kafka_$1_$2_$3_total
type: COUNTER
labels:
"$4": "$5"
- pattern: kafka.(\w+)<type=(.+), name=(.+)PerSec\w*><>Count
name: kafka_$1_$2_$3_total
type: COUNTER
# Generic gauges with 0-2 key/value pairs
- pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.+), (.+)=(.+)><>Value
name: kafka_$1_$2_$3
type: GAUGE
labels:
"$4": "$5"
"$6": "$7"
- pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.+)><>Value
name: kafka_$1_$2_$3
type: GAUGE
labels:
"$4": "$5"
- pattern: kafka.(\w+)<type=(.+), name=(.+)><>Value
name: kafka_$1_$2_$3
type: GAUGE
# Emulate Prometheus 'Summary' metrics for the exported 'Histogram's.
# Note that these are missing the '_sum' metric!
- pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.+), (.+)=(.+)><>Count
name: kafka_$1_$2_$3_count
type: COUNTER
labels:
"$4": "$5"
"$6": "$7"
- pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.*), (.+)=(.+)><>(\d+)thPercentile
name: kafka_$1_$2_$3
type: GAUGE
labels:
"$4": "$5"
"$6": "$7"
quantile: "0.$8"
- pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.+)><>Count
name: kafka_$1_$2_$3_count
type: COUNTER
labels:
"$4": "$5"
- pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.*)><>(\d+)thPercentile
name: kafka_$1_$2_$3
type: GAUGE
labels:
"$4": "$5"
quantile: "0.$6"
- pattern: kafka.(\w+)<type=(.+), name=(.+)><>Count
name: kafka_$1_$2_$3_count
type: COUNTER
- pattern: kafka.(\w+)<type=(.+), name=(.+)><>(\d+)thPercentile
name: kafka_$1_$2_$3
type: GAUGE
labels:
quantile: "0.$4"
# KRaft mode: uncomment the following lines to export KRaft related metrics
# KRaft overall related metrics
# distinguish between always increasing COUNTER (total and max) and variable GAUGE (all others) metrics
#- pattern: "kafka.server<type=raft-metrics><>(.+-total|.+-max):"
# name: kafka_server_raftmetrics_$1
# type: COUNTER
#- pattern: "kafka.server<type=raft-metrics><>(.+):"
# name: kafka_server_raftmetrics_$1
# type: GAUGE
# KRaft "low level" channels related metrics
# distinguish between always increasing COUNTER (total and max) and variable GAUGE (all others) metrics
#- pattern: "kafka.server<type=raft-channel-metrics><>(.+-total|.+-max):"
# name: kafka_server_raftchannelmetrics_$1
# type: COUNTER
#- pattern: "kafka.server<type=raft-channel-metrics><>(.+):"
# name: kafka_server_raftchannelmetrics_$1
# type: GAUGE
# Broker metrics related to fetching metadata topic records in KRaft mode
#- pattern: "kafka.server<type=broker-metadata-metrics><>(.+):"
# name: kafka_server_brokermetadatametrics_$1
# type: GAUGE
30 changes: 30 additions & 0 deletions roles/amq_streams_common/templates/prometheus/zk-config.yaml.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# See https://github.com/prometheus/jmx_exporter for more info about JMX Prometheus Exporter metrics
lowercaseOutputName: true
rules:
# replicated Zookeeper
- pattern: "org.apache.ZooKeeperService<name0=ReplicatedServer_id(\\d+)><>(\\w+)"
name: "zookeeper_$2"
type: GAUGE
- pattern: "org.apache.ZooKeeperService<name0=ReplicatedServer_id(\\d+), name1=replica.(\\d+)><>(\\w+)"
name: "zookeeper_$3"
type: GAUGE
labels:
replicaId: "$2"
- pattern: "org.apache.ZooKeeperService<name0=ReplicatedServer_id(\\d+), name1=replica.(\\d+), name2=(\\w+)><>(Packets\\w+)"
name: "zookeeper_$4"
type: COUNTER
labels:
replicaId: "$2"
memberType: "$3"
- pattern: "org.apache.ZooKeeperService<name0=ReplicatedServer_id(\\d+), name1=replica.(\\d+), name2=(\\w+)><>(\\w+)"
name: "zookeeper_$4"
type: GAUGE
labels:
replicaId: "$2"
memberType: "$3"
- pattern: "org.apache.ZooKeeperService<name0=ReplicatedServer_id(\\d+), name1=replica.(\\d+), name2=(\\w+), name3=(\\w+)><>(\\w+)"
name: "zookeeper_$4_$5"
type: GAUGE
labels:
replicaId: "$2"
memberType: "$3"
5 changes: 4 additions & 1 deletion roles/amq_streams_common/templates/service.conf.j2
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
# {{ ansible_managed }}
{% if server_log_dir is defined %}LOG_DIR="{{ server_log_dir }}"
{% endif %}
{% if server_java_opts is defined %}KAFKA_OPTS="{{ server_java_opts }}"
{% if server_java_opts is defined and server_java_opts | length > 0 %}KAFKA_OPTS="{{ server_java_opts }}"
{% endif %}
{% if server_prometheus_enabled is defined and server_prometheus_enabled %}
KAFKA_OPTS="{{ server_java_opts }} -javaagent:{{ amq_streams_common_prometheus_path_to_jar }}/{{ amq_streams_common_prometheus_jarname }}={{ amq_streams_common_prometheus_bind_addr }}:{{ amq_streams_common_prometheus_port }}:{{ amq_streams_common_prometheus_metrics_config_home }}{{ server_prometheus_config_file }}"
{% endif %}
{% if server_log4j_opts is defined %}KAFKA_LOG4J_OPTS="{{ server_log4j_opts }}"
{% endif %}
Expand Down
6 changes: 6 additions & 0 deletions roles/amq_streams_zookeeper/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,9 @@ amq_streams_zookeeper_auth_user: zookeeper
amq_streams_zookeeper_auth_pass: PLEASE_CHANGE_ME_I_AM_NO_GOOD_FOR_PRODUCTION

amq_streams_zookeeper_server_log_validation_min_size: 20

amq_streams_zookeeper_prometheus_enabled: False
amq_streams_zookeeper_prometheus_port: 18080
amq_streams_zookeeper_prometheus_bind_addr: '0.0.0.0'
amq_streams_zookeeper_prometheus_config_file: zk-config.yaml
amq_streams_zookeeper_prometheus_config_file_template: prometheus/zk-config.yaml.j2
12 changes: 12 additions & 0 deletions roles/amq_streams_zookeeper/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,18 @@
when:
- ansible_distribution == "RedHat"

- name: "Ensure Prometheus requirements are met (if enabled: {{ amq_streams_zookeeper_prometheus_enabled }}) - {{ amq_streams_zookeeper_prometheus_config_file_template }}."
ansible.builtin.include_role:
name: amq_streams_common
tasks_from: prometheus.yml
vars:
amq_streams_common_prometheus_port: "{{ amq_streams_zookeeper_prometheus_port }}"
amq_streams_common_prometheus_bind_addr: "{{ amq_streams_zookeeper_prometheus_bind_addr }}"
amq_streams_common_prometheus_config_file: "{{ amq_streams_zookeeper_prometheus_config_file }}"
amq_streams_common_prometheus_config_file_template: "{{ amq_streams_zookeeper_prometheus_config_file_template }}"
when:
- amq_streams_zookeeper_prometheus_enabled is defined and amq_streams_zookeeper_prometheus_enabled

- name: "Ensure Zookeeper data dir exists and belongs to the appropriate user - if user is defined."
ansible.builtin.file:
path: "{{ amq_streams_zookeeper_data_dir }}"
Expand Down
4 changes: 4 additions & 0 deletions roles/amq_streams_zookeeper/tasks/service.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,7 @@
server_java_performance_opts: "{{ amq_streams_zookeeper_java_performance_opts | default('') }}"
server_java_gc_log_opts: "{{ amq_streams_zookeeper_java_java_gc_log_opts | default('') }}"
server_java_jmx_opts: "{{ amq_streams_zookeeper_java_jmx_opts | default('') }}"
server_prometheus_enabled: "{{ amq_streams_zookeeper_prometheus_enabled }}"
server_prometheus_config_file: "{{ amq_streams_zookeeper_prometheus_config_file | default('') }}"
amq_streams_common_prometheus_port: "{{ amq_streams_zookeeper_prometheus_port }}"
amq_streams_common_prometheus_bind_addr: "{{ amq_streams_zookeeper_prometheus_bind_addr }}"
10 changes: 10 additions & 0 deletions roles/amq_streams_zookeeper/tasks/validate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,13 @@
when:
- amq_streams_deployment_balance_check_enabled is defined and amq_streams_deployment_balance_check_enabled
- amq_streams_zookeeper_instance_count is defined

- name: "Check that Prometheus port is accessible (if enabled: {{ amq_streams_zookeeper_prometheus_enabled }})"
ansible.builtin.include_role:
name: amq_streams_common
tasks_from: validation/prometheus.yml
vars:
amq_streams_zookeeper_prometheus_bind_addr: "{{ amq_streams_broker_prometheus_bind_addr }}"
amq_streams_zookeeper_prometheus_port: "{{ amq_streams_broker_prometheus_port }}"
when:
- amq_streams_zookeeper_prometheus_enabled is defined and amq_streams_zookeeper_prometheus_enabled

0 comments on commit fe78895

Please sign in to comment.