Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RHIDP-3294: Deploy postgres-exporter #74

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions ci-scripts/rhdh-setup/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ export ENABLE_PROFILING="${ENABLE_PROFILING:-false}"

export PSQL_LOG="${PSQL_LOG:-true}"
export RHDH_METRIC="${RHDH_METRIC:-true}"
export PSQL_EXPORT="${PSQL_EXPORT:-false}"
export LOG_MIN_DURATION_STATEMENT="${LOG_MIN_DURATION_STATEMENT:-65}"
export LOG_MIN_DURATION_SAMPLE="${LOG_MIN_DURATION_SAMPLE:-50}"
export LOG_STATEMENT_SAMPLE_RATE="${LOG_STATEMENT_SAMPLE_RATE:-0.7}"
Expand Down Expand Up @@ -346,13 +347,49 @@ psql_debug() {
$clin exec "${psql_db}" -- sh -c "sed -i "s/^\s*#log_min_duration_sample.*/log_min_duration_sample=${LOG_MIN_DURATION_SAMPLE}/" /var/lib/pgsql/data/userdata/postgresql.conf "
$clin exec "${psql_db}" -- sh -c "sed -i "s/^\s*#log_statement_sample_rate.*/log_statement_sample_rate=${LOG_STATEMENT_SAMPLE_RATE}/" /var/lib/pgsql/data/userdata/postgresql.conf "
fi
if ${PSQL_EXPORT}; then
$clin exec "${psql_db}" -- sh -c 'sed -i "s/^\s*#track_io_timing.*/track_io_timing = on/" /var/lib/pgsql/data/userdata/postgresql.conf'
$clin exec "${psql_db}" -- sh -c 'sed -i "s/^\s*#track_wal_io_timing.*/track_wal_io_timing = on/" /var/lib/pgsql/data/userdata/postgresql.conf'
$clin exec "${psql_db}" -- sh -c 'sed -i "s/^\s*#track_functions.*/track_functions = all/" /var/lib/pgsql/data/userdata/postgresql.conf'
$clin exec "${psql_db}" -- sh -c 'sed -i "s/^\s*#stats_fetch_consistency.*/stats_fetch_consistency = cache/" /var/lib/pgsql/data/userdata/postgresql.conf'
$clin exec "${psql_db}" -- sh -c "echo shared_preload_libraries = \'pgaudit,auto_explain,pg_stat_statements\' >> /var/lib/pgsql/data/userdata/postgresql.conf"
fi
echo "Restarting RHDH DB..."
$clin rollout restart statefulset/"$psql_db_ss"
wait_to_start statefulset "$psql_db_ss" 300 300

if ${PSQL_EXPORT}; then
$clin exec "${psql_db}" -- sh -c 'psql -c "CREATE EXTENSION pg_stat_statements;"'
uid=$(oc get namespace "${RHDH_NAMESPACE}" -o go-template='{{ index .metadata.annotations "openshift.io/sa.scc.supplemental-groups" }}'| cut -d '/' -f 1)
pg_pass=$(${clin} get secret rhdh-postgresql -o jsonpath='{.data.postgres-password}'|base64 -d)
plugins=("backstage_plugin_permission" "backstage_plugin_auth" "backstage_plugin_catalog" "backstage_plugin_scaffolder" "backstage_plugin_search" "backstage_plugin_app")
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
cp template/postgres-exporter/chart-values.yaml "$TMP_DIR/pg-exporter.yaml"
sed -i "s/uid/$uid/g" "$TMP_DIR/pg-exporter.yaml"
sed -i "s/pg_password/'$pg_pass'/g" "$TMP_DIR/pg-exporter.yaml"
helm install pg-exporter prometheus-community/prometheus-postgres-exporter -n "${RHDH_NAMESPACE}" -f "$TMP_DIR/pg-exporter.yaml"
for plugin in "${plugins[@]}"; do
cp template/postgres-exporter/values-template.yaml "${TMP_DIR}/${plugin}.yaml"
sed -i "s/'dbname'/'$plugin'/" "${TMP_DIR}/${plugin}.yaml"
sed -i "s/uid/$uid/g" "${TMP_DIR}/${plugin}.yaml"
sed -i "s/pg_password/'$pg_pass'/g" "${TMP_DIR}/${plugin}.yaml"
helm_name=${plugin//_/-}
helm install "${helm_name}" prometheus-community/prometheus-postgres-exporter -n "${RHDH_NAMESPACE}" -f "${TMP_DIR}/${plugin}.yaml"
done
fi

echo "Restarting RHDH..."
$clin rollout restart deployment/"$rhdh_deployment"
wait_to_start deployment "$rhdh_deployment" 300 300
if ${PSQL_EXPORT}; then
plugins=("pg-exporter" "backstage-plugin-permission" "backstage-plugin-auth" "backstage-plugin-catalog" "backstage-plugin-scaffolder" "backstage-plugin-search" "backstage-plugin-app")
for plugin in "${plugins[@]}"; do
cp template/postgres-exporter/service-monitor-template.yaml "${TMP_DIR}/${plugin}-monitor.yaml"
sed -i "s/pglabel/$plugin/" "${TMP_DIR}/${plugin}-monitor.yaml"
sed -i "s/pgnamespace/$RHDH_NAMESPACE/g" "${TMP_DIR}/${plugin}-monitor.yaml"
$clin create -f "${TMP_DIR}/${plugin}-monitor.yaml"
done
fi
}
setup_monitoring() {
echo "Enabling user workload monitoring"
Expand Down
25 changes: 25 additions & 0 deletions ci-scripts/rhdh-setup/template/postgres-exporter/chart-values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
podSecurityContext:
runAsGroup: uid
runAsUser: uid
runAsNonRoot: true
seccompProfile:
type: RuntimeDefault
securityContext: {}
config:
logLevel: 'debug'
extraArgs:
- --collector.long_running_transactions
- --collector.process_idle
- --collector.stat_activity_autovacuum
- --collector.stat_user_tables
- --collector.statio_user_indexes
- --collector.statio_user_tables
- --collector.postmaster
- --collector.stat_statements
- --auto-discover-databases
datasource:
host: 'rhdh-postgresql-primary'
user: 'postgres'
password: pg_password
port: "5432"
database: ''
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
release: pglabel
name: prometheus-pglabel
spec:
endpoints:
- interval: 30s
port: http
scheme: http
path: /metrics
namespaceSelector:
matchNames:
- pgnamespace
selector:
matchLabels:
release: pglabel
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
podSecurityContext:
runAsGroup: uid
runAsUser: uid
runAsNonRoot: true
seccompProfile:
type: RuntimeDefault
securityContext: {}
config:
logLevel: 'debug'
extraArgs:
- --no-collector.database
- --no-collector.database_wraparound
- --no-collector.locks
- --no-collector.long_running_transactions
- --no-collector.postmaster
- --no-collector.process_idle
- --no-collector.replication
- --no-collector.replication_slot
- --no-collector.stat_activity_autovacuum
- --no-collector.stat_bgwriter
- --no-collector.stat_database
- --no-collector.stat_statements
- --no-collector.stat_wal_receiver
- --no-collector.wal
- --no-collector.xlog_location
- --collector.stat_user_tables
- --collector.statio_user_indexes
- --collector.statio_user_tables
datasource:
host: 'rhdh-postgresql-primary'
user: 'postgres'
password: pg_password
port: "5432"
database: 'dbname'
223 changes: 223 additions & 0 deletions config/cluster_read_config.populate.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,226 @@ value }}
- name: measurements.nodejs.populate.nodejs_gc_duration_seconds_major_average
monitoring_query: sum(rate(nodejs_gc_duration_seconds_sum{kind="major",job="rhdh-metrics"}[5m]))/sum(rate(nodejs_gc_duration_seconds_count{kind="major",job="rhdh-metrics"}[5m]))
monitoring_step: 15


{% macro pg_query_sum(alias, query) -%}
# Gather monitoring data about the db {{ alias }}
- name: measurements.postgresql.populate.{{ alias }}.{{ query }}
monitoring_query: sum({{ query }}{service='{{ alias }}-prometheus-postgres-exporter'})
monitoring_step: 15
{%- endmacro %}

{% for query in [
'pg_statio_user_indexes_idx_blks_hit_total',
'pg_statio_user_indexes_idx_blks_read_total',
'pg_statio_user_tables_heap_blocks_hit',
'pg_statio_user_tables_heap_blocks_read',
'pg_statio_user_tables_idx_blocks_hit',
'pg_statio_user_tables_idx_blocks_read',
'pg_statio_user_tables_tidx_blocks_hit',
'pg_statio_user_tables_tidx_blocks_read',
'pg_statio_user_tables_toast_blocks_hit',
'pg_statio_user_tables_toast_blocks_read',
'pg_stat_user_tables_vacuum_count',
'pg_stat_user_tables_size_bytes',
'pg_stat_user_tables_seq_tup_read',
'pg_stat_user_tables_seq_scan',
'pg_stat_user_tables_n_tup_upd',
'pg_stat_user_tables_n_tup_ins',
'pg_stat_user_tables_n_tup_hot_upd',
'pg_stat_user_tables_n_tup_del',
'pg_stat_user_tables_n_mod_since_analyze',
'pg_stat_user_tables_n_live_tup',
'pg_stat_user_tables_n_dead_tup',
'pg_stat_user_tables_last_vacuum',
'pg_stat_user_tables_last_autovacuum',
'pg_stat_user_tables_last_autoanalyze',
'pg_stat_user_tables_last_analyze',
'pg_stat_user_tables_idx_tup_fetch',
'pg_stat_user_tables_idx_scan',
'pg_stat_user_tables_autovacuum_count',
'pg_stat_user_tables_autoanalyze_count',
'pg_stat_user_tables_analyze_count'
] %}
{% for db in [
'backstage-plugin-permission',
'backstage-plugin-auth',
'backstage-plugin-catalog',
'backstage-plugin-scaffolder',
'backstage-plugin-search',
'backstage-plugin-app'
] %}
{{ pg_query_sum(db, query ) }}
{% endfor %}
{% endfor %}

{% macro pg_query(alias, query) -%}
# Gather monitoring data about the db {{ alias }}
- name: measurements.postgresql.populate.{{ alias }}.{{ query }}
monitoring_query: {{ query }}{datname="{{ alias }}"}
monitoring_step: 15
{%- endmacro %}

{% for query in [
'pg_stat_database_blk_read_time',
'pg_stat_database_blk_write_time',
'pg_stat_database_blks_hit',
'pg_stat_database_blks_read',
'pg_stat_database_conflicts',
'pg_stat_database_conflicts_confl_bufferpin',
'pg_stat_database_conflicts_confl_deadlock',
'pg_stat_database_conflicts_confl_lock',
'pg_stat_database_conflicts_confl_snapshot',
'pg_stat_database_conflicts_confl_tablespace',
'pg_stat_database_deadlocks',
'pg_stat_database_numbackends',
'pg_stat_database_temp_bytes',
'pg_stat_database_temp_files',
'pg_stat_database_tup_deleted',
'pg_stat_database_tup_fetched',
'pg_stat_database_tup_inserted',
'pg_stat_database_tup_returned',
'pg_stat_database_tup_updated',
'pg_stat_database_xact_commit',
'pg_stat_database_xact_rollback',
'pg_database_size_bytes'
] %}
{% for db in [
'backstage_plugin_permission',
'backstage_plugin_auth',
'backstage_plugin_catalog',
'backstage_plugin_scaffolder',
'backstage_plugin_search',
'backstage_plugin_app'
] %}
{{ pg_query(db, query ) }}
{% endfor %}
{% endfor %}

{% macro pg_stat_statements_sum(alias, query) -%}
# Gather monitoring data about the db {{ alias }}
- name: measurements.postgresql.populate.{{ alias }}.{{ query }}
monitoring_query: sum({{ query }}{datname='{{ alias }}'})
monitoring_step: 15
{%- endmacro %}

{% for query in [
'pg_stat_statements_block_read_seconds_total',
'pg_stat_statements_block_write_seconds_total',
'pg_stat_statements_calls_total',
'pg_stat_statements_rows_total',
'pg_stat_statements_seconds_total',
'pg_locks_count'
] %}
{% for db in [
'backstage_plugin_permission',
'backstage_plugin_auth',
'backstage_plugin_catalog',
'backstage_plugin_scaffolder',
'backstage_plugin_search',
'backstage_plugin_app'
] %}
{{ pg_stat_statements_sum(db, query ) }}
{% endfor %}
{% endfor %}


{% macro pg_settings(query) -%}
# Gather monitoring data about the db {{ alias }}
- name: measurements.postgresql.populate.{{ query }}
monitoring_query: {{ query }}{service="pg-exporter-prometheus-postgres-exporter"}
monitoring_step: 30
{%- endmacro %}

{% for query in [
'pg_settings_max_connections',
'pg_settings_superuser_reserved_connections',
'pg_settings_shared_buffers_bytes',
'pg_settings_work_mem_bytes',
'pg_settings_maintenance_work_mem_bytes',
'pg_settings_shared_memory_size_in_huge_pages',
'pg_settings_effective_cache_size_bytes',
'pg_settings_effective_io_concurrency',
'pg_settings_random_page_cost',
'pg_settings_track_io_timing',
'pg_settings_max_wal_senders',
'pg_settings_checkpoint_timeout_seconds',
'pg_settings_checkpoint_completion_target',
'pg_settings_max_wal_size_bytes',
'pg_settings_min_wal_size_bytes',
'pg_settings_wal_buffers_bytes',
'pg_settings_wal_writer_delay_seconds',
'pg_settings_wal_writer_flush_after_bytes',
'pg_settings_bgwriter_delay_seconds',
'pg_settings_bgwriter_lru_maxpages',
'pg_settings_bgwriter_lru_multiplier',
'pg_settings_bgwriter_flush_after_bytes',
'pg_settings_max_worker_processes',
'pg_settings_max_parallel_workers_per_gather',
'pg_settings_max_parallel_maintenance_workers',
'pg_settings_max_parallel_workers',
'pg_settings_parallel_leader_participation',
'pg_settings_enable_partitionwise_join',
'pg_settings_enable_partitionwise_aggregate',
'pg_settings_jit',
'pg_settings_max_slot_wal_keep_size_bytes',
'pg_settings_track_wal_io_timing',
'pg_settings_maintenance_io_concurrency',
'pg_settings_wal_recycle',
'pg_process_idle_seconds_sum',
'pg_process_idle_seconds_count',
'pg_stat_bgwriter_buffers_alloc_total',
'pg_stat_bgwriter_buffers_backend_fsync_total',
'pg_stat_bgwriter_buffers_backend_total',
'pg_stat_bgwriter_buffers_checkpoint_tota',
'pg_stat_bgwriter_buffers_clean_total',
'pg_stat_bgwriter_checkpoint_sync_time_total',
'pg_stat_bgwriter_checkpoint_write_time_total',
'pg_stat_bgwriter_checkpoints_req_total',
'pg_stat_bgwriter_checkpoints_timed_total',
'pg_stat_bgwriter_maxwritten_clean_total',
'pg_stat_archiver_archived_count',
'pg_stat_archiver_failed_count',
'pg_long_running_transactions',
'pg_long_running_transactions_oldest_timestamp_seconds',
'pg_wal_segments',
'pg_wal_size_bytes',
'process_cpu_seconds_total',
'process_max_fds',
'process_open_fds',
'process_resident_memory_bytes',
'process_virtual_memory_bytes',
'process_virtual_memory_max_bytes',
] %}
{{ pg_settings( query ) }}
{% endfor %}

{% macro pg_stat_activity(alias, query, state) -%}
# Gather monitoring data about the db {{ alias }}
- name: measurements.postgresql.populate.{{ alias }}.{{ query }}.{{ state }}
monitoring_query: sum({{ query }}{datname='{{ alias }}',state='{{ state }}',service="pg-exporter-prometheus-postgres-exporter"})
monitoring_step: 15
{%- endmacro %}

{% for query in [
'pg_stat_activity_count',
'pg_stat_activity_max_tx_duration'
] %}
{% for db in [
'backstage_plugin_permission',
'backstage_plugin_auth',
'backstage_plugin_catalog',
'backstage_plugin_scaffolder',
'backstage_plugin_search',
'backstage_plugin_app'
] %}
{% for state in [
'active',
'disabled',
'fastpath',
'idle'
] %}
{{ pg_stat_activity(db, query, state ) }}
{% endfor %}
{% endfor %}
{% endfor %}
Loading