pgmon/sample-config/pgmon-metrics.yml

96 lines
4.6 KiB
YAML
Raw Normal View History

metrics:
##
# Discovery metrics
##
discover_dbs:
type: set
query:
0: SELECT datname AS dbname FROM pg_database
discover_rep:
type: set
query:
0: SELECT client_addr || '_' || regexp_replace(application_name, '[ ,]', '_', 'g') AS repid, client_addr, state FROM pg_stat_replication
discover_slots:
type: set
query:
90400: SELECT slot_name, plugin, slot_type, database, false as temporary, active FROM pg_replication_slots
100000: SELECT slot_name, plugin, slot_type, database, temporary, active FROM pg_replication_slots
##
# cluster-wide metrics
##
version:
type: value
query:
0: SHOW server_version_num
max_frozen_age:
type: row
query:
0: SELECT max(age(datfrozenxid)) AS xid_age, 0 AS mxid_age FROM pg_database
90600: SELECT max(age(datfrozenxid)) AS xid_age, max(mxid_age(datminmxid)) AS mxid_age FROM pg_database
bgwriter:
type: row
query:
0: SELECT checkpoints_timed, checkpoints_req, checkpoint_write_time, checkpoint_sync_time, buffers_checkpoint, buffers_clean, maxwritten_clean, buffers_backend, buffers_backend_fsync, buffers_alloc FROM pg_stat_bgwriter;
##
# Per-database metrics
##
db_stats:
type: row
query:
0: SELECT numbackends, xact_commit, xact_rollback, blks_read, blks_hit, tup_returned, tup_fetched, tup_inserted, tup_updated, tup_deleted, conflicts, temp_files, temp_bytes, deadlocks, 0 AS checksum_failures, blk_read_time, blk_write_time, 0 AS session_time, 0 AS active_time, 0 AS idle_in_transaction_time, 0 AS sessions, 0 AS sessions_abandoned, 0 AS sessions_fatal, 0 AS sessions_killed, extract('epoch' from stats_reset)::float AS stats_reset FROM pg_stat_database WHERE datname = %(dbname)s
140000: SELECT numbackends, xact_commit, xact_rollback, blks_read, blks_hit, tup_returned, tup_fetched, tup_inserted, tup_updated, tup_deleted, conflicts, temp_files, temp_bytes, deadlocks, COALESCE(checksum_failures, 0) AS checksum_failures, blk_read_time, blk_write_time, session_time, active_time, idle_in_transaction_time, sessions, sessions_abandoned, sessions_fatal, sessions_killed, extract('epoch' from stats_reset)::float AS stats_reset FROM pg_stat_database WHERE datname = %(dbname)s
test_args:
dbname: postgres
hit_ratios:
type: row
query:
0: SELECT sum(heap_blks_read)::float / NULLIF(sum(heap_blks_read + heap_blks_hit), 0) AS avg_heap_hit_ratio, sum(idx_blks_hit)::float / NULLIF(sum(idx_blks_read + idx_blks_hit), 0) AS avg_idx_hit_ratio, sum(toast_blks_hit)::float / NULLIF(sum(toast_blks_read + toast_blks_hit), 0) AS avg_toast_hit_ratio, sum(tidx_blks_hit)::float / NULLIF(sum(tidx_blks_read + tidx_blks_hit), 0) AS avg_tidx_hit_ratio FROM pg_statio_all_tables
activity:
type: row
query:
0: >
SELECT count(*) as backend_count,
count(*) FILTER (WHERE state = 'idle') AS idle_count,
count(*) FILTER (WHERE state = 'idle in transaction') AS idle_tx_count,
COALESCE(EXTRACT(EPOCH FROM max(now() - state_change) FILTER (WHERE state = 'active'))::float, 0) AS max_active_time,
COALESCE(EXTRACT(EPOCH FROM max(now() - state_change) FILTER (WHERE state = 'idle in transaction'))::float, 0) AS max_idle_tx_time
FROM pg_stat_activity
WHERE datname = %(dbname)s
##
# Per-replication metrics
##
rep_stats:
type: row
query:
2025-06-15 06:19:10 +00:00
90400: SELECT pid, usename, EXTRACT(EPOCH FROM backend_start)::integer AS backend_start, state, sent_lsn, write_lsn, flush_lsn, replay_lsn, COALESCE(EXTRACT(EPOCH FROM write_lag), 0)::integer AS write_lag, COALESCE(EXTRACT(EPOCH FROM flush_lag), 0)::integer AS flush_lag, COALESCE(EXTRACT(EPOCH FROM replay_lag), 0)::integer AS replay_lag, sync_state FROM pg_stat_replication WHERE client_addr || '_' || regexp_replace(application_name, '[ ,]', '_', 'g') = %(repid)s
test_args:
repid: 127.0.0.1_test_rep
##
# Per-slot metrics
##
slot_stats:
type: row
query:
90400: SELECT active_pid, xmin, pg_xlog_location_diff(pg_current_xlog_location(), restart_lsn) AS restart_bytes, pg_xlog_location_diff(pg_current_xlog_location(), confirmed_flush_lsn) AS confirmed_flush_bytes FROM pg_replication_slots WHERE slot_name = %(slot)s
100000: SELECT active_pid, xmin, pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn) AS restart_bytes, pg_wal_lsn_diff(pg_current_wal_lsn(), confirmed_flush_lsn) AS confirmed_flush_bytes FROM pg_replication_slots WHERE slot_name = %(slot)s
test_args:
slot: test_slot
##
# Debugging
##
ntables:
type: value
query:
0: SELECT count(*) AS ntables FROM pg_stat_user_tables