diff --git a/sample-config/pgmon-metrics.yml b/sample-config/pgmon-metrics.yml index 711e2a9..dcf6588 100644 --- a/sample-config/pgmon-metrics.yml +++ b/sample-config/pgmon-metrics.yml @@ -1,5 +1,7 @@ metrics: + ## # Discovery metrics + ## discover_dbs: type: set query: @@ -14,7 +16,9 @@ metrics: 90400: SELECT slot_name, plugin, slot_type, database, false as temporary, active FROM pg_replication_slots 100000: SELECT slot_name, plugin, slot_type, database, temporary, active FROM pg_replication_slots + ## # cluster-wide metrics + ## version: type: value query: @@ -26,12 +30,20 @@ metrics: 0: SELECT max(age(datfrozenxid)) AS xid_age, 0 AS mxid_age FROM pg_database 90600: SELECT max(age(datfrozenxid)) AS xid_age, max(mxid_age(datminmxid)) AS mxid_age FROM pg_database + bgwriter: + type: row + query: + 0: SELECT checkpoints_timed, checkpoints_req, checkpoint_write_time, checkpoint_sync_time, buffers_checkpoint, buffers_clean, maxwritten_clean, buffers_backend, buffers_backend_fsync, buffers_alloc FROM pg_stat_bgwriter; + + + ## # Per-database metrics + ## db_stats: type: row query: - 0: SELECT numbackends, xact_commit, xact_rollback, blks_read, blks_hit, tup_returned, tup_fetched, tup_inserted, tup_updated, tup_deleted, conflicts, temp_files, temp_bytes, deadlocks, blk_read_time, blk_write_time, extract('epoch' from stats_reset)::float FROM pg_stat_database WHERE datname = %(dbname)s - 140000: SELECT numbackends, xact_commit, xact_rollback, blks_read, blks_hit, tup_returned, tup_fetched, tup_inserted, tup_updated, tup_deleted, conflicts, temp_files, temp_bytes, deadlocks, COALESCE(checksum_failures, 0) AS checksum_failures, blk_read_time, blk_write_time, session_time, active_time, idle_in_transaction_time, sessions, sessions_abandoned, sessions_fatal, sessions_killed, extract('epoch' from stats_reset)::float FROM pg_stat_database WHERE datname = %(dbname)s + 0: SELECT numbackends, xact_commit, xact_rollback, blks_read, blks_hit, tup_returned, tup_fetched, tup_inserted, tup_updated, tup_deleted, conflicts, temp_files, temp_bytes, deadlocks, 0 AS checksum_failures, blk_read_time, blk_write_time, 0 AS session_time, 0 AS active_time, 0 AS idle_in_transaction_time, 0 AS sessions, 0 AS sessions_abandoned, 0 AS sessions_fatal, 0 AS sessions_killed, extract('epoch' from stats_reset)::float AS stats_reset FROM pg_stat_database WHERE datname = %(dbname)s + 140000: SELECT numbackends, xact_commit, xact_rollback, blks_read, blks_hit, tup_returned, tup_fetched, tup_inserted, tup_updated, tup_deleted, conflicts, temp_files, temp_bytes, deadlocks, COALESCE(checksum_failures, 0) AS checksum_failures, blk_read_time, blk_write_time, session_time, active_time, idle_in_transaction_time, sessions, sessions_abandoned, sessions_fatal, sessions_killed, extract('epoch' from stats_reset)::float AS stats_reset FROM pg_stat_database WHERE datname = %(dbname)s test_args: dbname: postgres @@ -40,13 +52,21 @@ metrics: query: 0: SELECT sum(heap_blks_read)::float / NULLIF(sum(heap_blks_read + heap_blks_hit), 0) AS avg_heap_hit_ratio, sum(idx_blks_hit)::float / NULLIF(sum(idx_blks_read + idx_blks_hit), 0) AS avg_idx_hit_ratio, sum(toast_blks_hit)::float / NULLIF(sum(toast_blks_read + toast_blks_hit), 0) AS avg_toast_hit_ratio, sum(tidx_blks_hit)::float / NULLIF(sum(tidx_blks_read + tidx_blks_hit), 0) AS avg_tidx_hit_ratio FROM pg_statio_all_tables - # Debugging - ntables: - type: value + activity: + type: row query: - 0: SELECT count(*) AS ntables FROM pg_stat_user_tables + 0: > + SELECT count(*) as backend_count, + count(*) FILTER (WHERE state = 'idle') AS idle_count, + count(*) FILTER (WHERE state = 'idle in transaction') AS idle_tx_count, + COALESCE(EXTRACT(EPOCH FROM max(now() - state_change) FILTER (WHERE state = 'active'))::float, 0) AS max_active_time, + COALESCE(EXTRACT(EPOCH FROM max(now() - state_change) FILTER (WHERE state = 'idle in transaction'))::float, 0) AS max_idle_tx_time + FROM pg_stat_activity + WHERE datname = %(dbname)s + ## # Per-replication metrics + ## rep_stats: type: row query: @@ -54,7 +74,10 @@ metrics: test_args: repid: 127.0.0.1_test_rep + + ## # Per-slot metrics + ## slot_stats: type: row query: @@ -62,3 +85,11 @@ metrics: 100000: SELECT active_pid, xmin, pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn) AS restart_bytes, pg_wal_lsn_diff(pg_current_wal_lsn(), confirmed_flush_lsn) AS confirmed_flush_bytes FROM pg_replication_slots WHERE slot_name = %(slot)s test_args: slot: test_slot + + ## + # Debugging + ## + ntables: + type: value + query: + 0: SELECT count(*) AS ntables FROM pg_stat_user_tables