Simplify the code by switching to http for IPC

This commit is contained in:
James Campbell 2024-10-24 12:23:19 -04:00
parent 9fd03f5027
commit c13951e16e
Signed by: james
GPG Key ID: 2287C33A40DC906A
10 changed files with 559 additions and 1874 deletions

View File

@ -1,2 +1,2 @@
bin_SCRIPTS = pgmon.py
dist_doc_DATA = README ChangeLog pgmon_userparameter.conf pgmon_templates.yaml
dist_doc_DATA = README ChangeLog pgmon_templates.yaml

View File

@ -1,14 +0,0 @@
# Discovery metrics
metric=discover_dbs:set::SELECT datname, %s AS agent, %s AS cluster FROM pg_database
metric=discover_rep:set::SELECT client_addr || '_' || regexp_replace(application_name, '[ ,]', '_', 'g') AS repid, client_addr, state, %s AS agent, %s AS cluster FROM pg_stat_replication
# Cluster-wide metrics
metric=version:value::SHOW server_version_num
metric=max_frozen_age:value::SELECT max(age(datfrozenxid)) FROM pg_database
# Per-database metrics
metric=db_stats:row::SELECT numbackends, xact_commit, xact_rollback, blks_read, blks_hit, tup_returned, tup_fetched, tup_inserted, tup_updated, tup_deleted, conflicts, temp_files, temp_bytes, deadlocks, blk_read_time, blk_write_time, extract('epoch' from stats_reset)::float FROM pg_stat_database WHERE datname = '{datname}'
metric=db_stats:row:140000:SELECT numbackends, xact_commit, xact_rollback, blks_read, blks_hit, tup_returned, tup_fetched, tup_inserted, tup_updated, tup_deleted, conflicts, temp_files, temp_bytes, deadlocks, COALESCE(checksum_failures, 0) AS checksum_failures, blk_read_time, blk_write_time, session_time, active_time, idle_in_transaction_time, sessions, sessions_abandoned, sessions_fatal, sessions_killed, extract('epoch' from stats_reset)::float FROM pg_stat_database WHERE datname = '{datname}'
# Per-replication metrics
metric=rep_stats:row::SELECT * FROM pg_stat_database WHERE client_addr || '_' || regexp_replace(application_name, '[ ,]', '_', 'g') = '{repid}'

39
pgmon-metrics.yml Normal file
View File

@ -0,0 +1,39 @@
metrics:
# Discovery metrics
discover_dbs:
type: set
query:
0: SELECT datname FROM pg_database
discover_rep:
type: set
query:
0: SELECT client_addr || '_' || regexp_replace(application_name, '[ ,]', '_', 'g') AS repid, client_addr, state FROM pg_stat_replication
# cluster-wide metrics
version:
type: value
query:
0: SHOW server_version_num
max_frozen_age:
type: value
query:
0: SELECT max(age(datfrozenxid)) FROM pg_database
# Per-database metrics
db_stats:
type: row
query:
0: SELECT numbackends, xact_commit, xact_rollback, blks_read, blks_hit, tup_returned, tup_fetched, tup_inserted, tup_updated, tup_deleted, conflicts, temp_files, temp_bytes, deadlocks, blk_read_time, blk_write_time, extract('epoch' from stats_reset)::float FROM pg_stat_database WHERE datname = %(datname)s
140000: SELECT numbackends, xact_commit, xact_rollback, blks_read, blks_hit, tup_returned, tup_fetched, tup_inserted, tup_updated, tup_deleted, conflicts, temp_files, temp_bytes, deadlocks, COALESCE(checksum_failures, 0) AS checksum_failures, blk_read_time, blk_write_time, session_time, active_time, idle_in_transaction_time, sessions, sessions_abandoned, sessions_fatal, sessions_killed, extract('epoch' from stats_reset)::float FROM pg_stat_database WHERE datname = %(datname)s
# Per-replication metrics
rep_stats:
type: row
query:
0: SELECT * FROM pg_stat_database WHERE client_addr || '_' || regexp_replace(application_name, '[ ,]', '_', 'g') = '{repid}'
# Debugging
sleep:
type: value
query:
0: SELECT now(), pg_sleep(5);

View File

@ -1,72 +0,0 @@
##
# Misc agent settings
##
# Where to write/find the agent PID
#pid_file=/tmt/pgmon.pid
pid_file=/run/pgmon/pgmon.pid
##
# Agent communication settings
##
# IPC socket
#ipc_socket=/tmp/pgmon.sock
ipc_socket=/run/pgmon/pgmon.sock
# IPC communication timeout (s)
#ipc_timeout=10
# Request processing timeout (s)
#request_timeout=10
# Max size of the request queue before it blocks
#request_queue_size=100
# Max time to wait when queueing a request (s)
#request_queue_timeout=2
##
# Agent resource settings
##
# Number of worker threads
#worker_count=4
##
# Logging settings
##
# Log level for stderr logging (or 'off')
stderr_log_level=debug
# Log level for file logging (od 'off')
file_log_level=off
# Log file
#log_file=pgmon.log
##
# DB connection settings
#
# Each cluster entry is of the form:
# name:address:port:dbname:user:password
#
# Any element other than the name can be left empty to use the defaults
##
#cluster=local:/var/run/postgresql:5432:postgres:zbx_monitor:
cluster=pg15:localhost:54315:postgres:postgres:
cluster=pg96:localhost:54396:postgres:postgres:
# Default database to connect to when none is specified for a metric
#dbname=postgres
##
# Monitoring configuration
##
# Metrics
#metrics={}
include=pgmon-metrics.cfg

1677
pgmon.py

File diff suppressed because it is too large Load Diff

21
pgmon.yml Normal file
View File

@ -0,0 +1,21 @@
# IPC port
port: 5400
# Max PostgreSQL connection pool size
#pool_size: 4
# Log level for stderr logging (or 'off')
log_level: debug
# Connection string (excluding dbname)
# This can be left empty to use the libpq defaults
connstr: "user=postgres"
# Default database to connect to when none is specified for a metric
#dbname: postgres
# Metrics
#metrics: {}
include:
- pgmon-metrics.yml

View File

@ -5,12 +5,10 @@ Description=PostgreSQL Monitoring Bridge
After=network.target
[Service]
ExecStart=/usr/local/bin/pgmon.py --server --config /etc/pgmon/%i.cfg
ExecReload=/user/local/bin/pgmon.py --reload --config /etc/pgmon/%i.cfg
RuntimeDirectory=pgmon
RuntimeDirectoryMode=0755
PIDFile=/run/pgmon/%i.pid
ExecStart=/usr/local/bin/pgmon.py /etc/pgmon/%i.cfg
ExecReload=kill -HUP $MAINPID
Restart=on-failure
Type=exec
[Install]
WantedBy=multi-user.target

File diff suppressed because it is too large Load Diff

View File

@ -1 +0,0 @@
UserParameter=pgmon[*],/usr/local/bin/pgmon.py -c /etc/pgmon/$1.cfg "$1,$2,$3,$4,$5,$6,$7,$8,$9"

View File

@ -1 +1,2 @@
psycopg2
psycopg[binary,pool]
pyyaml