Compare commits
9 Commits
86d5e8917b
...
5ea007c3f6
| Author | SHA1 | Date | |
|---|---|---|---|
| 5ea007c3f6 | |||
| 7cb0f7ad40 | |||
| 83fa12ec54 | |||
| 98b74d9aed | |||
| 45953848e2 | |||
| 6116f4f885 | |||
| 24d1214855 | |||
| 3c39d8aa97 | |||
| ebb084aa9d |
@ -8,14 +8,21 @@ metrics:
|
||||
0: >
|
||||
SELECT datname AS dbname
|
||||
FROM pg_database
|
||||
|
||||
# Note: If the user lacks sufficient privileges, these fields will be NULL.
|
||||
# The WHERE clause is intended to prevent Zabbix from discovering a
|
||||
# connection it cannot monitor. Ideally this would generate an error
|
||||
# instead.
|
||||
discover_rep:
|
||||
type: set
|
||||
query:
|
||||
0: >
|
||||
SELECT client_addr || '_' || regexp_replace(application_name, '[ ,]', '_', 'g') AS repid,
|
||||
SELECT host(client_addr) || '_' || regexp_replace(application_name, '[ ,]', '_', 'g') AS repid,
|
||||
client_addr,
|
||||
state
|
||||
FROM pg_stat_replication
|
||||
WHERE state IS NOT NULL
|
||||
|
||||
discover_slots:
|
||||
type: set
|
||||
query:
|
||||
@ -36,6 +43,7 @@ metrics:
|
||||
active
|
||||
FROM pg_replication_slots
|
||||
|
||||
|
||||
##
|
||||
# cluster-wide metrics
|
||||
##
|
||||
@ -85,29 +93,29 @@ metrics:
|
||||
FROM pg_stat_bgwriter bg
|
||||
CROSS JOIN pg_stat_checkpointer cp
|
||||
|
||||
|
||||
io_per_backend:
|
||||
type: set
|
||||
query:
|
||||
160000: >
|
||||
SELECT backend_type,
|
||||
COALESCE(SUM(reads * op_bytes), 0)::bigint AS reads,
|
||||
COALESCE(SUM(read_time), 0)::bigint AS read_time,
|
||||
COALESCE(SUM(writes * op_bytes), 0)::bigint AS writes,
|
||||
COALESCE(SUM(write_time), 0)::bigint AS write_time,
|
||||
COALESCE(SUM(writebacks * op_bytes), 0)::bigint AS writebacks,
|
||||
COALESCE(SUM(writeback_time), 0)::bigint AS writeback_time,
|
||||
COALESCE(SUM(extends * op_bytes), 0)::bigint AS extends,
|
||||
COALESCE(SUM(extend_time), 0)::bigint AS extend_time,
|
||||
COALESCE(SUM(op_bytes), 0)::bigint AS op_bytes,
|
||||
COALESCE(SUM(hits), 0)::bigint AS hits,
|
||||
COALESCE(SUM(evictions), 0)::bigint AS evictions,
|
||||
COALESCE(SUM(reuses), 0)::bigint AS reuses,
|
||||
COALESCE(SUM(fsyncs), 0)::bigint AS fsyncs,
|
||||
COALESCE(SUM(fsync_time), 0)::bigint AS fsync_time
|
||||
COALESCE(SUM(reads * op_bytes), 0) AS reads,
|
||||
COALESCE(SUM(read_time), 0) AS read_time,
|
||||
COALESCE(SUM(writes * op_bytes), 0) AS writes,
|
||||
COALESCE(SUM(write_time), 0) AS write_time,
|
||||
COALESCE(SUM(writebacks * op_bytes), 0) AS writebacks,
|
||||
COALESCE(SUM(writeback_time), 0) AS writeback_time,
|
||||
COALESCE(SUM(extends * op_bytes), 0) AS extends,
|
||||
COALESCE(SUM(extend_time), 0) AS extend_time,
|
||||
COALESCE(SUM(op_bytes), 0) AS op_bytes,
|
||||
COALESCE(SUM(hits), 0) AS hits,
|
||||
COALESCE(SUM(evictions), 0) AS evictions,
|
||||
COALESCE(SUM(reuses), 0) AS reuses,
|
||||
COALESCE(SUM(fsyncs), 0) AS fsyncs,
|
||||
COALESCE(SUM(fsync_time), 0) AS fsync_time
|
||||
FROM pg_stat_io
|
||||
GROUP BY backend_type
|
||||
|
||||
|
||||
##
|
||||
# Per-database metrics
|
||||
##
|
||||
@ -139,7 +147,7 @@ metrics:
|
||||
NULL AS sessions_abandoned,
|
||||
NULL AS sessions_fatal,
|
||||
NULL AS sessions_killed,
|
||||
extract('epoch' from stats_reset)::float AS stats_reset
|
||||
extract('epoch' from stats_reset) AS stats_reset
|
||||
FROM pg_stat_database WHERE datname = %(dbname)s
|
||||
140000: >
|
||||
SELECT numbackends,
|
||||
@ -166,7 +174,7 @@ metrics:
|
||||
sessions_abandoned,
|
||||
sessions_fatal,
|
||||
sessions_killed,
|
||||
extract('epoch' from stats_reset)::float AS stats_reset
|
||||
extract('epoch' from stats_reset) AS stats_reset
|
||||
FROM pg_stat_database WHERE datname = %(dbname)s
|
||||
test_args:
|
||||
dbname: postgres
|
||||
@ -189,13 +197,40 @@ metrics:
|
||||
0: >
|
||||
SELECT state,
|
||||
count(*) AS backend_count,
|
||||
COALESCE(EXTRACT(EPOCH FROM max(now() - state_change))::float, 0) AS max_state_time
|
||||
COALESCE(EXTRACT(EPOCH FROM max(now() - state_change)), 0) AS max_state_time
|
||||
FROM pg_stat_activity
|
||||
WHERE datname = %(dbname)s
|
||||
GROUP BY state
|
||||
test_args:
|
||||
dbname: postgres
|
||||
|
||||
sequence_usage:
|
||||
type: value
|
||||
query:
|
||||
# 9.2 lacks lateral joins, the pg_sequence_last_value function, and the pg_sequences view
|
||||
# 0: >
|
||||
# SELECT COALESCE(MAX(pg_sequence_last_value(c.oid)::float / (pg_sequence_parameters(oid)).maximum_value), 0) AS max_usage
|
||||
# FROM pg_class c
|
||||
# WHERE c.relkind = 'S'
|
||||
# 9.3 - 9.6 lacks the pg_sequence_last_value function, and pg_sequences view
|
||||
# 90300: >
|
||||
# SELECT COALESCE(MAX(pg_sequence_last_value(c.oid)::float / s.maximum_value), 0) AS max_usage
|
||||
# FROM pg_class c
|
||||
# CROSS JOIN LATERAL pg_sequence_parameters(c.oid) AS s
|
||||
# WHERE c.relkind = 'S'
|
||||
100000: SELECT COALESCE(MAX(last_value::float / max_value), 0) AS max_usage FROM pg_sequences;
|
||||
test_args:
|
||||
dbname: postgres
|
||||
|
||||
sequence_visibility:
|
||||
type: row
|
||||
query:
|
||||
100000: >
|
||||
SELECT COUNT(*) FILTER (WHERE has_sequence_privilege(c.oid, 'SELECT,USAGE')) AS visible_sequences,
|
||||
COUNT(*) AS total_sequences
|
||||
FROM pg_class AS c
|
||||
WHERE relkind = 'S';
|
||||
|
||||
|
||||
##
|
||||
# Per-replication metrics
|
||||
@ -205,7 +240,7 @@ metrics:
|
||||
query:
|
||||
90400: >
|
||||
SELECT pid, usename,
|
||||
EXTRACT(EPOCH FROM backend_start)::integer AS backend_start,
|
||||
EXTRACT(EPOCH FROM backend_start) AS backend_start,
|
||||
state,
|
||||
pg_xlog_location_diff(pg_current_xlog_location(), sent_location) AS sent_lsn,
|
||||
pg_xlog_location_diff(pg_current_xlog_location(), write_location) AS write_lsn,
|
||||
@ -216,20 +251,21 @@ metrics:
|
||||
NULL AS replay_lag,
|
||||
sync_state
|
||||
FROM pg_stat_replication
|
||||
WHERE client_addr || '_' || regexp_replace(application_name, '[ ,]', '_', 'g') = %(repid)s
|
||||
WHERE host(client_addr) || '_' || regexp_replace(application_name, '[ ,]', '_', 'g') = %(repid)s
|
||||
100000: >
|
||||
SELECT pid, usename,
|
||||
EXTRACT(EPOCH FROM backend_start)::integer AS backend_start,
|
||||
EXTRACT(EPOCH FROM backend_start) AS backend_start,
|
||||
state,
|
||||
pg_wal_lsn_diff(pg_current_wal_lsn(), sent_lsn) AS sent_lsn,
|
||||
pg_wal_lsn_diff(pg_current_wal_lsn(), write_lsn) AS write_lsn,
|
||||
pg_wal_lsn_diff(pg_current_wal_lsn(), flush_lsn) AS flush_lsn,
|
||||
pg_wal_lsn_diff(pg_current_wal_lsn(), replay_lsn) AS replay_lsn,
|
||||
COALESCE(EXTRACT(EPOCH FROM write_lag), 0)::integer AS write_lag,
|
||||
COALESCE(EXTRACT(EPOCH FROM flush_lag), 0)::integer AS flush_lag,
|
||||
COALESCE(EXTRACT(EPOCH FROM replay_lag), 0)::integer AS replay_lag,
|
||||
COALESCE(EXTRACT(EPOCH FROM write_lag), 0) AS write_lag,
|
||||
COALESCE(EXTRACT(EPOCH FROM flush_lag), 0) AS flush_lag,
|
||||
COALESCE(EXTRACT(EPOCH FROM replay_lag), 0) AS replay_lag,
|
||||
sync_state
|
||||
FROM pg_stat_replication WHERE client_addr || '_' || regexp_replace(application_name, '[ ,]', '_', 'g') = %(repid)s
|
||||
FROM pg_stat_replication
|
||||
WHERE host(client_addr) || '_' || regexp_replace(application_name, '[ ,]', '_', 'g') = %(repid)s
|
||||
test_args:
|
||||
repid: 127.0.0.1_test_rep
|
||||
|
||||
@ -261,6 +297,7 @@ metrics:
|
||||
test_args:
|
||||
slot: test_slot
|
||||
|
||||
|
||||
##
|
||||
# Debugging
|
||||
##
|
||||
|
||||
18
src/pgmon.py
18
src/pgmon.py
@ -27,6 +27,8 @@ from urllib.parse import urlparse, parse_qs
|
||||
import requests
|
||||
import re
|
||||
|
||||
from decimal import Decimal
|
||||
|
||||
VERSION = "1.0.3"
|
||||
|
||||
# Configuration
|
||||
@ -391,6 +393,16 @@ def get_query(metric, version):
|
||||
raise MetricVersionError("Missing metric query for PostgreSQL {}".format(version))
|
||||
|
||||
|
||||
def json_encode_special(obj):
|
||||
"""
|
||||
Encoder function to handle types the standard JSON package doesn't know what
|
||||
to do with
|
||||
"""
|
||||
if isinstance(obj, Decimal):
|
||||
return float(obj)
|
||||
raise TypeError(f'Cannot serialize object of {type(obj)}')
|
||||
|
||||
|
||||
def run_query_no_retry(pool, return_type, query, args):
|
||||
"""
|
||||
Run the query with no explicit retry code
|
||||
@ -408,13 +420,13 @@ def run_query_no_retry(pool, return_type, query, args):
|
||||
elif return_type == "row":
|
||||
if len(res) == 0:
|
||||
return "[]"
|
||||
return json.dumps(res[0])
|
||||
return json.dumps(res[0], default=json_encode_special)
|
||||
elif return_type == "column":
|
||||
if len(res) == 0:
|
||||
return "[]"
|
||||
return json.dumps([list(r.values())[0] for r in res])
|
||||
return json.dumps([list(r.values())[0] for r in res], default=json_encode_special)
|
||||
elif return_type == "set":
|
||||
return json.dumps(res)
|
||||
return json.dumps(res, default=json_encode_special)
|
||||
except:
|
||||
dbname = pool.name
|
||||
if dbname in unhappy_cooldown:
|
||||
|
||||
@ -5,6 +5,9 @@ import tempfile
|
||||
|
||||
import logging
|
||||
|
||||
from decimal import Decimal
|
||||
import json
|
||||
|
||||
import pgmon
|
||||
|
||||
# Silence most logging output
|
||||
@ -789,3 +792,20 @@ metrics:
|
||||
# Make sure we can pull the RSS file (we assume the 9.6 series won't be getting
|
||||
# any more updates)
|
||||
self.assertEqual(pgmon.get_latest_version(), 90624)
|
||||
|
||||
def test_json_encode_special(self):
|
||||
# Confirm that we're getting the right type
|
||||
self.assertFalse(isinstance(Decimal('0.5'), float))
|
||||
self.assertTrue(isinstance(pgmon.json_encode_special(Decimal('0.5')), float))
|
||||
|
||||
# Make sure we get sane values
|
||||
self.assertEqual(pgmon.json_encode_special(Decimal('0.5')), 0.5)
|
||||
self.assertEqual(pgmon.json_encode_special(Decimal('12')), 12.0)
|
||||
|
||||
# Make sure we can still fail for other types
|
||||
self.assertRaises(
|
||||
TypeError, pgmon.json_encode_special, object
|
||||
)
|
||||
|
||||
# Make sure we can actually serialize a Decimal
|
||||
self.assertEqual(json.dumps(Decimal('2.5'), default=pgmon.json_encode_special), '2.5')
|
||||
|
||||
@ -785,6 +785,76 @@ zabbix_export:
|
||||
value: PostgreSQL
|
||||
- tag: Database
|
||||
value: '{#DBNAME}'
|
||||
- uuid: 5960120dd01c4926b0fc1fbe9c011507
|
||||
name: 'Database max sequence usage in {#DBNAME}'
|
||||
type: HTTP_AGENT
|
||||
key: 'pgmon_db_max_sequence[{#DBNAME}]'
|
||||
delay: 5m
|
||||
value_type: FLOAT
|
||||
units: '%'
|
||||
description: 'The percent of the currently configured value range for the most utilized sequence.'
|
||||
url: 'http://localhost:{$AGENT_PORT}/sequence_usage'
|
||||
query_fields:
|
||||
- name: dbname
|
||||
value: '{#DBNAME}'
|
||||
tags:
|
||||
- tag: Application
|
||||
value: PostgreSQL
|
||||
- tag: Database
|
||||
value: '{#DBNAME}'
|
||||
- uuid: 48b9cc80ac4d4aee9e9f3a5d6f7d4a95
|
||||
name: 'Total number of sequences on {#DBNAME}'
|
||||
type: DEPENDENT
|
||||
key: 'pgmon_db_sequences[total,{#DBNAME}]'
|
||||
delay: '0'
|
||||
description: 'Total number of sequences in the database.'
|
||||
preprocessing:
|
||||
- type: JSONPATH
|
||||
parameters:
|
||||
- $.total_sequences
|
||||
master_item:
|
||||
key: 'pgmon_db_sequence_visibility[{#DBNAME}]'
|
||||
tags:
|
||||
- tag: Application
|
||||
value: PostgreSQL
|
||||
- tag: Database
|
||||
value: '{#DBNAME}'
|
||||
- uuid: 6521a9bab2ac47bf85429832d289bbac
|
||||
name: 'Visible sequences on {#DBNAME}'
|
||||
type: DEPENDENT
|
||||
key: 'pgmon_db_sequences[visible,{#DBNAME}]'
|
||||
delay: '0'
|
||||
description: 'Number of sequences in the database for which Zabbix can see stats.'
|
||||
preprocessing:
|
||||
- type: JSONPATH
|
||||
parameters:
|
||||
- $.visible_sequences
|
||||
master_item:
|
||||
key: 'pgmon_db_sequence_visibility[{#DBNAME}]'
|
||||
tags:
|
||||
- tag: Application
|
||||
value: PostgreSQL
|
||||
- tag: Database
|
||||
value: '{#DBNAME}'
|
||||
- uuid: 00f2da3eb99940839410a6ecd5df153f
|
||||
name: 'Database sequence visibility in {#DBNAME}'
|
||||
type: HTTP_AGENT
|
||||
key: 'pgmon_db_sequence_visibility[{#DBNAME}]'
|
||||
delay: 30m
|
||||
value_type: TEXT
|
||||
trends: '0'
|
||||
description: 'Statistics about the number of sequences that exist and the number Zabbix can actually see stats for.'
|
||||
url: 'http://localhost:{$AGENT_PORT}/sequence_visibility'
|
||||
query_fields:
|
||||
- name: dbname
|
||||
value: '{#DBNAME}'
|
||||
tags:
|
||||
- tag: Application
|
||||
value: PostgreSQL
|
||||
- tag: Database
|
||||
value: '{#DBNAME}'
|
||||
- tag: Type
|
||||
value: Raw
|
||||
- uuid: 492b3cac15f348c2b85f97b69c114d1b
|
||||
name: 'Database Stats for {#DBNAME}'
|
||||
type: HTTP_AGENT
|
||||
@ -803,6 +873,17 @@ zabbix_export:
|
||||
value: '{#DBNAME}'
|
||||
- tag: Type
|
||||
value: Raw
|
||||
trigger_prototypes:
|
||||
- uuid: d29d0fd9d9d34b5ebd649592b0829ce5
|
||||
expression: 'last(/PostgreSQL by pgmon/pgmon_db_sequences[total,{#DBNAME}]) <> last(/PostgreSQL by pgmon/pgmon_db_sequences[visible,{#DBNAME}])'
|
||||
name: 'Sequences not visible to Zabbix on {#DBNAME}'
|
||||
priority: WARNING
|
||||
description: 'There are sequences for which Zabbix cannot see usage statistics'
|
||||
tags:
|
||||
- tag: Application
|
||||
value: PostgreSQL
|
||||
- tag: Component
|
||||
value: Sequence
|
||||
graph_prototypes:
|
||||
- uuid: 1f7de43b77714f819e61c31273712b70
|
||||
name: 'DML Totals for {#DBNAME}'
|
||||
@ -1500,6 +1581,7 @@ zabbix_export:
|
||||
delay: '0'
|
||||
history: 90d
|
||||
value_type: FLOAT
|
||||
units: s
|
||||
description: 'Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written and flushed it (but not yet applied it). This can be used to gauge the delay that synchronous_commit level on incurred while committing if this server was configured as a synchronous standby.'
|
||||
preprocessing:
|
||||
- type: JSONPATH
|
||||
@ -1515,13 +1597,12 @@ zabbix_export:
|
||||
- tag: Database
|
||||
value: '{#DBNAME}'
|
||||
- uuid: 624f8f085a3642c9a10a03361c17763d
|
||||
name: 'Last flush LSN for {#REPID}'
|
||||
name: 'Last flush LSN lag for {#REPID}'
|
||||
type: DEPENDENT
|
||||
key: 'pgmon_rep[flush_lsn,repid={#REPID}]'
|
||||
delay: '0'
|
||||
history: 90d
|
||||
value_type: TEXT
|
||||
trends: '0'
|
||||
units: B
|
||||
description: 'Last write-ahead log location flushed to disk by this standby server'
|
||||
preprocessing:
|
||||
- type: JSONPATH
|
||||
@ -1543,6 +1624,7 @@ zabbix_export:
|
||||
delay: '0'
|
||||
history: 90d
|
||||
value_type: FLOAT
|
||||
units: s
|
||||
description: 'Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written, flushed and applied it. This can be used to gauge the delay that synchronous_commit level remote_apply incurred while committing if this server was configured as a synchronous standby.'
|
||||
preprocessing:
|
||||
- type: JSONPATH
|
||||
@ -1558,13 +1640,12 @@ zabbix_export:
|
||||
- tag: Database
|
||||
value: '{#DBNAME}'
|
||||
- uuid: fe1bed51845d4694bae8f53deed4846d
|
||||
name: 'Last replay LSN for {#REPID}'
|
||||
name: 'Last replay LSN lag for {#REPID}'
|
||||
type: DEPENDENT
|
||||
key: 'pgmon_rep[replay_lsn,repid={#REPID}]'
|
||||
delay: '0'
|
||||
history: 90d
|
||||
value_type: TEXT
|
||||
trends: '0'
|
||||
units: B
|
||||
description: 'Last write-ahead log location replayed into the database on this standby server'
|
||||
preprocessing:
|
||||
- type: JSONPATH
|
||||
@ -1580,13 +1661,12 @@ zabbix_export:
|
||||
- tag: Database
|
||||
value: '{#DBNAME}'
|
||||
- uuid: 68c179d0e33f45f9bf82d2d4125763f0
|
||||
name: 'Last sent LSN for {#REPID}'
|
||||
name: 'Last sent LSN lag for {#REPID}'
|
||||
type: DEPENDENT
|
||||
key: 'pgmon_rep[sent_lsn,repid={#REPID}]'
|
||||
delay: '0'
|
||||
history: 90d
|
||||
value_type: TEXT
|
||||
trends: '0'
|
||||
units: B
|
||||
description: 'Last write-ahead log location sent on this connection'
|
||||
preprocessing:
|
||||
- type: JSONPATH
|
||||
@ -1636,6 +1716,7 @@ zabbix_export:
|
||||
delay: '0'
|
||||
history: 90d
|
||||
value_type: FLOAT
|
||||
units: s
|
||||
description: 'Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written it (but not yet flushed it or applied it). This can be used to gauge the delay that synchronous_commit level remote_write incurred while committing if this server was configured as a synchronous standby.'
|
||||
preprocessing:
|
||||
- type: JSONPATH
|
||||
@ -1651,13 +1732,12 @@ zabbix_export:
|
||||
- tag: Database
|
||||
value: '{#DBNAME}'
|
||||
- uuid: 57fb03cf63af4b0a91d8e36d6ff64d30
|
||||
name: 'Last write LSN for {#REPID}'
|
||||
name: 'Last write LSN lag for {#REPID}'
|
||||
type: DEPENDENT
|
||||
key: 'pgmon_rep[write_lsn,repid={#REPID}]'
|
||||
delay: '0'
|
||||
history: 90d
|
||||
value_type: TEXT
|
||||
trends: '0'
|
||||
units: B
|
||||
description: 'Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written it (but not yet flushed it or applied it). This can be used to gauge the delay that synchronous_commit level remote_write incurred while committing if this server was configured as a synchronous standby.'
|
||||
preprocessing:
|
||||
- type: JSONPATH
|
||||
|
||||
Loading…
Reference in New Issue
Block a user