From ebb084aa9dec7c7ad87baa8104e9bb43cf69e3fb Mon Sep 17 00:00:00 2001 From: James Campbell Date: Tue, 1 Jul 2025 02:29:56 -0400 Subject: [PATCH 1/8] Add initial query for sequence usage --- sample-config/pgmon-metrics.yml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/sample-config/pgmon-metrics.yml b/sample-config/pgmon-metrics.yml index 3c8e465..5bdb6d5 100644 --- a/sample-config/pgmon-metrics.yml +++ b/sample-config/pgmon-metrics.yml @@ -196,6 +196,27 @@ metrics: test_args: dbname: postgres + sequence_usage: + type: value + query: +# 9.2 lacks lateral joins, the pg_sequence_last_value function, and the pg_sequences view +# 0: > +# SELECT COALESCE(MAX(pg_sequence_last_value(c.oid)::float / (pg_sequence_parameters(oid)).maximum_value), 0) AS max_usage +# FROM pg_class c +# WHERE c.relkind = 'S' +# 9.3 - 9.6 lacks the pg_sequence_last_value function, and pg_sequences view +# 90300: > +# SELECT COALESCE(MAX(pg_sequence_last_value(c.oid)::float / s.maximum_value), 0) AS max_usage +# FROM pg_class c +# CROSS JOIN LATERAL pg_sequence_parameters(c.oid) AS s +# WHERE c.relkind = 'S' + 100000: > + SELECT COALESCE(MAX(pg_sequence_last_value(c.oid)::float / sp.maximum_value), 0) AS max_usage + FROM pg_class c + CROSS JOIN LATERAL pg_sequence_parameters(c.oid) AS sp + WHERE c.relkind = 'S' + test_args: + dbname: postgres ## # Per-replication metrics From 3c39d8aa9751bb49382884b19cc904b10fb0c856 Mon Sep 17 00:00:00 2001 From: James Campbell Date: Thu, 3 Jul 2025 01:13:58 -0400 Subject: [PATCH 2/8] Remove CIDR prefix from replication id --- sample-config/pgmon-metrics.yml | 44 ++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/sample-config/pgmon-metrics.yml b/sample-config/pgmon-metrics.yml index 5bdb6d5..ac905be 100644 --- a/sample-config/pgmon-metrics.yml +++ b/sample-config/pgmon-metrics.yml @@ -8,14 +8,21 @@ metrics: 0: > SELECT datname AS dbname FROM pg_database + + # Note: If the user lacks sufficient privileges, these fields will be NULL. + # The WHERE clause is intended to prevent Zabbix from discovering a + # connection it cannot monitor. Ideally this would generate an error + # instead. discover_rep: type: set query: 0: > - SELECT client_addr || '_' || regexp_replace(application_name, '[ ,]', '_', 'g') AS repid, + SELECT host(client_addr) || '_' || regexp_replace(application_name, '[ ,]', '_', 'g') AS repid, client_addr, state FROM pg_stat_replication + WHERE state IS NOT NULL + discover_slots: type: set query: @@ -36,6 +43,7 @@ metrics: active FROM pg_replication_slots + ## # cluster-wide metrics ## @@ -85,7 +93,6 @@ metrics: FROM pg_stat_bgwriter bg CROSS JOIN pg_stat_checkpointer cp - io_per_backend: type: set query: @@ -108,6 +115,7 @@ metrics: FROM pg_stat_io GROUP BY backend_type + ## # Per-database metrics ## @@ -210,14 +218,20 @@ metrics: # FROM pg_class c # CROSS JOIN LATERAL pg_sequence_parameters(c.oid) AS s # WHERE c.relkind = 'S' - 100000: > - SELECT COALESCE(MAX(pg_sequence_last_value(c.oid)::float / sp.maximum_value), 0) AS max_usage - FROM pg_class c - CROSS JOIN LATERAL pg_sequence_parameters(c.oid) AS sp - WHERE c.relkind = 'S' + 100000: SELECT COALESCE(MAX(last_value::float / max_value), 0) AS max_usage FROM pg_sequences; test_args: dbname: postgres + sequence_visibility: + type: row + query: + 100000: > + SELECT COUNT(*) FILTER (WHERE has_sequence_privilege(c.oid, 'SELECT,USAGE'::text)) AS readable_sequences, + COUNT(*) AS total_sequences + FROM pg_class AS c + WHERE relkind = 'S'; + + ## # Per-replication metrics ## @@ -237,7 +251,7 @@ metrics: NULL AS replay_lag, sync_state FROM pg_stat_replication - WHERE client_addr || '_' || regexp_replace(application_name, '[ ,]', '_', 'g') = %(repid)s + WHERE host(client_addr) || '_' || regexp_replace(application_name, '[ ,]', '_', 'g') = %(repid)s 100000: > SELECT pid, usename, EXTRACT(EPOCH FROM backend_start)::integer AS backend_start, @@ -250,7 +264,8 @@ metrics: COALESCE(EXTRACT(EPOCH FROM flush_lag), 0)::integer AS flush_lag, COALESCE(EXTRACT(EPOCH FROM replay_lag), 0)::integer AS replay_lag, sync_state - FROM pg_stat_replication WHERE client_addr || '_' || regexp_replace(application_name, '[ ,]', '_', 'g') = %(repid)s + FROM pg_stat_replication + WHERE host(client_addr) || '_' || regexp_replace(application_name, '[ ,]', '_', 'g') = %(repid)s test_args: repid: 127.0.0.1_test_rep @@ -264,24 +279,25 @@ metrics: 90400: > SELECT NULL as active_pid, xmin, - pg_xlog_location_diff(pg_current_xlog_location(), restart_lsn) AS restart_bytes, + pg_xlog_location_diff(pg_current_xlog_location(), restart_lsn)::bigint AS restart_bytes, NULL AS confirmed_flush_bytes FROM pg_replication_slots WHERE slot_name = %(slot)s 90600: > SELECT active_pid, xmin, - pg_xlog_location_diff(pg_current_xlog_location(), restart_lsn) AS restart_bytes, - pg_xlog_location_diff(pg_current_xlog_location(), confirmed_flush_lsn) AS confirmed_flush_bytes + pg_xlog_location_diff(pg_current_xlog_location(), restart_lsn)::bigint AS restart_bytes, + pg_xlog_location_diff(pg_current_xlog_location(), confirmed_flush_lsn)::bigint AS confirmed_flush_bytes FROM pg_replication_slots WHERE slot_name = %(slot)s 100000: > SELECT active_pid, xmin, - pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn) AS restart_bytes, - pg_wal_lsn_diff(pg_current_wal_lsn(), confirmed_flush_lsn) AS confirmed_flush_bytes + pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn)::bigint AS restart_bytes, + pg_wal_lsn_diff(pg_current_wal_lsn(), confirmed_flush_lsn)::bigint AS confirmed_flush_bytes FROM pg_replication_slots WHERE slot_name = %(slot)s test_args: slot: test_slot + ## # Debugging ## From 24d1214855b9467a51002fbf20e87e88488da216 Mon Sep 17 00:00:00 2001 From: James Campbell Date: Thu, 3 Jul 2025 01:47:06 -0400 Subject: [PATCH 3/8] Teach json how to serialize decimals --- src/pgmon.py | 16 ++++++++++++++-- src/test_pgmon.py | 16 ++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/src/pgmon.py b/src/pgmon.py index 71d3491..58a4a9e 100755 --- a/src/pgmon.py +++ b/src/pgmon.py @@ -27,6 +27,8 @@ from urllib.parse import urlparse, parse_qs import requests import re +from decimal import Decimal + VERSION = "1.0.3" # Configuration @@ -391,6 +393,16 @@ def get_query(metric, version): raise MetricVersionError("Missing metric query for PostgreSQL {}".format(version)) +def json_encode_special(obj): + """ + Encoder function to handle types the standard JSON package doesn't know what + to do with + """ + if isinstance(obj, Decimal): + return float(obj) + raise TypeError(f'Cannot serialize object of {type(obj)}') + + def run_query_no_retry(pool, return_type, query, args): """ Run the query with no explicit retry code @@ -408,11 +420,11 @@ def run_query_no_retry(pool, return_type, query, args): elif return_type == "row": if len(res) == 0: return "[]" - return json.dumps(res[0]) + return json.dumps(res[0], default=json_encode_special) elif return_type == "column": if len(res) == 0: return "[]" - return json.dumps([list(r.values())[0] for r in res]) + return json.dumps([list(r.values())[0] for r in res], default=json_encode_special) elif return_type == "set": return json.dumps(res) except: diff --git a/src/test_pgmon.py b/src/test_pgmon.py index d2c0590..7e88c8d 100644 --- a/src/test_pgmon.py +++ b/src/test_pgmon.py @@ -5,6 +5,8 @@ import tempfile import logging +from decimal import Decimal + import pgmon # Silence most logging output @@ -789,3 +791,17 @@ metrics: # Make sure we can pull the RSS file (we assume the 9.6 series won't be getting # any more updates) self.assertEqual(pgmon.get_latest_version(), 90624) + + def test_json_encode_special(self): + # Confirm that we're getting the right type + self.assertFalse(isinstance(Decimal('0.5'), float)) + self.assertTrue(isinstance(pgmon.json_encode_special(Decimal('0.5')), float)) + + # Make sure we get sane values + self.assertEqual(pgmon.json_encode_special(Decimal('0.5')), 0.5) + self.assertEqual(pgmon.json_encode_special(Decimal('12')), 12.0) + + # Make sure we can still fail for other types + self.assertRaises( + TypeError, pgmon.json_encode_special, object + ) From 6116f4f885f9e5999a51acc9fe9b4f7af0b25434 Mon Sep 17 00:00:00 2001 From: James Campbell Date: Thu, 3 Jul 2025 02:08:45 -0400 Subject: [PATCH 4/8] Fix missing json default --- src/pgmon.py | 2 +- src/test_pgmon.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/pgmon.py b/src/pgmon.py index 58a4a9e..d184a67 100755 --- a/src/pgmon.py +++ b/src/pgmon.py @@ -426,7 +426,7 @@ def run_query_no_retry(pool, return_type, query, args): return "[]" return json.dumps([list(r.values())[0] for r in res], default=json_encode_special) elif return_type == "set": - return json.dumps(res) + return json.dumps(res, default=json_encode_special) except: dbname = pool.name if dbname in unhappy_cooldown: diff --git a/src/test_pgmon.py b/src/test_pgmon.py index 7e88c8d..1a86492 100644 --- a/src/test_pgmon.py +++ b/src/test_pgmon.py @@ -6,6 +6,7 @@ import tempfile import logging from decimal import Decimal +import json import pgmon @@ -805,3 +806,6 @@ metrics: self.assertRaises( TypeError, pgmon.json_encode_special, object ) + + # Make sure we can actually serialize a Decimal + self.assertEqual(json.dumps(Decimal('2.5'), default=pgmon.json_encode_special), '2.5') From 45953848e2336d7229093b3b588a22ef7f64e8d3 Mon Sep 17 00:00:00 2001 From: James Campbell Date: Thu, 3 Jul 2025 10:16:20 -0400 Subject: [PATCH 5/8] Remove some type casts from working around Decimal types --- sample-config/pgmon-metrics.yml | 56 ++++++++++++++++----------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/sample-config/pgmon-metrics.yml b/sample-config/pgmon-metrics.yml index ac905be..67251db 100644 --- a/sample-config/pgmon-metrics.yml +++ b/sample-config/pgmon-metrics.yml @@ -98,20 +98,20 @@ metrics: query: 160000: > SELECT backend_type, - COALESCE(SUM(reads * op_bytes), 0)::bigint AS reads, - COALESCE(SUM(read_time), 0)::bigint AS read_time, - COALESCE(SUM(writes * op_bytes), 0)::bigint AS writes, - COALESCE(SUM(write_time), 0)::bigint AS write_time, - COALESCE(SUM(writebacks * op_bytes), 0)::bigint AS writebacks, - COALESCE(SUM(writeback_time), 0)::bigint AS writeback_time, - COALESCE(SUM(extends * op_bytes), 0)::bigint AS extends, - COALESCE(SUM(extend_time), 0)::bigint AS extend_time, - COALESCE(SUM(op_bytes), 0)::bigint AS op_bytes, - COALESCE(SUM(hits), 0)::bigint AS hits, - COALESCE(SUM(evictions), 0)::bigint AS evictions, - COALESCE(SUM(reuses), 0)::bigint AS reuses, - COALESCE(SUM(fsyncs), 0)::bigint AS fsyncs, - COALESCE(SUM(fsync_time), 0)::bigint AS fsync_time + COALESCE(SUM(reads * op_bytes), 0) AS reads, + COALESCE(SUM(read_time), 0) AS read_time, + COALESCE(SUM(writes * op_bytes), 0) AS writes, + COALESCE(SUM(write_time), 0) AS write_time, + COALESCE(SUM(writebacks * op_bytes), 0) AS writebacks, + COALESCE(SUM(writeback_time), 0) AS writeback_time, + COALESCE(SUM(extends * op_bytes), 0) AS extends, + COALESCE(SUM(extend_time), 0) AS extend_time, + COALESCE(SUM(op_bytes), 0) AS op_bytes, + COALESCE(SUM(hits), 0) AS hits, + COALESCE(SUM(evictions), 0) AS evictions, + COALESCE(SUM(reuses), 0) AS reuses, + COALESCE(SUM(fsyncs), 0) AS fsyncs, + COALESCE(SUM(fsync_time), 0) AS fsync_time FROM pg_stat_io GROUP BY backend_type @@ -147,7 +147,7 @@ metrics: NULL AS sessions_abandoned, NULL AS sessions_fatal, NULL AS sessions_killed, - extract('epoch' from stats_reset)::float AS stats_reset + extract('epoch' from stats_reset) AS stats_reset FROM pg_stat_database WHERE datname = %(dbname)s 140000: > SELECT numbackends, @@ -174,7 +174,7 @@ metrics: sessions_abandoned, sessions_fatal, sessions_killed, - extract('epoch' from stats_reset)::float AS stats_reset + extract('epoch' from stats_reset) AS stats_reset FROM pg_stat_database WHERE datname = %(dbname)s test_args: dbname: postgres @@ -197,7 +197,7 @@ metrics: 0: > SELECT state, count(*) AS backend_count, - COALESCE(EXTRACT(EPOCH FROM max(now() - state_change))::float, 0) AS max_state_time + COALESCE(EXTRACT(EPOCH FROM max(now() - state_change)) 0) AS max_state_time FROM pg_stat_activity WHERE datname = %(dbname)s GROUP BY state @@ -226,7 +226,7 @@ metrics: type: row query: 100000: > - SELECT COUNT(*) FILTER (WHERE has_sequence_privilege(c.oid, 'SELECT,USAGE'::text)) AS readable_sequences, + SELECT COUNT(*) FILTER (WHERE has_sequence_privilege(c.oid, 'SELECT,USAGE')) AS readable_sequences, COUNT(*) AS total_sequences FROM pg_class AS c WHERE relkind = 'S'; @@ -240,7 +240,7 @@ metrics: query: 90400: > SELECT pid, usename, - EXTRACT(EPOCH FROM backend_start)::integer AS backend_start, + EXTRACT(EPOCH FROM backend_start) AS backend_start, state, pg_xlog_location_diff(pg_current_xlog_location(), sent_location) AS sent_lsn, pg_xlog_location_diff(pg_current_xlog_location(), write_location) AS write_lsn, @@ -254,15 +254,15 @@ metrics: WHERE host(client_addr) || '_' || regexp_replace(application_name, '[ ,]', '_', 'g') = %(repid)s 100000: > SELECT pid, usename, - EXTRACT(EPOCH FROM backend_start)::integer AS backend_start, + EXTRACT(EPOCH FROM backend_start) AS backend_start, state, pg_wal_lsn_diff(pg_current_wal_lsn(), sent_lsn) AS sent_lsn, pg_wal_lsn_diff(pg_current_wal_lsn(), write_lsn) AS write_lsn, pg_wal_lsn_diff(pg_current_wal_lsn(), flush_lsn) AS flush_lsn, pg_wal_lsn_diff(pg_current_wal_lsn(), replay_lsn) AS replay_lsn, - COALESCE(EXTRACT(EPOCH FROM write_lag), 0)::integer AS write_lag, - COALESCE(EXTRACT(EPOCH FROM flush_lag), 0)::integer AS flush_lag, - COALESCE(EXTRACT(EPOCH FROM replay_lag), 0)::integer AS replay_lag, + COALESCE(EXTRACT(EPOCH FROM write_lag), 0) AS write_lag, + COALESCE(EXTRACT(EPOCH FROM flush_lag), 0) AS flush_lag, + COALESCE(EXTRACT(EPOCH FROM replay_lag), 0) AS replay_lag, sync_state FROM pg_stat_replication WHERE host(client_addr) || '_' || regexp_replace(application_name, '[ ,]', '_', 'g') = %(repid)s @@ -279,20 +279,20 @@ metrics: 90400: > SELECT NULL as active_pid, xmin, - pg_xlog_location_diff(pg_current_xlog_location(), restart_lsn)::bigint AS restart_bytes, + pg_xlog_location_diff(pg_current_xlog_location(), restart_lsn) AS restart_bytes, NULL AS confirmed_flush_bytes FROM pg_replication_slots WHERE slot_name = %(slot)s 90600: > SELECT active_pid, xmin, - pg_xlog_location_diff(pg_current_xlog_location(), restart_lsn)::bigint AS restart_bytes, - pg_xlog_location_diff(pg_current_xlog_location(), confirmed_flush_lsn)::bigint AS confirmed_flush_bytes + pg_xlog_location_diff(pg_current_xlog_location(), restart_lsn) AS restart_bytes, + pg_xlog_location_diff(pg_current_xlog_location(), confirmed_flush_lsn) AS confirmed_flush_bytes FROM pg_replication_slots WHERE slot_name = %(slot)s 100000: > SELECT active_pid, xmin, - pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn)::bigint AS restart_bytes, - pg_wal_lsn_diff(pg_current_wal_lsn(), confirmed_flush_lsn)::bigint AS confirmed_flush_bytes + pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn) AS restart_bytes, + pg_wal_lsn_diff(pg_current_wal_lsn(), confirmed_flush_lsn) AS confirmed_flush_bytes FROM pg_replication_slots WHERE slot_name = %(slot)s test_args: slot: test_slot From 98b74d9aed5bdfc1341fb7f32989808e0f7262e6 Mon Sep 17 00:00:00 2001 From: James Campbell Date: Fri, 4 Jul 2025 02:45:43 -0400 Subject: [PATCH 6/8] Add sequence metrics to Zabbix template --- sample-config/pgmon-metrics.yml | 2 +- zabbix_templates/pgmon_templates.yaml | 81 +++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 1 deletion(-) diff --git a/sample-config/pgmon-metrics.yml b/sample-config/pgmon-metrics.yml index 67251db..cb537d2 100644 --- a/sample-config/pgmon-metrics.yml +++ b/sample-config/pgmon-metrics.yml @@ -226,7 +226,7 @@ metrics: type: row query: 100000: > - SELECT COUNT(*) FILTER (WHERE has_sequence_privilege(c.oid, 'SELECT,USAGE')) AS readable_sequences, + SELECT COUNT(*) FILTER (WHERE has_sequence_privilege(c.oid, 'SELECT,USAGE')) AS visible_sequences, COUNT(*) AS total_sequences FROM pg_class AS c WHERE relkind = 'S'; diff --git a/zabbix_templates/pgmon_templates.yaml b/zabbix_templates/pgmon_templates.yaml index 997e1fa..f982e33 100644 --- a/zabbix_templates/pgmon_templates.yaml +++ b/zabbix_templates/pgmon_templates.yaml @@ -785,6 +785,76 @@ zabbix_export: value: PostgreSQL - tag: Database value: '{#DBNAME}' + - uuid: 5960120dd01c4926b0fc1fbe9c011507 + name: 'Database max sequence usage in {#DBNAME}' + type: HTTP_AGENT + key: 'pgmon_db_max_sequence[{#DBNAME}]' + delay: 5m + value_type: FLOAT + units: '%' + description: 'The percent of the currently configured value range for the most utilized sequence.' + url: 'http://localhost:{$AGENT_PORT}/sequence_usage' + query_fields: + - name: dbname + value: '{#DBNAME}' + tags: + - tag: Application + value: PostgreSQL + - tag: Database + value: '{#DBNAME}' + - uuid: 48b9cc80ac4d4aee9e9f3a5d6f7d4a95 + name: 'Total number of sequences on {#DBNAME}' + type: DEPENDENT + key: 'pgmon_db_sequences[total,{#DBNAME}]' + delay: '0' + description: 'Total number of sequences in the database.' + preprocessing: + - type: JSONPATH + parameters: + - $.total_sequences + master_item: + key: 'pgmon_db_sequence_visibility[{#DBNAME}]' + tags: + - tag: Application + value: PostgreSQL + - tag: Database + value: '{#DBNAME}' + - uuid: 6521a9bab2ac47bf85429832d289bbac + name: 'Visible sequences on {#DBNAME}' + type: DEPENDENT + key: 'pgmon_db_sequences[visible,{#DBNAME}]' + delay: '0' + description: 'Number of sequences in the database for which Zabbix can see stats.' + preprocessing: + - type: JSONPATH + parameters: + - $.visible_sequences + master_item: + key: 'pgmon_db_sequence_visibility[{#DBNAME}]' + tags: + - tag: Application + value: PostgreSQL + - tag: Database + value: '{#DBNAME}' + - uuid: 00f2da3eb99940839410a6ecd5df153f + name: 'Database sequence visibility in {#DBNAME}' + type: HTTP_AGENT + key: 'pgmon_db_sequence_visibility[{#DBNAME}]' + delay: 30m + value_type: TEXT + trends: '0' + description: 'Statistics about the number of sequences that exist and the number Zabbix can actually see stats for.' + url: 'http://localhost:{$AGENT_PORT}/sequence_visibility' + query_fields: + - name: dbname + value: '{#DBNAME}' + tags: + - tag: Application + value: PostgreSQL + - tag: Database + value: '{#DBNAME}' + - tag: Type + value: Raw - uuid: 492b3cac15f348c2b85f97b69c114d1b name: 'Database Stats for {#DBNAME}' type: HTTP_AGENT @@ -803,6 +873,17 @@ zabbix_export: value: '{#DBNAME}' - tag: Type value: Raw + trigger_prototypes: + - uuid: d29d0fd9d9d34b5ebd649592b0829ce5 + expression: 'last(/PostgreSQL by pgmon/pgmon_db_sequences[total,{#DBNAME}]) <> last(/PostgreSQL by pgmon/pgmon_db_sequences[visible,{#DBNAME}])' + name: 'Sequences not visible to Zabbix on {#DBNAME}' + priority: WARNING + description: 'There are sequences for which Zabbix cannot see usage statistics' + tags: + - tag: Application + value: PostgreSQL + - tag: Component + value: Sequence graph_prototypes: - uuid: 1f7de43b77714f819e61c31273712b70 name: 'DML Totals for {#DBNAME}' From 83fa12ec54496cde28497cff988de6f762fd528e Mon Sep 17 00:00:00 2001 From: James Campbell Date: Fri, 4 Jul 2025 02:46:25 -0400 Subject: [PATCH 7/8] Correct types for slot LSN lag metrics --- zabbix_templates/pgmon_templates.yaml | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/zabbix_templates/pgmon_templates.yaml b/zabbix_templates/pgmon_templates.yaml index f982e33..b63d1c0 100644 --- a/zabbix_templates/pgmon_templates.yaml +++ b/zabbix_templates/pgmon_templates.yaml @@ -1581,6 +1581,7 @@ zabbix_export: delay: '0' history: 90d value_type: FLOAT + units: s description: 'Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written and flushed it (but not yet applied it). This can be used to gauge the delay that synchronous_commit level on incurred while committing if this server was configured as a synchronous standby.' preprocessing: - type: JSONPATH @@ -1596,13 +1597,12 @@ zabbix_export: - tag: Database value: '{#DBNAME}' - uuid: 624f8f085a3642c9a10a03361c17763d - name: 'Last flush LSN for {#REPID}' + name: 'Last flush LSN lag for {#REPID}' type: DEPENDENT key: 'pgmon_rep[flush_lsn,repid={#REPID}]' delay: '0' history: 90d - value_type: TEXT - trends: '0' + units: B description: 'Last write-ahead log location flushed to disk by this standby server' preprocessing: - type: JSONPATH @@ -1624,6 +1624,7 @@ zabbix_export: delay: '0' history: 90d value_type: FLOAT + units: s description: 'Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written, flushed and applied it. This can be used to gauge the delay that synchronous_commit level remote_apply incurred while committing if this server was configured as a synchronous standby.' preprocessing: - type: JSONPATH @@ -1639,13 +1640,12 @@ zabbix_export: - tag: Database value: '{#DBNAME}' - uuid: fe1bed51845d4694bae8f53deed4846d - name: 'Last replay LSN for {#REPID}' + name: 'Last replay LSN lag for {#REPID}' type: DEPENDENT key: 'pgmon_rep[replay_lsn,repid={#REPID}]' delay: '0' history: 90d - value_type: TEXT - trends: '0' + units: B description: 'Last write-ahead log location replayed into the database on this standby server' preprocessing: - type: JSONPATH @@ -1661,13 +1661,12 @@ zabbix_export: - tag: Database value: '{#DBNAME}' - uuid: 68c179d0e33f45f9bf82d2d4125763f0 - name: 'Last sent LSN for {#REPID}' + name: 'Last sent LSN lag for {#REPID}' type: DEPENDENT key: 'pgmon_rep[sent_lsn,repid={#REPID}]' delay: '0' history: 90d - value_type: TEXT - trends: '0' + units: B description: 'Last write-ahead log location sent on this connection' preprocessing: - type: JSONPATH @@ -1717,6 +1716,7 @@ zabbix_export: delay: '0' history: 90d value_type: FLOAT + units: s description: 'Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written it (but not yet flushed it or applied it). This can be used to gauge the delay that synchronous_commit level remote_write incurred while committing if this server was configured as a synchronous standby.' preprocessing: - type: JSONPATH @@ -1732,13 +1732,12 @@ zabbix_export: - tag: Database value: '{#DBNAME}' - uuid: 57fb03cf63af4b0a91d8e36d6ff64d30 - name: 'Last write LSN for {#REPID}' + name: 'Last write LSN lag for {#REPID}' type: DEPENDENT key: 'pgmon_rep[write_lsn,repid={#REPID}]' delay: '0' history: 90d - value_type: TEXT - trends: '0' + units: B description: 'Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written it (but not yet flushed it or applied it). This can be used to gauge the delay that synchronous_commit level remote_write incurred while committing if this server was configured as a synchronous standby.' preprocessing: - type: JSONPATH From 7cb0f7ad409bc8f5881695d366e18c7b5779068f Mon Sep 17 00:00:00 2001 From: James Campbell Date: Sat, 5 Jul 2025 01:17:35 -0400 Subject: [PATCH 8/8] Fix typo in activity query --- sample-config/pgmon-metrics.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sample-config/pgmon-metrics.yml b/sample-config/pgmon-metrics.yml index cb537d2..0b07909 100644 --- a/sample-config/pgmon-metrics.yml +++ b/sample-config/pgmon-metrics.yml @@ -197,7 +197,7 @@ metrics: 0: > SELECT state, count(*) AS backend_count, - COALESCE(EXTRACT(EPOCH FROM max(now() - state_change)) 0) AS max_state_time + COALESCE(EXTRACT(EPOCH FROM max(now() - state_change)), 0) AS max_state_time FROM pg_stat_activity WHERE datname = %(dbname)s GROUP BY state