From c872fc6b90a5435cc935395e672edcdee095a209 Mon Sep 17 00:00:00 2001 From: James Campbell Date: Thu, 15 May 2025 02:01:20 -0400 Subject: [PATCH 1/6] Start implementing metric tests --- requirements-dev.yml | 4 ++ tests/sql-tests.py | 90 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+) create mode 100644 requirements-dev.yml create mode 100644 tests/sql-tests.py diff --git a/requirements-dev.yml b/requirements-dev.yml new file mode 100644 index 0000000..9680b1e --- /dev/null +++ b/requirements-dev.yml @@ -0,0 +1,4 @@ +-r requirements.txt +testcontainers[postgresql] +pytest +black diff --git a/tests/sql-tests.py b/tests/sql-tests.py new file mode 100644 index 0000000..497f835 --- /dev/null +++ b/tests/sql-tests.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 + +from testcontainers import PostgresContainer +import requests +import yaml +import sys + +pg_versions=[9.2, 9.6, 10, 11, 12, 13, 14, 15, 16 17] + +pgmon_port=93849 + +tests = {} + +container = None + +def std_version(version): + if version[0] == '9': + return int(f"{version[0]}0{version[1]}00") + else + return int(f"{version}0000") + +def run_test(metric, params, status, check): + """ + Validate the return code and restults of a query + + params: + metric: The name of the metric to test + params: A dictionary of query parameters to use when testing + status: The expected status code + check: A regular expression to validate the results (or None) + """ + result = requests.get(f"http://localhost:{pgmon_port}/{metric}", params=params) + + if result.status_code != status: + print(f"FAIL: {metric}[{params}] returned wrong status code: {result.status_code}") + return False + + if re.match(check, result.text): + print(f"SUCCESS: {metric}[{params}]") + return True + else + print(f"FAIL: {metric}[{params}] result is invalid, got:\n {result.text}") + return False + +def run_all_tests(version): + """ + Run all defined tests against the current running instance + + params: + version: The PostgreSQL version currently being tested (server_version_num format) + """ + errors = 0 + + # Convert versions like 12 to 120000 + version_num = std_version(version) + + # Loop through all of the metrics to test. + for metric in tests.keys(): + params = metric.get('params', {}) + status = 200 + check = '' + + # Find the correct version of the status and check parameters (assuming there are any). + # If there are any check conditions, find the highest version that does not exceed the version we're currently testing against. + # To do this, we order the keys (versions) in reverse, so we start with the highest. + for v in reversed(sorted(metric.get('expect', {}).keys())): + # If we've reached a version <= the one we're testing use it. + if int(v) <= version_num: + status = metric['expect'][v]['status'] + check = metric['expect'][v]['check'] + break + + if not run_test(metric, metrics[metric].get(params, {}), status, check): + errors += 1 + + return errors + +def start_test_db(version): + #container = PostgresContainer() + pass + +# Read the test script +try: + with open("metric_tests.yml", 'r') as f: + tests = yaml.safe_load(f) +except yaml.parser.ParserError as e: + sys.exit("Failed to parse metrics_test.yml: {e}") + + +# Run From 8928bba337143911139d963cbca2b2aff3da50ce Mon Sep 17 00:00:00 2001 From: James Campbell Date: Thu, 15 May 2025 02:04:50 -0400 Subject: [PATCH 2/6] Format python using black --- tests/sql-tests.py | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/tests/sql-tests.py b/tests/sql-tests.py index 497f835..f339123 100644 --- a/tests/sql-tests.py +++ b/tests/sql-tests.py @@ -5,20 +5,22 @@ import requests import yaml import sys -pg_versions=[9.2, 9.6, 10, 11, 12, 13, 14, 15, 16 17] +pg_versions = [9.2, 9.6, 10, 11, 12, 13, 14, 15, 16, 17] -pgmon_port=93849 +pgmon_port = 93849 tests = {} container = None + def std_version(version): - if version[0] == '9': + if version[0] == "9": return int(f"{version[0]}0{version[1]}00") - else + else: return int(f"{version}0000") + def run_test(metric, params, status, check): """ Validate the return code and restults of a query @@ -32,16 +34,19 @@ def run_test(metric, params, status, check): result = requests.get(f"http://localhost:{pgmon_port}/{metric}", params=params) if result.status_code != status: - print(f"FAIL: {metric}[{params}] returned wrong status code: {result.status_code}") + print( + f"FAIL: {metric}[{params}] returned wrong status code: {result.status_code}" + ) return False if re.match(check, result.text): print(f"SUCCESS: {metric}[{params}]") return True - else + else: print(f"FAIL: {metric}[{params}] result is invalid, got:\n {result.text}") return False + def run_all_tests(version): """ Run all defined tests against the current running instance @@ -52,39 +57,38 @@ def run_all_tests(version): errors = 0 # Convert versions like 12 to 120000 - version_num = std_version(version) + version_num = std_version(version) # Loop through all of the metrics to test. for metric in tests.keys(): - params = metric.get('params', {}) + params = metric.get("params", {}) status = 200 - check = '' + check = "" # Find the correct version of the status and check parameters (assuming there are any). # If there are any check conditions, find the highest version that does not exceed the version we're currently testing against. # To do this, we order the keys (versions) in reverse, so we start with the highest. - for v in reversed(sorted(metric.get('expect', {}).keys())): + for v in reversed(sorted(metric.get("expect", {}).keys())): # If we've reached a version <= the one we're testing use it. if int(v) <= version_num: - status = metric['expect'][v]['status'] - check = metric['expect'][v]['check'] + status = metric["expect"][v]["status"] + check = metric["expect"][v]["check"] break - + if not run_test(metric, metrics[metric].get(params, {}), status, check): errors += 1 return errors + def start_test_db(version): - #container = PostgresContainer() + # container = PostgresContainer() pass + # Read the test script try: - with open("metric_tests.yml", 'r') as f: + with open("metric_tests.yml", "r") as f: tests = yaml.safe_load(f) except yaml.parser.ParserError as e: sys.exit("Failed to parse metrics_test.yml: {e}") - - -# Run From 529bef96794f293e5d00b8700d6c878c73dc87b1 Mon Sep 17 00:00:00 2001 From: James Campbell Date: Sun, 18 May 2025 12:52:32 -0400 Subject: [PATCH 3/6] Add ability to run query tests --- src/pgmon.py | 128 ++++++++++++++++++++++++++++++++------------------- 1 file changed, 81 insertions(+), 47 deletions(-) diff --git a/src/pgmon.py b/src/pgmon.py index 38c1f8b..24e0c3a 100755 --- a/src/pgmon.py +++ b/src/pgmon.py @@ -4,6 +4,7 @@ import yaml import json import time import os +import sys import argparse import logging @@ -74,6 +75,10 @@ class UnhappyDBError(Exception): pass +class UnknownMetricError(Exception): + pass + + class MetricVersionError(Exception): pass @@ -466,6 +471,54 @@ def get_cluster_version(): return cluster_version +def sample_metric(dbname, metric_name, args, retry=True): + """ + Run the appropriate query for the named metric against the specified database + """ + # Get the metric definition + try: + metric = config["metrics"][metric_name] + except KeyError: + raise UnknownMetricError("Unknown metric: {}".format(metric_name)) + + # Get the connection pool for the database, or create one if it doesn't + # already exist. + pool = get_pool(dbname) + + # Identify the PostgreSQL version + version = get_cluster_version() + + # Get the query version + query = get_query(metric, version) + + # Execute the quert + if retry: + return run_query(pool, metric["type"], query, args) + else: + return run_query_no_retry(pool, metric["type"], query, args) + + +def test_queries(): + """ + Run all of the metric queries against a database and check the results + """ + # We just use the default db for tests + dbname = config["dbname"] + # Loop through all defined metrics. + for metric_name in config["metrics"].keys(): + # Get the actual metric definition + metric = metrics[metric_name] + # If the metric has arguments to use while testing, grab those + args = metric.get("test_args", {}) + # Run the query without the ability to retry. + res = sample_metric(dbname, metric_name, args, retry=False) + # Compare the result to the provided sample results + # TODO + # Return the number of errors + # TODO + return 0 + + class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): """ This is our request handling server. It is responsible for listening for @@ -494,10 +547,10 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): """ # Parse the URL parsed_path = urlparse(self.path) - name = parsed_path.path.strip("/") + metric_name = parsed_path.path.strip("/") parsed_query = parse_qs(parsed_path.query) - if name == "agent_version": + if metric_name == "agent_version": self._reply(200, VERSION) return @@ -505,60 +558,31 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler): # single values, just grab the first from each. args = {key: values[0] for key, values in parsed_query.items()} - # Get the metric definition - try: - metric = config["metrics"][name] - except KeyError: - log.error("Unknown metric: {}".format(name)) - self._reply(404, "Unknown metric") - return - # Get the dbname. If none was provided, use the default from the # config. dbname = args.get("dbname", config["dbname"]) - # Get the connection pool for the database, or create one if it doesn't - # already exist. + # Sample the metric try: - pool = get_pool(dbname) - except UnhappyDBError: + self._reply(200, sample_metric(dbname, metric_name, args)) + return + except UnknownMetricError as e: + log.error("Unknown metric: {}".format(metric_name)) + self._reply(404, "Unknown metric") + return + except MetricVersionError as e: + log.error( + "Failed to find a version of {} for {}".format(metric_name, version) + ) + self._reply(404, "Unsupported version") + return + except UnhappyDBError as e: log.info("Database {} is unhappy, please be patient".format(dbname)) self._reply(503, "Database unavailable") return - - # Identify the PostgreSQL version - try: - version = get_cluster_version() - except UnhappyDBError: - return except Exception as e: - if dbname in unhappy_cooldown: - log.info("Database {} is unhappy, please be patient".format(dbname)) - self._reply(503, "Database unavailable") - else: - log.error("Failed to get PostgreSQL version: {}".format(e)) - self._reply(500, "Error getting DB version") - return - - # Get the query version - try: - query = get_query(metric, version) - except KeyError: - log.error("Failed to find a version of {} for {}".format(name, version)) - self._reply(404, "Unsupported version") - return - - # Execute the quert - try: - self._reply(200, run_query(pool, metric["type"], query, args)) - return - except Exception as e: - if dbname in unhappy_cooldown: - log.info("Database {} is unhappy, please be patient".format(dbname)) - self._reply(503, "Database unavailable") - else: - log.error("Error running query: {}".format(e)) - self._reply(500, "Error running query") + log.error("Error running query: {}".format(e)) + self._reply(500, "Unexpected error: {}".format(e)) return def _reply(self, code, content): @@ -585,6 +609,8 @@ if __name__ == "__main__": help="The config file to read (default: %(default)s)", ) + parser.add_argument("test", action="store_true", help="Run query tests and exit") + args = parser.parse_args() # Set the config file path @@ -593,6 +619,14 @@ if __name__ == "__main__": # Read the config file read_config(config_file) + # Run query tests and exit if test mode is enabled + if args.test: + errors = test_queries() + if errors > 0: + sys.exit(1) + else: + sys.exit(0) + # Set up the http server to receive requests server_address = ("127.0.0.1", config["port"]) httpd = ThreadingHTTPServer(server_address, SimpleHTTPRequestHandler) From c0e153108310070dce4127a2887e88295f1c5d6d Mon Sep 17 00:00:00 2001 From: James Campbell Date: Thu, 22 May 2025 14:53:25 -0400 Subject: [PATCH 4/6] Add query test script and test mode * Add a mode to test all metric queries * Add a script to run query tests against different versions of PostgeSQL * Add Docker elements for query testing * Switch to using a --config flag when specifying the config file * Fix some metric queries * Allow the agent address to be configured * Allow the sslmode connection parameter to be configured --- openrc/pgmon.initd | 2 +- sample-config/pgmon-metrics.yml | 14 +++-- sample-config/pgmon.yml | 6 +++ src/pgmon.py | 31 +++++++---- systemd/pgmon@.service | 2 +- tests/Dockerfile | 22 ++++++++ tests/docker-compose.yml | 32 +++++++++++ tests/pgpass | 1 + tests/run-tests.sh | 62 ++++++++++++++++++++++ tests/sql-tests.py | 94 --------------------------------- tests/test-config.yml | 16 ++++++ 11 files changed, 174 insertions(+), 108 deletions(-) create mode 100644 tests/Dockerfile create mode 100644 tests/docker-compose.yml create mode 100644 tests/pgpass create mode 100755 tests/run-tests.sh delete mode 100644 tests/sql-tests.py create mode 100644 tests/test-config.yml diff --git a/openrc/pgmon.initd b/openrc/pgmon.initd index 8d8c96d..e6e3c81 100644 --- a/openrc/pgmon.initd +++ b/openrc/pgmon.initd @@ -19,6 +19,6 @@ start_pre() { } command="/usr/bin/pgmon" -command_args="'$CONFIG_FILE'" +command_args="-c '$CONFIG_FILE'" command_background="true" command_user="${PGMON_USER}:${PGMON_GROUP}" diff --git a/sample-config/pgmon-metrics.yml b/sample-config/pgmon-metrics.yml index c72c786..6bb7753 100644 --- a/sample-config/pgmon-metrics.yml +++ b/sample-config/pgmon-metrics.yml @@ -11,7 +11,8 @@ metrics: discover_slots: type: set query: - 0: SELECT slot_name, plugin, slot_type, database, temporary, active FROM pg_replication_slots + 0: SELECT slot_name, plugin, slot_type, database, false as temporary, active FROM pg_replication_slots + 100000: SELECT slot_name, plugin, slot_type, database, temporary, active FROM pg_replication_slots # cluster-wide metrics version: @@ -29,6 +30,8 @@ metrics: query: 0: SELECT numbackends, xact_commit, xact_rollback, blks_read, blks_hit, tup_returned, tup_fetched, tup_inserted, tup_updated, tup_deleted, conflicts, temp_files, temp_bytes, deadlocks, blk_read_time, blk_write_time, extract('epoch' from stats_reset)::float FROM pg_stat_database WHERE datname = %(dbname)s 140000: SELECT numbackends, xact_commit, xact_rollback, blks_read, blks_hit, tup_returned, tup_fetched, tup_inserted, tup_updated, tup_deleted, conflicts, temp_files, temp_bytes, deadlocks, COALESCE(checksum_failures, 0) AS checksum_failures, blk_read_time, blk_write_time, session_time, active_time, idle_in_transaction_time, sessions, sessions_abandoned, sessions_fatal, sessions_killed, extract('epoch' from stats_reset)::float FROM pg_stat_database WHERE datname = %(dbname)s + test_args: + dbname: postgres # Debugging ntables: @@ -40,7 +43,9 @@ metrics: rep_stats: type: row query: - 0: SELECT * FROM pg_stat_database WHERE client_addr || '_' || regexp_replace(application_name, '[ ,]', '_', 'g') = '{repid}' + 0: SELECT * FROM pg_stat_replication WHERE client_addr || '_' || regexp_replace(application_name, '[ ,]', '_', 'g') = '{repid}' + test_args: + repid: 127.0.0.1_test_rep # Debugging sleep: @@ -52,4 +57,7 @@ metrics: slot_stats: type: row query: - 0: SELECT active_pid, xmin, pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn) AS restart_bytes, pg_wal_lsn_diff(pg_current_wal_lsn(), confirmed_flush_lsn) AS confirmed_flush_bytes FROM pg_replication_slots WHERE slot_name = '{slot}' + 0: SELECT active_pid, xmin, pg_xlog_location_diff(pg_current_xlog_location(), restart_lsn) AS restart_bytes, pg_xlog_location_diff(pg_current_xlog_location(), confirmed_flush_lsn) AS confirmed_flush_bytes FROM pg_replication_slots WHERE slot_name = '{slot}' + 100000: SELECT active_pid, xmin, pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn) AS restart_bytes, pg_wal_lsn_diff(pg_current_wal_lsn(), confirmed_flush_lsn) AS confirmed_flush_bytes FROM pg_replication_slots WHERE slot_name = '{slot}' + test_args: + slot: test_slot diff --git a/sample-config/pgmon.yml b/sample-config/pgmon.yml index 879005d..bbe49dc 100644 --- a/sample-config/pgmon.yml +++ b/sample-config/pgmon.yml @@ -1,3 +1,6 @@ +# The address the agent binds to +#address: 127.0.0.1 + # The port the agent listens on for requests #port: 5400 @@ -26,6 +29,9 @@ # Default database to connect to when none is specified for a metric #dbname: 'postgres' +# SSL connection mode +#ssl_mode: require + # Timeout for getting a connection slot from a pool #pool_slot_timeout: 5 diff --git a/src/pgmon.py b/src/pgmon.py index 24e0c3a..f9c906c 100755 --- a/src/pgmon.py +++ b/src/pgmon.py @@ -85,6 +85,8 @@ class MetricVersionError(Exception): # Default config settings default_config = { + # The address the agent binds to + "address": "127.0.0.1", # The port the agent listens on for requests "port": 5400, # Min PostgreSQL connection pool size (per database) @@ -103,6 +105,8 @@ default_config = { "dbport": 5432, # Default database to connect to when none is specified for a metric "dbname": "postgres", + # SSL connection mode + "ssl_mode": "require", # Timeout for getting a connection slot from a pool "pool_slot_timeout": 5, # PostgreSQL connection timeout (seconds) @@ -325,6 +329,7 @@ def get_pool(dbname): # lock if dbname not in connections: log.info("Creating connection pool for: {}".format(dbname)) + # Actually create the connection pool connections[dbname] = ConnectionPool( dbname, int(config["min_pool_size"]), @@ -334,7 +339,7 @@ def get_pool(dbname): port=config["dbport"], user=config["dbuser"], connect_timeout=int(config["connect_timeout"]), - sslmode="require", + sslmode=config["ssl_mode"], ) # Clear the unhappy indicator if present unhappy_cooldown.pop(dbname, None) @@ -382,10 +387,16 @@ def run_query_no_retry(pool, return_type, query, args): res = curs.fetchall() if return_type == "value": + if len(res) == 0: + return "" return str(list(res[0].values())[0]) elif return_type == "row": + if len(res) == 0: + return "[]" return json.dumps(res[0]) elif return_type == "column": + if len(res) == 0: + return "[]" return json.dumps([list(r.values())[0] for r in res]) elif return_type == "set": return json.dumps(res) @@ -393,7 +404,7 @@ def run_query_no_retry(pool, return_type, query, args): dbname = pool.name if dbname in unhappy_cooldown: raise UnhappyDBError() - elif conn.broken: + elif conn.closed != 0: raise DisconnectedError() else: raise @@ -505,15 +516,14 @@ def test_queries(): # We just use the default db for tests dbname = config["dbname"] # Loop through all defined metrics. - for metric_name in config["metrics"].keys(): - # Get the actual metric definition - metric = metrics[metric_name] + for name, metric in config["metrics"].items(): # If the metric has arguments to use while testing, grab those args = metric.get("test_args", {}) # Run the query without the ability to retry. - res = sample_metric(dbname, metric_name, args, retry=False) + res = sample_metric(dbname, name, args, retry=False) # Compare the result to the provided sample results # TODO + print("{} -> {}".format(name, res)) # Return the number of errors # TODO return 0 @@ -603,13 +613,16 @@ if __name__ == "__main__": ) parser.add_argument( - "config_file", + "-c", + "--config_file", default="pgmon.yml", nargs="?", help="The config file to read (default: %(default)s)", ) - parser.add_argument("test", action="store_true", help="Run query tests and exit") + parser.add_argument( + "-t", "--test", action="store_true", help="Run query tests and exit" + ) args = parser.parse_args() @@ -628,7 +641,7 @@ if __name__ == "__main__": sys.exit(0) # Set up the http server to receive requests - server_address = ("127.0.0.1", config["port"]) + server_address = (config["address"], config["port"]) httpd = ThreadingHTTPServer(server_address, SimpleHTTPRequestHandler) # Set up the signal handler diff --git a/systemd/pgmon@.service b/systemd/pgmon@.service index 0dec16b..a17860a 100644 --- a/systemd/pgmon@.service +++ b/systemd/pgmon@.service @@ -7,7 +7,7 @@ After=network.target [Service] EnvironmentFile=/etc/pgmon/%i-service.conf User=${SERVICE_USER:-postgres} -ExecStart=/usr/local/bin/pgmon /etc/pgmon/%i.yml +ExecStart=/usr/local/bin/pgmon -c /etc/pgmon/%i.yml ExecReload=kill -HUP $MAINPID Restart=on-failure Type=exec diff --git a/tests/Dockerfile b/tests/Dockerfile new file mode 100644 index 0000000..1913313 --- /dev/null +++ b/tests/Dockerfile @@ -0,0 +1,22 @@ +FROM alpine:3.21 + +RUN apk update && \ + apk add py3-psycopg2 \ + py3-yaml \ + tini + +WORKDIR /app + +COPY src/pgmon.py /app/ + +COPY sample-config/pgmon-metrics.yml /app/ + +COPY tests/test-config.yml /app/ + +COPY --chmod=0600 --chown=postgres:postgres tests/pgpass /root/.pgpass + +ENTRYPOINT ["tini", "--"] + +EXPOSE 5400 + +CMD ["/app/pgmon.py", "-c", "/app/test-config.yml", "--test"] diff --git a/tests/docker-compose.yml b/tests/docker-compose.yml new file mode 100644 index 0000000..deeecd2 --- /dev/null +++ b/tests/docker-compose.yml @@ -0,0 +1,32 @@ +--- + +services: + agent: + image: pgmon + build: + context: .. + dockerfile: tests/Dockerfile + ports: + - :5400 + depends_on: + db: + condition: service_healthy + + db: + image: "postgres:${PGTAG:-17-bookworm}" + ports: + - :5432 + environment: + POSTGRES_PASSWORD: secret + healthcheck: + #test: [ "CMD", "pg_isready", "-U", "postgres" ] + test: [ "CMD-SHELL", "pg_controldata /var/lib/postgresql/data/ | grep -q 'in production'" ] + interval: 5s + timeout: 2s + retries: 10 + command: > + postgres -c ssl=on + -c ssl_cert_file='/etc/ssl/certs/ssl-cert-snakeoil.pem' + -c ssl_key_file='/etc/ssl/private/ssl-cert-snakeoil.key' + -c listen_addresses='*' + diff --git a/tests/pgpass b/tests/pgpass new file mode 100644 index 0000000..1066019 --- /dev/null +++ b/tests/pgpass @@ -0,0 +1 @@ +db:5432:*:postgres:secret diff --git a/tests/run-tests.sh b/tests/run-tests.sh new file mode 100755 index 0000000..9bf472a --- /dev/null +++ b/tests/run-tests.sh @@ -0,0 +1,62 @@ +#!/bin/bash + +# Versions to test +versions=( $@ ) + +# If we weren't given any versions, test them all +if [ ${#versions[@]} -eq 0 ] +then + versions=( 9.2 9.6 10 11 12 13 14 15 16 17 ) +fi + +# Image tags to use +declare -A images=() +images["9.2"]='9.2' +images["9.6"]='9.6-bullseye' +images["10"]='10-bullseye' +images["11"]='11-bookworm' +images["12"]='12-bookworm' +images["13"]='13-bookworm' +images["14"]='14-bookworm' +images["15"]='15-bookworm' +images["16"]='16-bookworm' +images["17"]='17-bookworm' + +declare -A results=() + +# Make sure everything's down to start with +docker compose down + +# Make sure our agent container is up to date +docker compose build agent + +for version in "${versions[@]}" +do + echo + echo "Testing: PostgreSQL ${version}" + + # Specify the version we're testing against + export PGTAG="${images["$version"]}" + + # Start the containers + docker compose up --exit-code-from=agent agent + rc=$? + + results["$version"]=$rc + + # Destroy the containers + docker compose down +done + +echo +echo +for v in "${versions[@]}" +do + case "${results["$v"]}" in + 0) msg="OK" ;; + 1) msg="Query failure detected" ;; + 18) msg="Docker image error: 18" ;; + *) msg="Unexpected error: ${results["$v"]}" ;; + esac + echo "$v -> $msg" +done diff --git a/tests/sql-tests.py b/tests/sql-tests.py deleted file mode 100644 index f339123..0000000 --- a/tests/sql-tests.py +++ /dev/null @@ -1,94 +0,0 @@ -#!/usr/bin/env python3 - -from testcontainers import PostgresContainer -import requests -import yaml -import sys - -pg_versions = [9.2, 9.6, 10, 11, 12, 13, 14, 15, 16, 17] - -pgmon_port = 93849 - -tests = {} - -container = None - - -def std_version(version): - if version[0] == "9": - return int(f"{version[0]}0{version[1]}00") - else: - return int(f"{version}0000") - - -def run_test(metric, params, status, check): - """ - Validate the return code and restults of a query - - params: - metric: The name of the metric to test - params: A dictionary of query parameters to use when testing - status: The expected status code - check: A regular expression to validate the results (or None) - """ - result = requests.get(f"http://localhost:{pgmon_port}/{metric}", params=params) - - if result.status_code != status: - print( - f"FAIL: {metric}[{params}] returned wrong status code: {result.status_code}" - ) - return False - - if re.match(check, result.text): - print(f"SUCCESS: {metric}[{params}]") - return True - else: - print(f"FAIL: {metric}[{params}] result is invalid, got:\n {result.text}") - return False - - -def run_all_tests(version): - """ - Run all defined tests against the current running instance - - params: - version: The PostgreSQL version currently being tested (server_version_num format) - """ - errors = 0 - - # Convert versions like 12 to 120000 - version_num = std_version(version) - - # Loop through all of the metrics to test. - for metric in tests.keys(): - params = metric.get("params", {}) - status = 200 - check = "" - - # Find the correct version of the status and check parameters (assuming there are any). - # If there are any check conditions, find the highest version that does not exceed the version we're currently testing against. - # To do this, we order the keys (versions) in reverse, so we start with the highest. - for v in reversed(sorted(metric.get("expect", {}).keys())): - # If we've reached a version <= the one we're testing use it. - if int(v) <= version_num: - status = metric["expect"][v]["status"] - check = metric["expect"][v]["check"] - break - - if not run_test(metric, metrics[metric].get(params, {}), status, check): - errors += 1 - - return errors - - -def start_test_db(version): - # container = PostgresContainer() - pass - - -# Read the test script -try: - with open("metric_tests.yml", "r") as f: - tests = yaml.safe_load(f) -except yaml.parser.ParserError as e: - sys.exit("Failed to parse metrics_test.yml: {e}") diff --git a/tests/test-config.yml b/tests/test-config.yml new file mode 100644 index 0000000..8bf863b --- /dev/null +++ b/tests/test-config.yml @@ -0,0 +1,16 @@ +--- + +# Bind to all interfaces so we can submit requests from outside the test container +address: 0.0.0.0 + +# We always just connect to the db container +dbhost: db +dbport: 5432 +dbuser: postgres + +# Allow some insecure SSL parameters for the 9.2 test +ssl_ciphers: DEFAULT@SECLEVEL=1 + +# Pull in the standard metrics +include: + - pgmon-metrics.yml From 80304f40d1ee135d454dbc222d4098e09fe0a20f Mon Sep 17 00:00:00 2001 From: James Campbell Date: Sun, 1 Jun 2025 00:11:44 -0400 Subject: [PATCH 5/6] Revise the versions on a few queries, improve query tests * Add ability to specify the sslmode parameter when connecting to PostgreSQL * Fix min versions for replication queries * Add query-tests target to main Makefile --- Makefile | 13 +++++++++++-- sample-config/pgmon-metrics.yml | 10 +++++----- tests/docker-compose.yml | 2 +- tests/test-config.yml | 5 +++-- 4 files changed, 20 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index 8f98594..7606715 100644 --- a/Makefile +++ b/Makefile @@ -22,7 +22,7 @@ SUPPORTED := ubuntu-20.04 \ # These targets are the main ones to use for most things. ## -.PHONY: all clean tgz test install +.PHONY: all clean tgz test query-tests install # Build all packages @@ -64,6 +64,10 @@ clean: test: cd src ; python3 -m unittest +# Run query tests +query-tests: + cd tests ; ./run-tests.sh + # Install the script at the specified base directory install: # Set up directories @@ -92,7 +96,7 @@ install: # Run all of the install tests -.PHONY: install-tests debian-%-install-test rockylinux-%-install-test ubuntu-%-install-test +.PHONY: install-tests debian-%-install-test rockylinux-%-install-test ubuntu-%-install-test gentoo-install-test install-tests: $(foreach distro_release, $(SUPPORTED), $(distro_release)-install-test) @@ -124,6 +128,11 @@ oraclelinux-%-install-test: oraclelinux:7 \ bash -c 'yum makecache && yum install -y /output/$(PACKAGE_NAME)-$(VERSION)-1.el7.noarch.rpm' +# Run a Gentoo install test +gentoo-install-test: + # May impliment this in the future, but would require additional headaches to set up a repo + true + ## # Container targets # diff --git a/sample-config/pgmon-metrics.yml b/sample-config/pgmon-metrics.yml index 6bb7753..60e428e 100644 --- a/sample-config/pgmon-metrics.yml +++ b/sample-config/pgmon-metrics.yml @@ -11,7 +11,7 @@ metrics: discover_slots: type: set query: - 0: SELECT slot_name, plugin, slot_type, database, false as temporary, active FROM pg_replication_slots + 90400: SELECT slot_name, plugin, slot_type, database, false as temporary, active FROM pg_replication_slots 100000: SELECT slot_name, plugin, slot_type, database, temporary, active FROM pg_replication_slots # cluster-wide metrics @@ -20,9 +20,9 @@ metrics: query: 0: SHOW server_version_num max_frozen_age: - type: value + type: row query: - 0: SELECT max(age(datfrozenxid)) FROM pg_database + 0: SELECT max(age(datfrozenxid)), max(mxid_age(datminmxid)) FROM pg_database # Per-database metrics db_stats: @@ -43,7 +43,7 @@ metrics: rep_stats: type: row query: - 0: SELECT * FROM pg_stat_replication WHERE client_addr || '_' || regexp_replace(application_name, '[ ,]', '_', 'g') = '{repid}' + 90400: SELECT * FROM pg_stat_replication WHERE client_addr || '_' || regexp_replace(application_name, '[ ,]', '_', 'g') = '{repid}' test_args: repid: 127.0.0.1_test_rep @@ -57,7 +57,7 @@ metrics: slot_stats: type: row query: - 0: SELECT active_pid, xmin, pg_xlog_location_diff(pg_current_xlog_location(), restart_lsn) AS restart_bytes, pg_xlog_location_diff(pg_current_xlog_location(), confirmed_flush_lsn) AS confirmed_flush_bytes FROM pg_replication_slots WHERE slot_name = '{slot}' + 90400: SELECT active_pid, xmin, pg_xlog_location_diff(pg_current_xlog_location(), restart_lsn) AS restart_bytes, pg_xlog_location_diff(pg_current_xlog_location(), confirmed_flush_lsn) AS confirmed_flush_bytes FROM pg_replication_slots WHERE slot_name = '{slot}' 100000: SELECT active_pid, xmin, pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn) AS restart_bytes, pg_wal_lsn_diff(pg_current_wal_lsn(), confirmed_flush_lsn) AS confirmed_flush_bytes FROM pg_replication_slots WHERE slot_name = '{slot}' test_args: slot: test_slot diff --git a/tests/docker-compose.yml b/tests/docker-compose.yml index deeecd2..ae9b5f6 100644 --- a/tests/docker-compose.yml +++ b/tests/docker-compose.yml @@ -23,7 +23,7 @@ services: test: [ "CMD-SHELL", "pg_controldata /var/lib/postgresql/data/ | grep -q 'in production'" ] interval: 5s timeout: 2s - retries: 10 + retries: 20 command: > postgres -c ssl=on -c ssl_cert_file='/etc/ssl/certs/ssl-cert-snakeoil.pem' diff --git a/tests/test-config.yml b/tests/test-config.yml index 8bf863b..7a17cdb 100644 --- a/tests/test-config.yml +++ b/tests/test-config.yml @@ -8,8 +8,9 @@ dbhost: db dbport: 5432 dbuser: postgres -# Allow some insecure SSL parameters for the 9.2 test -ssl_ciphers: DEFAULT@SECLEVEL=1 +# The SSL cipher parameters are too old in the 9.2 container, so we allow the tests +# to be run without encryption +ssl_mode: prefer # Pull in the standard metrics include: From 1d642d41b2aebd1ce99386bde2f62e6725112b64 Mon Sep 17 00:00:00 2001 From: James Campbell Date: Sun, 1 Jun 2025 00:23:26 -0400 Subject: [PATCH 6/6] Bump version to 1.0.2 * Modify Makefile to extract the version from the mainscript * Bump version to 1.0.2 --- GENTOO/pgmon-1.0.2.ebuild | 73 +++++++++++++++++++++++++++++++++++++++ Makefile | 3 +- src/pgmon.py | 2 +- 3 files changed, 76 insertions(+), 2 deletions(-) create mode 100644 GENTOO/pgmon-1.0.2.ebuild diff --git a/GENTOO/pgmon-1.0.2.ebuild b/GENTOO/pgmon-1.0.2.ebuild new file mode 100644 index 0000000..9c52b5a --- /dev/null +++ b/GENTOO/pgmon-1.0.2.ebuild @@ -0,0 +1,73 @@ +# Copyright 2024 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +EAPI=8 + +PYTHON_COMPAT=( python3_{6..13} ) + +inherit python-r1 + +DESCRIPTION="PostgreSQL monitoring bridge" +HOMEPAGE="None" + +LICENSE="BSD" +SLOT="0" +KEYWORDS="amd64" + +SRC_URI="https://code2.shh-dot-com.org/james/${PN}/archive/v${PV}.tar.bz2 -> ${P}.tar.bz2" + +IUSE="-systemd" + +DEPEND=" + ${PYTHON_DEPS} + dev-python/psycopg:2 + dev-python/pyyaml + app-admin/logrotate + " +RDEPEND="${DEPEND}" +BDEPEND="" + +RESTRICT="fetch" + +#S="${WORKDIR}/${PN}" + +pkg_nofetch() { + einfo "Please download" + einfo " - ${P}.tar.bz2" + einfo "from ${HOMEPAGE} and place it in your DISTDIR directory." + einfo "The file should be owned by portage:portage." +} + +src_compile() { + true +} + +src_install() { + # Install init script + if ! use systemd ; then + newinitd "openrc/pgmon.initd" pgmon + newconfd "openrc/pgmon.confd" pgmon + fi + + # Install systemd unit + if use systemd ; then + systemd_dounit "systemd/pgmon.service" + fi + + # Install script + exeinto /usr/bin + newexe "src/pgmon.py" pgmon + + # Install default config + diropts -o root -g root -m 0755 + insinto /etc/pgmon + doins "sample-config/pgmon.yml" + doins "sample-config/pgmon-metrics.yml" + + # Install logrotate config + insinto /etc/logrotate.d + newins "logrotate/pgmon.logrotate" pgmon + + # Install man page + doman manpages/pgmon.1 +} diff --git a/Makefile b/Makefile index 7606715..1c3de00 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,10 @@ # Package details PACKAGE_NAME := pgmon -VERSION := 1.0.1 SCRIPT := src/$(PACKAGE_NAME).py +VERSION := $(shell grep -m 1 '^VERSION = ' "$(SCRIPT)" | sed -ne 's/.*"\(.*\)".*/\1/p') + # Where packages are built BUILD_DIR := build diff --git a/src/pgmon.py b/src/pgmon.py index f9c906c..6178827 100755 --- a/src/pgmon.py +++ b/src/pgmon.py @@ -24,7 +24,7 @@ from http.server import BaseHTTPRequestHandler, HTTPServer from http.server import ThreadingHTTPServer from urllib.parse import urlparse, parse_qs -VERSION = "1.0.1" +VERSION = "1.0.2" # Configuration config = {}