From c872fc6b90a5435cc935395e672edcdee095a209 Mon Sep 17 00:00:00 2001
From: James Campbell <james@shh-dot-com.org>
Date: Thu, 15 May 2025 02:01:20 -0400
Subject: [PATCH 1/6] Start implementing metric tests

---
 requirements-dev.yml |  4 ++
 tests/sql-tests.py   | 90 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 94 insertions(+)
 create mode 100644 requirements-dev.yml
 create mode 100644 tests/sql-tests.py

diff --git a/requirements-dev.yml b/requirements-dev.yml
new file mode 100644
index 0000000..9680b1e
--- /dev/null
+++ b/requirements-dev.yml
@@ -0,0 +1,4 @@
+-r requirements.txt
+testcontainers[postgresql]
+pytest
+black
diff --git a/tests/sql-tests.py b/tests/sql-tests.py
new file mode 100644
index 0000000..497f835
--- /dev/null
+++ b/tests/sql-tests.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python3
+
+from testcontainers import PostgresContainer
+import requests
+import yaml
+import sys
+
+pg_versions=[9.2, 9.6, 10, 11, 12, 13, 14, 15, 16 17]
+
+pgmon_port=93849
+
+tests = {}
+
+container = None
+
+def std_version(version):
+    if version[0] == '9':
+        return int(f"{version[0]}0{version[1]}00")
+    else
+        return int(f"{version}0000")
+
+def run_test(metric, params, status, check):
+    """
+    Validate the return code and restults of a query
+
+    params:
+      metric: The name of the metric to test
+      params: A dictionary of query parameters to use when testing
+      status: The expected status code
+      check: A regular expression to validate the results (or None)
+    """
+    result = requests.get(f"http://localhost:{pgmon_port}/{metric}", params=params)
+
+    if result.status_code != status:
+        print(f"FAIL: {metric}[{params}] returned wrong status code: {result.status_code}")
+        return False
+
+    if re.match(check, result.text):
+        print(f"SUCCESS: {metric}[{params}]")
+        return True
+    else
+        print(f"FAIL: {metric}[{params}] result is invalid, got:\n {result.text}")
+        return False
+
+def run_all_tests(version):
+    """
+    Run all defined tests against the current running instance
+
+    params:
+      version: The PostgreSQL version currently being tested (server_version_num format)
+    """
+    errors = 0
+
+    # Convert versions like 12 to 120000
+    version_num  = std_version(version)
+
+    # Loop through all of the metrics to test.
+    for metric in tests.keys():
+        params = metric.get('params', {})
+        status = 200
+        check = ''
+
+        # Find the correct version of the status and check parameters (assuming there are any).
+        # If there are any check conditions, find the highest version that does not exceed the version we're currently testing against.
+        # To do this, we order the keys (versions) in reverse, so we start with the highest.
+        for v in reversed(sorted(metric.get('expect', {}).keys())):
+            # If we've reached a version <= the one we're testing use it.
+            if int(v) <= version_num:
+                status = metric['expect'][v]['status']
+                check = metric['expect'][v]['check']
+                break
+        
+        if not run_test(metric, metrics[metric].get(params, {}), status, check):
+            errors += 1
+
+    return errors
+
+def start_test_db(version):
+    #container = PostgresContainer()
+    pass
+
+# Read the test script
+try:
+    with open("metric_tests.yml", 'r') as f:
+        tests = yaml.safe_load(f)
+except yaml.parser.ParserError as e:
+    sys.exit("Failed to parse metrics_test.yml: {e}")
+
+
+# Run 

From 8928bba337143911139d963cbca2b2aff3da50ce Mon Sep 17 00:00:00 2001
From: James Campbell <james@shh-dot-com.org>
Date: Thu, 15 May 2025 02:04:50 -0400
Subject: [PATCH 2/6] Format python using black

---
 tests/sql-tests.py | 40 ++++++++++++++++++++++------------------
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/tests/sql-tests.py b/tests/sql-tests.py
index 497f835..f339123 100644
--- a/tests/sql-tests.py
+++ b/tests/sql-tests.py
@@ -5,20 +5,22 @@ import requests
 import yaml
 import sys
 
-pg_versions=[9.2, 9.6, 10, 11, 12, 13, 14, 15, 16 17]
+pg_versions = [9.2, 9.6, 10, 11, 12, 13, 14, 15, 16, 17]
 
-pgmon_port=93849
+pgmon_port = 93849
 
 tests = {}
 
 container = None
 
+
 def std_version(version):
-    if version[0] == '9':
+    if version[0] == "9":
         return int(f"{version[0]}0{version[1]}00")
-    else
+    else:
         return int(f"{version}0000")
 
+
 def run_test(metric, params, status, check):
     """
     Validate the return code and restults of a query
@@ -32,16 +34,19 @@ def run_test(metric, params, status, check):
     result = requests.get(f"http://localhost:{pgmon_port}/{metric}", params=params)
 
     if result.status_code != status:
-        print(f"FAIL: {metric}[{params}] returned wrong status code: {result.status_code}")
+        print(
+            f"FAIL: {metric}[{params}] returned wrong status code: {result.status_code}"
+        )
         return False
 
     if re.match(check, result.text):
         print(f"SUCCESS: {metric}[{params}]")
         return True
-    else
+    else:
         print(f"FAIL: {metric}[{params}] result is invalid, got:\n {result.text}")
         return False
 
+
 def run_all_tests(version):
     """
     Run all defined tests against the current running instance
@@ -52,39 +57,38 @@ def run_all_tests(version):
     errors = 0
 
     # Convert versions like 12 to 120000
-    version_num  = std_version(version)
+    version_num = std_version(version)
 
     # Loop through all of the metrics to test.
     for metric in tests.keys():
-        params = metric.get('params', {})
+        params = metric.get("params", {})
         status = 200
-        check = ''
+        check = ""
 
         # Find the correct version of the status and check parameters (assuming there are any).
         # If there are any check conditions, find the highest version that does not exceed the version we're currently testing against.
         # To do this, we order the keys (versions) in reverse, so we start with the highest.
-        for v in reversed(sorted(metric.get('expect', {}).keys())):
+        for v in reversed(sorted(metric.get("expect", {}).keys())):
             # If we've reached a version <= the one we're testing use it.
             if int(v) <= version_num:
-                status = metric['expect'][v]['status']
-                check = metric['expect'][v]['check']
+                status = metric["expect"][v]["status"]
+                check = metric["expect"][v]["check"]
                 break
-        
+
         if not run_test(metric, metrics[metric].get(params, {}), status, check):
             errors += 1
 
     return errors
 
+
 def start_test_db(version):
-    #container = PostgresContainer()
+    # container = PostgresContainer()
     pass
 
+
 # Read the test script
 try:
-    with open("metric_tests.yml", 'r') as f:
+    with open("metric_tests.yml", "r") as f:
         tests = yaml.safe_load(f)
 except yaml.parser.ParserError as e:
     sys.exit("Failed to parse metrics_test.yml: {e}")
-
-
-# Run 

From 529bef96794f293e5d00b8700d6c878c73dc87b1 Mon Sep 17 00:00:00 2001
From: James Campbell <james@shh-dot-com.org>
Date: Sun, 18 May 2025 12:52:32 -0400
Subject: [PATCH 3/6] Add ability to run query tests

---
 src/pgmon.py | 128 ++++++++++++++++++++++++++++++++-------------------
 1 file changed, 81 insertions(+), 47 deletions(-)

diff --git a/src/pgmon.py b/src/pgmon.py
index 38c1f8b..24e0c3a 100755
--- a/src/pgmon.py
+++ b/src/pgmon.py
@@ -4,6 +4,7 @@ import yaml
 import json
 import time
 import os
+import sys
 
 import argparse
 import logging
@@ -74,6 +75,10 @@ class UnhappyDBError(Exception):
     pass
 
 
+class UnknownMetricError(Exception):
+    pass
+
+
 class MetricVersionError(Exception):
     pass
 
@@ -466,6 +471,54 @@ def get_cluster_version():
     return cluster_version
 
 
+def sample_metric(dbname, metric_name, args, retry=True):
+    """
+    Run the appropriate query for the named metric against the specified database
+    """
+    # Get the metric definition
+    try:
+        metric = config["metrics"][metric_name]
+    except KeyError:
+        raise UnknownMetricError("Unknown metric: {}".format(metric_name))
+
+    # Get the connection pool for the database, or create one if it doesn't
+    # already exist.
+    pool = get_pool(dbname)
+
+    # Identify the PostgreSQL version
+    version = get_cluster_version()
+
+    # Get the query version
+    query = get_query(metric, version)
+
+    # Execute the quert
+    if retry:
+        return run_query(pool, metric["type"], query, args)
+    else:
+        return run_query_no_retry(pool, metric["type"], query, args)
+
+
+def test_queries():
+    """
+    Run all of the metric queries against a database and check the results
+    """
+    # We just use the default db for tests
+    dbname = config["dbname"]
+    # Loop through all defined metrics.
+    for metric_name in config["metrics"].keys():
+        # Get the actual metric definition
+        metric = metrics[metric_name]
+        # If the metric has arguments to use while testing, grab those
+        args = metric.get("test_args", {})
+        # Run the query without the ability to retry.
+        res = sample_metric(dbname, metric_name, args, retry=False)
+        # Compare the result to the provided sample results
+        # TODO
+    # Return the number of errors
+    # TODO
+    return 0
+
+
 class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
     """
     This is our request handling server.  It is responsible for listening for
@@ -494,10 +547,10 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
         """
         # Parse the URL
         parsed_path = urlparse(self.path)
-        name = parsed_path.path.strip("/")
+        metric_name = parsed_path.path.strip("/")
         parsed_query = parse_qs(parsed_path.query)
 
-        if name == "agent_version":
+        if metric_name == "agent_version":
             self._reply(200, VERSION)
             return
 
@@ -505,60 +558,31 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
         #       single values, just grab the first from each.
         args = {key: values[0] for key, values in parsed_query.items()}
 
-        # Get the metric definition
-        try:
-            metric = config["metrics"][name]
-        except KeyError:
-            log.error("Unknown metric: {}".format(name))
-            self._reply(404, "Unknown metric")
-            return
-
         # Get the dbname.  If none was provided, use the default from the
         # config.
         dbname = args.get("dbname", config["dbname"])
 
-        # Get the connection pool for the database, or create one if it doesn't
-        # already exist.
+        # Sample the metric
         try:
-            pool = get_pool(dbname)
-        except UnhappyDBError:
+            self._reply(200, sample_metric(dbname, metric_name, args))
+            return
+        except UnknownMetricError as e:
+            log.error("Unknown metric: {}".format(metric_name))
+            self._reply(404, "Unknown metric")
+            return
+        except MetricVersionError as e:
+            log.error(
+                "Failed to find a version of {} for {}".format(metric_name, version)
+            )
+            self._reply(404, "Unsupported version")
+            return
+        except UnhappyDBError as e:
             log.info("Database {} is unhappy, please be patient".format(dbname))
             self._reply(503, "Database unavailable")
             return
-
-        # Identify the PostgreSQL version
-        try:
-            version = get_cluster_version()
-        except UnhappyDBError:
-            return
         except Exception as e:
-            if dbname in unhappy_cooldown:
-                log.info("Database {} is unhappy, please be patient".format(dbname))
-                self._reply(503, "Database unavailable")
-            else:
-                log.error("Failed to get PostgreSQL version: {}".format(e))
-                self._reply(500, "Error getting DB version")
-            return
-
-        # Get the query version
-        try:
-            query = get_query(metric, version)
-        except KeyError:
-            log.error("Failed to find a version of {} for {}".format(name, version))
-            self._reply(404, "Unsupported version")
-            return
-
-        # Execute the quert
-        try:
-            self._reply(200, run_query(pool, metric["type"], query, args))
-            return
-        except Exception as e:
-            if dbname in unhappy_cooldown:
-                log.info("Database {} is unhappy, please be patient".format(dbname))
-                self._reply(503, "Database unavailable")
-            else:
-                log.error("Error running query: {}".format(e))
-                self._reply(500, "Error running query")
+            log.error("Error running query: {}".format(e))
+            self._reply(500, "Unexpected error: {}".format(e))
             return
 
     def _reply(self, code, content):
@@ -585,6 +609,8 @@ if __name__ == "__main__":
         help="The config file to read (default: %(default)s)",
     )
 
+    parser.add_argument("test", action="store_true", help="Run query tests and exit")
+
     args = parser.parse_args()
 
     # Set the config file path
@@ -593,6 +619,14 @@ if __name__ == "__main__":
     # Read the config file
     read_config(config_file)
 
+    # Run query tests and exit if test mode is enabled
+    if args.test:
+        errors = test_queries()
+        if errors > 0:
+            sys.exit(1)
+        else:
+            sys.exit(0)
+
     # Set up the http server to receive requests
     server_address = ("127.0.0.1", config["port"])
     httpd = ThreadingHTTPServer(server_address, SimpleHTTPRequestHandler)

From c0e153108310070dce4127a2887e88295f1c5d6d Mon Sep 17 00:00:00 2001
From: James Campbell <james@shh-dot-com.org>
Date: Thu, 22 May 2025 14:53:25 -0400
Subject: [PATCH 4/6] Add query test script and test mode

* Add a mode to test all metric queries

* Add a script to run query tests against different versions of
  PostgeSQL

* Add Docker elements for query testing

* Switch to using a --config flag when specifying the config file

* Fix some metric queries

* Allow the agent address to be configured

* Allow the sslmode connection parameter to be configured
---
 openrc/pgmon.initd              |  2 +-
 sample-config/pgmon-metrics.yml | 14 +++--
 sample-config/pgmon.yml         |  6 +++
 src/pgmon.py                    | 31 +++++++----
 systemd/pgmon@.service          |  2 +-
 tests/Dockerfile                | 22 ++++++++
 tests/docker-compose.yml        | 32 +++++++++++
 tests/pgpass                    |  1 +
 tests/run-tests.sh              | 62 ++++++++++++++++++++++
 tests/sql-tests.py              | 94 ---------------------------------
 tests/test-config.yml           | 16 ++++++
 11 files changed, 174 insertions(+), 108 deletions(-)
 create mode 100644 tests/Dockerfile
 create mode 100644 tests/docker-compose.yml
 create mode 100644 tests/pgpass
 create mode 100755 tests/run-tests.sh
 delete mode 100644 tests/sql-tests.py
 create mode 100644 tests/test-config.yml

diff --git a/openrc/pgmon.initd b/openrc/pgmon.initd
index 8d8c96d..e6e3c81 100644
--- a/openrc/pgmon.initd
+++ b/openrc/pgmon.initd
@@ -19,6 +19,6 @@ start_pre() {
 }
 
 command="/usr/bin/pgmon"
-command_args="'$CONFIG_FILE'"
+command_args="-c '$CONFIG_FILE'"
 command_background="true"
 command_user="${PGMON_USER}:${PGMON_GROUP}"
diff --git a/sample-config/pgmon-metrics.yml b/sample-config/pgmon-metrics.yml
index c72c786..6bb7753 100644
--- a/sample-config/pgmon-metrics.yml
+++ b/sample-config/pgmon-metrics.yml
@@ -11,7 +11,8 @@ metrics:
   discover_slots:
     type: set
     query:
-      0: SELECT slot_name, plugin, slot_type, database, temporary, active FROM pg_replication_slots
+      0: SELECT slot_name, plugin, slot_type, database, false as temporary, active FROM pg_replication_slots
+      100000: SELECT slot_name, plugin, slot_type, database, temporary, active FROM pg_replication_slots
 
   # cluster-wide metrics
   version:
@@ -29,6 +30,8 @@ metrics:
     query:
       0: SELECT numbackends, xact_commit, xact_rollback, blks_read, blks_hit, tup_returned, tup_fetched, tup_inserted, tup_updated, tup_deleted, conflicts, temp_files, temp_bytes, deadlocks, blk_read_time, blk_write_time, extract('epoch' from stats_reset)::float FROM pg_stat_database WHERE datname = %(dbname)s
       140000: SELECT numbackends, xact_commit, xact_rollback, blks_read, blks_hit, tup_returned, tup_fetched, tup_inserted, tup_updated, tup_deleted, conflicts, temp_files, temp_bytes, deadlocks, COALESCE(checksum_failures, 0) AS checksum_failures, blk_read_time, blk_write_time, session_time, active_time, idle_in_transaction_time, sessions, sessions_abandoned, sessions_fatal, sessions_killed, extract('epoch' from stats_reset)::float FROM pg_stat_database WHERE datname = %(dbname)s
+    test_args:
+      dbname: postgres
 
   # Debugging
   ntables:
@@ -40,7 +43,9 @@ metrics:
   rep_stats:
     type: row
     query:
-      0: SELECT * FROM pg_stat_database WHERE client_addr || '_' || regexp_replace(application_name, '[ ,]', '_', 'g') = '{repid}'
+      0: SELECT * FROM pg_stat_replication WHERE client_addr || '_' || regexp_replace(application_name, '[ ,]', '_', 'g') = '{repid}'
+    test_args:
+      repid: 127.0.0.1_test_rep
 
   # Debugging
   sleep:
@@ -52,4 +57,7 @@ metrics:
   slot_stats:
     type: row
     query:
-      0: SELECT active_pid, xmin, pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn) AS restart_bytes, pg_wal_lsn_diff(pg_current_wal_lsn(), confirmed_flush_lsn) AS confirmed_flush_bytes FROM pg_replication_slots WHERE slot_name = '{slot}'
+      0: SELECT active_pid, xmin, pg_xlog_location_diff(pg_current_xlog_location(), restart_lsn) AS restart_bytes, pg_xlog_location_diff(pg_current_xlog_location(), confirmed_flush_lsn) AS confirmed_flush_bytes FROM pg_replication_slots WHERE slot_name = '{slot}'
+      100000: SELECT active_pid, xmin, pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn) AS restart_bytes, pg_wal_lsn_diff(pg_current_wal_lsn(), confirmed_flush_lsn) AS confirmed_flush_bytes FROM pg_replication_slots WHERE slot_name = '{slot}'
+    test_args:
+      slot: test_slot
diff --git a/sample-config/pgmon.yml b/sample-config/pgmon.yml
index 879005d..bbe49dc 100644
--- a/sample-config/pgmon.yml
+++ b/sample-config/pgmon.yml
@@ -1,3 +1,6 @@
+# The address the agent binds to
+#address: 127.0.0.1
+
 # The port the agent listens on for requests
 #port: 5400
 
@@ -26,6 +29,9 @@
 # Default database to connect to when none is specified for a metric
 #dbname: 'postgres'
 
+# SSL connection mode
+#ssl_mode: require
+
 # Timeout for getting a connection slot from a pool
 #pool_slot_timeout: 5
 
diff --git a/src/pgmon.py b/src/pgmon.py
index 24e0c3a..f9c906c 100755
--- a/src/pgmon.py
+++ b/src/pgmon.py
@@ -85,6 +85,8 @@ class MetricVersionError(Exception):
 
 # Default config settings
 default_config = {
+    # The address the agent binds to
+    "address": "127.0.0.1",
     # The port the agent listens on for requests
     "port": 5400,
     # Min PostgreSQL connection pool size (per database)
@@ -103,6 +105,8 @@ default_config = {
     "dbport": 5432,
     # Default database to connect to when none is specified for a metric
     "dbname": "postgres",
+    # SSL connection mode
+    "ssl_mode": "require",
     # Timeout for getting a connection slot from a pool
     "pool_slot_timeout": 5,
     # PostgreSQL connection timeout (seconds)
@@ -325,6 +329,7 @@ def get_pool(dbname):
             # lock
             if dbname not in connections:
                 log.info("Creating connection pool for: {}".format(dbname))
+                # Actually create the connection pool
                 connections[dbname] = ConnectionPool(
                     dbname,
                     int(config["min_pool_size"]),
@@ -334,7 +339,7 @@ def get_pool(dbname):
                     port=config["dbport"],
                     user=config["dbuser"],
                     connect_timeout=int(config["connect_timeout"]),
-                    sslmode="require",
+                    sslmode=config["ssl_mode"],
                 )
             # Clear the unhappy indicator if present
             unhappy_cooldown.pop(dbname, None)
@@ -382,10 +387,16 @@ def run_query_no_retry(pool, return_type, query, args):
                 res = curs.fetchall()
 
                 if return_type == "value":
+                    if len(res) == 0:
+                        return ""
                     return str(list(res[0].values())[0])
                 elif return_type == "row":
+                    if len(res) == 0:
+                        return "[]"
                     return json.dumps(res[0])
                 elif return_type == "column":
+                    if len(res) == 0:
+                        return "[]"
                     return json.dumps([list(r.values())[0] for r in res])
                 elif return_type == "set":
                     return json.dumps(res)
@@ -393,7 +404,7 @@ def run_query_no_retry(pool, return_type, query, args):
             dbname = pool.name
             if dbname in unhappy_cooldown:
                 raise UnhappyDBError()
-            elif conn.broken:
+            elif conn.closed != 0:
                 raise DisconnectedError()
             else:
                 raise
@@ -505,15 +516,14 @@ def test_queries():
     # We just use the default db for tests
     dbname = config["dbname"]
     # Loop through all defined metrics.
-    for metric_name in config["metrics"].keys():
-        # Get the actual metric definition
-        metric = metrics[metric_name]
+    for name, metric in config["metrics"].items():
         # If the metric has arguments to use while testing, grab those
         args = metric.get("test_args", {})
         # Run the query without the ability to retry.
-        res = sample_metric(dbname, metric_name, args, retry=False)
+        res = sample_metric(dbname, name, args, retry=False)
         # Compare the result to the provided sample results
         # TODO
+        print("{} -> {}".format(name, res))
     # Return the number of errors
     # TODO
     return 0
@@ -603,13 +613,16 @@ if __name__ == "__main__":
     )
 
     parser.add_argument(
-        "config_file",
+        "-c",
+        "--config_file",
         default="pgmon.yml",
         nargs="?",
         help="The config file to read (default: %(default)s)",
     )
 
-    parser.add_argument("test", action="store_true", help="Run query tests and exit")
+    parser.add_argument(
+        "-t", "--test", action="store_true", help="Run query tests and exit"
+    )
 
     args = parser.parse_args()
 
@@ -628,7 +641,7 @@ if __name__ == "__main__":
             sys.exit(0)
 
     # Set up the http server to receive requests
-    server_address = ("127.0.0.1", config["port"])
+    server_address = (config["address"], config["port"])
     httpd = ThreadingHTTPServer(server_address, SimpleHTTPRequestHandler)
 
     # Set up the signal handler
diff --git a/systemd/pgmon@.service b/systemd/pgmon@.service
index 0dec16b..a17860a 100644
--- a/systemd/pgmon@.service
+++ b/systemd/pgmon@.service
@@ -7,7 +7,7 @@ After=network.target
 [Service]
 EnvironmentFile=/etc/pgmon/%i-service.conf
 User=${SERVICE_USER:-postgres}
-ExecStart=/usr/local/bin/pgmon /etc/pgmon/%i.yml
+ExecStart=/usr/local/bin/pgmon -c /etc/pgmon/%i.yml
 ExecReload=kill -HUP $MAINPID
 Restart=on-failure
 Type=exec
diff --git a/tests/Dockerfile b/tests/Dockerfile
new file mode 100644
index 0000000..1913313
--- /dev/null
+++ b/tests/Dockerfile
@@ -0,0 +1,22 @@
+FROM alpine:3.21
+
+RUN apk update && \
+    apk add py3-psycopg2 \
+            py3-yaml \
+            tini
+
+WORKDIR /app
+
+COPY src/pgmon.py /app/
+
+COPY sample-config/pgmon-metrics.yml /app/
+
+COPY tests/test-config.yml /app/
+
+COPY --chmod=0600 --chown=postgres:postgres tests/pgpass /root/.pgpass
+
+ENTRYPOINT ["tini", "--"]
+
+EXPOSE 5400
+
+CMD ["/app/pgmon.py", "-c", "/app/test-config.yml", "--test"]
diff --git a/tests/docker-compose.yml b/tests/docker-compose.yml
new file mode 100644
index 0000000..deeecd2
--- /dev/null
+++ b/tests/docker-compose.yml
@@ -0,0 +1,32 @@
+---
+
+services:
+  agent:
+    image: pgmon
+    build:
+      context: ..
+      dockerfile: tests/Dockerfile
+    ports:
+      - :5400
+    depends_on:
+      db:
+        condition: service_healthy
+
+  db:
+    image: "postgres:${PGTAG:-17-bookworm}"
+    ports:
+      - :5432
+    environment:
+      POSTGRES_PASSWORD: secret
+    healthcheck:
+      #test: [ "CMD", "pg_isready", "-U", "postgres" ]
+      test: [ "CMD-SHELL", "pg_controldata /var/lib/postgresql/data/ | grep -q 'in production'" ]
+      interval: 5s
+      timeout: 2s
+      retries: 10
+    command: >
+      postgres -c ssl=on
+               -c ssl_cert_file='/etc/ssl/certs/ssl-cert-snakeoil.pem'
+               -c ssl_key_file='/etc/ssl/private/ssl-cert-snakeoil.key'
+               -c listen_addresses='*'
+
diff --git a/tests/pgpass b/tests/pgpass
new file mode 100644
index 0000000..1066019
--- /dev/null
+++ b/tests/pgpass
@@ -0,0 +1 @@
+db:5432:*:postgres:secret
diff --git a/tests/run-tests.sh b/tests/run-tests.sh
new file mode 100755
index 0000000..9bf472a
--- /dev/null
+++ b/tests/run-tests.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+# Versions to test
+versions=( $@ )
+
+# If we weren't given any versions, test them all
+if [ ${#versions[@]} -eq 0 ]
+then
+    versions=( 9.2 9.6 10 11 12 13 14 15 16 17 )
+fi
+
+# Image tags to use
+declare -A images=()
+images["9.2"]='9.2'
+images["9.6"]='9.6-bullseye'
+images["10"]='10-bullseye'
+images["11"]='11-bookworm'
+images["12"]='12-bookworm'
+images["13"]='13-bookworm'
+images["14"]='14-bookworm'
+images["15"]='15-bookworm'
+images["16"]='16-bookworm'
+images["17"]='17-bookworm'
+
+declare -A results=()
+
+# Make sure everything's down to start with
+docker compose down
+
+# Make sure our agent container is up to date
+docker compose build agent
+
+for version in "${versions[@]}"
+do
+    echo
+    echo "Testing: PostgreSQL ${version}"
+
+    # Specify the version we're testing against
+    export PGTAG="${images["$version"]}"
+
+    # Start the containers
+    docker compose up --exit-code-from=agent agent
+    rc=$?
+
+    results["$version"]=$rc
+
+    # Destroy the containers
+    docker compose down
+done
+
+echo
+echo
+for v in "${versions[@]}"
+do
+    case "${results["$v"]}" in
+        0) msg="OK" ;;
+        1) msg="Query failure detected" ;;
+        18) msg="Docker image error: 18" ;;
+        *) msg="Unexpected error: ${results["$v"]}" ;;
+    esac
+    echo "$v -> $msg"
+done
diff --git a/tests/sql-tests.py b/tests/sql-tests.py
deleted file mode 100644
index f339123..0000000
--- a/tests/sql-tests.py
+++ /dev/null
@@ -1,94 +0,0 @@
-#!/usr/bin/env python3
-
-from testcontainers import PostgresContainer
-import requests
-import yaml
-import sys
-
-pg_versions = [9.2, 9.6, 10, 11, 12, 13, 14, 15, 16, 17]
-
-pgmon_port = 93849
-
-tests = {}
-
-container = None
-
-
-def std_version(version):
-    if version[0] == "9":
-        return int(f"{version[0]}0{version[1]}00")
-    else:
-        return int(f"{version}0000")
-
-
-def run_test(metric, params, status, check):
-    """
-    Validate the return code and restults of a query
-
-    params:
-      metric: The name of the metric to test
-      params: A dictionary of query parameters to use when testing
-      status: The expected status code
-      check: A regular expression to validate the results (or None)
-    """
-    result = requests.get(f"http://localhost:{pgmon_port}/{metric}", params=params)
-
-    if result.status_code != status:
-        print(
-            f"FAIL: {metric}[{params}] returned wrong status code: {result.status_code}"
-        )
-        return False
-
-    if re.match(check, result.text):
-        print(f"SUCCESS: {metric}[{params}]")
-        return True
-    else:
-        print(f"FAIL: {metric}[{params}] result is invalid, got:\n {result.text}")
-        return False
-
-
-def run_all_tests(version):
-    """
-    Run all defined tests against the current running instance
-
-    params:
-      version: The PostgreSQL version currently being tested (server_version_num format)
-    """
-    errors = 0
-
-    # Convert versions like 12 to 120000
-    version_num = std_version(version)
-
-    # Loop through all of the metrics to test.
-    for metric in tests.keys():
-        params = metric.get("params", {})
-        status = 200
-        check = ""
-
-        # Find the correct version of the status and check parameters (assuming there are any).
-        # If there are any check conditions, find the highest version that does not exceed the version we're currently testing against.
-        # To do this, we order the keys (versions) in reverse, so we start with the highest.
-        for v in reversed(sorted(metric.get("expect", {}).keys())):
-            # If we've reached a version <= the one we're testing use it.
-            if int(v) <= version_num:
-                status = metric["expect"][v]["status"]
-                check = metric["expect"][v]["check"]
-                break
-
-        if not run_test(metric, metrics[metric].get(params, {}), status, check):
-            errors += 1
-
-    return errors
-
-
-def start_test_db(version):
-    # container = PostgresContainer()
-    pass
-
-
-# Read the test script
-try:
-    with open("metric_tests.yml", "r") as f:
-        tests = yaml.safe_load(f)
-except yaml.parser.ParserError as e:
-    sys.exit("Failed to parse metrics_test.yml: {e}")
diff --git a/tests/test-config.yml b/tests/test-config.yml
new file mode 100644
index 0000000..8bf863b
--- /dev/null
+++ b/tests/test-config.yml
@@ -0,0 +1,16 @@
+---
+
+# Bind to all interfaces so we can submit requests from outside the test container
+address: 0.0.0.0
+
+# We always just connect to the db container
+dbhost: db
+dbport: 5432
+dbuser: postgres
+
+# Allow some insecure SSL parameters for the 9.2 test
+ssl_ciphers: DEFAULT@SECLEVEL=1
+
+# Pull in the standard metrics
+include:
+  - pgmon-metrics.yml

From 80304f40d1ee135d454dbc222d4098e09fe0a20f Mon Sep 17 00:00:00 2001
From: James Campbell <james@shh-dot-com.org>
Date: Sun, 1 Jun 2025 00:11:44 -0400
Subject: [PATCH 5/6] Revise the versions on a few queries, improve query tests

* Add ability to specify the sslmode parameter when connecting to
  PostgreSQL

* Fix min versions for replication queries

* Add query-tests target to main Makefile
---
 Makefile                        | 13 +++++++++++--
 sample-config/pgmon-metrics.yml | 10 +++++-----
 tests/docker-compose.yml        |  2 +-
 tests/test-config.yml           |  5 +++--
 4 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/Makefile b/Makefile
index 8f98594..7606715 100644
--- a/Makefile
+++ b/Makefile
@@ -22,7 +22,7 @@ SUPPORTED := ubuntu-20.04 \
 # These targets are the main ones to use for most things.
 ##
 
-.PHONY: all clean tgz test install
+.PHONY: all clean tgz test query-tests install
 
 
 # Build all packages
@@ -64,6 +64,10 @@ clean:
 test:
 	cd src ; python3 -m unittest
 
+# Run query tests
+query-tests:
+	cd tests ; ./run-tests.sh
+
 # Install the script at the specified base directory
 install:
 	# Set up directories
@@ -92,7 +96,7 @@ install:
 
 
 # Run all of the install tests
-.PHONY: install-tests debian-%-install-test rockylinux-%-install-test ubuntu-%-install-test
+.PHONY: install-tests debian-%-install-test rockylinux-%-install-test ubuntu-%-install-test gentoo-install-test
 install-tests: $(foreach distro_release, $(SUPPORTED), $(distro_release)-install-test)
 
 
@@ -124,6 +128,11 @@ oraclelinux-%-install-test:
 	     oraclelinux:7 \
 	     bash -c 'yum makecache && yum install -y /output/$(PACKAGE_NAME)-$(VERSION)-1.el7.noarch.rpm'
 
+# Run a Gentoo install test
+gentoo-install-test:
+	# May impliment this in the future, but would require additional headaches to set up a repo
+	true
+
 ##
 # Container targets
 #
diff --git a/sample-config/pgmon-metrics.yml b/sample-config/pgmon-metrics.yml
index 6bb7753..60e428e 100644
--- a/sample-config/pgmon-metrics.yml
+++ b/sample-config/pgmon-metrics.yml
@@ -11,7 +11,7 @@ metrics:
   discover_slots:
     type: set
     query:
-      0: SELECT slot_name, plugin, slot_type, database, false as temporary, active FROM pg_replication_slots
+      90400: SELECT slot_name, plugin, slot_type, database, false as temporary, active FROM pg_replication_slots
       100000: SELECT slot_name, plugin, slot_type, database, temporary, active FROM pg_replication_slots
 
   # cluster-wide metrics
@@ -20,9 +20,9 @@ metrics:
     query:
       0: SHOW server_version_num
   max_frozen_age:
-    type: value
+    type: row
     query:
-      0: SELECT max(age(datfrozenxid)) FROM pg_database
+      0: SELECT max(age(datfrozenxid)), max(mxid_age(datminmxid)) FROM pg_database
 
   # Per-database metrics
   db_stats:
@@ -43,7 +43,7 @@ metrics:
   rep_stats:
     type: row
     query:
-      0: SELECT * FROM pg_stat_replication WHERE client_addr || '_' || regexp_replace(application_name, '[ ,]', '_', 'g') = '{repid}'
+      90400: SELECT * FROM pg_stat_replication WHERE client_addr || '_' || regexp_replace(application_name, '[ ,]', '_', 'g') = '{repid}'
     test_args:
       repid: 127.0.0.1_test_rep
 
@@ -57,7 +57,7 @@ metrics:
   slot_stats:
     type: row
     query:
-      0: SELECT active_pid, xmin, pg_xlog_location_diff(pg_current_xlog_location(), restart_lsn) AS restart_bytes, pg_xlog_location_diff(pg_current_xlog_location(), confirmed_flush_lsn) AS confirmed_flush_bytes FROM pg_replication_slots WHERE slot_name = '{slot}'
+      90400: SELECT active_pid, xmin, pg_xlog_location_diff(pg_current_xlog_location(), restart_lsn) AS restart_bytes, pg_xlog_location_diff(pg_current_xlog_location(), confirmed_flush_lsn) AS confirmed_flush_bytes FROM pg_replication_slots WHERE slot_name = '{slot}'
       100000: SELECT active_pid, xmin, pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn) AS restart_bytes, pg_wal_lsn_diff(pg_current_wal_lsn(), confirmed_flush_lsn) AS confirmed_flush_bytes FROM pg_replication_slots WHERE slot_name = '{slot}'
     test_args:
       slot: test_slot
diff --git a/tests/docker-compose.yml b/tests/docker-compose.yml
index deeecd2..ae9b5f6 100644
--- a/tests/docker-compose.yml
+++ b/tests/docker-compose.yml
@@ -23,7 +23,7 @@ services:
       test: [ "CMD-SHELL", "pg_controldata /var/lib/postgresql/data/ | grep -q 'in production'" ]
       interval: 5s
       timeout: 2s
-      retries: 10
+      retries: 20
     command: >
       postgres -c ssl=on
                -c ssl_cert_file='/etc/ssl/certs/ssl-cert-snakeoil.pem'
diff --git a/tests/test-config.yml b/tests/test-config.yml
index 8bf863b..7a17cdb 100644
--- a/tests/test-config.yml
+++ b/tests/test-config.yml
@@ -8,8 +8,9 @@ dbhost: db
 dbport: 5432
 dbuser: postgres
 
-# Allow some insecure SSL parameters for the 9.2 test
-ssl_ciphers: DEFAULT@SECLEVEL=1
+# The SSL cipher parameters are too old in the 9.2 container, so we allow the tests
+# to be run without encryption
+ssl_mode: prefer
 
 # Pull in the standard metrics
 include:

From 1d642d41b2aebd1ce99386bde2f62e6725112b64 Mon Sep 17 00:00:00 2001
From: James Campbell <james@shh-dot-com.org>
Date: Sun, 1 Jun 2025 00:23:26 -0400
Subject: [PATCH 6/6] Bump version to 1.0.2

* Modify Makefile to extract the version from the mainscript

* Bump version to 1.0.2
---
 GENTOO/pgmon-1.0.2.ebuild | 73 +++++++++++++++++++++++++++++++++++++++
 Makefile                  |  3 +-
 src/pgmon.py              |  2 +-
 3 files changed, 76 insertions(+), 2 deletions(-)
 create mode 100644 GENTOO/pgmon-1.0.2.ebuild

diff --git a/GENTOO/pgmon-1.0.2.ebuild b/GENTOO/pgmon-1.0.2.ebuild
new file mode 100644
index 0000000..9c52b5a
--- /dev/null
+++ b/GENTOO/pgmon-1.0.2.ebuild
@@ -0,0 +1,73 @@
+# Copyright 2024 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=8
+
+PYTHON_COMPAT=( python3_{6..13} )
+
+inherit python-r1
+
+DESCRIPTION="PostgreSQL monitoring bridge"
+HOMEPAGE="None"
+
+LICENSE="BSD"
+SLOT="0"
+KEYWORDS="amd64"
+
+SRC_URI="https://code2.shh-dot-com.org/james/${PN}/archive/v${PV}.tar.bz2 -> ${P}.tar.bz2"
+
+IUSE="-systemd"
+
+DEPEND="
+	${PYTHON_DEPS}
+	dev-python/psycopg:2
+	dev-python/pyyaml
+	app-admin/logrotate
+	"
+RDEPEND="${DEPEND}"
+BDEPEND=""
+
+RESTRICT="fetch"
+
+#S="${WORKDIR}/${PN}"
+
+pkg_nofetch() {
+	einfo "Please download"
+	einfo "  - ${P}.tar.bz2"
+	einfo "from ${HOMEPAGE} and place it in your DISTDIR directory."
+	einfo "The file should be owned by portage:portage."
+}
+
+src_compile() {
+	true
+}
+
+src_install() {
+	# Install init script
+	if ! use systemd ; then
+		newinitd "openrc/pgmon.initd" pgmon
+		newconfd "openrc/pgmon.confd" pgmon
+	fi
+
+	# Install systemd unit
+	if use systemd ; then
+		systemd_dounit "systemd/pgmon.service"
+	fi
+
+	# Install script
+	exeinto /usr/bin
+	newexe "src/pgmon.py" pgmon
+
+	# Install default config
+	diropts -o root -g root -m 0755
+	insinto /etc/pgmon
+	doins "sample-config/pgmon.yml"
+	doins "sample-config/pgmon-metrics.yml"
+
+	# Install logrotate config
+	insinto /etc/logrotate.d
+	newins "logrotate/pgmon.logrotate" pgmon
+
+	# Install man page
+	doman manpages/pgmon.1
+}
diff --git a/Makefile b/Makefile
index 7606715..1c3de00 100644
--- a/Makefile
+++ b/Makefile
@@ -1,9 +1,10 @@
 # Package details
 PACKAGE_NAME := pgmon
-VERSION := 1.0.1
 
 SCRIPT := src/$(PACKAGE_NAME).py
 
+VERSION := $(shell grep -m 1 '^VERSION = ' "$(SCRIPT)" | sed -ne 's/.*"\(.*\)".*/\1/p')
+
 # Where packages are built
 BUILD_DIR := build
 
diff --git a/src/pgmon.py b/src/pgmon.py
index f9c906c..6178827 100755
--- a/src/pgmon.py
+++ b/src/pgmon.py
@@ -24,7 +24,7 @@ from http.server import BaseHTTPRequestHandler, HTTPServer
 from http.server import ThreadingHTTPServer
 from urllib.parse import urlparse, parse_qs
 
-VERSION = "1.0.1"
+VERSION = "1.0.2"
 
 # Configuration
 config = {}