Compare commits
No commits in common. "15097dcba41702683bc1158c85bd664bcec31047" and "030afafc207cd6b5e8187359b2086dde2ef38d74" have entirely different histories.
15097dcba4
...
030afafc20
@ -1,73 +0,0 @@
|
|||||||
# Copyright 2024 Gentoo Authors
|
|
||||||
# Distributed under the terms of the GNU General Public License v2
|
|
||||||
|
|
||||||
EAPI=8
|
|
||||||
|
|
||||||
PYTHON_COMPAT=( python3_{6..13} )
|
|
||||||
|
|
||||||
inherit python-r1
|
|
||||||
|
|
||||||
DESCRIPTION="PostgreSQL monitoring bridge"
|
|
||||||
HOMEPAGE="None"
|
|
||||||
|
|
||||||
LICENSE="BSD"
|
|
||||||
SLOT="0"
|
|
||||||
KEYWORDS="amd64"
|
|
||||||
|
|
||||||
SRC_URI="https://code2.shh-dot-com.org/james/${PN}/archive/v${PV}.tar.bz2 -> ${P}.tar.bz2"
|
|
||||||
|
|
||||||
IUSE="-systemd"
|
|
||||||
|
|
||||||
DEPEND="
|
|
||||||
${PYTHON_DEPS}
|
|
||||||
dev-python/psycopg:2
|
|
||||||
dev-python/pyyaml
|
|
||||||
app-admin/logrotate
|
|
||||||
"
|
|
||||||
RDEPEND="${DEPEND}"
|
|
||||||
BDEPEND=""
|
|
||||||
|
|
||||||
RESTRICT="fetch"
|
|
||||||
|
|
||||||
#S="${WORKDIR}/${PN}"
|
|
||||||
|
|
||||||
pkg_nofetch() {
|
|
||||||
einfo "Please download"
|
|
||||||
einfo " - ${P}.tar.bz2"
|
|
||||||
einfo "from ${HOMEPAGE} and place it in your DISTDIR directory."
|
|
||||||
einfo "The file should be owned by portage:portage."
|
|
||||||
}
|
|
||||||
|
|
||||||
src_compile() {
|
|
||||||
true
|
|
||||||
}
|
|
||||||
|
|
||||||
src_install() {
|
|
||||||
# Install init script
|
|
||||||
if ! use systemd ; then
|
|
||||||
newinitd "openrc/pgmon.initd" pgmon
|
|
||||||
newconfd "openrc/pgmon.confd" pgmon
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Install systemd unit
|
|
||||||
if use systemd ; then
|
|
||||||
systemd_dounit "systemd/pgmon.service"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Install script
|
|
||||||
exeinto /usr/bin
|
|
||||||
newexe "src/pgmon.py" pgmon
|
|
||||||
|
|
||||||
# Install default config
|
|
||||||
diropts -o root -g root -m 0755
|
|
||||||
insinto /etc/pgmon
|
|
||||||
doins "sample-config/pgmon.yml"
|
|
||||||
doins "sample-config/pgmon-metrics.yml"
|
|
||||||
|
|
||||||
# Install logrotate config
|
|
||||||
insinto /etc/logrotate.d
|
|
||||||
newins "logrotate/pgmon.logrotate" pgmon
|
|
||||||
|
|
||||||
# Install man page
|
|
||||||
doman manpages/pgmon.1
|
|
||||||
}
|
|
||||||
16
Makefile
16
Makefile
@ -1,10 +1,9 @@
|
|||||||
# Package details
|
# Package details
|
||||||
PACKAGE_NAME := pgmon
|
PACKAGE_NAME := pgmon
|
||||||
|
VERSION := 1.0.1
|
||||||
|
|
||||||
SCRIPT := src/$(PACKAGE_NAME).py
|
SCRIPT := src/$(PACKAGE_NAME).py
|
||||||
|
|
||||||
VERSION := $(shell grep -m 1 '^VERSION = ' "$(SCRIPT)" | sed -ne 's/.*"\(.*\)".*/\1/p')
|
|
||||||
|
|
||||||
# Where packages are built
|
# Where packages are built
|
||||||
BUILD_DIR := build
|
BUILD_DIR := build
|
||||||
|
|
||||||
@ -23,7 +22,7 @@ SUPPORTED := ubuntu-20.04 \
|
|||||||
# These targets are the main ones to use for most things.
|
# These targets are the main ones to use for most things.
|
||||||
##
|
##
|
||||||
|
|
||||||
.PHONY: all clean tgz test query-tests install
|
.PHONY: all clean tgz test install
|
||||||
|
|
||||||
|
|
||||||
# Build all packages
|
# Build all packages
|
||||||
@ -65,10 +64,6 @@ clean:
|
|||||||
test:
|
test:
|
||||||
cd src ; python3 -m unittest
|
cd src ; python3 -m unittest
|
||||||
|
|
||||||
# Run query tests
|
|
||||||
query-tests:
|
|
||||||
cd tests ; ./run-tests.sh
|
|
||||||
|
|
||||||
# Install the script at the specified base directory
|
# Install the script at the specified base directory
|
||||||
install:
|
install:
|
||||||
# Set up directories
|
# Set up directories
|
||||||
@ -97,7 +92,7 @@ install:
|
|||||||
|
|
||||||
|
|
||||||
# Run all of the install tests
|
# Run all of the install tests
|
||||||
.PHONY: install-tests debian-%-install-test rockylinux-%-install-test ubuntu-%-install-test gentoo-install-test
|
.PHONY: install-tests debian-%-install-test rockylinux-%-install-test ubuntu-%-install-test
|
||||||
install-tests: $(foreach distro_release, $(SUPPORTED), $(distro_release)-install-test)
|
install-tests: $(foreach distro_release, $(SUPPORTED), $(distro_release)-install-test)
|
||||||
|
|
||||||
|
|
||||||
@ -129,11 +124,6 @@ oraclelinux-%-install-test:
|
|||||||
oraclelinux:7 \
|
oraclelinux:7 \
|
||||||
bash -c 'yum makecache && yum install -y /output/$(PACKAGE_NAME)-$(VERSION)-1.el7.noarch.rpm'
|
bash -c 'yum makecache && yum install -y /output/$(PACKAGE_NAME)-$(VERSION)-1.el7.noarch.rpm'
|
||||||
|
|
||||||
# Run a Gentoo install test
|
|
||||||
gentoo-install-test:
|
|
||||||
# May impliment this in the future, but would require additional headaches to set up a repo
|
|
||||||
true
|
|
||||||
|
|
||||||
##
|
##
|
||||||
# Container targets
|
# Container targets
|
||||||
#
|
#
|
||||||
|
|||||||
@ -19,6 +19,6 @@ start_pre() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
command="/usr/bin/pgmon"
|
command="/usr/bin/pgmon"
|
||||||
command_args="-c '$CONFIG_FILE'"
|
command_args="'$CONFIG_FILE'"
|
||||||
command_background="true"
|
command_background="true"
|
||||||
command_user="${PGMON_USER}:${PGMON_GROUP}"
|
command_user="${PGMON_USER}:${PGMON_GROUP}"
|
||||||
|
|||||||
@ -1,4 +0,0 @@
|
|||||||
-r requirements.txt
|
|
||||||
testcontainers[postgresql]
|
|
||||||
pytest
|
|
||||||
black
|
|
||||||
@ -11,8 +11,7 @@ metrics:
|
|||||||
discover_slots:
|
discover_slots:
|
||||||
type: set
|
type: set
|
||||||
query:
|
query:
|
||||||
90400: SELECT slot_name, plugin, slot_type, database, false as temporary, active FROM pg_replication_slots
|
0: SELECT slot_name, plugin, slot_type, database, temporary, active FROM pg_replication_slots
|
||||||
100000: SELECT slot_name, plugin, slot_type, database, temporary, active FROM pg_replication_slots
|
|
||||||
|
|
||||||
# cluster-wide metrics
|
# cluster-wide metrics
|
||||||
version:
|
version:
|
||||||
@ -20,9 +19,9 @@ metrics:
|
|||||||
query:
|
query:
|
||||||
0: SHOW server_version_num
|
0: SHOW server_version_num
|
||||||
max_frozen_age:
|
max_frozen_age:
|
||||||
type: row
|
type: value
|
||||||
query:
|
query:
|
||||||
0: SELECT max(age(datfrozenxid)), max(mxid_age(datminmxid)) FROM pg_database
|
0: SELECT max(age(datfrozenxid)) FROM pg_database
|
||||||
|
|
||||||
# Per-database metrics
|
# Per-database metrics
|
||||||
db_stats:
|
db_stats:
|
||||||
@ -30,8 +29,6 @@ metrics:
|
|||||||
query:
|
query:
|
||||||
0: SELECT numbackends, xact_commit, xact_rollback, blks_read, blks_hit, tup_returned, tup_fetched, tup_inserted, tup_updated, tup_deleted, conflicts, temp_files, temp_bytes, deadlocks, blk_read_time, blk_write_time, extract('epoch' from stats_reset)::float FROM pg_stat_database WHERE datname = %(dbname)s
|
0: SELECT numbackends, xact_commit, xact_rollback, blks_read, blks_hit, tup_returned, tup_fetched, tup_inserted, tup_updated, tup_deleted, conflicts, temp_files, temp_bytes, deadlocks, blk_read_time, blk_write_time, extract('epoch' from stats_reset)::float FROM pg_stat_database WHERE datname = %(dbname)s
|
||||||
140000: SELECT numbackends, xact_commit, xact_rollback, blks_read, blks_hit, tup_returned, tup_fetched, tup_inserted, tup_updated, tup_deleted, conflicts, temp_files, temp_bytes, deadlocks, COALESCE(checksum_failures, 0) AS checksum_failures, blk_read_time, blk_write_time, session_time, active_time, idle_in_transaction_time, sessions, sessions_abandoned, sessions_fatal, sessions_killed, extract('epoch' from stats_reset)::float FROM pg_stat_database WHERE datname = %(dbname)s
|
140000: SELECT numbackends, xact_commit, xact_rollback, blks_read, blks_hit, tup_returned, tup_fetched, tup_inserted, tup_updated, tup_deleted, conflicts, temp_files, temp_bytes, deadlocks, COALESCE(checksum_failures, 0) AS checksum_failures, blk_read_time, blk_write_time, session_time, active_time, idle_in_transaction_time, sessions, sessions_abandoned, sessions_fatal, sessions_killed, extract('epoch' from stats_reset)::float FROM pg_stat_database WHERE datname = %(dbname)s
|
||||||
test_args:
|
|
||||||
dbname: postgres
|
|
||||||
|
|
||||||
# Debugging
|
# Debugging
|
||||||
ntables:
|
ntables:
|
||||||
@ -43,9 +40,7 @@ metrics:
|
|||||||
rep_stats:
|
rep_stats:
|
||||||
type: row
|
type: row
|
||||||
query:
|
query:
|
||||||
90400: SELECT * FROM pg_stat_replication WHERE client_addr || '_' || regexp_replace(application_name, '[ ,]', '_', 'g') = '{repid}'
|
0: SELECT * FROM pg_stat_database WHERE client_addr || '_' || regexp_replace(application_name, '[ ,]', '_', 'g') = '{repid}'
|
||||||
test_args:
|
|
||||||
repid: 127.0.0.1_test_rep
|
|
||||||
|
|
||||||
# Debugging
|
# Debugging
|
||||||
sleep:
|
sleep:
|
||||||
@ -57,7 +52,4 @@ metrics:
|
|||||||
slot_stats:
|
slot_stats:
|
||||||
type: row
|
type: row
|
||||||
query:
|
query:
|
||||||
90400: SELECT active_pid, xmin, pg_xlog_location_diff(pg_current_xlog_location(), restart_lsn) AS restart_bytes, pg_xlog_location_diff(pg_current_xlog_location(), confirmed_flush_lsn) AS confirmed_flush_bytes FROM pg_replication_slots WHERE slot_name = '{slot}'
|
0: SELECT active_pid, xmin, pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn) AS restart_bytes, pg_wal_lsn_diff(pg_current_wal_lsn(), confirmed_flush_lsn) AS confirmed_flush_bytes FROM pg_replication_slots WHERE slot_name = '{slot}'
|
||||||
100000: SELECT active_pid, xmin, pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn) AS restart_bytes, pg_wal_lsn_diff(pg_current_wal_lsn(), confirmed_flush_lsn) AS confirmed_flush_bytes FROM pg_replication_slots WHERE slot_name = '{slot}'
|
|
||||||
test_args:
|
|
||||||
slot: test_slot
|
|
||||||
|
|||||||
@ -1,6 +1,3 @@
|
|||||||
# The address the agent binds to
|
|
||||||
#address: 127.0.0.1
|
|
||||||
|
|
||||||
# The port the agent listens on for requests
|
# The port the agent listens on for requests
|
||||||
#port: 5400
|
#port: 5400
|
||||||
|
|
||||||
@ -29,9 +26,6 @@
|
|||||||
# Default database to connect to when none is specified for a metric
|
# Default database to connect to when none is specified for a metric
|
||||||
#dbname: 'postgres'
|
#dbname: 'postgres'
|
||||||
|
|
||||||
# SSL connection mode
|
|
||||||
#ssl_mode: require
|
|
||||||
|
|
||||||
# Timeout for getting a connection slot from a pool
|
# Timeout for getting a connection slot from a pool
|
||||||
#pool_slot_timeout: 5
|
#pool_slot_timeout: 5
|
||||||
|
|
||||||
|
|||||||
149
src/pgmon.py
149
src/pgmon.py
@ -4,7 +4,6 @@ import yaml
|
|||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
import os
|
import os
|
||||||
import sys
|
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import logging
|
import logging
|
||||||
@ -24,7 +23,7 @@ from http.server import BaseHTTPRequestHandler, HTTPServer
|
|||||||
from http.server import ThreadingHTTPServer
|
from http.server import ThreadingHTTPServer
|
||||||
from urllib.parse import urlparse, parse_qs
|
from urllib.parse import urlparse, parse_qs
|
||||||
|
|
||||||
VERSION = "1.0.2"
|
VERSION = "1.0.1"
|
||||||
|
|
||||||
# Configuration
|
# Configuration
|
||||||
config = {}
|
config = {}
|
||||||
@ -75,18 +74,12 @@ class UnhappyDBError(Exception):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class UnknownMetricError(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class MetricVersionError(Exception):
|
class MetricVersionError(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
# Default config settings
|
# Default config settings
|
||||||
default_config = {
|
default_config = {
|
||||||
# The address the agent binds to
|
|
||||||
"address": "127.0.0.1",
|
|
||||||
# The port the agent listens on for requests
|
# The port the agent listens on for requests
|
||||||
"port": 5400,
|
"port": 5400,
|
||||||
# Min PostgreSQL connection pool size (per database)
|
# Min PostgreSQL connection pool size (per database)
|
||||||
@ -105,8 +98,6 @@ default_config = {
|
|||||||
"dbport": 5432,
|
"dbport": 5432,
|
||||||
# Default database to connect to when none is specified for a metric
|
# Default database to connect to when none is specified for a metric
|
||||||
"dbname": "postgres",
|
"dbname": "postgres",
|
||||||
# SSL connection mode
|
|
||||||
"ssl_mode": "require",
|
|
||||||
# Timeout for getting a connection slot from a pool
|
# Timeout for getting a connection slot from a pool
|
||||||
"pool_slot_timeout": 5,
|
"pool_slot_timeout": 5,
|
||||||
# PostgreSQL connection timeout (seconds)
|
# PostgreSQL connection timeout (seconds)
|
||||||
@ -329,7 +320,6 @@ def get_pool(dbname):
|
|||||||
# lock
|
# lock
|
||||||
if dbname not in connections:
|
if dbname not in connections:
|
||||||
log.info("Creating connection pool for: {}".format(dbname))
|
log.info("Creating connection pool for: {}".format(dbname))
|
||||||
# Actually create the connection pool
|
|
||||||
connections[dbname] = ConnectionPool(
|
connections[dbname] = ConnectionPool(
|
||||||
dbname,
|
dbname,
|
||||||
int(config["min_pool_size"]),
|
int(config["min_pool_size"]),
|
||||||
@ -339,7 +329,7 @@ def get_pool(dbname):
|
|||||||
port=config["dbport"],
|
port=config["dbport"],
|
||||||
user=config["dbuser"],
|
user=config["dbuser"],
|
||||||
connect_timeout=int(config["connect_timeout"]),
|
connect_timeout=int(config["connect_timeout"]),
|
||||||
sslmode=config["ssl_mode"],
|
sslmode="require",
|
||||||
)
|
)
|
||||||
# Clear the unhappy indicator if present
|
# Clear the unhappy indicator if present
|
||||||
unhappy_cooldown.pop(dbname, None)
|
unhappy_cooldown.pop(dbname, None)
|
||||||
@ -387,16 +377,10 @@ def run_query_no_retry(pool, return_type, query, args):
|
|||||||
res = curs.fetchall()
|
res = curs.fetchall()
|
||||||
|
|
||||||
if return_type == "value":
|
if return_type == "value":
|
||||||
if len(res) == 0:
|
|
||||||
return ""
|
|
||||||
return str(list(res[0].values())[0])
|
return str(list(res[0].values())[0])
|
||||||
elif return_type == "row":
|
elif return_type == "row":
|
||||||
if len(res) == 0:
|
|
||||||
return "[]"
|
|
||||||
return json.dumps(res[0])
|
return json.dumps(res[0])
|
||||||
elif return_type == "column":
|
elif return_type == "column":
|
||||||
if len(res) == 0:
|
|
||||||
return "[]"
|
|
||||||
return json.dumps([list(r.values())[0] for r in res])
|
return json.dumps([list(r.values())[0] for r in res])
|
||||||
elif return_type == "set":
|
elif return_type == "set":
|
||||||
return json.dumps(res)
|
return json.dumps(res)
|
||||||
@ -404,7 +388,7 @@ def run_query_no_retry(pool, return_type, query, args):
|
|||||||
dbname = pool.name
|
dbname = pool.name
|
||||||
if dbname in unhappy_cooldown:
|
if dbname in unhappy_cooldown:
|
||||||
raise UnhappyDBError()
|
raise UnhappyDBError()
|
||||||
elif conn.closed != 0:
|
elif conn.broken:
|
||||||
raise DisconnectedError()
|
raise DisconnectedError()
|
||||||
else:
|
else:
|
||||||
raise
|
raise
|
||||||
@ -482,53 +466,6 @@ def get_cluster_version():
|
|||||||
return cluster_version
|
return cluster_version
|
||||||
|
|
||||||
|
|
||||||
def sample_metric(dbname, metric_name, args, retry=True):
|
|
||||||
"""
|
|
||||||
Run the appropriate query for the named metric against the specified database
|
|
||||||
"""
|
|
||||||
# Get the metric definition
|
|
||||||
try:
|
|
||||||
metric = config["metrics"][metric_name]
|
|
||||||
except KeyError:
|
|
||||||
raise UnknownMetricError("Unknown metric: {}".format(metric_name))
|
|
||||||
|
|
||||||
# Get the connection pool for the database, or create one if it doesn't
|
|
||||||
# already exist.
|
|
||||||
pool = get_pool(dbname)
|
|
||||||
|
|
||||||
# Identify the PostgreSQL version
|
|
||||||
version = get_cluster_version()
|
|
||||||
|
|
||||||
# Get the query version
|
|
||||||
query = get_query(metric, version)
|
|
||||||
|
|
||||||
# Execute the quert
|
|
||||||
if retry:
|
|
||||||
return run_query(pool, metric["type"], query, args)
|
|
||||||
else:
|
|
||||||
return run_query_no_retry(pool, metric["type"], query, args)
|
|
||||||
|
|
||||||
|
|
||||||
def test_queries():
|
|
||||||
"""
|
|
||||||
Run all of the metric queries against a database and check the results
|
|
||||||
"""
|
|
||||||
# We just use the default db for tests
|
|
||||||
dbname = config["dbname"]
|
|
||||||
# Loop through all defined metrics.
|
|
||||||
for name, metric in config["metrics"].items():
|
|
||||||
# If the metric has arguments to use while testing, grab those
|
|
||||||
args = metric.get("test_args", {})
|
|
||||||
# Run the query without the ability to retry.
|
|
||||||
res = sample_metric(dbname, name, args, retry=False)
|
|
||||||
# Compare the result to the provided sample results
|
|
||||||
# TODO
|
|
||||||
print("{} -> {}".format(name, res))
|
|
||||||
# Return the number of errors
|
|
||||||
# TODO
|
|
||||||
return 0
|
|
||||||
|
|
||||||
|
|
||||||
class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
|
class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
|
||||||
"""
|
"""
|
||||||
This is our request handling server. It is responsible for listening for
|
This is our request handling server. It is responsible for listening for
|
||||||
@ -557,10 +494,10 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
|
|||||||
"""
|
"""
|
||||||
# Parse the URL
|
# Parse the URL
|
||||||
parsed_path = urlparse(self.path)
|
parsed_path = urlparse(self.path)
|
||||||
metric_name = parsed_path.path.strip("/")
|
name = parsed_path.path.strip("/")
|
||||||
parsed_query = parse_qs(parsed_path.query)
|
parsed_query = parse_qs(parsed_path.query)
|
||||||
|
|
||||||
if metric_name == "agent_version":
|
if name == "agent_version":
|
||||||
self._reply(200, VERSION)
|
self._reply(200, VERSION)
|
||||||
return
|
return
|
||||||
|
|
||||||
@ -568,31 +505,60 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
|
|||||||
# single values, just grab the first from each.
|
# single values, just grab the first from each.
|
||||||
args = {key: values[0] for key, values in parsed_query.items()}
|
args = {key: values[0] for key, values in parsed_query.items()}
|
||||||
|
|
||||||
|
# Get the metric definition
|
||||||
|
try:
|
||||||
|
metric = config["metrics"][name]
|
||||||
|
except KeyError:
|
||||||
|
log.error("Unknown metric: {}".format(name))
|
||||||
|
self._reply(404, "Unknown metric")
|
||||||
|
return
|
||||||
|
|
||||||
# Get the dbname. If none was provided, use the default from the
|
# Get the dbname. If none was provided, use the default from the
|
||||||
# config.
|
# config.
|
||||||
dbname = args.get("dbname", config["dbname"])
|
dbname = args.get("dbname", config["dbname"])
|
||||||
|
|
||||||
# Sample the metric
|
# Get the connection pool for the database, or create one if it doesn't
|
||||||
|
# already exist.
|
||||||
try:
|
try:
|
||||||
self._reply(200, sample_metric(dbname, metric_name, args))
|
pool = get_pool(dbname)
|
||||||
return
|
except UnhappyDBError:
|
||||||
except UnknownMetricError as e:
|
|
||||||
log.error("Unknown metric: {}".format(metric_name))
|
|
||||||
self._reply(404, "Unknown metric")
|
|
||||||
return
|
|
||||||
except MetricVersionError as e:
|
|
||||||
log.error(
|
|
||||||
"Failed to find a version of {} for {}".format(metric_name, version)
|
|
||||||
)
|
|
||||||
self._reply(404, "Unsupported version")
|
|
||||||
return
|
|
||||||
except UnhappyDBError as e:
|
|
||||||
log.info("Database {} is unhappy, please be patient".format(dbname))
|
log.info("Database {} is unhappy, please be patient".format(dbname))
|
||||||
self._reply(503, "Database unavailable")
|
self._reply(503, "Database unavailable")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Identify the PostgreSQL version
|
||||||
|
try:
|
||||||
|
version = get_cluster_version()
|
||||||
|
except UnhappyDBError:
|
||||||
|
return
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
if dbname in unhappy_cooldown:
|
||||||
|
log.info("Database {} is unhappy, please be patient".format(dbname))
|
||||||
|
self._reply(503, "Database unavailable")
|
||||||
|
else:
|
||||||
|
log.error("Failed to get PostgreSQL version: {}".format(e))
|
||||||
|
self._reply(500, "Error getting DB version")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Get the query version
|
||||||
|
try:
|
||||||
|
query = get_query(metric, version)
|
||||||
|
except KeyError:
|
||||||
|
log.error("Failed to find a version of {} for {}".format(name, version))
|
||||||
|
self._reply(404, "Unsupported version")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Execute the quert
|
||||||
|
try:
|
||||||
|
self._reply(200, run_query(pool, metric["type"], query, args))
|
||||||
|
return
|
||||||
|
except Exception as e:
|
||||||
|
if dbname in unhappy_cooldown:
|
||||||
|
log.info("Database {} is unhappy, please be patient".format(dbname))
|
||||||
|
self._reply(503, "Database unavailable")
|
||||||
|
else:
|
||||||
log.error("Error running query: {}".format(e))
|
log.error("Error running query: {}".format(e))
|
||||||
self._reply(500, "Unexpected error: {}".format(e))
|
self._reply(500, "Error running query")
|
||||||
return
|
return
|
||||||
|
|
||||||
def _reply(self, code, content):
|
def _reply(self, code, content):
|
||||||
@ -613,17 +579,12 @@ if __name__ == "__main__":
|
|||||||
)
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-c",
|
"config_file",
|
||||||
"--config_file",
|
|
||||||
default="pgmon.yml",
|
default="pgmon.yml",
|
||||||
nargs="?",
|
nargs="?",
|
||||||
help="The config file to read (default: %(default)s)",
|
help="The config file to read (default: %(default)s)",
|
||||||
)
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
|
||||||
"-t", "--test", action="store_true", help="Run query tests and exit"
|
|
||||||
)
|
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
# Set the config file path
|
# Set the config file path
|
||||||
@ -632,16 +593,8 @@ if __name__ == "__main__":
|
|||||||
# Read the config file
|
# Read the config file
|
||||||
read_config(config_file)
|
read_config(config_file)
|
||||||
|
|
||||||
# Run query tests and exit if test mode is enabled
|
|
||||||
if args.test:
|
|
||||||
errors = test_queries()
|
|
||||||
if errors > 0:
|
|
||||||
sys.exit(1)
|
|
||||||
else:
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
# Set up the http server to receive requests
|
# Set up the http server to receive requests
|
||||||
server_address = (config["address"], config["port"])
|
server_address = ("127.0.0.1", config["port"])
|
||||||
httpd = ThreadingHTTPServer(server_address, SimpleHTTPRequestHandler)
|
httpd = ThreadingHTTPServer(server_address, SimpleHTTPRequestHandler)
|
||||||
|
|
||||||
# Set up the signal handler
|
# Set up the signal handler
|
||||||
|
|||||||
@ -7,7 +7,7 @@ After=network.target
|
|||||||
[Service]
|
[Service]
|
||||||
EnvironmentFile=/etc/pgmon/%i-service.conf
|
EnvironmentFile=/etc/pgmon/%i-service.conf
|
||||||
User=${SERVICE_USER:-postgres}
|
User=${SERVICE_USER:-postgres}
|
||||||
ExecStart=/usr/local/bin/pgmon -c /etc/pgmon/%i.yml
|
ExecStart=/usr/local/bin/pgmon /etc/pgmon/%i.yml
|
||||||
ExecReload=kill -HUP $MAINPID
|
ExecReload=kill -HUP $MAINPID
|
||||||
Restart=on-failure
|
Restart=on-failure
|
||||||
Type=exec
|
Type=exec
|
||||||
|
|||||||
@ -1,22 +0,0 @@
|
|||||||
FROM alpine:3.21
|
|
||||||
|
|
||||||
RUN apk update && \
|
|
||||||
apk add py3-psycopg2 \
|
|
||||||
py3-yaml \
|
|
||||||
tini
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY src/pgmon.py /app/
|
|
||||||
|
|
||||||
COPY sample-config/pgmon-metrics.yml /app/
|
|
||||||
|
|
||||||
COPY tests/test-config.yml /app/
|
|
||||||
|
|
||||||
COPY --chmod=0600 --chown=postgres:postgres tests/pgpass /root/.pgpass
|
|
||||||
|
|
||||||
ENTRYPOINT ["tini", "--"]
|
|
||||||
|
|
||||||
EXPOSE 5400
|
|
||||||
|
|
||||||
CMD ["/app/pgmon.py", "-c", "/app/test-config.yml", "--test"]
|
|
||||||
@ -1,32 +0,0 @@
|
|||||||
---
|
|
||||||
|
|
||||||
services:
|
|
||||||
agent:
|
|
||||||
image: pgmon
|
|
||||||
build:
|
|
||||||
context: ..
|
|
||||||
dockerfile: tests/Dockerfile
|
|
||||||
ports:
|
|
||||||
- :5400
|
|
||||||
depends_on:
|
|
||||||
db:
|
|
||||||
condition: service_healthy
|
|
||||||
|
|
||||||
db:
|
|
||||||
image: "postgres:${PGTAG:-17-bookworm}"
|
|
||||||
ports:
|
|
||||||
- :5432
|
|
||||||
environment:
|
|
||||||
POSTGRES_PASSWORD: secret
|
|
||||||
healthcheck:
|
|
||||||
#test: [ "CMD", "pg_isready", "-U", "postgres" ]
|
|
||||||
test: [ "CMD-SHELL", "pg_controldata /var/lib/postgresql/data/ | grep -q 'in production'" ]
|
|
||||||
interval: 5s
|
|
||||||
timeout: 2s
|
|
||||||
retries: 20
|
|
||||||
command: >
|
|
||||||
postgres -c ssl=on
|
|
||||||
-c ssl_cert_file='/etc/ssl/certs/ssl-cert-snakeoil.pem'
|
|
||||||
-c ssl_key_file='/etc/ssl/private/ssl-cert-snakeoil.key'
|
|
||||||
-c listen_addresses='*'
|
|
||||||
|
|
||||||
@ -1 +0,0 @@
|
|||||||
db:5432:*:postgres:secret
|
|
||||||
@ -1,62 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
|
|
||||||
# Versions to test
|
|
||||||
versions=( $@ )
|
|
||||||
|
|
||||||
# If we weren't given any versions, test them all
|
|
||||||
if [ ${#versions[@]} -eq 0 ]
|
|
||||||
then
|
|
||||||
versions=( 9.2 9.6 10 11 12 13 14 15 16 17 )
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Image tags to use
|
|
||||||
declare -A images=()
|
|
||||||
images["9.2"]='9.2'
|
|
||||||
images["9.6"]='9.6-bullseye'
|
|
||||||
images["10"]='10-bullseye'
|
|
||||||
images["11"]='11-bookworm'
|
|
||||||
images["12"]='12-bookworm'
|
|
||||||
images["13"]='13-bookworm'
|
|
||||||
images["14"]='14-bookworm'
|
|
||||||
images["15"]='15-bookworm'
|
|
||||||
images["16"]='16-bookworm'
|
|
||||||
images["17"]='17-bookworm'
|
|
||||||
|
|
||||||
declare -A results=()
|
|
||||||
|
|
||||||
# Make sure everything's down to start with
|
|
||||||
docker compose down
|
|
||||||
|
|
||||||
# Make sure our agent container is up to date
|
|
||||||
docker compose build agent
|
|
||||||
|
|
||||||
for version in "${versions[@]}"
|
|
||||||
do
|
|
||||||
echo
|
|
||||||
echo "Testing: PostgreSQL ${version}"
|
|
||||||
|
|
||||||
# Specify the version we're testing against
|
|
||||||
export PGTAG="${images["$version"]}"
|
|
||||||
|
|
||||||
# Start the containers
|
|
||||||
docker compose up --exit-code-from=agent agent
|
|
||||||
rc=$?
|
|
||||||
|
|
||||||
results["$version"]=$rc
|
|
||||||
|
|
||||||
# Destroy the containers
|
|
||||||
docker compose down
|
|
||||||
done
|
|
||||||
|
|
||||||
echo
|
|
||||||
echo
|
|
||||||
for v in "${versions[@]}"
|
|
||||||
do
|
|
||||||
case "${results["$v"]}" in
|
|
||||||
0) msg="OK" ;;
|
|
||||||
1) msg="Query failure detected" ;;
|
|
||||||
18) msg="Docker image error: 18" ;;
|
|
||||||
*) msg="Unexpected error: ${results["$v"]}" ;;
|
|
||||||
esac
|
|
||||||
echo "$v -> $msg"
|
|
||||||
done
|
|
||||||
@ -1,17 +0,0 @@
|
|||||||
---
|
|
||||||
|
|
||||||
# Bind to all interfaces so we can submit requests from outside the test container
|
|
||||||
address: 0.0.0.0
|
|
||||||
|
|
||||||
# We always just connect to the db container
|
|
||||||
dbhost: db
|
|
||||||
dbport: 5432
|
|
||||||
dbuser: postgres
|
|
||||||
|
|
||||||
# The SSL cipher parameters are too old in the 9.2 container, so we allow the tests
|
|
||||||
# to be run without encryption
|
|
||||||
ssl_mode: prefer
|
|
||||||
|
|
||||||
# Pull in the standard metrics
|
|
||||||
include:
|
|
||||||
- pgmon-metrics.yml
|
|
||||||
Loading…
Reference in New Issue
Block a user