Switch back to psycopg2

This commit is contained in:
James Campbell 2025-01-08 02:39:20 -05:00
parent 40200c5372
commit 0c62ee19ce
Signed by: james
GPG Key ID: 2287C33A40DC906A
3 changed files with 207 additions and 37 deletions

107
pgmon.py
View File

@ -9,8 +9,11 @@ import logging
from datetime import datetime, timedelta from datetime import datetime, timedelta
import psycopg import psycopg2
from psycopg_pool import ConnectionPool from psycopg2.extras import DictCursor
from psycopg2.pool import ThreadedConnectionPool
from contextlib import contextmanager
import signal import signal
from threading import Thread, Lock, Semaphore from threading import Thread, Lock, Semaphore
@ -31,7 +34,7 @@ connections = {}
# Dictionary of unhappy databases. Keys are database names, value is the time # Dictionary of unhappy databases. Keys are database names, value is the time
# the database was determined to be unhappy plus the cooldown setting. So, # the database was determined to be unhappy plus the cooldown setting. So,
# basically it's the time when we should try to connect to the database again. # basically it's the time when we should try to connect to the database again.
unhappy = {} unhappy_cooldown = {}
# Version information # Version information
cluster_version = None cluster_version = None
@ -64,28 +67,41 @@ class UnhappyDBError(Exception):
# Default config settings # Default config settings
default_config = { default_config = {
# IPC port # Min PostgreSQL connection pool size (per database)
'port': 5400, 'min_pool_size': 0,
# Max PostgreSQL connection pool size # Max PostgreSQL connection pool size (per database)
'pool_size': 4, 'max_pool_size': 4,
# How long a connection can sit idle in the pool before it's removed (seconds)
'max_idle_time': 30,
# Log level for stderr logging (or 'off') # Log level for stderr logging (or 'off')
'log_level': 'debug', 'log_level': 'debug',
# Connection details # Database user to connect as
'connstr': '', 'dbuser': 'postgres',
# Database host
'dbhost': '/var/run/postgresql',
# Database port
'dbport': 5432,
# Default database to connect to when none is specified for a metric # Default database to connect to when none is specified for a metric
'dbname': 'postgres', 'dbname': 'postgres',
# Timeout for getting a connection slot from a pool
'pool_slot_timeout': 5,
# PostgreSQL connection timeout (seconds) # PostgreSQL connection timeout (seconds)
# Note: It can actually be double this because of retries
'connect_timeout': 5, 'connect_timeout': 5,
# Time to wait before trying to reconnect again after a reconnect failure # Time to wait before trying to reconnect again after a reconnect failure (seconds)
'reconnect_cooldown': 30, 'reconnect_cooldown': 30,
# How often to check the version of PostgreSQL # How often to check the version of PostgreSQL (seconds)
'version_check_period': 300, 'version_check_period': 300,
# Metrics # Metrics
@ -189,16 +205,43 @@ def signal_handler(sig, frame):
log.warning("Received config reload signal") log.warning("Received config reload signal")
read_config(config_file) read_config(config_file)
class ConnectionPool(ThreadedConnectionPool):
def __init__(self, dbname, minconn, maxconn, *args, **kwargs):
# Make sure dbname isn't different in the kwargs
kwargs['dbname'] = dbname
super().__init__(minconn, maxconn, *args, **kwargs)
self.name = dbname
@contextmanager
def connection(self, timeout=None):
conn = None
timeout_time = datetime.now() + timedelta(timeout)
# We will continue to try to get a connection slot until we time out
while datetime.now() < timeout_time:
# See if we can get a connection slot
try:
conn = self.getconn()
try:
yield conn
finally:
self.putconn(conn)
return
except psycopg2.pool.PoolError:
# If we failed to get the connection slot, wait a bit and try again
time.sleep(0.1)
raise TimeoutError(f"Timed out waiting for an available connection to {self.name}")
def get_pool(dbname): def get_pool(dbname):
""" """
Get a database connection pool. Get a database connection pool.
""" """
# Check if the db is unhappy and wants to be left alone # Check if the db is unhappy and wants to be left alone
if dbname in unhappy: if dbname in unhappy_cooldown:
if unhappy[dbname] > datetime.now(): if unhappy_cooldown[dbname] > datetime.now():
raise UnhappyDBError() raise UnhappyDBError()
# Create a connection pool if it doesn't already exist
if dbname not in connections: if dbname not in connections:
with connections_lock: with connections_lock:
# Make sure nobody created the pool while we were waiting on the # Make sure nobody created the pool while we were waiting on the
@ -206,13 +249,17 @@ def get_pool(dbname):
if dbname not in connections: if dbname not in connections:
log.info(f"Creating connection pool for: {dbname}") log.info(f"Creating connection pool for: {dbname}")
connections[dbname] = ConnectionPool( connections[dbname] = ConnectionPool(
name=dbname, dbname,
min_size=0, max_size=int(config['pool_size']), int(config['min_pool_size']),
conninfo=f"dbname={dbname} application_name=pgmon {config['connstr']}", int(config['max_pool_size']),
reconnect_timeout=float(config['connect_timeout']), application_name='pgmon',
reconnect_failed=handle_connect_failure) host=config['dbhost'],
port=config['dbport'],
user=config['dbuser'],
connect_timeout=float(config['connect_timeout']),
sslmode='require')
# Clear the unhappy indicator if present # Clear the unhappy indicator if present
unhappy.pop(dbname, None) unhappy_cooldown.pop(dbname, None)
return connections[dbname] return connections[dbname]
def handle_connect_failure(pool): def handle_connect_failure(pool):
@ -220,8 +267,7 @@ def handle_connect_failure(pool):
Mark the database as being unhappy so we can leave it alone for a while Mark the database as being unhappy so we can leave it alone for a while
""" """
dbname = pool.name dbname = pool.name
unhappy[dbname] = datetime.now() + timedelta(seconds=int(config['reconnect_cooldown'])) unhappy_cooldown[dbname] = datetime.now() + timedelta(seconds=int(config['reconnect_cooldown']))
def get_query(metric, version): def get_query(metric, version):
""" """
@ -243,9 +289,9 @@ def run_query_no_retry(pool, return_type, query, args):
""" """
Run the query with no explicit retry code Run the query with no explicit retry code
""" """
with pool.connection(timeout=float(config['connect_timeout'])) as conn: with pool.connection(float(config['connect_timeout'])) as conn:
try: try:
with conn.cursor(row_factory=psycopg.rows.dict_row) as curs: with conn.cursor(cursor_factory=DictCursor) as curs:
curs.execute(query, args) curs.execute(query, args)
res = curs.fetchall() res = curs.fetchall()
@ -259,7 +305,7 @@ def run_query_no_retry(pool, return_type, query, args):
return json.dumps(res) return json.dumps(res)
except: except:
dbname = pool.name dbname = pool.name
if dbname in unhappy: if dbname in unhappy_cooldown:
raise UnhappyDBError() raise UnhappyDBError()
elif conn.broken: elif conn.broken:
raise DisconnectedError() raise DisconnectedError()
@ -285,11 +331,16 @@ def run_query(pool, return_type, query, args):
return run_query_no_retry(pool, return_type, query, args) return run_query_no_retry(pool, return_type, query, args)
except DisconnectedError: except DisconnectedError:
log.warning("Stale PostgreSQL connection found ... trying again") log.warning("Stale PostgreSQL connection found ... trying again")
# This sleep is an annoyinh hack to give the pool workers time to # This sleep is an annoying hack to give the pool workers time to
# actually mark the connection, otherwise it can be given back in the # actually mark the connection, otherwise it can be given back in the
# next connection() call # next connection() call
# TODO: verify this is the case with psycopg2
time.sleep(1) time.sleep(1)
return run_query_no_retry(pool, return_type, query, args) try:
return run_query_no_retry(pool, return_type, query, args)
except:
handle_connect_failure(pool)
raise UnhappyDBError()
def get_cluster_version(): def get_cluster_version():
""" """
@ -381,7 +432,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
except UnhappyDBError: except UnhappyDBError:
return return
except Exception as e: except Exception as e:
if dbname in unhappy: if dbname in unhappy_cooldown:
log.info(f"Database {dbname} is unhappy, please be patient") log.info(f"Database {dbname} is unhappy, please be patient")
self._reply(503, 'Database unavailable') self._reply(503, 'Database unavailable')
else: else:
@ -402,7 +453,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
self._reply(200, run_query(pool, metric['type'], query, args)) self._reply(200, run_query(pool, metric['type'], query, args))
return return
except Exception as e: except Exception as e:
if dbname in unhappy: if dbname in unhappy_cooldown:
log.info(f"Database {dbname} is unhappy, please be patient") log.info(f"Database {dbname} is unhappy, please be patient")
self._reply(503, 'Database unavailable') self._reply(503, 'Database unavailable')
else: else:

View File

@ -1,18 +1,39 @@
# IPC port # Min PostgreSQL connection pool size (per database)
port: 5400 #min_pool_size: 0,
# Max PostgreSQL connection pool size # Max PostgreSQL connection pool size (per database)
#pool_size: 4 #max_pool_size: 4,
# How long a connection can sit idle in the pool before it's removed (seconds)
#max_idle_time: 30,
# Log level for stderr logging (or 'off') # Log level for stderr logging (or 'off')
log_level: debug #log_level: 'debug',
# Connection string (excluding dbname) # Database user to connect as
# This can be left empty to use the libpq defaults #dbuser: 'postgres',
connstr: "user=postgres"
# Database host
#dbhost: '/var/run/postgresql',
# Database port
#dbport: 5432,
# Default database to connect to when none is specified for a metric # Default database to connect to when none is specified for a metric
#dbname: postgres #dbname: 'postgres',
# Timeout for getting a connection slot from a pool
#pool_slot_timeout: 5,
# PostgreSQL connection timeout (seconds)
# Note: It can actually be double this because of retries
#connect_timeout: 5,
# Time to wait before trying to reconnect again after a reconnect failure (seconds)
#reconnect_cooldown: 30,
# How often to check the version of PostgreSQL (seconds)
#version_check_period: 300,
# Metrics # Metrics
#metrics: {} #metrics: {}

View File

@ -1,5 +1,7 @@
import unittest import unittest
from datetime import datetime, timedelta
import pgmon import pgmon
class TestPgmonMethods(unittest.TestCase): class TestPgmonMethods(unittest.TestCase):
@ -110,3 +112,99 @@ class TestPgmonMethods(unittest.TestCase):
d2 = {'foo': {'a': 7}} d2 = {'foo': {'a': 7}}
self.assertRaises(TypeError, pgmon.update_deep, d1, d2) self.assertRaises(TypeError, pgmon.update_deep, d1, d2)
##
# get_pool
##
def test_get_pool__simple(self):
# Just get a pool in a normal case
pgmon.config.update(pgmon.default_config)
pool = pgmon.get_pool('postgres')
self.assertIsNotNone(pool)
def test_get_pool__unhappy(self):
# Test getting an unhappy database pool
pgmon.config.update(pgmon.default_config)
pgmon.unhappy_cooldown['postgres'] = datetime.now() + timedelta(60)
self.assertRaises(pgmon.UnhappyDBError, pgmon.get_pool, 'postgres')
# Test getting a different database when there's an unhappy one
pool = pgmon.get_pool('template0')
self.assertIsNotNone(pool)
##
# handle_connect_failure
##
def test_handle_connect_failure__simple(self):
# Test adding to an empty unhappy list
pgmon.config.update(pgmon.default_config)
pgmon.unhappy_cooldown = {}
pool = pgmon.get_pool('postgres')
pgmon.handle_connect_failure(pool)
self.assertGreater(pgmon.unhappy_cooldown['postgres'], datetime.now())
# Test adding another database
pool = pgmon.get_pool('template0')
pgmon.handle_connect_failure(pool)
self.assertGreater(pgmon.unhappy_cooldown['postgres'], datetime.now())
self.assertGreater(pgmon.unhappy_cooldown['template0'], datetime.now())
self.assertEqual(len(pgmon.unhappy_cooldown), 2)
##
# get_query
##
def test_get_query__basic(self):
# Test getting a query with one version
metric = {
'type': 'value',
'query': {
0: 'DEFAULT'
}
}
self.assertEqual(pgmon.get_query(metric, 100000), 'DEFAULT')
def test_get_query__versions(self):
metric = {
'type': 'value',
'query': {
0: 'DEFAULT',
110000: 'NEW'
}
}
# Test getting the default version of a query with no lower bound and a newer version
self.assertEqual(pgmon.get_query(metric, 100000), 'DEFAULT')
# Test getting the newer version of a query with no lower bound and a newer version for the newer version
self.assertEqual(pgmon.get_query(metric, 110000), 'NEW')
# Test getting the newer version of a query with no lower bound and a newer version for an even newer version
self.assertEqual(pgmon.get_query(metric, 160000), 'NEW')
# Test getting a version in bwtween two other versions
metric = {
'type': 'value',
'query': {
0: 'DEFAULT',
96000: 'OLD',
110000: 'NEW'
}
}
self.assertEqual(pgmon.get_query(metric, 100000), 'OLD')
def test_get_query__missing_metric(self):
# Test getting a metric that doesn't exist
pass
def test_get_query__missing_version(self):
# Test getting a metric that only exists for newer versions
pass
# Test getting a metric that only exists for older versions
pass