Switch back to psycopg2
This commit is contained in:
parent
40200c5372
commit
0c62ee19ce
105
pgmon.py
105
pgmon.py
@ -9,8 +9,11 @@ import logging
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import psycopg
|
||||
from psycopg_pool import ConnectionPool
|
||||
import psycopg2
|
||||
from psycopg2.extras import DictCursor
|
||||
from psycopg2.pool import ThreadedConnectionPool
|
||||
|
||||
from contextlib import contextmanager
|
||||
|
||||
import signal
|
||||
from threading import Thread, Lock, Semaphore
|
||||
@ -31,7 +34,7 @@ connections = {}
|
||||
# Dictionary of unhappy databases. Keys are database names, value is the time
|
||||
# the database was determined to be unhappy plus the cooldown setting. So,
|
||||
# basically it's the time when we should try to connect to the database again.
|
||||
unhappy = {}
|
||||
unhappy_cooldown = {}
|
||||
|
||||
# Version information
|
||||
cluster_version = None
|
||||
@ -64,28 +67,41 @@ class UnhappyDBError(Exception):
|
||||
|
||||
# Default config settings
|
||||
default_config = {
|
||||
# IPC port
|
||||
'port': 5400,
|
||||
# Min PostgreSQL connection pool size (per database)
|
||||
'min_pool_size': 0,
|
||||
|
||||
# Max PostgreSQL connection pool size
|
||||
'pool_size': 4,
|
||||
# Max PostgreSQL connection pool size (per database)
|
||||
'max_pool_size': 4,
|
||||
|
||||
# How long a connection can sit idle in the pool before it's removed (seconds)
|
||||
'max_idle_time': 30,
|
||||
|
||||
# Log level for stderr logging (or 'off')
|
||||
'log_level': 'debug',
|
||||
|
||||
# Connection details
|
||||
'connstr': '',
|
||||
# Database user to connect as
|
||||
'dbuser': 'postgres',
|
||||
|
||||
# Database host
|
||||
'dbhost': '/var/run/postgresql',
|
||||
|
||||
# Database port
|
||||
'dbport': 5432,
|
||||
|
||||
# Default database to connect to when none is specified for a metric
|
||||
'dbname': 'postgres',
|
||||
|
||||
# Timeout for getting a connection slot from a pool
|
||||
'pool_slot_timeout': 5,
|
||||
|
||||
# PostgreSQL connection timeout (seconds)
|
||||
# Note: It can actually be double this because of retries
|
||||
'connect_timeout': 5,
|
||||
|
||||
# Time to wait before trying to reconnect again after a reconnect failure
|
||||
# Time to wait before trying to reconnect again after a reconnect failure (seconds)
|
||||
'reconnect_cooldown': 30,
|
||||
|
||||
# How often to check the version of PostgreSQL
|
||||
# How often to check the version of PostgreSQL (seconds)
|
||||
'version_check_period': 300,
|
||||
|
||||
# Metrics
|
||||
@ -189,16 +205,43 @@ def signal_handler(sig, frame):
|
||||
log.warning("Received config reload signal")
|
||||
read_config(config_file)
|
||||
|
||||
class ConnectionPool(ThreadedConnectionPool):
|
||||
def __init__(self, dbname, minconn, maxconn, *args, **kwargs):
|
||||
# Make sure dbname isn't different in the kwargs
|
||||
kwargs['dbname'] = dbname
|
||||
|
||||
super().__init__(minconn, maxconn, *args, **kwargs)
|
||||
self.name = dbname
|
||||
|
||||
@contextmanager
|
||||
def connection(self, timeout=None):
|
||||
conn = None
|
||||
timeout_time = datetime.now() + timedelta(timeout)
|
||||
# We will continue to try to get a connection slot until we time out
|
||||
while datetime.now() < timeout_time:
|
||||
# See if we can get a connection slot
|
||||
try:
|
||||
conn = self.getconn()
|
||||
try:
|
||||
yield conn
|
||||
finally:
|
||||
self.putconn(conn)
|
||||
return
|
||||
except psycopg2.pool.PoolError:
|
||||
# If we failed to get the connection slot, wait a bit and try again
|
||||
time.sleep(0.1)
|
||||
raise TimeoutError(f"Timed out waiting for an available connection to {self.name}")
|
||||
|
||||
def get_pool(dbname):
|
||||
"""
|
||||
Get a database connection pool.
|
||||
"""
|
||||
# Check if the db is unhappy and wants to be left alone
|
||||
if dbname in unhappy:
|
||||
if unhappy[dbname] > datetime.now():
|
||||
if dbname in unhappy_cooldown:
|
||||
if unhappy_cooldown[dbname] > datetime.now():
|
||||
raise UnhappyDBError()
|
||||
|
||||
# Create a connection pool if it doesn't already exist
|
||||
if dbname not in connections:
|
||||
with connections_lock:
|
||||
# Make sure nobody created the pool while we were waiting on the
|
||||
@ -206,13 +249,17 @@ def get_pool(dbname):
|
||||
if dbname not in connections:
|
||||
log.info(f"Creating connection pool for: {dbname}")
|
||||
connections[dbname] = ConnectionPool(
|
||||
name=dbname,
|
||||
min_size=0, max_size=int(config['pool_size']),
|
||||
conninfo=f"dbname={dbname} application_name=pgmon {config['connstr']}",
|
||||
reconnect_timeout=float(config['connect_timeout']),
|
||||
reconnect_failed=handle_connect_failure)
|
||||
dbname,
|
||||
int(config['min_pool_size']),
|
||||
int(config['max_pool_size']),
|
||||
application_name='pgmon',
|
||||
host=config['dbhost'],
|
||||
port=config['dbport'],
|
||||
user=config['dbuser'],
|
||||
connect_timeout=float(config['connect_timeout']),
|
||||
sslmode='require')
|
||||
# Clear the unhappy indicator if present
|
||||
unhappy.pop(dbname, None)
|
||||
unhappy_cooldown.pop(dbname, None)
|
||||
return connections[dbname]
|
||||
|
||||
def handle_connect_failure(pool):
|
||||
@ -220,8 +267,7 @@ def handle_connect_failure(pool):
|
||||
Mark the database as being unhappy so we can leave it alone for a while
|
||||
"""
|
||||
dbname = pool.name
|
||||
unhappy[dbname] = datetime.now() + timedelta(seconds=int(config['reconnect_cooldown']))
|
||||
|
||||
unhappy_cooldown[dbname] = datetime.now() + timedelta(seconds=int(config['reconnect_cooldown']))
|
||||
|
||||
def get_query(metric, version):
|
||||
"""
|
||||
@ -243,9 +289,9 @@ def run_query_no_retry(pool, return_type, query, args):
|
||||
"""
|
||||
Run the query with no explicit retry code
|
||||
"""
|
||||
with pool.connection(timeout=float(config['connect_timeout'])) as conn:
|
||||
with pool.connection(float(config['connect_timeout'])) as conn:
|
||||
try:
|
||||
with conn.cursor(row_factory=psycopg.rows.dict_row) as curs:
|
||||
with conn.cursor(cursor_factory=DictCursor) as curs:
|
||||
curs.execute(query, args)
|
||||
res = curs.fetchall()
|
||||
|
||||
@ -259,7 +305,7 @@ def run_query_no_retry(pool, return_type, query, args):
|
||||
return json.dumps(res)
|
||||
except:
|
||||
dbname = pool.name
|
||||
if dbname in unhappy:
|
||||
if dbname in unhappy_cooldown:
|
||||
raise UnhappyDBError()
|
||||
elif conn.broken:
|
||||
raise DisconnectedError()
|
||||
@ -285,11 +331,16 @@ def run_query(pool, return_type, query, args):
|
||||
return run_query_no_retry(pool, return_type, query, args)
|
||||
except DisconnectedError:
|
||||
log.warning("Stale PostgreSQL connection found ... trying again")
|
||||
# This sleep is an annoyinh hack to give the pool workers time to
|
||||
# This sleep is an annoying hack to give the pool workers time to
|
||||
# actually mark the connection, otherwise it can be given back in the
|
||||
# next connection() call
|
||||
# TODO: verify this is the case with psycopg2
|
||||
time.sleep(1)
|
||||
try:
|
||||
return run_query_no_retry(pool, return_type, query, args)
|
||||
except:
|
||||
handle_connect_failure(pool)
|
||||
raise UnhappyDBError()
|
||||
|
||||
def get_cluster_version():
|
||||
"""
|
||||
@ -381,7 +432,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
|
||||
except UnhappyDBError:
|
||||
return
|
||||
except Exception as e:
|
||||
if dbname in unhappy:
|
||||
if dbname in unhappy_cooldown:
|
||||
log.info(f"Database {dbname} is unhappy, please be patient")
|
||||
self._reply(503, 'Database unavailable')
|
||||
else:
|
||||
@ -402,7 +453,7 @@ class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):
|
||||
self._reply(200, run_query(pool, metric['type'], query, args))
|
||||
return
|
||||
except Exception as e:
|
||||
if dbname in unhappy:
|
||||
if dbname in unhappy_cooldown:
|
||||
log.info(f"Database {dbname} is unhappy, please be patient")
|
||||
self._reply(503, 'Database unavailable')
|
||||
else:
|
||||
|
||||
39
pgmon.yml
39
pgmon.yml
@ -1,18 +1,39 @@
|
||||
# IPC port
|
||||
port: 5400
|
||||
# Min PostgreSQL connection pool size (per database)
|
||||
#min_pool_size: 0,
|
||||
|
||||
# Max PostgreSQL connection pool size
|
||||
#pool_size: 4
|
||||
# Max PostgreSQL connection pool size (per database)
|
||||
#max_pool_size: 4,
|
||||
|
||||
# How long a connection can sit idle in the pool before it's removed (seconds)
|
||||
#max_idle_time: 30,
|
||||
|
||||
# Log level for stderr logging (or 'off')
|
||||
log_level: debug
|
||||
#log_level: 'debug',
|
||||
|
||||
# Connection string (excluding dbname)
|
||||
# This can be left empty to use the libpq defaults
|
||||
connstr: "user=postgres"
|
||||
# Database user to connect as
|
||||
#dbuser: 'postgres',
|
||||
|
||||
# Database host
|
||||
#dbhost: '/var/run/postgresql',
|
||||
|
||||
# Database port
|
||||
#dbport: 5432,
|
||||
|
||||
# Default database to connect to when none is specified for a metric
|
||||
#dbname: postgres
|
||||
#dbname: 'postgres',
|
||||
|
||||
# Timeout for getting a connection slot from a pool
|
||||
#pool_slot_timeout: 5,
|
||||
|
||||
# PostgreSQL connection timeout (seconds)
|
||||
# Note: It can actually be double this because of retries
|
||||
#connect_timeout: 5,
|
||||
|
||||
# Time to wait before trying to reconnect again after a reconnect failure (seconds)
|
||||
#reconnect_cooldown: 30,
|
||||
|
||||
# How often to check the version of PostgreSQL (seconds)
|
||||
#version_check_period: 300,
|
||||
|
||||
# Metrics
|
||||
#metrics: {}
|
||||
|
||||
@ -1,5 +1,7 @@
|
||||
import unittest
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
import pgmon
|
||||
|
||||
class TestPgmonMethods(unittest.TestCase):
|
||||
@ -110,3 +112,99 @@ class TestPgmonMethods(unittest.TestCase):
|
||||
d2 = {'foo': {'a': 7}}
|
||||
self.assertRaises(TypeError, pgmon.update_deep, d1, d2)
|
||||
|
||||
|
||||
##
|
||||
# get_pool
|
||||
##
|
||||
|
||||
def test_get_pool__simple(self):
|
||||
# Just get a pool in a normal case
|
||||
pgmon.config.update(pgmon.default_config)
|
||||
pool = pgmon.get_pool('postgres')
|
||||
self.assertIsNotNone(pool)
|
||||
|
||||
def test_get_pool__unhappy(self):
|
||||
# Test getting an unhappy database pool
|
||||
pgmon.config.update(pgmon.default_config)
|
||||
pgmon.unhappy_cooldown['postgres'] = datetime.now() + timedelta(60)
|
||||
self.assertRaises(pgmon.UnhappyDBError, pgmon.get_pool, 'postgres')
|
||||
|
||||
# Test getting a different database when there's an unhappy one
|
||||
pool = pgmon.get_pool('template0')
|
||||
self.assertIsNotNone(pool)
|
||||
|
||||
|
||||
##
|
||||
# handle_connect_failure
|
||||
##
|
||||
|
||||
def test_handle_connect_failure__simple(self):
|
||||
# Test adding to an empty unhappy list
|
||||
pgmon.config.update(pgmon.default_config)
|
||||
pgmon.unhappy_cooldown = {}
|
||||
pool = pgmon.get_pool('postgres')
|
||||
pgmon.handle_connect_failure(pool)
|
||||
self.assertGreater(pgmon.unhappy_cooldown['postgres'], datetime.now())
|
||||
|
||||
# Test adding another database
|
||||
pool = pgmon.get_pool('template0')
|
||||
pgmon.handle_connect_failure(pool)
|
||||
self.assertGreater(pgmon.unhappy_cooldown['postgres'], datetime.now())
|
||||
self.assertGreater(pgmon.unhappy_cooldown['template0'], datetime.now())
|
||||
self.assertEqual(len(pgmon.unhappy_cooldown), 2)
|
||||
|
||||
|
||||
##
|
||||
# get_query
|
||||
##
|
||||
|
||||
def test_get_query__basic(self):
|
||||
# Test getting a query with one version
|
||||
metric = {
|
||||
'type': 'value',
|
||||
'query': {
|
||||
0: 'DEFAULT'
|
||||
}
|
||||
}
|
||||
self.assertEqual(pgmon.get_query(metric, 100000), 'DEFAULT')
|
||||
|
||||
def test_get_query__versions(self):
|
||||
metric = {
|
||||
'type': 'value',
|
||||
'query': {
|
||||
0: 'DEFAULT',
|
||||
110000: 'NEW'
|
||||
}
|
||||
}
|
||||
|
||||
# Test getting the default version of a query with no lower bound and a newer version
|
||||
self.assertEqual(pgmon.get_query(metric, 100000), 'DEFAULT')
|
||||
|
||||
# Test getting the newer version of a query with no lower bound and a newer version for the newer version
|
||||
self.assertEqual(pgmon.get_query(metric, 110000), 'NEW')
|
||||
|
||||
# Test getting the newer version of a query with no lower bound and a newer version for an even newer version
|
||||
self.assertEqual(pgmon.get_query(metric, 160000), 'NEW')
|
||||
|
||||
# Test getting a version in bwtween two other versions
|
||||
metric = {
|
||||
'type': 'value',
|
||||
'query': {
|
||||
0: 'DEFAULT',
|
||||
96000: 'OLD',
|
||||
110000: 'NEW'
|
||||
}
|
||||
}
|
||||
self.assertEqual(pgmon.get_query(metric, 100000), 'OLD')
|
||||
|
||||
def test_get_query__missing_metric(self):
|
||||
# Test getting a metric that doesn't exist
|
||||
pass
|
||||
|
||||
def test_get_query__missing_version(self):
|
||||
# Test getting a metric that only exists for newer versions
|
||||
pass
|
||||
|
||||
# Test getting a metric that only exists for older versions
|
||||
pass
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user