/* pg_lock_hash_info.c - PostgreSQL extension to expose lock and proclock hash bucket statistics */ #include "postgres.h" #include "fmgr.h" #include "access/htup_details.h" #include "miscadmin.h" // Access to MaxBackends and MaxConnections #include "access/twophase.h" // Access to max_prepared_xacts #include "funcapi.h" #include "storage/lock.h" #include "storage/lwlock.h" #include "storage/shmem.h" #include "storage/spin.h" #include "utils/builtins.h" #include "utils/hsearch.h" #include "utils/dynahash.h" PG_MODULE_MAGIC; HTAB *lock_hash = NULL; void get_lock_hash(); // BEGIN Copied from dynahash.c #define NLOCKENTS() \ mul_size(max_locks_per_xact, add_size(MaxBackends, max_prepared_xacts)) #define DEF_SEGSIZE 256 #define DEF_SEGSIZE_SHIFT 8 /* must be log2(DEF_SEGSIZE) */ #define DEF_DIRSIZE 256 #define NUM_FREELISTS 32 typedef HASHELEMENT *HASHBUCKET; typedef HASHBUCKET *HASHSEGMENT; typedef struct { slock_t mutex; /* spinlock for this freelist */ long nentries; /* number of entries in associated buckets */ HASHELEMENT *freeList; /* chain of free elements */ } FreeListData; struct HASHHDR { FreeListData freeList[NUM_FREELISTS]; long dsize; /* directory size */ long nsegs; /* number of allocated segments (<= dsize) */ uint32 max_bucket; /* ID of maximum bucket in use */ uint32 high_mask; /* mask to modulo into entire table */ uint32 low_mask; /* mask to modulo into lower half of table */ Size keysize; /* hash key length in bytes */ Size entrysize; /* total user element size in bytes */ long num_partitions; /* # partitions (must be power of 2), or 0 */ long max_dsize; /* 'dsize' limit if directory is fixed size */ long ssize; /* segment size --- must be power of 2 */ int sshift; /* segment shift = log2(ssize) */ int nelem_alloc; /* number of entries to allocate at once */ #ifdef HASH_STATISTICS long accesses; long collisions; #endif }; struct HTAB { HASHHDR *hctl; /* => shared control information */ HASHSEGMENT *dir; /* directory of segment starts */ HashValueFunc hash; /* hash function */ HashCompareFunc match; /* key comparison function */ HashCopyFunc keycopy; /* key copying function */ HashAllocFunc alloc; /* memory allocator */ MemoryContext hcxt; /* memory context if default allocator used */ char *tabname; /* table name (for error messages) */ bool isshared; /* true if table is in shared memory */ bool isfixed; /* if true, don't enlarge */ bool frozen; /* true = no more inserts allowed */ Size keysize; /* hash key length in bytes */ long ssize; /* segment size --- must be power of 2 */ int sshift; /* segment shift = log2(ssize) */ }; // END Copied from dynahash.c void get_lock_hash() { HASHCTL info; long init_table_size, max_table_size; bool found; max_table_size = NLOCKENTS(); init_table_size = max_table_size / 2; info.keysize = sizeof(LOCKTAG); info.entrysize = sizeof(LOCK); info.num_partitions = NUM_LOCK_PARTITIONS; lock_hash = ShmemInitHash("LOCK hash", init_table_size, max_table_size, &info, HASH_ELEM | HASH_BLOBS | HASH_PARTITION); if (!lock_hash) { elog(ERROR, "Failed to find lock hash"); } } int get_free_count(FreeListData *lst) { HASHELEMENT *elem; int count = 0; SpinLockAcquire(&(lst->mutex)); elem = lst->freeList; while (elem != NULL) { count++; elem = elem->link; } SpinLockRelease(&(lst->mutex)); return count; } int get_total_free_count(HASHHDR *hctl) { int i; int count = 0; for (i=0; ifreeList[i])); } return count; } PG_FUNCTION_INFO_V1(pg_lock_hash_info); Datum pg_lock_hash_info(PG_FUNCTION_ARGS) { TupleDesc tupdesc; Datum values[14]; bool nulls[14]; HeapTuple tuple; // Get a pointer to the lock hash get_lock_hash(); if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("function returning record called in context that cannot accept type record"))); values[0] = Int32GetDatum(lock_hash->hctl->dsize); //values[0] = Int32GetDatum(MaxBackends); nulls[0] = false; values[1] = Int32GetDatum(lock_hash->hctl->nsegs); //values[1] = Int32GetDatum(MaxConnections); nulls[1] = false; values[2] = UInt32GetDatum(lock_hash->hctl->max_bucket); nulls[2] = false; values[3] = UInt32GetDatum(lock_hash->hctl->keysize); nulls[3] = false; values[4] = UInt32GetDatum(lock_hash->hctl->entrysize); nulls[4] = false; values[5] = Int32GetDatum(lock_hash->hctl->num_partitions); nulls[5] = false; values[6] = Int32GetDatum(lock_hash->hctl->max_dsize); nulls[6] = false; values[7] = Int32GetDatum(lock_hash->hctl->ssize); nulls[7] = false; values[8] = Int32GetDatum(lock_hash->hctl->sshift); nulls[8] = false; values[9] = Int32GetDatum(lock_hash->hctl->nelem_alloc); nulls[9] = false; values[10] = Int32GetDatum(get_total_free_count(lock_hash->hctl)); nulls[10] = false; values[11] = Int32GetDatum(NLOCKENTS()); nulls[11] = false; values[12] = Int32GetDatum(lock_hash->hctl->low_mask); nulls[12] = false; values[13] = Int32GetDatum(lock_hash->hctl->high_mask); nulls[13] = false; tuple = heap_form_tuple(tupdesc, values, nulls); PG_RETURN_DATUM(HeapTupleGetDatum(tuple)); } PG_FUNCTION_INFO_V1(pg_lock_hash_freelist_info); Datum pg_lock_hash_freelist_info(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; int *list_id; if (SRF_IS_FIRSTCALL()) { TupleDesc tupdesc; MemoryContext oldcontext; funcctx = SRF_FIRSTCALL_INIT(); oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); // Get a pointer to the lock hash get_lock_hash(); tupdesc = CreateTemplateTupleDesc(3); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "freelist", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "nentries", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "count", INT4OID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(tupdesc); /* Allocate and initialize the position */ funcctx->user_fctx = palloc(sizeof(int)); list_id = (int *) funcctx->user_fctx; *list_id = 0; MemoryContextSwitchTo(oldcontext); } funcctx = SRF_PERCALL_SETUP(); list_id = (int *) funcctx->user_fctx; if (*list_id >= NUM_FREELISTS) { SRF_RETURN_DONE(funcctx); } else { Datum values[4]; bool nulls[4]; HeapTuple tuple; FreeListData *freelist = &(lock_hash->hctl->freeList[*list_id]); values[0] = Int32GetDatum(*list_id); nulls[0] = false; values[1] = Int32GetDatum(freelist->nentries); nulls[1] = false; values[2] = Int32GetDatum(get_free_count(freelist)); nulls[2] = false; tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); // Increment the position, handling segment wraparound (*list_id)++; SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); } } int count_list(HASHBUCKET root) { int count = 0; HASHELEMENT *elem; if (!root) { return 0; } elem = root->link; while (elem != NULL) { count++; elem = elem->link; } return count; } PG_FUNCTION_INFO_V1(pg_lock_hash_bucket_info); typedef struct { int segment_id; int bucket_id; } LockHashBucketInfoPosition; Datum pg_lock_hash_bucket_info(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; LockHashBucketInfoPosition *pos; LWLock *part_lock; if (SRF_IS_FIRSTCALL()) { TupleDesc tupdesc; MemoryContext oldcontext; funcctx = SRF_FIRSTCALL_INIT(); oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); // Get a pointer to the lock hash get_lock_hash(); tupdesc = CreateTemplateTupleDesc(6); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "segment", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "bucket", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "bucket_hash", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 4, "bucket_hash_low", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 5, "bucket_hash_high", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 6, "entries", INT4OID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(tupdesc); /* Allocate and initialize the position */ funcctx->user_fctx = palloc(sizeof(LockHashBucketInfoPosition)); pos = (LockHashBucketInfoPosition *) funcctx->user_fctx; pos->segment_id = 0; pos->bucket_id = 0; MemoryContextSwitchTo(oldcontext); } funcctx = SRF_PERCALL_SETUP(); pos = (LockHashBucketInfoPosition *) funcctx->user_fctx; if (pos->segment_id >= lock_hash->hctl->nsegs) { SRF_RETURN_DONE(funcctx); } else { Datum values[6]; bool nulls[6]; HeapTuple tuple; HASHBUCKET bucketp = lock_hash->dir[pos->segment_id][pos->bucket_id]; values[0] = Int32GetDatum(pos->segment_id); nulls[0] = false; values[1] = Int32GetDatum(pos->bucket_id); nulls[1] = false; if (bucketp) { values[2] = Int32GetDatum(bucketp->hashvalue); nulls[2] = false; values[3] = Int32GetDatum(bucketp->hashvalue & lock_hash->hctl->low_mask); nulls[3] = false; values[4] = Int32GetDatum(bucketp->hashvalue & lock_hash->hctl->high_mask); nulls[4] = false; part_lock = LockHashPartitionLock(bucketp->hashvalue); LWLockAcquire(part_lock, LW_SHARED); values[5] = UInt32GetDatum(count_list(bucketp)); nulls[5] = false; LWLockRelease(part_lock); } else { values[2] = Int32GetDatum(0); nulls[2] = true; values[3] = Int32GetDatum(0); nulls[3] = true; values[4] = Int32GetDatum(0); nulls[4] = true; values[5] = Int32GetDatum(0); nulls[5] = false; } tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); // Increment the position, handling segment wraparound pos->bucket_id++; while (pos->bucket_id >= lock_hash->hctl->ssize) { pos->bucket_id -= lock_hash->hctl->ssize; pos->segment_id++; } SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); } }