这篇文章主要介绍“PostgreSQL的simplehash.h文件中的内容是什么”,在日常操作中,相信很多人在PostgreSQL的simplehash.h文件中的内容是什么问题上存在疑惑,小编查阅了各式资料,整理出简单好用的操作方法,希望对大家解答”PostgreSQL的simplehash.h文件中的内容是什么”的疑惑有所帮助!接下来,请跟着小编一起来学习吧!
一、数据结构
TupleHashTable
哈希表定义
typedef struct TupleHashTableData *TupleHashTable;
typedef struct TupleHashTableData
{
//底层Hash表
tuplehash_hash *hashtab;
//在检索键中的列数
int numCols;
//键列中的属性格式
AttrNumber *keyColIdx;
//数据类型的哈希函数
FmgrInfo *tab_hash_funcs;
//数据类型比较器
ExprState *tab_eq_func;
//包含数据表的内存上下文
MemoryContext tablecxt;
//函数解析上下文
MemoryContext tempcxt;
//构造每个哈希条目的实际大小
Size entrysize;
//依赖数据表条目的slot
TupleTableSlot *tableslot;
//下面字段为每一个表检索时临时设置
//当前输入tuple slot
TupleTableSlot *inputslot;
//输入数据类型的哈希函数
FmgrInfo *in_hash_funcs;
//input vs table的比较器
ExprState *cur_eq_func;
//哈希函数IV
uint32 hash_iv;
//表达式上下文
ExprContext *exprcontext;
} TupleHashTableData;
typedef tuplehash_iterator TupleHashIterator;
//哈希表类型定义
typedef struct SH_TYPE //tuplehash_hash
{
uint64 size;
//有多少个元素具有有效内容
uint32 members;
//基于大小,用于计算桶和大小的掩码
uint32 sizemask;
//哈希表增长的阈值
uint32 grow_threshold;
//哈希桶
SH_ELEMENT_TYPE *data;
//用于分配的内存上下文
MemoryContext ctx;
//用户自定义的数据,通常用于回调函数
void *private_data;
} SH_TYPE;//实际是tuplehash_hash
TupleHashEntryData
哈希表条目
typedef struct TupleHashEntryData *TupleHashEntry;
typedef struct TupleHashTableData *TupleHashTable;
typedef struct TupleHashEntryData
{
//该组第一个元组的拷贝
MinimalTuple firstTuple;
//用户数据
void *additional;
//状态(见SH_STATUS)
uint32 status;
//哈希值(已缓存)
uint32 hash;
} TupleHashEntryData;
typedef enum SH_STATUS
{
SH_STATUS_EMPTY = 0x00,
SH_STATUS_IN_USE = 0x01
} SH_STATUS;
MinimalTuple
最小化的元组定义
#define MINIMAL_TUPLE_OFFSET \
((offsetof(HeapTupleHeaderData, t_infomask2) - sizeof(uint32)) / MAXIMUM_ALIGNOF * MAXIMUM_ALIGNOF)
#define MINIMAL_TUPLE_PADDING \
((offsetof(HeapTupleHeaderData, t_infomask2) - sizeof(uint32)) % MAXIMUM_ALIGNOF)
#define MINIMAL_TUPLE_DATA_OFFSET \
offsetof(MinimalTupleData, t_infomask2)
struct MinimalTupleData
{
uint32 t_len;
char mt_padding[MINIMAL_TUPLE_PADDING];
uint16 t_infomask2;
uint16 t_infomask;
uint8 t_hoff;
bits8 t_bits[FLEXIBLE_ARRAY_MEMBER];
};
#define SizeofMinimalTupleHeader offsetof(MinimalTupleData, t_bits)
typedef struct MinimalTupleData MinimalTupleData;
typedef MinimalTupleData *MinimalTuple;
二、源码解读
simplehash.h定义了一系列的宏,比如SH_MAKE_PREFIX/SH_TYPE等等,在聚合函数实现(文件:src/backend/executor/execGrouping.c)中,具体定义了SH_PREFIX这些宏在聚合函数实现场景下的实际值.
如:
#define SH_PREFIX tuplehash
在聚合函数实现中,均以tuplehash打头,最终的实现函数为tuplehash_insert等.
//-----------------------------------------------------------------------------------
//src/backend/executor/execGrouping.c
#define SH_PREFIX tuplehash //以tuplehash打头,如tuplehash_insert等
#define SH_ELEMENT_TYPE TupleHashEntryData //条目类型
#define SH_KEY_TYPE MinimalTuple //Key类型
#define SH_KEY firstTuple //KEY
#define SH_HASH_KEY(tb, key) TupleHashTableHash(tb, key) //SH_HASH_KEY --> TupleHashTableHash
#define SH_EQUAL(tb, a, b) TupleHashTableMatch(tb, a, b) == 0 //SH_EQUAL --> TupleHashTableMatch
#define SH_SCOPE extern //外部LIB
#define SH_STORE_HASH
#define SH_GET_HASH(tb, a) a->hash
#define SH_DEFINE
#include "lib/simplehash.h"
//-----------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------
//src/include/nodes/execnodes.h
#define SH_PREFIX tuplehash
#define SH_ELEMENT_TYPE TupleHashEntryData
#define SH_KEY_TYPE MinimalTuple
#define SH_SCOPE extern
#define SH_DECLARE
#include "lib/simplehash.h"
//-----------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------
//src/backend/nodes/tidbitmap.c
#define SH_USE_NONDEFAULT_ALLOCATOR
#define SH_PREFIX pagetable
#define SH_ELEMENT_TYPE PagetableEntry
#define SH_KEY_TYPE BlockNumber
#define SH_KEY blockno
#define SH_HASH_KEY(tb, key) murmurhash42(key)
#define SH_EQUAL(tb, a, b) a == b
#define SH_SCOPE static inline
#define SH_DEFINE
#define SH_DECLARE
#include "lib/simplehash.h"
//-----------------------------------------------------------------------------------
//助手宏定义,比如组装名称等等
#define SH_MAKE_PREFIX(a) CppConcat(a,_)
#define SH_MAKE_NAME(name) SH_MAKE_NAME_(SH_MAKE_PREFIX(SH_PREFIX),name)
#define SH_MAKE_NAME_(a,b) CppConcat(a,b)
//类型声明
#define SH_TYPE SH_MAKE_NAME(hash)
#define SH_STATUS SH_MAKE_NAME(status)
#define SH_STATUS_EMPTY SH_MAKE_NAME(EMPTY)
#define SH_STATUS_IN_USE SH_MAKE_NAME(IN_USE)
#define SH_ITERATOR SH_MAKE_NAME(iterator)
//函数声明
#define SH_CREATE SH_MAKE_NAME(create)
#define SH_DESTROY SH_MAKE_NAME(destroy)
#define SH_RESET SH_MAKE_NAME(reset)
#define SH_INSERT SH_MAKE_NAME(insert)
#define SH_DELETE SH_MAKE_NAME(delete)
#define SH_LOOKUP SH_MAKE_NAME(lookup)
#define SH_GROW SH_MAKE_NAME(grow)
#define SH_START_ITERATE SH_MAKE_NAME(start_iterate)
#define SH_START_ITERATE_AT SH_MAKE_NAME(start_iterate_at)
#define SH_ITERATE SH_MAKE_NAME(iterate)
#define SH_ALLOCATE SH_MAKE_NAME(allocate)
#define SH_FREE SH_MAKE_NAME(free)
#define SH_STAT SH_MAKE_NAME(stat)
//内部助手函数(非外部可见原型)
#define SH_COMPUTE_PARAMETERS SH_MAKE_NAME(compute_parameters)
#define SH_NEXT SH_MAKE_NAME(next)
#define SH_PREV SH_MAKE_NAME(prev)
#define SH_DISTANCE_FROM_OPTIMAL SH_MAKE_NAME(distance)
#define SH_INITIAL_BUCKET SH_MAKE_NAME(initial_bucket)
#define SH_ENTRY_HASH SH_MAKE_NAME(entry_hash)
//如定义了SH_DECLARE,则生成使用哈希表所需的声明
#ifdef SH_DECLARE
typedef struct SH_TYPE
{
uint64 size;
uint32 members;
uint32 sizemask;
uint32 grow_threshold;
SH_ELEMENT_TYPE *data;
MemoryContext ctx;
void *private_data;
} SH_TYPE;//实际是tuplehash_hash
typedef enum SH_STATUS
{
SH_STATUS_EMPTY = 0x00,
SH_STATUS_IN_USE = 0x01
} SH_STATUS;
typedef struct SH_ITERATOR
{
uint32 cur;
uint32 end;
bool done;
} SH_ITERATOR;
SH_SCOPE SH_TYPE *SH_CREATE(MemoryContext ctx, uint32 nelements,
void *private_data);
SH_SCOPE void SH_DESTROY(SH_TYPE * tb);
SH_SCOPE void SH_RESET(SH_TYPE * tb);
SH_SCOPE void SH_GROW(SH_TYPE * tb, uint32 newsize);
SH_SCOPE SH_ELEMENT_TYPE *SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found);
SH_SCOPE SH_ELEMENT_TYPE *SH_LOOKUP(SH_TYPE * tb, SH_KEY_TYPE key);
SH_SCOPE bool SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key);
SH_SCOPE void SH_START_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter);
SH_SCOPE void SH_START_ITERATE_AT(SH_TYPE * tb, SH_ITERATOR * iter, uint32 at);
SH_SCOPE SH_ELEMENT_TYPE *SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter);
SH_SCOPE void SH_STAT(SH_TYPE * tb);
#endif
//如定义了宏SH_DEFINE,则生成Hash表的实现
#ifdef SH_DEFINE
#include "utils/memutils.h"
#define SH_MAX_SIZE (((uint64) PG_UINT32_MAX) + 1)
#ifndef SH_FILLFACTOR
#define SH_FILLFACTOR (0.9)
#endif
#define SH_MAX_FILLFACTOR (0.98)
#ifndef SH_GROW_MAX_DIB
#define SH_GROW_MAX_DIB 25
#endif
#ifndef SH_GROW_MAX_MOVE
#define SH_GROW_MAX_MOVE 150
#endif
#ifndef SH_GROW_MIN_FILLFACTOR
#define SH_GROW_MIN_FILLFACTOR 0.1
#endif
#ifdef SH_STORE_HASH
#define SH_COMPARE_KEYS(tb, ahash, akey, b) (ahash == SH_GET_HASH(tb, b) && SH_EQUAL(tb, b->SH_KEY, akey))
#else
#define SH_COMPARE_KEYS(tb, ahash, akey, b) (SH_EQUAL(tb, b->SH_KEY, akey))
#endif
static inline uint64
sh_log2(uint64 num)
{
int i;
uint64 limit;
for (i = 0, limit = 1; limit < num; i++, limit <<= 1)
;
return i;
}
static inline uint64
sh_pow2(uint64 num)
{
return ((uint64) 1) << sh_log2(num);
}
static inline void
SH_COMPUTE_PARAMETERS(SH_TYPE * tb, uint32 newsize)
{
uint64 size;
size = Max(newsize, 2);
size = sh_pow2(size);
Assert(size <= SH_MAX_SIZE);
if ((((uint64) sizeof(SH_ELEMENT_TYPE)) * size) >= MaxAllocHugeSize)
elog(ERROR, "hash table too large");
tb->size = size;
if (tb->size == SH_MAX_SIZE)
tb->sizemask = 0;
else
tb->sizemask = tb->size - 1;
if (tb->size == SH_MAX_SIZE)
tb->grow_threshold = ((double) tb->size) * SH_MAX_FILLFACTOR;
else
tb->grow_threshold = ((double) tb->size) * SH_FILLFACTOR;
}
static inline uint32
SH_INITIAL_BUCKET(SH_TYPE * tb, uint32 hash)
{
return hash & tb->sizemask;
}
static inline uint32
SH_NEXT(SH_TYPE * tb, uint32 curelem, uint32 startelem)
{
curelem = (curelem + 1) & tb->sizemask;
Assert(curelem != startelem);
return curelem;
}
static inline uint32
SH_PREV(SH_TYPE * tb, uint32 curelem, uint32 startelem)
{
curelem = (curelem - 1) & tb->sizemask;
Assert(curelem != startelem);
return curelem;
}
static inline uint32
SH_DISTANCE_FROM_OPTIMAL(SH_TYPE * tb, uint32 optimal, uint32 bucket)
{
if (optimal <= bucket)
return bucket - optimal;
else
return (tb->size + bucket) - optimal;
}
static inline uint32
SH_ENTRY_HASH(SH_TYPE * tb, SH_ELEMENT_TYPE * entry)
{
#ifdef SH_STORE_HASH
return SH_GET_HASH(tb, entry);
#else
return SH_HASH_KEY(tb, entry->SH_KEY);
#endif
}
static inline void *SH_ALLOCATE(SH_TYPE * type, Size size);
static inline void SH_FREE(SH_TYPE * type, void *pointer);
#ifndef SH_USE_NONDEFAULT_ALLOCATOR
static inline void *
SH_ALLOCATE(SH_TYPE * type, Size size)
{
return MemoryContextAllocExtended(type->ctx, size,
MCXT_ALLOC_HUGE | MCXT_ALLOC_ZERO);
}
static inline void
SH_FREE(SH_TYPE * type, void *pointer)
{
pfree(pointer);
}
#endif
SH_SCOPE SH_TYPE *
SH_CREATE(MemoryContext ctx, uint32 nelements, void *private_data)
{
SH_TYPE *tb;
uint64 size;
tb = MemoryContextAllocZero(ctx, sizeof(SH_TYPE));
tb->ctx = ctx;
tb->private_data = private_data;
size = Min((double) SH_MAX_SIZE, ((double) nelements) / SH_FILLFACTOR);
SH_COMPUTE_PARAMETERS(tb, size);
tb->data = SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * tb->size);
return tb;
}
SH_SCOPE void
SH_DESTROY(SH_TYPE * tb)
{
SH_FREE(tb, tb->data);
pfree(tb);
}
SH_SCOPE void
SH_RESET(SH_TYPE * tb)
{
memset(tb->data, 0, sizeof(SH_ELEMENT_TYPE) * tb->size);
tb->members = 0;
}
SH_SCOPE void
SH_GROW(SH_TYPE * tb, uint32 newsize)
{
uint64 oldsize = tb->size;
SH_ELEMENT_TYPE *olddata = tb->data;
SH_ELEMENT_TYPE *newdata;
uint32 i;
uint32 startelem = 0;
uint32 copyelem;
Assert(oldsize == sh_pow2(oldsize));
Assert(oldsize != SH_MAX_SIZE);
Assert(oldsize < newsize);
SH_COMPUTE_PARAMETERS(tb, newsize);
tb->data = SH_ALLOCATE(tb, sizeof(SH_ELEMENT_TYPE) * tb->size);
newdata = tb->data;
for (i = 0; i < oldsize; i++)
{
SH_ELEMENT_TYPE *oldentry = &olddata[i];
uint32 hash;
uint32 optimal;
if (oldentry->status != SH_STATUS_IN_USE)
{
startelem = i;
break;
}
hash = SH_ENTRY_HASH(tb, oldentry);
optimal = SH_INITIAL_BUCKET(tb, hash);
if (optimal == i)
{
startelem = i;
break;
}
}
copyelem = startelem;
for (i = 0; i < oldsize; i++)
{
SH_ELEMENT_TYPE *oldentry = &olddata[copyelem];
if (oldentry->status == SH_STATUS_IN_USE)
{
uint32 hash;
uint32 startelem;
uint32 curelem;
SH_ELEMENT_TYPE *newentry;
hash = SH_ENTRY_HASH(tb, oldentry);
startelem = SH_INITIAL_BUCKET(tb, hash);
curelem = startelem;
while (true)
{
newentry = &newdata[curelem];
if (newentry->status == SH_STATUS_EMPTY)
{
break;
}
curelem = SH_NEXT(tb, curelem, startelem);
}
memcpy(newentry, oldentry, sizeof(SH_ELEMENT_TYPE));
}
copyelem++;
if (copyelem >= oldsize)
{
copyelem = 0;
}
}
SH_FREE(tb, olddata);
}
SH_SCOPE SH_ELEMENT_TYPE *
SH_INSERT(SH_TYPE * tb, SH_KEY_TYPE key, bool *found)
{
uint32 hash = SH_HASH_KEY(tb, key);//TupleHashTableHash,Key类型为MinimalTuple
uint32 startelem;
uint32 curelem;
SH_ELEMENT_TYPE *data;
uint32 insertdist;
restart:
insertdist = 0;
if (unlikely(tb->members >= tb->grow_threshold))
{
if (tb->size == SH_MAX_SIZE)
{
elog(ERROR, "hash table size exceeded");
}
SH_GROW(tb, tb->size * 2);
}
//执行插入,在优化的位置开始bucket搜索
data = tb->data;
startelem = SH_INITIAL_BUCKET(tb, hash);//开始位置
curelem = startelem;//当前哈希表中的元素
while (true)
{
uint32 curdist;
uint32 curhash;
uint32 curoptimal;
SH_ELEMENT_TYPE *entry = &data[curelem];//SH_ELEMENT_TYPE --> TupleHashEntryData
//是否有空bucket可以直接使用?
if (entry->status == SH_STATUS_EMPTY)
{
//--------- 条目状态为空
//成员加1
tb->members++;
//Key赋值
entry->SH_KEY = key;
#ifdef SH_STORE_HASH
//是否存在hash值?
SH_GET_HASH(tb, entry) = hash;
#endif
//调整条目状态
entry->status = SH_STATUS_IN_USE;
//设置相关变量
*found = false;
//返回entry
return entry;
}
if (SH_COMPARE_KEYS(tb, hash, key, entry))//TupleHashTableMatch
{
//找到了相应的Key
Assert(entry->status == SH_STATUS_IN_USE);
*found = true;
//返回条目
return entry;
}
//当前的哈希值
curhash = SH_ENTRY_HASH(tb, entry);
//当前优化的位置
curoptimal = SH_INITIAL_BUCKET(tb, curhash);
//距离
curdist = SH_DISTANCE_FROM_OPTIMAL(tb, curoptimal, curelem);
if (insertdist > curdist)
{
SH_ELEMENT_TYPE *lastentry = entry;
uint32 emptyelem = curelem;
uint32 moveelem;
int32 emptydist = 0;
while (true)
{
SH_ELEMENT_TYPE *emptyentry;
emptyelem = SH_NEXT(tb, emptyelem, startelem);
emptyentry = &data[emptyelem];
if (emptyentry->status == SH_STATUS_EMPTY)
{
lastentry = emptyentry;
break;
}
if (unlikely(++emptydist > SH_GROW_MAX_MOVE) &&
((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR)
{
tb->grow_threshold = 0;
goto restart;
}
}
moveelem = emptyelem;
while (moveelem != curelem)
{
SH_ELEMENT_TYPE *moveentry;
moveelem = SH_PREV(tb, moveelem, startelem);
moveentry = &data[moveelem];
memcpy(lastentry, moveentry, sizeof(SH_ELEMENT_TYPE));
lastentry = moveentry;
}
tb->members++;
entry->SH_KEY = key;
#ifdef SH_STORE_HASH
SH_GET_HASH(tb, entry) = hash;
#endif
entry->status = SH_STATUS_IN_USE;
*found = false;
return entry;
}
curelem = SH_NEXT(tb, curelem, startelem);
insertdist++;
if (unlikely(insertdist > SH_GROW_MAX_DIB) &&
((double) tb->members / tb->size) >= SH_GROW_MIN_FILLFACTOR)
{
tb->grow_threshold = 0;
goto restart;
}
}
}
SH_SCOPE SH_ELEMENT_TYPE *
SH_LOOKUP(SH_TYPE * tb, SH_KEY_TYPE key)
{
uint32 hash = SH_HASH_KEY(tb, key);
const uint32 startelem = SH_INITIAL_BUCKET(tb, hash);
uint32 curelem = startelem;
while (true)
{
SH_ELEMENT_TYPE *entry = &tb->data[curelem];
if (entry->status == SH_STATUS_EMPTY)
{
return NULL;
}
Assert(entry->status == SH_STATUS_IN_USE);
if (SH_COMPARE_KEYS(tb, hash, key, entry))
return entry;
curelem = SH_NEXT(tb, curelem, startelem);
}
}
SH_SCOPE bool
SH_DELETE(SH_TYPE * tb, SH_KEY_TYPE key)
{
uint32 hash = SH_HASH_KEY(tb, key);
uint32 startelem = SH_INITIAL_BUCKET(tb, hash);
uint32 curelem = startelem;
while (true)
{
SH_ELEMENT_TYPE *entry = &tb->data[curelem];
if (entry->status == SH_STATUS_EMPTY)
return false;
if (entry->status == SH_STATUS_IN_USE &&
SH_COMPARE_KEYS(tb, hash, key, entry))
{
SH_ELEMENT_TYPE *lastentry = entry;
tb->members--;
while (true)
{
SH_ELEMENT_TYPE *curentry;
uint32 curhash;
uint32 curoptimal;
curelem = SH_NEXT(tb, curelem, startelem);
curentry = &tb->data[curelem];
if (curentry->status != SH_STATUS_IN_USE)
{
lastentry->status = SH_STATUS_EMPTY;
break;
}
curhash = SH_ENTRY_HASH(tb, curentry);
curoptimal = SH_INITIAL_BUCKET(tb, curhash);
if (curoptimal == curelem)
{
lastentry->status = SH_STATUS_EMPTY;
break;
}
memcpy(lastentry, curentry, sizeof(SH_ELEMENT_TYPE));
lastentry = curentry;
}
return true;
}
curelem = SH_NEXT(tb, curelem, startelem);
}
}
SH_SCOPE void
SH_START_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter)
{
int i;
uint64 startelem = PG_UINT64_MAX;
for (i = 0; i < tb->size; i++)
{
SH_ELEMENT_TYPE *entry = &tb->data[i];
if (entry->status != SH_STATUS_IN_USE)
{
startelem = i;
break;
}
}
Assert(startelem < SH_MAX_SIZE);
iter->cur = startelem;
iter->end = iter->cur;
iter->done = false;
}
SH_SCOPE void
SH_START_ITERATE_AT(SH_TYPE * tb, SH_ITERATOR * iter, uint32 at)
{
iter->cur = at & tb->sizemask;
iter->end = iter->cur;
iter->done = false;
}
SH_SCOPE SH_ELEMENT_TYPE *
SH_ITERATE(SH_TYPE * tb, SH_ITERATOR * iter)
{
while (!iter->done)
{
SH_ELEMENT_TYPE *elem;
elem = &tb->data[iter->cur];
iter->cur = (iter->cur - 1) & tb->sizemask;
if ((iter->cur & tb->sizemask) == (iter->end & tb->sizemask))
iter->done = true;
if (elem->status == SH_STATUS_IN_USE)
{
return elem;
}
}
return NULL;
}
SH_SCOPE void
SH_STAT(SH_TYPE * tb)
{
uint32 max_chain_length = 0;
uint32 total_chain_length = 0;
double avg_chain_length;
double fillfactor;
uint32 i;
uint32 *collisions = palloc0(tb->size * sizeof(uint32));
uint32 total_collisions = 0;
uint32 max_collisions = 0;
double avg_collisions;
for (i = 0; i < tb->size; i++)
{
uint32 hash;
uint32 optimal;
uint32 dist;
SH_ELEMENT_TYPE *elem;
elem = &tb->data[i];
if (elem->status != SH_STATUS_IN_USE)
continue;
hash = SH_ENTRY_HASH(tb, elem);
optimal = SH_INITIAL_BUCKET(tb, hash);
dist = SH_DISTANCE_FROM_OPTIMAL(tb, optimal, i);
if (dist > max_chain_length)
max_chain_length = dist;
total_chain_length += dist;
collisions[optimal]++;
}
for (i = 0; i < tb->size; i++)
{
uint32 curcoll = collisions[i];
if (curcoll == 0)
continue;
curcoll--;
total_collisions += curcoll;
if (curcoll > max_collisions)
max_collisions = curcoll;
}
if (tb->members > 0)
{
fillfactor = tb->members / ((double) tb->size);
avg_chain_length = ((double) total_chain_length) / tb->members;
avg_collisions = ((double) total_collisions) / tb->members;
}
else
{
fillfactor = 0;
avg_chain_length = 0;
avg_collisions = 0;
}
elog(LOG, "size: " UINT64_FORMAT ", members: %u, filled: %f, total chain: %u, max chain: %u, avg chain: %f, total_collisions: %u, max_collisions: %i, avg_collisions: %f",
tb->size, tb->members, fillfactor, total_chain_length, max_chain_length, avg_chain_length,
total_collisions, max_collisions, avg_collisions);
}
#endif
#undef SH_PREFIX
#undef SH_KEY_TYPE
#undef SH_KEY
#undef SH_ELEMENT_TYPE
#undef SH_HASH_KEY
#undef SH_SCOPE
#undef SH_DECLARE
#undef SH_DEFINE
#undef SH_GET_HASH
#undef SH_STORE_HASH
#undef SH_USE_NONDEFAULT_ALLOCATOR
#undef SH_MAKE_PREFIX
#undef SH_MAKE_NAME
#undef SH_MAKE_NAME_
#undef SH_FILLFACTOR
#undef SH_MAX_FILLFACTOR
#undef SH_GROW_MAX_DIB
#undef SH_GROW_MAX_MOVE
#undef SH_GROW_MIN_FILLFACTOR
#undef SH_MAX_SIZE
#undef SH_TYPE
#undef SH_STATUS
#undef SH_STATUS_EMPTY
#undef SH_STATUS_IN_USE
#undef SH_ITERATOR
#undef SH_CREATE
#undef SH_DESTROY
#undef SH_RESET
#undef SH_INSERT
#undef SH_DELETE
#undef SH_LOOKUP
#undef SH_GROW
#undef SH_START_ITERATE
#undef SH_START_ITERATE_AT
#undef SH_ITERATE
#undef SH_ALLOCATE
#undef SH_FREE
#undef SH_STAT
#undef SH_COMPUTE_PARAMETERS
#undef SH_COMPARE_KEYS
#undef SH_INITIAL_BUCKET
#undef SH_NEXT
#undef SH_PREV
#undef SH_DISTANCE_FROM_OPTIMAL
#undef SH_ENTRY_HASH
三、跟踪分析
下面以tuplehash_insert为例,分析simplehash插入哈希表的实现.
测试脚本
-- 禁用并行
set max_parallel_workers_per_gather=0;
select bh,avg(c1),min(c1),max(c2) from t_agg_simple group by bh;
跟踪分析
(gdb) b tuplehash_insert
Breakpoint 1 at 0x6d2a27: file ../../../src/include/lib/simplehash.h, line 490.
(gdb)
输入参数
(gdb) p *tb
$1 = {size = 256, members = 0, sizemask = 255, grow_threshold = 230, data = 0x1cc2a10, ctx = 0x1c9b320,
private_data = 0x1cb88a0}
(gdb)
判断是否需要增长
(gdb) n
497 insertdist = 0;
(gdb)
507 if (unlikely(tb->members >= tb->grow_threshold))
(gdb) p tb->members
$2 = 0
(gdb) p tb->grow_threshold
$3 = 230
执行插入,在优化的位置开始bucket搜索
获取条目数组(TupleHashEntryData *指针),初始化开始元素和当前元素
(gdb) n
523 data = tb->data;
(gdb)
524 startelem = SH_INITIAL_BUCKET(tb, hash);
(gdb) p *data
$4 = {firstTuple = 0x0, additional = 0x0, status = 0, hash = 0}
(gdb) n
525 curelem = startelem;
(gdb) p startelem
$5 = 114
(gdb) p hash
$6 = 443809650
(gdb)
进入循环,寻找空闲的bucket执行插入
(gdb) n
531 SH_ELEMENT_TYPE *entry = &data[curelem];
(gdb) n
534 if (entry->status == SH_STATUS_EMPTY)
(gdb) p *entry
$7 = {firstTuple = 0x0, additional = 0x0, status = 0, hash = 0}
(gdb) p *data
$8 = {firstTuple = 0x0, additional = 0x0, status = 0, hash = 0}
(gdb) p data[255]
$9 = {firstTuple = 0x0, additional = 0x0, status = 0, hash = 0}
(gdb) n
536 tb->members++;
(gdb)
537 entry->SH_KEY = key;
(gdb) p *tb
$10 = {size = 256, members = 1, sizemask = 255, grow_threshold = 230, data = 0x1cc2a10, ctx = 0x1c9b320,
private_data = 0x1cb88a0}
(gdb) n
539 SH_GET_HASH(tb, entry) = hash;
(gdb)
541 entry->status = SH_STATUS_IN_USE;
(gdb) p *entry
$11 = {firstTuple = 0x0, additional = 0x0, status = 0, hash = 443809650}
(gdb) n
542 *found = false;
(gdb)
543 return entry;
(gdb) p *entry
$12 = {firstTuple = 0x0, additional = 0x0, status = 1, hash = 443809650}
(gdb)
完成函数调用,返回entry
(gdb) n
652 }
(gdb)
LookupTupleHashEntry (hashtable=0x1cb88a0, slot=0x1c9d248, isnew=0x7ffd1348e797) at execGrouping.c:303
303 if (found)
(gdb)
回到LookupTupleHashEntry
(gdb)
LookupTupleHashEntry (hashtable=0x1cb88a0, slot=0x1c9d248, isnew=0x7ffd1348e797) at execGrouping.c:303
303 if (found)
(gdb) n
311 *isnew = true;
(gdb)
313 entry->additional = NULL;
(gdb)
314 MemoryContextSwitchTo(hashtable->tablecxt);
(gdb)
316 entry->firstTuple = ExecCopySlotMinimalTuple(slot);
(gdb)
324 MemoryContextSwitchTo(oldContext);
查看tuple数据
(gdb) p *entry
$13 = {firstTuple = 0x1cb2498, additional = 0x0, status = 1, hash = 443809650}
(gdb) x/7x entry->firstTuple->t_bits
0x1cb24a7: 0x00 0x0b 0x47 0x5a 0x30 0x31 0x7e
(gdb) x/7c entry->firstTuple->t_bits
0x1cb24a7: 0 '\000' 11 '\v' 71 'G' 90 'Z' 48 '0' 49 '1' 126 '~'
下一次调用,这次出现了碰撞
(gdb) c
Continuing.
Breakpoint 1, tuplehash_insert (tb=0x1cb8730, key=0x0, found=0x7ffd1348e757) at ../../../src/include/lib/simplehash.h:490
490 uint32 hash = SH_HASH_KEY(tb, key);
(gdb) n
497 insertdist = 0;
(gdb) p hash
$15 = 4237773170
(gdb) n
507 if (unlikely(tb->members >= tb->grow_threshold))
(gdb)
523 data = tb->data;
(gdb)
524 startelem = SH_INITIAL_BUCKET(tb, hash);
(gdb) p data[0]
$16 = {firstTuple = 0x0, additional = 0x0, status = 0, hash = 0}
(gdb) n
525 curelem = startelem;
(gdb)
531 SH_ELEMENT_TYPE *entry = &data[curelem];
(gdb) p startelem
$17 = 114
(gdb) p curelem
$18 = 114
(gdb) p data[curelem]
$19 = {firstTuple = 0x1cb2498, additional = 0x1cb24d0, status = 1, hash = 443809650}
(gdb) n
534 if (entry->status == SH_STATUS_EMPTY)
(gdb)
554 if (SH_COMPARE_KEYS(tb, hash, key, entry))
(gdb)
561 curhash = SH_ENTRY_HASH(tb, entry);
(gdb)
562 curoptimal = SH_INITIAL_BUCKET(tb, curhash);
(gdb) p curhash
$20 = 443809650
(gdb) n
563 curdist = SH_DISTANCE_FROM_OPTIMAL(tb, curoptimal, curelem);
(gdb)
565 if (insertdist > curdist)
(gdb) p curoptimal
$21 = 114
(gdb) p curdist
$22 = 0
(gdb) n
634 curelem = SH_NEXT(tb, curelem, startelem);
(gdb) p insertdist
$23 = 0
(gdb) n
635 insertdist++;
(gdb) p curelem
$24 = 115
(gdb) n
645 if (unlikely(insertdist > SH_GROW_MAX_DIB) &&
(gdb)
651 }
(gdb)
531 SH_ELEMENT_TYPE *entry = &data[curelem];
(gdb)
534 if (entry->status == SH_STATUS_EMPTY)
(gdb)
536 tb->members++;
(gdb)
537 entry->SH_KEY = key;
(gdb)
539 SH_GET_HASH(tb, entry) = hash;
(gdb)
541 entry->status = SH_STATUS_IN_USE;
(gdb)
542 *found = false;
(gdb)
543 return entry;
(gdb) p *entry
$25 = {firstTuple = 0x0, additional = 0x0, status = 1, hash = 4237773170}
(gdb)
回到LookupTupleHashEntry,查看tuple
(gdb)
LookupTupleHashEntry (hashtable=0x1cb88a0, slot=0x1c9d248, isnew=0x7ffd1348e797) at execGrouping.c:303
303 if (found)
(gdb)
311 *isnew = true;
(gdb)
313 entry->additional = NULL;
(gdb)
314 MemoryContextSwitchTo(hashtable->tablecxt);
(gdb)
316 entry->firstTuple = ExecCopySlotMinimalTuple(slot);
(gdb)
324 MemoryContextSwitchTo(oldContext);
(gdb) p *entry
$26 = {firstTuple = 0x1cb2580, additional = 0x0, status = 1, hash = 4237773170}
(gdb) p *entry->firstTuple
$27 = {t_len = 21, mt_padding = "\000\000\000\000\000", t_infomask2 = 1, t_infomask = 2, t_hoff = 24 '\030',
t_bits = 0x1cb258f ""}
(gdb) x/7x entry->firstTuple->t_bits
0x1cb258f: 0x00 0x0b 0x47 0x5a 0x30 0x32 0x7e
(gdb) x/7c entry->firstTuple->t_bits
0x1cb258f: 0 '\000' 11 '\v' 71 'G' 90 'Z' 48 '0' 50 '2' 126 '~'
(gdb)
到此,关于“PostgreSQL的simplehash.h文件中的内容是什么”的学习就结束了,希望能够解决大家的疑惑。理论与实践的搭配能更好的帮助大家学习,快去试试吧!若想继续学习更多相关知识,请继续关注亿速云网站,小编会继续努力为大家带来更多实用的文章!