本篇内容介绍了“PostgreSQL的ExecHashJoin依赖其他函数的实现逻辑是什么”的有关知识,在实际案例的操作过程中,不少人都会遇到这样的困境,接下来就让小编带领大家学习一下如何处理这些情况吧!希望大家仔细阅读,能够学有所成!
一、数据结构
JoinState
Hash/NestLoop/Merge Join的基类
typedef struct JoinState
{
PlanState ps;//基类PlanState
JoinType jointype;//连接类型
//在找到一个匹配inner tuple的时候,如需要跳转到下一个outer tuple,则该值为T
bool single_match;
//连接条件表达式(除了ps.qual)
ExprState *joinqual;
} JoinState;
HashJoinState
Hash Join运行期状态结构体
typedef struct HashJoinTupleData *HashJoinTuple;
typedef struct HashJoinTableData *HashJoinTable;
typedef struct HashJoinState
{
JoinState js;
ExprState *hashclauses;//hash连接条件
List *hj_OuterHashKeys;
List *hj_InnerHashKeys;
List *hj_HashOperators;
HashJoinTable hj_HashTable;//Hash表
uint32 hj_CurHashValue;//当前的Hash值
int hj_CurBucketNo;//当前的bucket编号
int hj_CurSkewBucketNo;//行倾斜bucket编号
HashJoinTuple hj_CurTuple;//当前元组
TupleTableSlot *hj_OuterTupleSlot;//outer relation slot
TupleTableSlot *hj_HashTupleSlot;//Hash tuple slot
TupleTableSlot *hj_NullOuterTupleSlot;//用于外连接的outer虚拟slot
TupleTableSlot *hj_NullInnerTupleSlot;//用于外连接的inner虚拟slot
TupleTableSlot *hj_FirstOuterTupleSlot;//
int hj_JoinState;//JoinState状态
bool hj_MatchedOuter;//是否匹配
bool hj_OuterNotEmpty;//outer relation是否为空
} HashJoinState;
HashJoinTable
Hash表数据结构
typedef struct HashJoinTableData
{
int nbuckets;
int log2_nbuckets;
int nbuckets_original;
int nbuckets_optimal;
int log2_nbuckets_optimal;
//bucket [i]是内存中第i个桶中的元组链表的head item
union
{
//未共享数组是按批处理存储的,所有元组均如此
struct HashJoinTupleData **unshared;
//共享数组是每个查询的DSA区域,所有元组均如此
dsa_pointer_atomic *shared;
} buckets;
bool keepNulls;
bool skewEnabled;
HashSkewBucket **skewBucket;
int skewBucketLen;
int nSkewBuckets;
int *skewBucketNums;
int nbatch;
int curbatch;
int nbatch_original;
int nbatch_outstart;
bool growEnabled;
double totalTuples;
double partialTuples;
double skewTuples;
BufFile **innerBatchFile;
BufFile **outerBatchFile;
FmgrInfo *outer_hashfunctions;
FmgrInfo *inner_hashfunctions;
bool *hashStrict;
Size spaceUsed;
Size spaceAllowed;
Size spacePeak;
Size spaceUsedSkew;
Size spaceAllowedSkew;
MemoryContext hashCxt;
MemoryContext batchCxt;
//用于密集分配元组(到链接块中)
HashMemoryChunk chunks;
//并行hash使用的共享和私有状态
HashMemoryChunk current_chunk;
dsa_area *area;
ParallelHashJoinState *parallel_state;//并行执行状态
ParallelHashJoinBatchAccessor *batches;//并行访问器
dsa_pointer current_chunk_shared;//当前chunk的开始指针
} HashJoinTableData;
typedef struct HashJoinTableData *HashJoinTable;
HashJoinTupleData
Hash连接元组数据
typedef struct HashJoinTupleData
{
//link同一个桶中的下一个元组
union
{
struct HashJoinTupleData *unshared;
dsa_pointer shared;
} next;
uint32 hashvalue;
} HashJoinTupleData;
#define HJTUPLE_OVERHEAD MAXALIGN(sizeof(HashJoinTupleData))
#define HJTUPLE_MINTUPLE(hjtup) \
((MinimalTuple) ((char *) (hjtup) + HJTUPLE_OVERHEAD))
二、源码解读
ExecScanHashBucket
搜索匹配当前outer relation tuple的hash桶,寻找匹配的inner relation元组。
bool
ExecScanHashBucket(HashJoinState *hjstate,
ExprContext *econtext)
{
ExprState *hjclauses = hjstate->hashclauses;//hash连接条件表达式
HashJoinTable hashtable = hjstate->hj_HashTable;//Hash表
HashJoinTuple hashTuple = hjstate->hj_CurTuple;//当前的Tuple
uint32 hashvalue = hjstate->hj_CurHashValue;//hash值
if (hashTuple != NULL)
hashTuple = hashTuple->next.unshared;//hashTuple,通过指针获取下一个
else if (hjstate->hj_CurSkewBucketNo != INVALID_SKEW_BUCKET_NO)
//如为NULL,而且使用倾斜优化,则从倾斜桶中获取
hashTuple = hashtable->skewBucket[hjstate->hj_CurSkewBucketNo]->tuples;
else
////如为NULL,不使用倾斜优化,从常规的bucket中获取
hashTuple = hashtable->buckets.unshared[hjstate->hj_CurBucketNo];
while (hashTuple != NULL)//循环
{
if (hashTuple->hashvalue == hashvalue)//hash值一致
{
TupleTableSlot *inntuple;//inner tuple
//把Hash表中的tuple插入到执行器的slot中,作为函数ExecQual的输入使用
inntuple = ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(hashTuple),
hjstate->hj_HashTupleSlot,
false);
econtext->ecxt_innertuple = inntuple;//赋值
if (ExecQualAndReset(hjclauses, econtext))//判断连接条件是否满足
{
hjstate->hj_CurTuple = hashTuple;//满足,则赋值&返回T
return true;
}
}
hashTuple = hashTuple->next.unshared;//从Hash表中获取下一个tuple
}
return false;
}
TupleTableSlot *
ExecStoreMinimalTuple(MinimalTuple mtup,
TupleTableSlot *slot,
bool shouldFree)
{
Assert(mtup != NULL);
Assert(slot != NULL);
Assert(slot->tts_tupleDescriptor != NULL);
if (unlikely(!TTS_IS_MINIMALTUPLE(slot)))//类型检查
elog(ERROR, "trying to store a minimal tuple into wrong type of slot");
tts_minimal_store_tuple(slot, mtup, shouldFree);//存储
return slot;//返回slot
}
static void
tts_minimal_store_tuple(TupleTableSlot *slot, MinimalTuple mtup, bool shouldFree)
{
MinimalTupleTableSlot *mslot = (MinimalTupleTableSlot *) slot;//获取slot
tts_minimal_clear(slot);//清除原来的slot
//安全检查
Assert(!TTS_SHOULDFREE(slot));
Assert(TTS_EMPTY(slot));
//设置slot信息
slot->tts_flags &= ~TTS_FLAG_EMPTY;
slot->tts_nvalid = 0;
mslot->off = 0;
//存储到mslot中
mslot->mintuple = mtup;
Assert(mslot->tuple == &mslot->minhdr);
mslot->minhdr.t_len = mtup->t_len + MINIMAL_TUPLE_OFFSET;
mslot->minhdr.t_data = (HeapTupleHeader) ((char *) mtup - MINIMAL_TUPLE_OFFSET);
//不需要设置t_sefl或者t_tableOid,因为不允许访问
if (shouldFree)
slot->tts_flags |= TTS_FLAG_SHOULDFREE;
else
Assert(!TTS_SHOULDFREE(slot));
}
#ifndef FRONTEND
static inline bool
ExecQualAndReset(ExprState *state, ExprContext *econtext)
{
bool ret = ExecQual(state, econtext);//调用ExecQual
//内联ResetExprContext,避免在这个文件中的ordering
MemoryContextReset(econtext->ecxt_per_tuple_memory);
return ret;
}
#endif
#define HeapTupleHeaderSetMatch(tup) \
( \
(tup)->t_infomask2 |= HEAP_TUPLE_HAS_MATCH \
)
三、跟踪分析
测试脚本如下
testdb=# set enable_nestloop=false;
SET
testdb=# set enable_mergejoin=false;
SET
testdb=# explain verbose select dw.*,grjf.grbh,grjf.xm,grjf.ny,grjf.je
testdb-# from t_dwxx dw,lateral (select gr.grbh,gr.xm,jf.ny,jf.je
testdb(# from t_grxx gr inner join t_jfxx jf
testdb(# on gr.dwbh = dw.dwbh
testdb(# and gr.grbh = jf.grbh) grjf
testdb-# order by dw.dwbh;
QUERY PLAN
-----------------------------------------------------------------------------------------------
Sort (cost=14828.83..15078.46 rows=99850 width=47)
Output: dw.dwmc, dw.dwbh, dw.dwdz, gr.grbh, gr.xm, jf.ny, jf.je
Sort Key: dw.dwbh
-> Hash Join (cost=3176.00..6537.55 rows=99850 width=47)
Output: dw.dwmc, dw.dwbh, dw.dwdz, gr.grbh, gr.xm, jf.ny, jf.je
Hash Cond: ((gr.grbh)::text = (jf.grbh)::text)
-> Hash Join (cost=289.00..2277.61 rows=99850 width=32)
Output: dw.dwmc, dw.dwbh, dw.dwdz, gr.grbh, gr.xm
Inner Unique: true
Hash Cond: ((gr.dwbh)::text = (dw.dwbh)::text)
-> Seq Scan on public.t_grxx gr (cost=0.00..1726.00 rows=100000 width=16)
Output: gr.dwbh, gr.grbh, gr.xm, gr.xb, gr.nl
-> Hash (cost=164.00..164.00 rows=10000 width=20)
Output: dw.dwmc, dw.dwbh, dw.dwdz
-> Seq Scan on public.t_dwxx dw (cost=0.00..164.00 rows=10000 width=20)
Output: dw.dwmc, dw.dwbh, dw.dwdz
-> Hash (cost=1637.00..1637.00 rows=100000 width=20)
Output: jf.ny, jf.je, jf.grbh
-> Seq Scan on public.t_jfxx jf (cost=0.00..1637.00 rows=100000 width=20)
Output: jf.ny, jf.je, jf.grbh
(20 rows)
启动gdb,设置断点
(gdb) b ExecScanHashBucket
Breakpoint 1 at 0x6ff25b: file nodeHash.c, line 1910.
(gdb) c
Continuing.
Breakpoint 1, ExecScanHashBucket (hjstate=0x2bb8738, econtext=0x2bb8950) at nodeHash.c:1910
1910 ExprState *hjclauses = hjstate->hashclauses;
设置相关变量
1910 ExprState *hjclauses = hjstate->hashclauses;
(gdb) n
1911 HashJoinTable hashtable = hjstate->hj_HashTable;
(gdb)
1912 HashJoinTuple hashTuple = hjstate->hj_CurTuple;
(gdb)
1913 uint32 hashvalue = hjstate->hj_CurHashValue;
(gdb)
1922 if (hashTuple != NULL)
hash join连接条件
(gdb) p *hjclauses
$1 = {tag = {type = T_ExprState}, flags = 7 '\a', resnull = false, resvalue = 0, resultslot = 0x0, steps = 0x2bc4bc8,
evalfunc = 0x6d1a6e <ExecInterpExprStillValid>, expr = 0x2bb60c0, evalfunc_private = 0x6cf625 <ExecInterpExpr>,
steps_len = 7, steps_alloc = 16, parent = 0x2bb8738, ext_params = 0x0, innermost_caseval = 0x0, innermost_casenull = 0x0,
innermost_domainval = 0x0, innermost_domainnull = 0x0}
hash表
(gdb) p hashtable
$2 = (HashJoinTable) 0x2bc9de8
(gdb) p *hashtable
$3 = {nbuckets = 16384, log2_nbuckets = 14, nbuckets_original = 16384, nbuckets_optimal = 16384,
log2_nbuckets_optimal = 14, buckets = {unshared = 0x7f0fc1345050, shared = 0x7f0fc1345050}, keepNulls = false,
skewEnabled = false, skewBucket = 0x0, skewBucketLen = 0, nSkewBuckets = 0, skewBucketNums = 0x0, nbatch = 1,
curbatch = 0, nbatch_original = 1, nbatch_outstart = 1, growEnabled = true, totalTuples = 10000, partialTuples = 10000,
skewTuples = 0, innerBatchFile = 0x0, outerBatchFile = 0x0, outer_hashfunctions = 0x2bdc228,
inner_hashfunctions = 0x2bdc280, hashStrict = 0x2bdc2d8, spaceUsed = 677754, spaceAllowed = 16777216, spacePeak = 677754,
spaceUsedSkew = 0, spaceAllowedSkew = 335544, hashCxt = 0x2bdc110, batchCxt = 0x2bde120, chunks = 0x2c708f0,
current_chunk = 0x0, area = 0x0, parallel_state = 0x0, batches = 0x0, current_chunk_shared = 0}
hash桶中的元组&hash值
(gdb) p *hashTuple
Cannot access memory at address 0x0
(gdb) p hashvalue
$4 = 2324234220
(gdb)
从常规hash桶中获取hash元组
(gdb) n
1924 else if (hjstate->hj_CurSkewBucketNo != INVALID_SKEW_BUCKET_NO)
(gdb) p hjstate->hj_CurSkewBucketNo
$5 = -1
(gdb) n
1927 hashTuple = hashtable->buckets.unshared[hjstate->hj_CurBucketNo];
(gdb)
1929 while (hashTuple != NULL)
(gdb) p hjstate->hj_CurBucketNo
$7 = 16364
(gdb) p *hashTuple
$6 = {next = {unshared = 0x0, shared = 0}, hashvalue = 1822113772}
判断hash值是否一致
(gdb) n
1931 if (hashTuple->hashvalue == hashvalue)
(gdb) p hashTuple->hashvalue
$8 = 1822113772
(gdb) p hashvalue
$9 = 2324234220
(gdb)
不一致,继续下一个元组
(gdb) n
1948 hashTuple = hashTuple->next.unshared;
(gdb)
1929 while (hashTuple != NULL)
下一个元组为NULL,返回F,说明没有匹配的元组
(gdb) p *hashTuple
Cannot access memory at address 0x0
(gdb) n
1954 return false;
在ExecStoreMinimalTuple上设置断点(这时候Hash值是一致的)
(gdb) b ExecStoreMinimalTuple
Breakpoint 2 at 0x6e8cbf: file execTuples.c, line 427.
(gdb) c
Continuing.
Breakpoint 1, ExecScanHashBucket (hjstate=0x2bb8738, econtext=0x2bb8950) at nodeHash.c:1910
1910 ExprState *hjclauses = hjstate->hashclauses;
(gdb) del 1
(gdb) c
Continuing.
Breakpoint 2, ExecStoreMinimalTuple (mtup=0x2be81b0, slot=0x2bb9c18, shouldFree=false) at execTuples.c:427
427 Assert(mtup != NULL);
(gdb) finish
Run till exit from #0 ExecStoreMinimalTuple (mtup=0x2be81b0, slot=0x2bb9c18, shouldFree=false) at execTuples.c:427
0x00000000006ff335 in ExecScanHashBucket (hjstate=0x2bb8738, econtext=0x2bb8950) at nodeHash.c:1936
1936 inntuple = ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(hashTuple),
Value returned is $10 = (TupleTableSlot *) 0x2bb9c18
(gdb) n
1939 econtext->ecxt_innertuple = inntuple;
匹配成功,返回T
(gdb) n
1941 if (ExecQualAndReset(hjclauses, econtext))
(gdb)
1943 hjstate->hj_CurTuple = hashTuple;
(gdb)
1944 return true;
(gdb)
1955 }
(gdb)
HJ_SCAN_BUCKET阶段,实现的逻辑是扫描Hash桶,寻找inner relation中与outer relation元组匹配的元组,如匹配,则把匹配的Tuple存储在hjstate->hj_CurTuple中.
“PostgreSQL的ExecHashJoin依赖其他函数的实现逻辑是什么”的内容就介绍到这里了,感谢大家的阅读。如果想了解更多行业相关的知识可以关注亿速云网站,小编将为大家输出更多高质量的实用文章!