这篇文章主要讲解了“PostgreSQL的set_base_rel_sizes函数分析”,文中的讲解内容简单清晰,易于学习与理解,下面请大家跟着小编的思路慢慢深入,一起来研究和学习“PostgreSQL的set_base_rel_sizes函数分析”吧!
在set_base_rel_sizes函数调用过程中,如RTE为子查询,则生成子查询的访问路径,通过调用函数set_subquery_pathlist实现。
make_one_rel源代码:
RelOptInfo *
make_one_rel(PlannerInfo *root, List *joinlist)
{
//...
set_base_rel_sizes(root);//估算Relation的Size并且设置consider_parallel标记
//...
}
一、数据结构
RelOptInfo
如前所述,RelOptInfo数据结构贯穿整个优化过程.
typedef struct RelOptInfo
{
NodeTag type;//节点标识
RelOptKind reloptkind;//RelOpt类型
Relids relids;
double rows;
bool consider_startup;
bool consider_param_startup;
bool consider_parallel;
struct PathTarget *reltarget;
List *pathlist;
List *ppilist;
List *partial_pathlist;
struct Path *cheapest_startup_path;//代价最低的启动路径
struct Path *cheapest_total_path;//代价最低的整体路径
struct Path *cheapest_unique_path;//代价最低的获取唯一值的路径
List *cheapest_parameterized_paths;//代价最低的参数化路径链表
Relids direct_lateral_relids;
Relids lateral_relids;
//reloptkind=RELOPT_BASEREL时使用的数据结构
Index relid;
Oid reltablespace;
RTEKind rtekind;
AttrNumber min_attr;
AttrNumber max_attr;
Relids *attr_needed;
int32 *attr_widths;
List *lateral_vars;
Relids lateral_referencers;
List *indexlist;
List *statlist;
BlockNumber pages;
double tuples;
double allvisfrac;
PlannerInfo *subroot;
List *subplan_params;
int rel_parallel_workers;
//FDW相关信息
Oid serverid;
Oid userid;
bool useridiscurrent;
struct FdwRoutine *fdwroutine;
void *fdw_private;
//已知的,可保证唯一元组返回的Relids链表
List *unique_for_rels;
List *non_unique_for_rels;
List *baserestrictinfo;
QualCost baserestrictcost;
Index baserestrict_min_security;
List *joininfo;
bool has_eclass_joins;
//是否尝试partitionwise连接,这是PG 11的一个新特性.
bool consider_partitionwise_join;
Relids top_parent_relids;
//分区表使用
PartitionScheme part_scheme;
int nparts;
struct PartitionBoundInfoData *boundinfo;
List *partition_qual;
struct RelOptInfo **part_rels;
List **partexprs;
List **nullable_partexprs;
List *partitioned_child_rels;
} RelOptInfo;
二、源码解读
set_rel_size
如前所述,set_rel_size函数估算关系的大小,如RTE为子查询,则调用set_subquery_pathlist方法生成子查询访问路径,相关代码如下:
static void
set_rel_size(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte)
{
if (rel->reloptkind == RELOPT_BASEREL &&
relation_excluded_by_constraints(root, rel, rte))
{
//...
}
else
{
switch (rel->rtekind)
{
case //...
//...
case RTE_SUBQUERY://子查询
set_subquery_pathlist(root, rel, rti, rte);//生成子查询访问路径
break;
case ...
}
//...
}
//...
}
set_subquery_pathlist
生成子查询的扫描路径,在生成过程中尝试下推外层的限制条件(减少参与运算的元组数量),并调用subquery_planner生成执行计划.
static void
set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
Index rti, RangeTblEntry *rte)
{
Query *parse = root->parse;
Query *subquery = rte->subquery;
Relids required_outer;
pushdown_safety_info safetyInfo;
double tuple_fraction;
RelOptInfo *sub_final_rel;
ListCell *lc;
subquery = copyObject(subquery);//拷贝
required_outer = rel->lateral_relids;//外层的Relids
memset(&safetyInfo, 0, sizeof(safetyInfo));
safetyInfo.unsafeColumns = (bool *)
palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
safetyInfo.unsafeLeaky = rte->security_barrier;
if (rel->baserestrictinfo != NIL &&
subquery_is_pushdown_safe(subquery, subquery, &safetyInfo))
{
//可以下推限制条件
List *upperrestrictlist = NIL;
ListCell *l;
foreach(l, rel->baserestrictinfo)//遍历子查询上的限制条件
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
Node *clause = (Node *) rinfo->clause;
if (!rinfo->pseudoconstant &&
qual_is_pushdown_safe(subquery, rti, clause, &safetyInfo))
{
subquery_push_qual(subquery, rte, rti, clause);//下推限制条件
}
else
{
upperrestrictlist = lappend(upperrestrictlist, rinfo);//保留在上层中
}
}
rel->baserestrictinfo = upperrestrictlist;
}
pfree(safetyInfo.unsafeColumns);
remove_unused_subquery_outputs(subquery, rel);
if (parse->hasAggs ||
parse->groupClause ||
parse->groupingSets ||
parse->havingQual ||
parse->distinctClause ||
parse->sortClause ||
has_multiple_baserels(root))
tuple_fraction = 0.0;
else
tuple_fraction = root->tuple_fraction;
Assert(root->plan_params == NIL);
rel->subroot = subquery_planner(root->glob, subquery,
root,
false, tuple_fraction);//调用subquery_planner获取子查询的执行计划
rel->subplan_params = root->plan_params;
root->plan_params = NIL;
sub_final_rel = fetch_upper_rel(rel->subroot, UPPERREL_FINAL, NULL);//子查询返回的最终关系
if (IS_DUMMY_REL(sub_final_rel))
{
set_dummy_rel_pathlist(rel);
return;
}
set_subquery_size_estimates(root, rel);//设置子查询的估算信息
foreach(lc, sub_final_rel->pathlist)//遍历最终关系的访问路径
{
Path *subpath = (Path *) lfirst(lc);
List *pathkeys;
//转换pathkeys为外层的表示法
pathkeys = convert_subquery_pathkeys(root,
rel,
subpath->pathkeys,
make_tlist_from_pathtarget(subpath->pathtarget));
add_path(rel, (Path *)
create_subqueryscan_path(root, rel, subpath,
pathkeys, required_outer));//通过子查询路径生成外层访问路径
}
if (rel->consider_parallel && bms_is_empty(required_outer))//是否可以并行处理
{
Assert(sub_final_rel->consider_parallel ||
sub_final_rel->partial_pathlist == NIL);
foreach(lc, sub_final_rel->partial_pathlist)
{
Path *subpath = (Path *) lfirst(lc);
List *pathkeys;
pathkeys = convert_subquery_pathkeys(root,
rel,
subpath->pathkeys,
make_tlist_from_pathtarget(subpath->pathtarget));
add_partial_path(rel, (Path *)
create_subqueryscan_path(root, rel, subpath,
pathkeys,
required_outer));
}
}
}
//-------------------------------------------------------- create_subqueryscan_path
SubqueryScanPath *
create_subqueryscan_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
List *pathkeys, Relids required_outer)
{
SubqueryScanPath *pathnode = makeNode(SubqueryScanPath);
pathnode->path.pathtype = T_SubqueryScan;//路径类型:子查询扫描
pathnode->path.parent = rel;//父RelOptInfo
pathnode->path.pathtarget = rel->reltarget;//投影列
pathnode->path.param_info = get_baserel_parampathinfo(root, rel,
required_outer);//参数化信息
pathnode->path.parallel_aware = false;//并行相关参数
pathnode->path.parallel_safe = rel->consider_parallel &&
subpath->parallel_safe;
pathnode->path.parallel_workers = subpath->parallel_workers;
pathnode->path.pathkeys = pathkeys;//排序键
pathnode->subpath = subpath;//子访问路径
cost_subqueryscan(pathnode, root, rel, pathnode->path.param_info);//子查询的成本
return pathnode;
}
//-------------------------------------------------------- cost_subqueryscan
void
cost_subqueryscan(SubqueryScanPath *path, PlannerInfo *root,
RelOptInfo *baserel, ParamPathInfo *param_info)
{
Cost startup_cost;
Cost run_cost;
QualCost qpqual_cost;
Cost cpu_per_tuple;
Assert(baserel->relid > 0);
Assert(baserel->rtekind == RTE_SUBQUERY);
if (param_info)
path->path.rows = param_info->ppi_rows;
else
path->path.rows = baserel->rows;
path->path.startup_cost = path->subpath->startup_cost;
path->path.total_cost = path->subpath->total_cost;
get_restriction_qual_cost(root, baserel, param_info, &qpqual_cost);
startup_cost = qpqual_cost.startup;
cpu_per_tuple = cpu_tuple_cost + qpqual_cost.per_tuple;
run_cost = cpu_per_tuple * baserel->tuples;
startup_cost += path->path.pathtarget->cost.startup;
run_cost += path->path.pathtarget->cost.per_tuple * path->path.rows;
path->path.startup_cost += startup_cost;
path->path.total_cost += startup_cost + run_cost;
}
三、跟踪分析
测试脚本如下:
select t1.*,t2.dwbh,t2.counter
from t_dwxx t1,
(select dwbh,count(*) as counter from t_grxx group by dwbh) t2
where t1.dwbh = t2.dwbh and t1.dwbh = '1001';
启动gdb:
(gdb) c
Continuing.
Breakpoint 1, set_subquery_pathlist (root=0x2d749b0, rel=0x2d34dd0, rti=2, rte=0x2d341a0) at allpaths.c:2082
2082 Query *parse = root->parse;
进入函数set_subquery_pathlist,输入参数中的root->simple_rel_array[2],rtekind为RTE_SUBQUERY子查询
(gdb) p *root->simple_rel_array[2]
$13 = {type = T_RelOptInfo, reloptkind = RELOPT_BASEREL, relids = 0x2d6a428, rows = 0, consider_startup = false,
consider_param_startup = false, consider_parallel = true, reltarget = 0x2d6a440, pathlist = 0x0, ppilist = 0x0,
partial_pathlist = 0x0, cheapest_startup_path = 0x0, cheapest_total_path = 0x0, cheapest_unique_path = 0x0,
cheapest_parameterized_paths = 0x0, direct_lateral_relids = 0x0, lateral_relids = 0x0, relid = 2, reltablespace = 0,
rtekind = RTE_SUBQUERY, min_attr = 0, max_attr = 2, attr_needed = 0x2d69b00, attr_widths = 0x2d69b50, lateral_vars = 0x0,
lateral_referencers = 0x0, indexlist = 0x0, statlist = 0x0, pages = 0, tuples = 0, allvisfrac = 0, subroot = 0x0,
subplan_params = 0x0, rel_parallel_workers = -1, serverid = 0, userid = 0, useridiscurrent = false, fdwroutine = 0x0,
fdw_private = 0x0, unique_for_rels = 0x0, non_unique_for_rels = 0x0, baserestrictinfo = 0x2d6b648, baserestrictcost = {
startup = 0, per_tuple = 0}, baserestrict_min_security = 0, joininfo = 0x0, has_eclass_joins = true,
top_parent_relids = 0x0, part_scheme = 0x0, nparts = 0, boundinfo = 0x0, partition_qual = 0x0, part_rels = 0x0,
partexprs = 0x0, nullable_partexprs = 0x0, partitioned_child_rels = 0x0}
(gdb) p *rte
$10 = {type = T_RangeTblEntry, rtekind = RTE_SUBQUERY, relid = 0, relkind = 0 '\000', tablesample = 0x0,
subquery = 0x2d342b0, security_barrier = false, jointype = JOIN_INNER, joinaliasvars = 0x0, functions = 0x0,
funcordinality = false, tablefunc = 0x0, values_lists = 0x0, ctename = 0x0, ctelevelsup = 0, self_reference = false,
coltypes = 0x0, coltypmods = 0x0, colcollations = 0x0, enrname = 0x0, enrtuples = 0, alias = 0x2c82728, eref = 0x2d35328,
lateral = false, inh = false, inFromCl = true, requiredPerms = 0, checkAsUser = 0, selectedCols = 0x0,
insertedCols = 0x0, updatedCols = 0x0, securityQuals = 0x0}
下推限制条件:
...
(gdb) n
2157 qual_is_pushdown_safe(subquery, rti, clause, &safetyInfo))
(gdb)
2156 if (!rinfo->pseudoconstant &&
(gdb)
2160 subquery_push_qual(subquery, rte, rti, clause);
tuple_fraction设置为0.0
...
(gdb)
2193 tuple_fraction = 0.0;
调用subquery_planner获取执行计划:
(gdb) n
2201 rel->subroot = subquery_planner(root->glob, subquery,
获取子查询生成的最终关系,reloptkind为RELOPT_UPPER_REL
...
(gdb)
2214 sub_final_rel = fetch_upper_rel(rel->subroot, UPPERREL_FINAL, NULL);
(gdb)
2216 if (IS_DUMMY_REL(sub_final_rel))
(gdb) p *sub_final_rel
$16 = {type = T_RelOptInfo, reloptkind = RELOPT_UPPER_REL, relids = 0x0, rows = 0, consider_startup = false,
consider_param_startup = false, consider_parallel = true, reltarget = 0x2d7bd50, pathlist = 0x2d7be10, ppilist = 0x0,
partial_pathlist = 0x0, cheapest_startup_path = 0x2d7aaa8, cheapest_total_path = 0x2d7aaa8, cheapest_unique_path = 0x0,
cheapest_parameterized_paths = 0x2d7be60, direct_lateral_relids = 0x0, lateral_relids = 0x0, relid = 0,
reltablespace = 0, rtekind = RTE_RELATION, min_attr = 0, max_attr = 0, attr_needed = 0x0, attr_widths = 0x0,
lateral_vars = 0x0, lateral_referencers = 0x0, indexlist = 0x0, statlist = 0x0, pages = 0, tuples = 0, allvisfrac = 0,
subroot = 0x0, subplan_params = 0x0, rel_parallel_workers = 0, serverid = 0, userid = 0, useridiscurrent = false,
fdwroutine = 0x0, fdw_private = 0x0, unique_for_rels = 0x0, non_unique_for_rels = 0x0, baserestrictinfo = 0x0,
baserestrictcost = {startup = 0, per_tuple = 0}, baserestrict_min_security = 0, joininfo = 0x0, has_eclass_joins = false,
top_parent_relids = 0x0, part_scheme = 0x0, nparts = 0, boundinfo = 0x0, partition_qual = 0x0, part_rels = 0x0,
partexprs = 0x0, nullable_partexprs = 0x0, partitioned_child_rels = 0x0}
成本最低的路径
(gdb) p *sub_final_rel->cheapest_total_path
$17 = {type = T_AggPath, pathtype = T_Agg, parent = 0x2d7b6d0, pathtarget = 0x2d7adc8, param_info = 0x0,
parallel_aware = false, parallel_safe = true, parallel_workers = 0, rows = 10, startup_cost = 0.29249999999999998,
total_cost = 20.143376803383145, pathkeys = 0x0}
通过子查询访问路径生成外层的访问路径
(gdb) n
2227 set_subquery_size_estimates(root, rel);
(gdb)
2233 foreach(lc, sub_final_rel->pathlist)
(gdb)
2235 Path *subpath = (Path *) lfirst(lc);
(gdb)
2239 pathkeys = convert_subquery_pathkeys(root,
(gdb)
2246 create_subqueryscan_path(root, rel, subpath,
(gdb)
2245 add_path(rel, (Path *)
(gdb)
2233 foreach(lc, sub_final_rel->pathlist)
完成函数调用,结束处理
(gdb)
set_rel_size (root=0x2d749b0, rel=0x2d34dd0, rti=2, rte=0x2d341a0) at allpaths.c:380
380 break;
执行计划如下:
testdb=# explain verbose select t1.*,t2.dwbh,t2.counter from t_dwxx t1,(select dwbh,count(*) as counter from t_grxx group by dwbh) t2
where t1.dwbh = t2.dwbh and t1.dwbh = '1001';
QUERY PLAN
--------------------------------------------------------------------------------------------------------
Nested Loop (cost=0.58..28.65 rows=10 width=32)
Output: t1.dwmc, t1.dwbh, t1.dwdz, t_grxx.dwbh, (count(*))
-> Index Scan using t_dwxx_pkey on public.t_dwxx t1 (cost=0.29..8.30 rows=1 width=20)
Output: t1.dwmc, t1.dwbh, t1.dwdz
Index Cond: ((t1.dwbh)::text = '1001'::text)
-> GroupAggregate (cost=0.29..20.14 rows=10 width=12)
Output: t_grxx.dwbh, count(*)
Group Key: t_grxx.dwbh
-> Index Only Scan using idx_t_dwxx_grbh on public.t_grxx (cost=0.29..19.99 rows=10 width=4)
Output: t_grxx.dwbh
Index Cond: (t_grxx.dwbh = '1001'::text)
(11 rows)
感谢各位的阅读,以上就是“PostgreSQL的set_base_rel_sizes函数分析”的内容了,经过本文的学习后,相信大家对PostgreSQL的set_base_rel_sizes函数分析这一问题有了更深刻的体会,具体使用情况还需要大家实践验证。这里是亿速云,小编将为大家推送更多相关知识点的文章,欢迎关注!