这篇文章主要介绍“PostgreSQL中query_planner函数的处理逻辑分析”,在日常操作中,相信很多人在PostgreSQL中query_planner函数的处理逻辑分析问题上存在疑惑,小编查阅了各式资料,整理出简单好用的操作方法,希望对大家解答”PostgreSQL中query_planner函数的处理逻辑分析”的疑惑有所帮助!接下来,请跟着小编一起来学习吧!
一、重要的数据结构
RelOptInfo
查询语句经过查询重写/表达式简化/外连接消除等处理后,查询树Query已完成规范化,查询树中的RangeTblEntry(RTE)数据结构已完成其历史使命,由此可进入逻辑优化处理,在此阶段使用RelOptInfo数据结构.
typedef enum RelOptKind
{
RELOPT_BASEREL,//基本关系(如基表/子查询等)
RELOPT_JOINREL,//连接产生的关系,要注意的是通过连接等方式产生的结果亦可以视为关系
RELOPT_OTHER_MEMBER_REL,
RELOPT_OTHER_JOINREL,
RELOPT_UPPER_REL,//上层的关系
RELOPT_OTHER_UPPER_REL,
RELOPT_DEADREL
} RelOptKind;
#define IS_SIMPLE_REL(rel) \
((rel)->reloptkind == RELOPT_BASEREL || \
(rel)->reloptkind == RELOPT_OTHER_MEMBER_REL)
#define IS_JOIN_REL(rel) \
((rel)->reloptkind == RELOPT_JOINREL || \
(rel)->reloptkind == RELOPT_OTHER_JOINREL)
#define IS_UPPER_REL(rel) \
((rel)->reloptkind == RELOPT_UPPER_REL || \
(rel)->reloptkind == RELOPT_OTHER_UPPER_REL)
#define IS_OTHER_REL(rel) \
((rel)->reloptkind == RELOPT_OTHER_MEMBER_REL || \
(rel)->reloptkind == RELOPT_OTHER_JOINREL || \
(rel)->reloptkind == RELOPT_OTHER_UPPER_REL)
typedef struct RelOptInfo
{
NodeTag type;//节点标识
RelOptKind reloptkind;//RelOpt类型
Relids relids;
double rows;
bool consider_startup;
bool consider_param_startup;
bool consider_parallel;
struct PathTarget *reltarget;
List *pathlist;
List *ppilist;
List *partial_pathlist;
struct Path *cheapest_startup_path;//代价最低的启动路径
struct Path *cheapest_total_path;//代价最低的整体路径
struct Path *cheapest_unique_path;//代价最低的获取唯一值的路径
List *cheapest_parameterized_paths;//代价最低的参数化?路径链表
Relids direct_lateral_relids;
Relids lateral_relids;
//reloptkind=RELOPT_BASEREL时使用的数据结构
Index relid;
Oid reltablespace;
RTEKind rtekind;
AttrNumber min_attr;
AttrNumber max_attr;
Relids *attr_needed;
int32 *attr_widths;
List *lateral_vars;
Relids lateral_referencers;
List *indexlist;
List *statlist;
BlockNumber pages;
double tuples;
double allvisfrac;
PlannerInfo *subroot;
List *subplan_params;
int rel_parallel_workers;
//FWD相关信息
Oid serverid;
Oid userid;
bool useridiscurrent;
struct FdwRoutine *fdwroutine;
void *fdw_private;
//已知的,可保证唯一的Relids链表
List *unique_for_rels;
List *non_unique_for_rels;
List *baserestrictinfo;
QualCost baserestrictcost;
Index baserestrict_min_security;
List *joininfo;
bool has_eclass_joins;
bool consider_partitionwise_join;
Relids top_parent_relids;
//分区表使用
PartitionScheme part_scheme;
int nparts;
struct PartitionBoundInfoData *boundinfo;
List *partition_qual;
struct RelOptInfo **part_rels;
List **partexprs;
List **nullable_partexprs;
List *partitioned_child_rels;
} RelOptInfo;
PathCostComparison
typedef enum
{
COSTS_EQUAL,
COSTS_BETTER1,
COSTS_BETTER2,
COSTS_DIFFERENT
} PathCostComparison;
ResultPath
typedef struct ResultPath //表示无基础表的结果计划节点
{
Path path;//扫描路径
List *quals;//where语句表达式,bare clauses, not RestrictInfos
} ResultPath;
二、源码解读
RelOptInfo *
query_planner(PlannerInfo *root, List *tlist,
query_pathkeys_callback qp_callback, void *qp_extra)
{
Query *parse = root->parse;//查询树
List *joinlist;
RelOptInfo *final_rel;//结果
Index rti;//RTE的index
double total_pages;//总pages数
if (parse->jointree->fromlist == NIL)//简单SQL,无FROM/WHERE语句
{
final_rel = build_empty_join_rel(root);//创建返回结果
if (root->glob->parallelModeOK)//并行模式?
final_rel->consider_parallel =
is_parallel_safe(root, parse->jointree->quals);
add_path(final_rel, (Path *)
create_result_path(root, final_rel,
final_rel->reltarget,
(List *) parse->jointree->quals));//添加访问路径
set_cheapest(final_rel);//选择最优的访问路径
root->canon_pathkeys = NIL;
(*qp_callback) (root, qp_extra);//回调函数
return final_rel;//返回
}
//其他代码
...
}
add_path/set_cheapest
后续再行介绍
create_result_path
ResultPath *
create_result_path(PlannerInfo *root, RelOptInfo *rel,
PathTarget *target, List *resconstantqual)
{
ResultPath *pathnode = makeNode(ResultPath);//结果
pathnode->path.pathtype = T_Result;//扫描路径类型
pathnode->path.parent = rel;//路径的partentbuild_empty_join_rel
pathnode->path.pathtarget = target;//目标列
pathnode->path.param_info = NULL;
pathnode->path.parallel_aware = false;
pathnode->path.parallel_safe = rel->consider_parallel;
pathnode->path.parallel_workers = 0;//并行workers数目,设置为0
pathnode->path.pathkeys = NIL;//
pathnode->quals = resconstantqual;//表达式
pathnode->path.rows = 1;//行数为1
pathnode->path.startup_cost = target->cost.startup;
pathnode->path.total_cost = target->cost.startup +
cpu_tuple_cost + target->cost.per_tuple;
if (resconstantqual)
{
QualCost qual_cost;
cost_qual_eval(&qual_cost, resconstantqual, root);
pathnode->path.startup_cost += qual_cost.startup + qual_cost.per_tuple;
pathnode->path.total_cost += qual_cost.startup + qual_cost.per_tuple;
}
return pathnode;
}
build_empty_join_rel
RelOptInfo *
build_empty_join_rel(PlannerInfo *root)
{
RelOptInfo *joinrel;
Assert(root->join_rel_list == NIL);
joinrel = makeNode(RelOptInfo);
joinrel->reloptkind = RELOPT_JOINREL;
joinrel->relids = NULL;
joinrel->rows = 1;
joinrel->rtekind = RTE_JOIN;
joinrel->reltarget = create_empty_pathtarget();
root->join_rel_list = lappend(root->join_rel_list, joinrel);
return joinrel;
}
三、跟踪分析
(gdb) b query_planner
Breakpoint 1 at 0x76942c: file planmain.c, line 57.
(gdb) c
Continuing.
Breakpoint 1, query_planner (root=0x275c878, tlist=0x277fd78, qp_callback=0x76e97d <standard_qp_callback>,
qp_extra=0x7ffdd435d490) at planmain.c:57
57 Query *parse = root->parse;
(gdb) n
67 if (parse->jointree->fromlist == NIL)
(gdb)
70 final_rel = build_empty_join_rel(root);
(gdb)
78 if (root->glob->parallelModeOK)
#创建的空RELOPT_JOINREL
(gdb) p *final_rel
$4 = {type = T_RelOptInfo, reloptkind = RELOPT_JOINREL, relids = 0x0, rows = 1, consider_startup = false,
consider_param_startup = false, consider_parallel = false, reltarget = 0x277fda8, pathlist = 0x0, ppilist = 0x0,
partial_pathlist = 0x0, cheapest_startup_path = 0x0, cheapest_total_path = 0x0, cheapest_unique_path = 0x0,
cheapest_parameterized_paths = 0x0, direct_lateral_relids = 0x0, lateral_relids = 0x0, relid = 0, reltablespace = 0,
rtekind = RTE_JOIN, min_attr = 0, max_attr = 0, attr_needed = 0x0, attr_widths = 0x0, lateral_vars = 0x0,
lateral_referencers = 0x0, indexlist = 0x0, statlist = 0x0, pages = 0, tuples = 0, allvisfrac = 0, subroot = 0x0,
subplan_params = 0x0, rel_parallel_workers = 0, serverid = 0, userid = 0, useridiscurrent = false, fdwroutine = 0x0,
fdw_private = 0x0, unique_for_rels = 0x0, non_unique_for_rels = 0x0, baserestrictinfo = 0x0, baserestrictcost = {
startup = 0, per_tuple = 0}, baserestrict_min_security = 0, joininfo = 0x0, has_eclass_joins = false,
top_parent_relids = 0x0, part_scheme = 0x0, nparts = 0, boundinfo = 0x0, partition_qual = 0x0, part_rels = 0x0,
partexprs = 0x0, nullable_partexprs = 0x0, partitioned_child_rels = 0x0}
...
(gdb) step
add_path (parent_rel=0x275cc88, new_path=0x275c498) at pathnode.c:424
424 bool accept_new = true;
#创建的path(ResultPath)
(gdb) p *new_path
$6 = {type = T_ResultPath, pathtype = T_Result, parent = 0x275cc88, pathtarget = 0x277fda8, param_info = 0x0,
parallel_aware = false, parallel_safe = true, parallel_workers = 0, rows = 1, startup_cost = 0, total_cost = 0.01,
pathkeys = 0x0}
(gdb) finish
Run till exit from #0 add_path (parent_rel=0x275cc88, new_path=0x275c498) at pathnode.c:425
query_planner (root=0x275c878, tlist=0x277fd78, qp_callback=0x76e97d <standard_qp_callback>, qp_extra=0x7ffdd435d490)
at planmain.c:89
89 set_cheapest(final_rel);
...
98 return final_rel;
(gdb)
267 }
#返回值
(gdb) p *final_rel
$8 = {type = T_RelOptInfo, reloptkind = RELOPT_JOINREL, relids = 0x0, rows = 1, consider_startup = false,
consider_param_startup = false, consider_parallel = true, reltarget = 0x277fda8, pathlist = 0x277fe68, ppilist = 0x0,
partial_pathlist = 0x0, cheapest_startup_path = 0x275c498, cheapest_total_path = 0x275c498, cheapest_unique_path = 0x0,
cheapest_parameterized_paths = 0x277feb8, direct_lateral_relids = 0x0, lateral_relids = 0x0, relid = 0,
reltablespace = 0, rtekind = RTE_JOIN, min_attr = 0, max_attr = 0, attr_needed = 0x0, attr_widths = 0x0,
lateral_vars = 0x0, lateral_referencers = 0x0, indexlist = 0x0, statlist = 0x0, pages = 0, tuples = 0, allvisfrac = 0,
subroot = 0x0, subplan_params = 0x0, rel_parallel_workers = 0, serverid = 0, userid = 0, useridiscurrent = false,
fdwroutine = 0x0, fdw_private = 0x0, unique_for_rels = 0x0, non_unique_for_rels = 0x0, baserestrictinfo = 0x0,
baserestrictcost = {startup = 0, per_tuple = 0}, baserestrict_min_security = 0, joininfo = 0x0, has_eclass_joins = false,
top_parent_relids = 0x0, part_scheme = 0x0, nparts = 0, boundinfo = 0x0, partition_qual = 0x0, part_rels = 0x0,
partexprs = 0x0, nullable_partexprs = 0x0, partitioned_child_rels = 0x0}
(gdb) p *final_rel->cheapest_total_path
$9 = {type = T_ResultPath, pathtype = T_Result, parent = 0x275cc88, pathtarget = 0x277fda8, param_info = 0x0,
parallel_aware = false, parallel_safe = true, parallel_workers = 0, rows = 1, startup_cost = 0, total_cost = 0.01,
pathkeys = 0x0}
(gdb)
#DONE!
到此,关于“PostgreSQL中query_planner函数的处理逻辑分析”的学习就结束了,希望能够解决大家的疑惑。理论与实践的搭配能更好的帮助大家学习,快去试试吧!若想继续学习更多相关知识,请继续关注亿速云网站,小编会继续努力为大家带来更多实用的文章!