文章详情

短信预约-IT技能 免费直播动态提醒

请输入下面的图形验证码

提交验证

短信预约提醒成功

PostgreSQL 源码解读(46)- 查询语句#31(query_planner函数#7)

2024-04-02 19:55

关注

先前的章节已介绍了函数query_planner中子函数reconsider_outer_join_clauses和generate_base_implied_equalities的主要实现逻辑,本节继续介绍query_planner中qp_callback(回调函数)、fix_placeholder_input_needed_levels函数的实现逻辑。

query_planner代码片段:

     //...
     
     (*qp_callback) (root, qp_extra);//调用回调函数,处理PathKeys
 
     
     fix_placeholder_input_needed_levels(root);//检查在子查询上拉时生成的PH表达式,确保Vars是OK的
 

     //...

一、数据结构

PlannerInfo与RelOptInfo结构体贯彻逻辑优化和物理优化过程的始终.
PlannerInfo

 
 struct AppendRelInfo;
 
 typedef struct PlannerInfo
 {
     NodeTag     type;//Node标识
 
     Query      *parse;          
 
     PlannerGlobal *glob;        
 
     Index       query_level;    
 
     struct PlannerInfo *parent_root;    
 
     
     List       *plan_params;    
     Bitmapset  *outer_params;
 
     
     
     struct RelOptInfo **simple_rel_array;   
     int         simple_rel_array_size;  
 
     
     RangeTblEntry **simple_rte_array;   
 
     
     struct AppendRelInfo **append_rel_array;//先前已介绍,在处理集合操作如UNION ALL时使用
 
     
     Relids      all_baserels;//"base rels"
 
     
     Relids      nullable_baserels;//Nullable-side端的"base rels"
 
     
     List       *join_rel_list;  
     struct HTAB *join_rel_hash; 
 
     
     List      **join_rel_level; 
     int         join_cur_level; 
 
     List       *init_plans;     
 
     List       *cte_plan_ids;   
 
     List       *multiexpr_params;   
 
     List       *eq_classes;     
 
     List       *canon_pathkeys; 
 
     List       *left_join_clauses;  
 
     List       *right_join_clauses; 
 
     List       *full_join_clauses;  
 
     List       *join_info_list; 
 
     List       *append_rel_list;    
 
     List       *rowMarks;       
 
     List       *placeholder_list;   
 
     List       *fkey_list;      
 
     List       *query_pathkeys; 
 
     List       *group_pathkeys; 
     List       *window_pathkeys;    
     List       *distinct_pathkeys;  
     List       *sort_pathkeys;  
 
     List       *part_schemes;   
 
     List       *initial_rels;   
 
     
     List       *upper_rels[UPPERREL_FINAL + 1]; 
 
     
     struct PathTarget *upper_targets[UPPERREL_FINAL + 1];//
 
     
     List       *processed_tlist;//最后需处理的投影列
 
     
     AttrNumber *grouping_map;   
     List       *minmax_aggs;    
 
     MemoryContext planner_cxt;  
 
     double      total_table_pages;  
 
     double      tuple_fraction; 
     double      limit_tuples;   
 
     Index       qual_security_level;    
     
 
     InheritanceKind inhTargetKind;  
     bool        hasJoinRTEs;    
     bool        hasLateralRTEs; 
     bool        hasDeletedRTEs; 
     bool        hasHavingQual;  
     bool        hasPseudoConstantQuals; 
     bool        hasRecursion;   
 
     
     int         wt_param_id;    
     struct Path *non_recursive_path;    
 
     
     Relids      curOuterRels;   
     List       *curOuterParams; 
 
     
     void       *join_search_private;
 
     
     bool        partColsUpdated;
 } PlannerInfo;
 

RelOptInfo

 typedef struct RelOptInfo
 {
     NodeTag     type;//节点标识
 
     RelOptKind  reloptkind;//RelOpt类型
 
     
     Relids      relids;         
 
     
     double      rows;           
 
     
     bool        consider_startup;   
     bool        consider_param_startup; 
     bool        consider_parallel;  
 
     
     struct PathTarget *reltarget;   
 
     
     List       *pathlist;       
     List       *ppilist;        
     List       *partial_pathlist;   
     struct Path *cheapest_startup_path;//代价最低的启动路径
     struct Path *cheapest_total_path;//代价最低的整体路径
     struct Path *cheapest_unique_path;//代价最低的获取唯一值的路径
     List       *cheapest_parameterized_paths;//代价最低的参数化?路径链表
 
     
     
     Relids      direct_lateral_relids;  
     Relids      lateral_relids; 
 
     
     //reloptkind=RELOPT_BASEREL时使用的数据结构
     Index       relid;          
     Oid         reltablespace;  
     RTEKind     rtekind;        
     AttrNumber  min_attr;       
     AttrNumber  max_attr;       
     Relids     *attr_needed;    
     int32      *attr_widths;    
     List       *lateral_vars;   
     Relids      lateral_referencers;    
     List       *indexlist;      
     List       *statlist;       
     BlockNumber pages;          
     double      tuples;         
     double      allvisfrac;     
     PlannerInfo *subroot;       
     List       *subplan_params; 
     int         rel_parallel_workers;   
 
     
     //FWD相关信息
     Oid         serverid;       
     Oid         userid;         
     bool        useridiscurrent;    
     
     struct FdwRoutine *fdwroutine;
     void       *fdw_private;
 
     
     //已知的,可保证唯一的Relids链表
     List       *unique_for_rels;    
     List       *non_unique_for_rels;    
 
     
     List       *baserestrictinfo;   
     QualCost    baserestrictcost;   
     Index       baserestrict_min_security;  
     List       *joininfo;       
     bool        has_eclass_joins;   
 
     
     bool        consider_partitionwise_join;    
     Relids      top_parent_relids;  
 
     
     //分区表使用
     PartitionScheme part_scheme;    
     int         nparts;         
     struct PartitionBoundInfoData *boundinfo;   
     List       *partition_qual; 
     struct RelOptInfo **part_rels;  
     List      **partexprs;      
     List      **nullable_partexprs; 
     List       *partitioned_child_rels; 
 } RelOptInfo;

PlaceHolderInfo


 
 typedef struct PlaceHolderInfo
 {
     NodeTag     type;
 
     Index       phid;           
     PlaceHolderVar *ph_var;     
     Relids      ph_eval_at;     
     Relids      ph_lateral;     
     Relids      ph_needed;      
     int32       ph_width;       
 } PlaceHolderInfo;

二、源码解读

standard_qp_callback
标准的query_planner回调函数,在生成计划的期间处理query_pathkeys和其他pathkeys

 
 static void
 standard_qp_callback(PlannerInfo *root, void *extra)
 {
     Query      *parse = root->parse;//查询树
     standard_qp_extra *qp_extra = (standard_qp_extra *) extra;//参数
     List       *tlist = qp_extra->tlist;
     List       *activeWindows = qp_extra->activeWindows;
 
     
     if (qp_extra->groupClause &&
         grouping_is_sortable(qp_extra->groupClause))//group语句&要求排序
         root->group_pathkeys =
             make_pathkeys_for_sortclauses(root,
                                           qp_extra->groupClause,
                                           tlist);//构建pathkeys
     else
         root->group_pathkeys = NIL;
 
     
     if (activeWindows != NIL)//窗口函数
     {
         WindowClause *wc = linitial_node(WindowClause, activeWindows);
 
         root->window_pathkeys = make_pathkeys_for_window(root,
                                                          wc,
                                                          tlist);
     }
     else
         root->window_pathkeys = NIL;
 
     if (parse->distinctClause &&
         grouping_is_sortable(parse->distinctClause))//存在distinct语句&按相关字段排序
         root->distinct_pathkeys =
             make_pathkeys_for_sortclauses(root,
                                           parse->distinctClause,
                                           tlist);//构建pathkeys
     else
         root->distinct_pathkeys = NIL;
 
     root->sort_pathkeys =
         make_pathkeys_for_sortclauses(root,
                                       parse->sortClause,
                                       tlist);//构建常规的排序pathkeys
 
     
     if (root->groupremove_useless_joins_pathkeys)
         root->query_pathkeys = root->group_pathkeys;
     else if (root->window_pathkeys)
         root->query_pathkeys = root->window_pathkeys;
     else if (list_length(root->distinct_pathkeys) >
              list_length(root->sort_pathkeys))
         root->query_pathkeys = root->distinct_pathkeys;
     else if (root->sort_pathkeys)
         root->query_pathkeys = root->sort_pathkeys;
     else
         root->query_pathkeys = NIL;
 }


 List *
 make_pathkeys_for_sortclauses(PlannerInfo *root,
                               List *sortclauses,
                               List *tlist)
 {
     List       *pathkeys = NIL;
     ListCell   *l;
 
     foreach(l, sortclauses)
     {
         SortGroupClause *sortcl = (SortGroupClause *) lfirst(l);
         Expr       *sortkey;
         PathKey    *pathkey;
 
         sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist);
         Assert(OidIsValid(sortcl->sortop));
         pathkey = make_pathkey_from_sortop(root,
                                            sortkey,
                                            root->nullable_baserels,
                                            sortcl->sortop,
                                            sortcl->nulls_first,
                                            sortcl->tleSortGroupRef,
                                            true);
 
         
         if (!pathkey_is_redundant(pathkey, pathkeys))//不是多余的Key的情况下,才保留
             pathkeys = lappend(pathkeys, pathkey);
     }
     return pathkeys;
 }

测试脚本:

testdb=# select t1.dwbh,t2.grbh
from t_dwxx t1 left join t_grxx t2 on t1.dwbh = t2.dwbh
where t1.dwbh = '1001'
order by t1.dwbh;

跟踪分析,进入make_pathkeys_for_sortclauses函数:

...
(gdb) step
make_pathkeys_for_sortclauses (root=0x1702958, sortclauses=0x170d068, tlist=0x1746758) at pathkeys.c:878
878   List     *pathkeys = NIL;
(gdb) p *(SortGroupClause *)sortclauses->head->data.ptr_value
$5 = {type = T_SortGroupClause, tleSortGroupRef = 1, eqop = 98, sortop = 664, nulls_first = false, hashable = true}

...
(gdb) n
889     pathkey = make_pathkey_from_sortop(root,
(gdb) 
898     if (!pathkey_is_redundant(pathkey, pathkeys))
(gdb) p *pathkey
$11 = {type = T_PathKey, pk_eclass = 0x17486c0, pk_opfamily = 1994, pk_strategy = 1, pk_nulls_first = false}

函数pathkey_is_redundant通过等价类判断排序是否多余(redundant),在本例中,已存在限制条件dwbh='1001',因此该排序是多余的,返回NULL.

901   return pathkeys;
(gdb) p *pathkeys
Cannot access memory at address 0x0

fix_placeholder_input_needed_levels

 
 void
 fix_placeholder_input_needed_levels(PlannerInfo *root)
 {
     ListCell   *lc;
 
     foreach(lc, root->placeholder_list)//遍历链表
     {
         PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc);
         List       *vars = pull_var_clause((Node *) phinfo->ph_var->phexpr,
                                            PVC_RECURSE_AGGREGATES |
                                            PVC_RECURSE_WINDOWFUNCS |
                                            PVC_INCLUDE_PLACEHOLDERS);//获取Vars
 
         add_vars_to_targetlist(root, vars, phinfo->ph_eval_at, false);//添加到投影列中
         list_free(vars);
     }
 }

考察下面的SQL语句:

testdb=# explain verbose select t1.dwbh,t2.grbh,t2.constant_field
from t_dwxx t1 left join (select a.dwbh,a.grbh,'TEST' as constant_field from t_grxx a) t2 on t1.dwbh = t2.dwbh  
where t1.dwbh = '1001'
order by t1.dwbh;
                               QUERY PLAN                               
------------------------------------------------------------------------
 Nested Loop Left Join  (cost=0.00..16.06 rows=2 width=108)
   Output: t1.dwbh, a.grbh, ('TEST'::text) -- PlaceHolderVar
   Join Filter: ((t1.dwbh)::text = (a.dwbh)::text)
   ->  Seq Scan on public.t_dwxx t1  (cost=0.00..1.04 rows=1 width=38)
         Output: t1.dwmc, t1.dwbh, t1.dwdz
         Filter: ((t1.dwbh)::text = '1001'::text)
   ->  Seq Scan on public.t_grxx a  (cost=0.00..15.00 rows=2 width=108)
         Output: a.grbh, a.dwbh, 'TEST'::text
         Filter: ((a.dwbh)::text = '1001'::text)
(9 rows)

子查询上拉与t_dwxx进行连接,上拉过程中,不能简单的把子查询中的"'TEST' as constant_field"作为上层查询的Var来看待(如果不作特殊处理,跟外连接就不等价了,因为该值有可能是NULL),PG因此引入了PlaceHolderVar这么一个Var来对这种变量进行特殊处理.

跟踪分析:

(gdb) b planmain.c:161
Breakpoint 1 at 0x769602: file planmain.c, line 161.
(gdb) c
Continuing.

Breakpoint 1, query_planner (root=0x1702b08, tlist=0x1749c20, qp_callback=0x76e97d <standard_qp_callback>, 
    qp_extra=0x7ffd35e059c0) at planmain.c:163
163   reconsider_outer_join_clauses(root);

注意root中的placeholder_list

(gdb) p *root
$1 = {type = T_PlannerInfo, ..., placeholder_list = 0x174bf00, ...}

查看其内存结构:

#1个PHV
(gdb) p *root->placeholder_list
$2 = {type = T_List, length = 1, head = 0x174bee0, tail = 0x174bee0}
(gdb) p *(Node *)root->placeholder_list->head->data.ptr_value
$3 = {type = T_PlaceHolderInfo}
(gdb) p *(PlaceHolderInfo *)root->placeholder_list->head->data.ptr_value
$4 = {type = T_PlaceHolderInfo, phid = 1, ph_var = 0x174be18, ph_eval_at = 0x174bec8, ph_lateral = 0x0, 
  ph_needed = 0x174bf30, ph_width = 32}
(gdb) set $phi=(PlaceHolderInfo *)root->placeholder_list->head->data.ptr_value
(gdb) p *$phi->ph_var
$5 = {xpr = {type = T_PlaceHolderVar}, phexpr = 0x174be48, phrels = 0x174beb0, phid = 1, phlevelsup = 0}
#该PHV位于编号为4的RTE中
(gdb) p *$phi->ph_eval_at
$6 = {nwords = 1, words = 0x174becc}
(gdb) p *$phi->ph_eval_at->words
$7 = 16
(gdb) p *$phi->ph_needed
$8 = {nwords = 1, words = 0x174bf34}
#该PHV在编号为1的RTE中需要用到
(gdb) p *$phi->ph_needed->words
$9 = 1

三、参考资料

planmain.c
what exactly is a PlaceHolderVAr

免责声明:

① 本站未注明“稿件来源”的信息均来自网络整理。其文字、图片和音视频稿件的所属权归原作者所有。本站收集整理出于非商业性的教育和科研之目的,并不意味着本站赞同其观点或证实其内容的真实性。仅作为临时的测试数据,供内部测试之用。本站并未授权任何人以任何方式主动获取本站任何信息。

② 本站未注明“稿件来源”的临时测试数据将在测试完成后最终做删除处理。有问题或投稿请发送至: 邮箱/279061341@qq.com QQ/279061341

软考中级精品资料免费领

  • 历年真题答案解析
  • 备考技巧名师总结
  • 高频考点精准押题
  • 资料下载
  • 历年真题
  • 2024年上半年信息系统项目管理师第二批次真题及答案解析(完整版)

    难度     813人已做
    查看
  • 【考后总结】2024年5月26日信息系统项目管理师第2批次考情分析

    难度     354人已做
    查看
  • 【考后总结】2024年5月25日信息系统项目管理师第1批次考情分析

    难度     318人已做
    查看
  • 2024年上半年软考高项第一、二批次真题考点汇总(完整版)

    难度     435人已做
    查看
  • 2024年上半年系统架构设计师考试综合知识真题

    难度     224人已做
    查看

相关文章

发现更多好内容

猜你喜欢

AI推送时光机
咦!没有更多了?去看看其它编程学习网 内容吧
首页课程
资料下载
问答资讯