文章详情

短信预约-IT技能 免费直播动态提醒

请输入下面的图形验证码

提交验证

短信预约提醒成功

PostgreSQL中sort_inner_and_outer函数分析

2024-04-02 19:55

关注

这篇文章主要讲解了“PostgreSQL中sort_inner_and_outer函数分析”,文中的讲解内容简单清晰,易于学习与理解,下面请大家跟着小编的思路慢慢深入,一起来研究和学习“PostgreSQL中sort_inner_and_outer函数分析”吧!


merge join的算法实现伪代码如下:
READ data_set_1 SORT BY JOIN KEY TO temp_ds1
READ data_set_2 SORT BY JOIN KEY TO temp_ds2
READ ds1_row FROM temp_ds1
READ ds2_row FROM temp_ds2
WHILE NOT eof ON temp_ds1,temp_ds2 LOOP
IF ( temp_ds1.key = temp_ds2.key ) OUTPUT JOIN ds1_row,ds2_row
ELSIF ( temp_ds1.key <= temp_ds2.key ) READ ds1_row FROM temp_ds1
ELSIF ( temp_ds1.key => temp_ds2.key ) READ ds2_row FROM temp_ds2
END LOOP

一、数据结构

Cost相关
注意:实际使用的参数值通过系统配置文件定义,而不是这里的常量定义!

 typedef double Cost; 

 
 
 
 
 #define DEFAULT_SEQ_PAGE_COST  1.0       //顺序扫描page的成本
 #define DEFAULT_RANDOM_PAGE_COST  4.0      //随机扫描page的成本
 #define DEFAULT_CPU_TUPLE_COST  0.01     //处理一个元组的CPU成本
 #define DEFAULT_CPU_INDEX_TUPLE_COST 0.005   //处理一个索引元组的CPU成本
 #define DEFAULT_CPU_OPERATOR_COST  0.0025    //执行一次操作或函数的CPU成本
 #define DEFAULT_PARALLEL_TUPLE_COST 0.1    //并行执行,从一个worker传输一个元组到另一个worker的成本
 #define DEFAULT_PARALLEL_SETUP_COST  1000.0  //构建并行执行环境的成本
 
 #define DEFAULT_EFFECTIVE_CACHE_SIZE  524288    

 double      seq_page_cost = DEFAULT_SEQ_PAGE_COST;
 double      random_page_cost = DEFAULT_RANDOM_PAGE_COST;
 double      cpu_tuple_cost = DEFAULT_CPU_TUPLE_COST;
 double      cpu_index_tuple_cost = DEFAULT_CPU_INDEX_TUPLE_COST;
 double      cpu_operator_cost = DEFAULT_CPU_OPERATOR_COST;
 double      parallel_tuple_cost = DEFAULT_PARALLEL_TUPLE_COST;
 double      parallel_setup_cost = DEFAULT_PARALLEL_SETUP_COST;
 
 int         effective_cache_size = DEFAULT_EFFECTIVE_CACHE_SIZE;
 Cost        disable_cost = 1.0e10;//1后面10个0,通过设置一个巨大的成本,让优化器自动放弃此路径
 
 int         max_parallel_workers_per_gather = 2;//每次gather使用的worker数

二、源码解读

sort_inner_and_outer函数尝试构造merge join访问路径.
构造过程中的成本估算实现函数initial_cost_mergejoin和final_cost_mergejoin在下一节介绍.

//------------------------------------------------ sort_inner_and_outer


static void
sort_inner_and_outer(PlannerInfo *root,
                     RelOptInfo *joinrel,
                     RelOptInfo *outerrel,
                     RelOptInfo *innerrel,
                     JoinType jointype,
                     JoinPathExtraData *extra)
{
    JoinType    save_jointype = jointype;
    Path       *outer_path;
    Path       *inner_path;
    Path       *cheapest_partial_outer = NULL;
    Path       *cheapest_safe_inner = NULL;
    List       *all_pathkeys;
    ListCell   *l;

    
    outer_path = outerrel->cheapest_total_path;
    inner_path = innerrel->cheapest_total_path;

    
    if (PATH_PARAM_BY_REL(outer_path, innerrel) ||
        PATH_PARAM_BY_REL(inner_path, outerrel))
        return;

    
    if (jointype == JOIN_UNIQUE_OUTER)
    {
        outer_path = (Path *) create_unique_path(root, outerrel,
                                                 outer_path, extra->sjinfo);
        Assert(outer_path);
        jointype = JOIN_INNER;
    }
    else if (jointype == JOIN_UNIQUE_INNER)
    {
        inner_path = (Path *) create_unique_path(root, innerrel,
                                                 inner_path, extra->sjinfo);
        Assert(inner_path);
        jointype = JOIN_INNER;
    }

    
    if (joinrel->consider_parallel &&
        save_jointype != JOIN_UNIQUE_OUTER &&
        save_jointype != JOIN_FULL &&
        save_jointype != JOIN_RIGHT &&
        outerrel->partial_pathlist != NIL &&
        bms_is_empty(joinrel->lateral_relids))
    {
        cheapest_partial_outer = (Path *) linitial(outerrel->partial_pathlist);

        if (inner_path->parallel_safe)
            cheapest_safe_inner = inner_path;
        else if (save_jointype != JOIN_UNIQUE_INNER)
            cheapest_safe_inner =
                get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
    }

    
    all_pathkeys = select_outer_pathkeys_for_merge(root,
                                                   extra->mergeclause_list,
                                                   joinrel);

    foreach(l, all_pathkeys)//遍历所有可用的排序键
    {
        List       *front_pathkey = (List *) lfirst(l);
        List       *cur_mergeclauses;
        List       *outerkeys;
        List       *innerkeys;
        List       *merge_pathkeys;

        
        if (l != list_head(all_pathkeys))
            outerkeys = lcons(front_pathkey,
                              list_delete_ptr(list_copy(all_pathkeys),
                                              front_pathkey));
        else
            outerkeys = all_pathkeys;   

        
        cur_mergeclauses =
            find_mergeclauses_for_outer_pathkeys(root,
                                                 outerkeys,
                                                 extra->mergeclause_list);

        
        Assert(list_length(cur_mergeclauses) == list_length(extra->mergeclause_list));

        
        innerkeys = make_inner_pathkeys_for_merge(root,
                                                  cur_mergeclauses,
                                                  outerkeys);

        
        merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
                                             outerkeys);

        
        try_mergejoin_path(root,
                           joinrel,
                           outer_path,
                           inner_path,
                           merge_pathkeys,
                           cur_mergeclauses,
                           outerkeys,
                           innerkeys,
                           jointype,
                           extra,
                           false);

        
        if (cheapest_partial_outer && cheapest_safe_inner)
            try_partial_mergejoin_path(root,
                                       joinrel,
                                       cheapest_partial_outer,
                                       cheapest_safe_inner,
                                       merge_pathkeys,
                                       cur_mergeclauses,
                                       outerkeys,
                                       innerkeys,
                                       jointype,
                                       extra);
    }
}


//----------------------------------- try_mergejoin_path

static void
try_mergejoin_path(PlannerInfo *root,
                   RelOptInfo *joinrel,
                   Path *outer_path,
                   Path *inner_path,
                   List *pathkeys,
                   List *mergeclauses,
                   List *outersortkeys,
                   List *innersortkeys,
                   JoinType jointype,
                   JoinPathExtraData *extra,
                   bool is_partial)
{
    Relids      required_outer;
    JoinCostWorkspace workspace;

    if (is_partial)
    {
        try_partial_mergejoin_path(root,
                                   joinrel,
                                   outer_path,
                                   inner_path,
                                   pathkeys,
                                   mergeclauses,
                                   outersortkeys,
                                   innersortkeys,
                                   jointype,
                                   extra);//并行执行
        return;
    }

    
    required_outer = calc_non_nestloop_required_outer(outer_path,
                                                      inner_path);
    if (required_outer &&
        !bms_overlap(required_outer, extra->param_source_rels))
    {
        
        bms_free(required_outer);
        return;
    }

    
    if (outersortkeys &&
        pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
        outersortkeys = NIL;
    if (innersortkeys &&
        pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
        innersortkeys = NIL;

    
    initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
                           outer_path, inner_path,
                           outersortkeys, innersortkeys,
                           extra);//初始化mergejoin

    if (add_path_precheck(joinrel,
                          workspace.startup_cost, workspace.total_cost,
                          pathkeys, required_outer))//执行前置检查
    {
        add_path(joinrel, (Path *)
                 create_mergejoin_path(root,
                                       joinrel,
                                       jointype,
                                       &workspace,
                                       extra,
                                       outer_path,
                                       inner_path,
                                       extra->restrictlist,
                                       pathkeys,
                                       required_outer,
                                       mergeclauses,
                                       outersortkeys,
                                       innersortkeys));//创建并添加路径
    }
    else
    {
        
        bms_free(required_outer);
    }
}


//----------------------- create_mergejoin_path
 
 
 MergePath *
 create_mergejoin_path(PlannerInfo *root,
                       RelOptInfo *joinrel,
                       JoinType jointype,
                       JoinCostWorkspace *workspace,
                       JoinPathExtraData *extra,
                       Path *outer_path,
                       Path *inner_path,
                       List *restrict_clauses,
                       List *pathkeys,
                       Relids required_outer,
                       List *mergeclauses,
                       List *outersortkeys,
                       List *innersortkeys)
 {
     MergePath  *pathnode = makeNode(MergePath);
 
     pathnode->jpath.path.pathtype = T_MergeJoin;
     pathnode->jpath.path.parent = joinrel;
     pathnode->jpath.path.pathtarget = joinrel->reltarget;
     pathnode->jpath.path.param_info =
         get_joinrel_parampathinfo(root,
                                   joinrel,
                                   outer_path,
                                   inner_path,
                                   extra->sjinfo,
                                   required_outer,
                                   &restrict_clauses);
     pathnode->jpath.path.parallel_aware = false;
     pathnode->jpath.path.parallel_safe = joinrel->consider_parallel &&
         outer_path->parallel_safe && inner_path->parallel_safe;
     
     pathnode->jpath.path.parallel_workers = outer_path->parallel_workers;
     pathnode->jpath.path.pathkeys = pathkeys;
     pathnode->jpath.jointype = jointype;
     pathnode->jpath.inner_unique = extra->inner_unique;
     pathnode->jpath.outerjoinpath = outer_path;
     pathnode->jpath.innerjoinpath = inner_path;
     pathnode->jpath.joinrestrictinfo = restrict_clauses;
     pathnode->path_mergeclauses = mergeclauses;
     pathnode->outersortkeys = outersortkeys;
     pathnode->innersortkeys = innersortkeys;
     
     
 
     final_cost_mergejoin(root, pathnode, workspace, extra);//估算成本
 
     return pathnode;
 }

三、跟踪分析

测试脚本如下

select a.*,b.grbh,b.je 
from t_dwxx a,
    lateral (select t1.dwbh,t1.grbh,t2.je 
     from t_grxx t1 
          inner join t_jfxx t2 on t1.dwbh = a.dwbh and t1.grbh = t2.grbh) b
order by b.dwbh;

启动gdb,设置断点

(gdb) b sort_inner_and_outer
Breakpoint 1 at 0x7af63a: file joinpath.c, line 888.
(gdb) c
Continuing.

Breakpoint 1, sort_inner_and_outer (root=0x1a4a278, joinrel=0x1aa7180, outerrel=0x1a55700, innerrel=0x1a56c30, 
    jointype=JOIN_INNER, extra=0x7ffca933f880) at joinpath.c:888
888     JoinType    save_jointype = jointype;
(gdb)

新生成的joinrel是1号和3号RTE的连接,类型为JOIN_INNER

(gdb) p *joinrel->relids->words
$1 = 10
(gdb) p jointype
$2 = JOIN_INNER

获取排序键,PathKey中的等价类EC,成员为t_grxx.dwbh和t_dwxx.dwbh

...
(gdb) 
993     all_pathkeys = select_outer_pathkeys_for_merge(root,
(gdb) n
997     foreach(l, all_pathkeys)
(gdb) p *all_pathkeys
$3 = {type = T_List, length = 1, head = 0x1a69490, tail = 0x1a69490}
(gdb) p *(PathKey *)all_pathkeys->head->data.ptr_value
$5 = {type = T_PathKey, pk_eclass = 0x1a60e08, pk_opfamily = 1994, pk_strategy = 1, pk_nulls_first = false}
...
(gdb) set $rt=(RelabelType *)((EquivalenceMember *)$ec->ec_members->head->data.ptr_value)->em_expr
(gdb) p *$rt->arg
$14 = {type = T_Var}
(gdb) p *(Var *)$rt->arg
$15 = {xpr = {type = T_Var}, varno = 3, varattno = 1, vartype = 1043, vartypmod = 14, varcollid = 100, varlevelsup = 0, 
  varnoold = 3, varoattno = 1, location = 208}
(gdb) set $rt2=(RelabelType *)((EquivalenceMember *)$ec->ec_members->head->next->data.ptr_value)->em_expr
(gdb) p *(Var *)$rt2->arg
$16 = {xpr = {type = T_Var}, varno = 1, varattno = 2, vartype = 1043, vartypmod = 24, varcollid = 100, varlevelsup = 0, 
  varnoold = 1, varoattno = 2, location = 218}

开始遍历all_pathkeys

(gdb) n
999         List       *front_pathkey = (List *) lfirst(l);

获取连接条件子句,t_dwxx.dwbh=t_grxx.dwbh

(gdb) p *cur_mergeclauses
$17 = {type = T_List, length = 1, head = 0x1a694f0, tail = 0x1a694f0}

构建outer和inner relation的排序键

(gdb) p *(PathKey *)innerkeys->head->data.ptr_value
$22 = {type = T_PathKey, pk_eclass = 0x1a60e08, pk_opfamily = 1994, pk_strategy = 1, pk_nulls_first = false}
(gdb) p *(PathKey *)merge_pathkeys->head->data.ptr_value
$25 = {type = T_PathKey, pk_eclass = 0x1a60e08, pk_opfamily = 1994, pk_strategy = 1, pk_nulls_first = false}

尝试merge join,进入函数try_mergejoin_path

(gdb) 
1038            try_mergejoin_path(root,
(gdb) step
try_mergejoin_path (root=0x1a4dcc0, joinrel=0x1a68e20, outer_path=0x1a62288, inner_path=0x1a62320, pathkeys=0x1a694b8, 
    mergeclauses=0x1a69518, outersortkeys=0x1a694b8, innersortkeys=0x1a69578, jointype=JOIN_INNER, extra=0x7ffca933f880, 
    is_partial=false) at joinpath.c:572
572     if (is_partial)

初始merge join成本

...
(gdb) 
615     initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
(gdb) p workspace
$26 = {startup_cost = 10861.483356195882, total_cost = 11134.203356195881, run_cost = 24.997499999999999, 
  inner_run_cost = 247.72250000000003, inner_rescan_run_cost = 1.3627136827435593e-316, outer_rows = 9999, 
  inner_rows = 100000, outer_skip_rows = 0, inner_skip_rows = 911, numbuckets = 27665584, numbatches = 0, 
  inner_rows_total = 1.3681950446447804e-316}

构造merge join

...
(gdb) n
625                  create_mergejoin_path(root,
(gdb) 
624         add_path(joinrel, (Path *)
(gdb) 
644 }
(gdb) p *joinrel->pathlist
$28 = {type = T_List, length = 1, head = 0x1a6a180, tail = 0x1a6a180}
(gdb) p *(Node *)joinrel->pathlist->head->data.ptr_value
$29 = {type = T_MergePath}
(gdb) p *(MergePath *)joinrel->pathlist->head->data.ptr_value
$30 = {jpath = {path = {type = T_MergePath, pathtype = T_MergeJoin, parent = 0x1a68e20, pathtarget = 0x1a69058, 
      param_info = 0x0, parallel_aware = false, parallel_safe = true, parallel_workers = 0, rows = 100000, 
      startup_cost = 10863.760856195882, total_cost = 12409.200856195883, pathkeys = 0x1a694b8}, jointype = JOIN_INNER, 
    inner_unique = false, outerjoinpath = 0x1a62288, innerjoinpath = 0x1a62320, joinrestrictinfo = 0x1a692f8}, 
  path_mergeclauses = 0x1a69518, outersortkeys = 0x1a694b8, innersortkeys = 0x1a69578, skip_mark_restore = false, 
  materialize_inner = false}

完成调用

(gdb) n
sort_inner_and_outer (root=0x1a4dcc0, joinrel=0x1a68e20, outerrel=0x1a4d700, innerrel=0x1a4d918, jointype=JOIN_INNER, 
    extra=0x7ffca933f880) at joinpath.c:1054
1054            if (cheapest_partial_outer && cheapest_safe_inner)
(gdb) 
997     foreach(l, all_pathkeys)
(gdb) 
1066    }
(gdb) n
add_paths_to_joinrel (root=0x1a4dcc0, joinrel=0x1a68e20, outerrel=0x1a4d700, innerrel=0x1a4d918, jointype=JOIN_INNER, 
    sjinfo=0x7ffca933f970, restrictlist=0x1a692f8) at joinpath.c:279
279     if (mergejoin_allowed)
(gdb) 
280         match_unsorted_outer(root, joinrel, outerrel, innerrel,
...

感谢各位的阅读,以上就是“PostgreSQL中sort_inner_and_outer函数分析”的内容了,经过本文的学习后,相信大家对PostgreSQL中sort_inner_and_outer函数分析这一问题有了更深刻的体会,具体使用情况还需要大家实践验证。这里是亿速云,小编将为大家推送更多相关知识点的文章,欢迎关注!

阅读原文内容投诉

免责声明:

① 本站未注明“稿件来源”的信息均来自网络整理。其文字、图片和音视频稿件的所属权归原作者所有。本站收集整理出于非商业性的教育和科研之目的,并不意味着本站赞同其观点或证实其内容的真实性。仅作为临时的测试数据,供内部测试之用。本站并未授权任何人以任何方式主动获取本站任何信息。

② 本站未注明“稿件来源”的临时测试数据将在测试完成后最终做删除处理。有问题或投稿请发送至: 邮箱/279061341@qq.com QQ/279061341

软考中级精品资料免费领

  • 历年真题答案解析
  • 备考技巧名师总结
  • 高频考点精准押题
  • 2024年上半年信息系统项目管理师第二批次真题及答案解析(完整版)

    难度     813人已做
    查看
  • 【考后总结】2024年5月26日信息系统项目管理师第2批次考情分析

    难度     354人已做
    查看
  • 【考后总结】2024年5月25日信息系统项目管理师第1批次考情分析

    难度     318人已做
    查看
  • 2024年上半年软考高项第一、二批次真题考点汇总(完整版)

    难度     435人已做
    查看
  • 2024年上半年系统架构设计师考试综合知识真题

    难度     224人已做
    查看

相关文章

发现更多好内容

猜你喜欢

AI推送时光机
位置:首页-资讯-数据库
咦!没有更多了?去看看其它编程学习网 内容吧
首页课程
资料下载
问答资讯