这篇文章主要介绍“PostgreSQL查询优化中对Having和Group By子句的简化处理分析”,在日常操作中,相信很多人在PostgreSQL查询优化中对Having和Group By子句的简化处理分析问题上存在疑惑,小编查阅了各式资料,整理出简单好用的操作方法,希望对大家解答”PostgreSQL查询优化中对Having和Group By子句的简化处理分析”的疑惑有所帮助!接下来,请跟着小编一起来学习吧!
一、基本概念
简化Having语句
把Having中的约束条件,如满足可以提升到Where条件中的,则移动到Where子句中,否则仍保留在Having语句中.这样做的目的是因为Having过滤在Group by之后执行,如能把Having中的过滤提升到Where中,则可以提前执行"选择"运算,减少Group by的开销.
以下语句,条件dwbh='1002'提升到Where中执行:
testdb=# explain verbose select a.dwbh,a.xb,count(*)
testdb-# from t_grxx a
testdb-# group by a.dwbh,a.xb
testdb-# having count(*) >= 1 and dwbh = '1002';
QUERY PLAN
-----------------------------------------------------------------------------
GroupAggregate (cost=15.01..15.06 rows=1 width=84)
Output: dwbh, xb, count(*)
Group Key: a.dwbh, a.xb
Filter: (count(*) >= 1) -- count(*) >= 1 仍保留在Having中
-> Sort (cost=15.01..15.02 rows=2 width=76)
Output: dwbh, xb
Sort Key: a.xb
-> Seq Scan on public.t_grxx a (cost=0.00..15.00 rows=2 width=76)
Output: dwbh, xb
Filter: ((a.dwbh)::text = '1002'::text) -- 提升到Where中,扫描时过滤Tuple
(10 rows)
如存在Group by & Grouping sets则不作处理:
testdb=# explain verbose
testdb-# select a.dwbh,a.xb,count(*)
testdb-# from t_grxx a
testdb-# group by
testdb-# grouping sets ((a.dwbh),(a.xb),())
testdb-# having count(*) >= 1 and dwbh = '1002'
testdb-# order by a.dwbh,a.xb;
QUERY PLAN
-------------------------------------------------------------------------------
Sort (cost=28.04..28.05 rows=3 width=84)
Output: dwbh, xb, (count(*))
Sort Key: a.dwbh, a.xb
-> MixedAggregate (cost=0.00..28.02 rows=3 width=84)
Output: dwbh, xb, count(*)
Hash Key: a.dwbh
Hash Key: a.xb
Group Key: ()
Filter: ((count(*) >= 1) AND ((a.dwbh)::text = '1002'::text)) -- 扫描数据表后再过滤
-> Seq Scan on public.t_grxx a (cost=0.00..14.00 rows=400 width=76)
Output: dwbh, grbh, xm, xb, nl
(11 rows)
简化Group by语句
如Group by中的字段列表已包含某个表主键的所有列,则该表在Group by语句中的其他列可以删除,这样的做法有利于提升在Group by过程中排序或Hash的性能,减少不必要的开销.
testdb=# explain verbose select a.dwbh,a.dwmc,count(*)
testdb-# from t_dwxx a
testdb-# group by a.dwbh,a.dwmc
testdb-# having count(*) >= 1;
QUERY PLAN
--------------------------------------------------------------------------
HashAggregate (cost=13.20..15.20 rows=53 width=264)
Output: dwbh, dwmc, count(*)
Group Key: a.dwbh, a.dwmc -- 分组键为dwbh & dwmc
Filter: (count(*) >= 1)
-> Seq Scan on public.t_dwxx a (cost=0.00..11.60 rows=160 width=256)
Output: dwmc, dwbh, dwdz
(6 rows)
testdb=# alter table t_dwxx add primary key(dwbh); -- 添加主键
ALTER TABLE
testdb=# explain verbose select a.dwbh,a.dwmc,count(*)
from t_dwxx a
group by a.dwbh,a.dwmc
having count(*) >= 1;
QUERY PLAN
-----------------------------------------------------------------------
HashAggregate (cost=1.05..1.09 rows=1 width=264)
Output: dwbh, dwmc, count(*)
Group Key: a.dwbh -- 分组键只保留dwbh
Filter: (count(*) >= 1)
-> Seq Scan on public.t_dwxx a (cost=0.00..1.03 rows=3 width=256)
Output: dwmc, dwbh, dwdz
(6 rows)
二、源码解读
相关处理的源码位于文件subquery_planner.c中,主函数为subquery_planner,代码片段如下:
newHaving = NIL;
foreach(l, (List *) parse->havingQual)//存在Having条件语句
{
Node *havingclause = (Node *) lfirst(l);//获取谓词
if ((parse->groupClause && parse->groupingSets) ||
contain_agg_clause(havingclause) ||
contain_volatile_functions(havingclause) ||
contain_subplans(havingclause))
{
//如果有Group&&Group Sets语句
//保持不变
newHaving = lappend(newHaving, havingclause);
}
else if (parse->groupClause && !parse->groupingSets)
{
//只有group语句,可以加入到jointree的条件中
parse->jointree->quals = (Node *)
lappend((List *) parse->jointree->quals, havingclause);
}
else//既没有group也没有grouping set,拷贝一份到jointree的条件中
{
parse->jointree->quals = (Node *)
lappend((List *) parse->jointree->quals,
copyObject(havingclause));
newHaving = lappend(newHaving, havingclause);
}
}
parse->havingQual = (Node *) newHaving;//调整having子句
remove_useless_groupby_columns(root);//去掉group by中无用的数据列
remove_useless_groupby_columns
static void
remove_useless_groupby_columns(PlannerInfo *root)
{
Query *parse = root->parse;//查询树
Bitmapset **groupbyattnos;//位图集合
Bitmapset **surplusvars;//位图集合
ListCell *lc;
int relid;
if (list_length(parse->groupClause) < 2)//如果只有1个ITEMS,无需处理
return;
if (parse->groupingSets)//存在Grouping sets,不作处理
return;
//用于分组的属性
groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
(list_length(parse->rtable) + 1));
foreach(lc, parse->groupClause)
{
SortGroupClause *sgc = lfirst_node(SortGroupClause, lc);
TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
Var *var = (Var *) tle->expr;
if (!IsA(var, Var) ||
var->varlevelsup > 0)
continue;
relid = var->varno;
Assert(relid <= list_length(parse->rtable));
groupbyattnos[relid] = bms_add_member(groupbyattnos[relid],
var->varattno - FirstLowInvalidHeapAttributeNumber);
}
surplusvars = NULL;
relid = 0;
//如某个Relation的分组键中已含主键列,去掉其他列
foreach(lc, parse->rtable)
{
RangeTblEntry *rte = lfirst_node(RangeTblEntry, lc);
Bitmapset *relattnos;
Bitmapset *pkattnos;
Oid constraintOid;
relid++;
if (rte->rtekind != RTE_RELATION)
continue;
relattnos = groupbyattnos[relid];
if (bms_membership(relattnos) != BMS_MULTIPLE)
continue;
pkattnos = get_primary_key_attnos(rte->relid, false, &constraintOid);
if (pkattnos == NULL)
continue;
if (bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1)
{
if (surplusvars == NULL)
surplusvars = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
(list_length(parse->rtable) + 1));
surplusvars[relid] = bms_difference(relattnos, pkattnos);
parse->constraintDeps = lappend_oid(parse->constraintDeps,
constraintOid);
}
}
if (surplusvars != NULL)
{
List *new_groupby = NIL;
foreach(lc, parse->groupClause)
{
SortGroupClause *sgc = lfirst_node(SortGroupClause, lc);
TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
Var *var = (Var *) tle->expr;
if (!IsA(var, Var) ||
var->varlevelsup > 0 ||
!bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber,
surplusvars[var->varno]))
new_groupby = lappend(new_groupby, sgc);
}
parse->groupClause = new_groupby;
}
}
到此,关于“PostgreSQL查询优化中对Having和Group By子句的简化处理分析”的学习就结束了,希望能够解决大家的疑惑。理论与实践的搭配能更好的帮助大家学习,快去试试吧!若想继续学习更多相关知识,请继续关注亿速云网站,小编会继续努力为大家带来更多实用的文章!