- 开窗函数
2.1开窗函数的定义及语法
开窗函数(又名:分析函数,窗口函数,OLAP函数)
聚合函数:将数据按照一定的规则分组,统一分析各组的某项情况,每个分组返回一行结果
开窗函数:将数据按照一定的规则分组,统一分析各组的某项情况,每行数据返回一行结果
( OLTP:事务处理 OLAP:数据分析)
开窗函数的语法形式:分析函数名()OVER(分析子句) --OVER()是开窗函数的一个标志
分析函数名:
1.聚合类:SUM() AVG() MAX() MIN() COUNT() --功能与聚合函数上相同
2.排序类:ROW_NUMBER() RANK() DENSE_RANK()
3.偏移类:LAG() LEAD()
分析子句:分组(PARTITION BY) 排序(ORDER BY) 窗口(ROWS) --窗口还有个极少用的RANGE
如何理解是三个分析子句:
1)例如现有多个班级的学生
2)所有学生按照班级分组--PARTITION BY CLASS
3)每个班级的学生按照成绩排座位--ORDER BY SCORE
4)从教室的后窗观察到部分学生的上课状态--ROWS
分析子句并不一定要写,也不一定全部都写,分析子句的使用依照需求和函数类型而定
2.2各类开窗函数举例
--建BUSINESS表,方便接下来的学习
CREATE TABLE BUSINESS(DATE_DT VARCHAR2(20),DAY VARCHAR2(20),WEEK VARCHAR2(20),AMT NUMBER);
--表中插入数据
INSERT INTO BUSINESS(DATE_DT,DAY,WEEK,AMT) VALUES('2020-05-04','星期一','第一周','3000');
INSERT INTO BUSINESS(DATE_DT,DAY,WEEK,AMT) VALUES('2020-05-05','星期二','第一周','2000');
INSERT INTO BUSINESS(DATE_DT,DAY,WEEK,AMT) VALUES('2020-05-06','星期三','第一周','1000');
INSERT INTO BUSINESS(DATE_DT,DAY,WEEK,AMT) VALUES('2020-05-07','星期四','第一周','4000');
INSERT INTO BUSINESS(DATE_DT,DAY,WEEK,AMT) VALUES('2020-05-08','星期五','第一周','6000');
INSERT INTO BUSINESS(DATE_DT,DAY,WEEK,AMT) VALUES('2020-05-09','星期六','第一周','2000');
INSERT INTO BUSINESS(DATE_DT,DAY,WEEK,AMT) VALUES('2020-05-10','星期日','第一周','3000');
INSERT INTO BUSINESS(DATE_DT,DAY,WEEK,AMT) VALUES('2020-05-11','星期一','第二周','1000');
INSERT INTO BUSINESS(DATE_DT,DAY,WEEK,AMT) VALUES('2020-05-12','星期二','第二周','4000');
INSERT INTO BUSINESS(DATE_DT,DAY,WEEK,AMT) VALUES('2020-05-13','星期三','第二周','8000');
INSERT INTO BUSINESS(DATE_DT,DAY,WEEK,AMT) VALUES('2020-05-14','星期四','第二周','2000');
INSERT INTO BUSINESS(DATE_DT,DAY,WEEK,AMT) VALUES('2020-05-15','星期五','第二周','5000');
INSERT INTO BUSINESS(DATE_DT,DAY,WEEK,AMT) VALUES('2020-05-16','星期六','第二周','3000');
INSERT INTO BUSINESS(DATE_DT,DAY,WEEK,AMT) VALUES('2020-05-17','星期日','第二周','7000');
COMMIT;
SELECT * FROM BUSINESS; --扫描全表
测试表:BUSINESS
聚合类举例:SUM() AVG() MAX() MIN() COUNT()
用法(以SUM为例):SUM(COL_NAME)OVER([PARTITION] [ORDER] [ROWS])
SELECT A.*,SUM(AMT) OVER() FROM BUSINESS A;
SELECT WEEK,SUM(AMT) FROM BUSINESS GROUP BY WEEK;
SELECT A.*,SUM(AMT) OVER(PARTITION BY WEEK) FROM BUSINESS A;
SELECT A.*,SUM(AMT) OVER(ORDER BY DAY) FROM BUSINESS A;
SELECT * FROM BUSINESS;
--查询每天的营业额及整个月的营业额总额
SELECT DATE_DT,AMT,(SELECT SUM(AMT) FROM BUSINESS) FROM BUSINESS;
--查询每天的营业额及每个周的营业额总额
SELECT DATE_DT,AMT,WEEK,SUM(AMT) OVER(PARTITION BY WEEK) A FROM BUSINESS;
--查询每天的营业额及月每日累计营业额
SELECT A.*,SUM(AMT) OVER(ORDER BY DATE_DT) FROM BUSINESS A; --所有数据从第一条到当前数据的和
--查询每天的营业额及周每日累计营业额
SELECT A.*,SUM(AMT) OVER(PARTITION BY WEEK ORDER BY DATE_DT) FROM BUSINESS A;--在组内,从第一条到当前数据的和
聚合类开窗函数注意点:
1)分析函数名内必须包含需要分析的内容
2)分析子句没有硬性要求 --出现ROWS时,必须跟随ORDER BY
3)采用默认窗口范围时,下一个相同值(排序的值)会被一并算入
SELECT ENAME,SAL,SUM(SAL) OVER(ORDER BY SAL) FROM EMP; --相同的值会一并计算入内
排序类举例:ROW_NUMBER() RANK() DENSE_RANK()
用法(以ROW_NUMBER为例):ROW_NUMBER()OVER([PARTITION] ORDER)
SELECT DATE_DT,AMT,AMT-SUM(AMT) OVER(PARTITION BY WEEK) FROM BUSINESS;--当天营业额与周营业额之差
--查询每天的营业额并在整月范围内升序排列
SELECT DATE_DT,AMT,ROW_NUMBER() OVER(ORDER BY AMT) FROM BUSINESS; --如果有相同的,也会按序号往下排(不并列,不跳跃)
SELECT DATE_DT,AMT,RANK() OVER(ORDER BY AMT) FROM BUSINESS; --如果有相同的,会把相同的变成同一个序号,按相同的数量的总数往下一位排(并列跳跃)
SELECT DATE_DT,AMT,DENSE_RANK() OVER(ORDER BY AMT) FROM BUSINESS; --如果有相同的,会把相同的变成同一个序号,下一个不相同的,按这个序号加1往下排(并列不跳跃)
--查询每天的营业额并在每周范围内降序排列
SELECT * FROM BUSINESS;
SELECT DATE_DT,AMT,WEEK,ROW_NUMBER() OVER(PARTITION BY WEEK ORDER BY AMT DESC) FROM BUSINESS;
--排序类开窗函数用于去重
SELECT * FROM BIAO;
SELECT ENAME,BNO,BSEX,ROW_NUMBER() OVER(PARTITION BY ENAME ORDER BY BNO) FROM BIAO;
SELECT DISTINCT ENAME,BNO,BSEX,TT FROM BIAO; --完全重复去重
SELECT ENAME, BNO, BSEX,TT
FROM (SELECT ENAME,
BNO,
BSEX,TT,
ROW_NUMBER() OVER(PARTITION BY ENAME ORDER BY TT DESC) BR --将名字相同的分为一组,再在这些组里根据日期排序,取出每个组里排第一的的数据
FROM BIAO)
WHERE BR = 1; --ROW_NUMBER的另一种用法,当多条数据属于某一个人,但数据都不尽相同时(在某些字段上去重,(取最新数据))
--RANK()
SELECT ENAME, BNO, BSEX,TT
FROM (SELECT ENAME,
BNO,
BSEX,TT,
RANK() OVER(PARTITION BY ENAME ORDER BY TT DESC) BR
FROM BIAO)
WHERE BR = 1;
--DENSE_RANK()
SELECT ENAME, BNO, BSEX,TT
FROM (SELECT ENAME,
BNO,
BSEX,TT,
DENSE_RANK() OVER(PARTITION BY ENAME ORDER BY TT DESC) BR --将名字相同的分为一组,再在这些组里根据日期排序,取出每个组里排第一的的数据
FROM BIAO)
WHERE BR = 1;
SELECT * FROM BIAO;
SELECT * FROM BIAO FOR UPDATE;
--注意:用在去重时,一般用ROW_NUMBER(),因为如果用RANK()和DENSE_RANK()的话
--碰到两条一样的数据时排序会一样,而在取出来时也会一并取出来,达不到去重的效果
--写在开窗函数里的ORDER BY 和 写在开窗函数外面的ORDER BY 的区别
写在开窗函数里的ORDER BY是对开窗函数里的数据进行排序
写在开窗函数外面的ORDER BY是对最终的结果进行一个排序
排序类开窗函数注意点:
1)分子函数名内不能包含任何内容 --ROW_NUMBER() 括号内不能包含任何东西
2)分析子句内必须添加ORDER BY,且不能指定窗口 --排序类字句中必须加ORDER BY ,而且不能加ROWS()
偏移类举例:LAG() LEAD()
用法(以LAG为例):LAG(COL_NAME,OFFSET,DEFVAL)OVER():向前偏移N行取数
COL_NAME:要分析的字段
OFFSET:偏移量 --默认偏移一行
DEFVAL:默认返回值 --默认返回空null
--查询每天的营业额以及前一天的营业额
方法1:
SELECT DATE_DT, AMT, LAG(AMT, 1, 0) OVER(ORDER BY DATE_DT) FROM BUSINESS; --更简单
方法2:
SELECT DATE_DT,
AMT,
SUM(AMT) OVER(ORDER BY DATE_DT ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING)
FROM BUSINESS;
--两天营业额之差
SELECT DATE_DT,AMT,AMT-LAG(AMT,1,0) OVER(ORDER BY DATE_DT) FROM BUSINESS; --偏移量不能为负值
--查询五月连续登录五天的用户
--建表检测
CREATE TABLE EXAM(ID VARCHAR2(10),TS VARCHAR2(15));
INSERT INTO EXAM VALUES('A0001','2021/01/04');
INSERT INTO EXAM VALUES('A0002','2021/01/04');
INSERT INTO EXAM VALUES('A0001','2021/01/05');
INSERT INTO EXAM VALUES('A0003','2021/01/05');
INSERT INTO EXAM VALUES('A0001','2021/01/06');
INSERT INTO EXAM VALUES('A0001','2021/01/07');
INSERT INTO EXAM VALUES('A0001','2021/01/08');
INSERT INTO EXAM VALUES('A0002','2021/01/09');
INSERT INTO EXAM VALUES('A0002','2021/01/10');
INSERT INTO EXAM VALUES('A0003','2021/01/10');
INSERT INTO EXAM VALUES('A0002','2021/01/11');
INSERT INTO EXAM VALUES('A0002','2021/01/12');
INSERT INTO EXAM VALUES('A0002','2021/01/13');
INSERT INTO EXAM VALUES('A0005','2021/01/13');
INSERT INTO EXAM VALUES('A0003','2021/01/14');
INSERT INTO EXAM VALUES('A0004','2021/01/15');
INSERT INTO EXAM VALUES('A0004','2021/01/16');
INSERT INTO EXAM VALUES('A0007','2021/01/17');
INSERT INTO EXAM VALUES('A0008','2021/01/18');
SELECT * FROM EXAM;
SELECT ID,
TS,
TO_CHAR(TO_DATE(TS, 'YYYY/MM/DD') - 4, 'YYYY/MM/DD') A,
LAG(TS, 4) OVER(PARTITION BY ID ORDER BY TS) B
FROM EXAM;--查询其向上偏移4天的登录时间
SELECT DISTINCT ID
FROM (SELECT ID,
TS, --本次(当天)登录日期
TO_CHAR(TO_DATE(TS, 'YYYY/MM/DD') - 4, 'YYYY/MM/DD') A, --当前数四天的日期
LAG(TS, 4) OVER(PARTITION BY ID ORDER BY TS) B --上四次的登录日期
FROM EXAM) WHERE B IS NOT NULL; --ERROR(不能用非空来算)
SELECT DISTINCT ID
FROM (SELECT ID,
TS, --本次(当天)登录日期
TO_CHAR(TO_DATE(TS, 'YYYY/MM/DD') - 4, 'YYYY/MM/DD') A, --当前数四天的日期
LAG(TS, 4) OVER(PARTITION BY ID ORDER BY TS) B --上四次的登录日期
FROM EXAM) WHERE A=B;
偏移类开窗函数注意点:
1)分析函数名内必须包含要分析的内容,其他两项参数可以默认
2)分析子句内必须添加ORDER BY,且不能指定窗口
3)若不再有可供偏移的行,则返回默认值
4)偏移量不允许写负数
5)分析的字段与默认返回值数据类型要保持一致
2.3开窗函数相关总结
一、各种窗口范围:
PRECEDING:之前的 FOLLOWING:之后的 CURRENT:当前的 UNBOUNDED:不受限的 ROW:行
1.--ROWS BETWEEN N PRECEDING AND N FOLLOWING 前N位到后N位
SELECT DATE_DT,
AMT,
SUM(AMT) OVER(PARTITION BY WEEK ORDER BY DATE_DT ROWS BETWEEN 3 PRECEDING AND 2 FOLLOWING) B
FROM BUSINESS;
2.--ROWS BETWEEN CURRENT ROW AND N FOLLOWING 当前位和到后N位
SELECT DATE_DT,
AMT,
SUM(AMT) OVER(PARTITION BY WEEK ORDER BY DATE_DT ROWS BETWEEN CURRENT ROW AND 2 FOLLOWING) B
FROM BUSINESS;
3.--ROWS BETWEEN N PRECEDING AND CURRENT ROW 前N位到当前位
SELECT DATE_DT,AMT,SUM(AMT) OVER(PARTITION BY WEEK ORDER BY DATE_DT ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) FROM BUSINESS;
4.--ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW 从前面所有行到当前行
SELECT DATE_DT,AMT,SUM(AMT) OVER(PARTITION BY WEEK ORDER BY DATE_DT ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) FROM BUSINESS;
5.--ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING 从当前行到后面所有行
SELECT DATE_DT,AMT,SUM(AMT) OVER(PARTITION BY WEEK ORDER BY DATE_DT ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) FROM BUSINESS;
6.--ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING 全部,
SELECT DATE_DT,
AMT,
SUM(AMT) OVER(PARTITION BY WEEK ORDER BY DATE_DT ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
FROM BUSINESS;
7.--ROWS BETWEEN UNBOUNDED PRECEDING AND N FOLLOWING 从前面所有到当前行
SELECT DATE_DT,
AMT,
SUM(AMT) OVER(PARTITION BY WEEK ORDER BY DATE_DT ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
FROM BUSINESS;
8.--ROWS BETWEEN N PRECEDING AND UNBOUNDED FOLLOWING 从前面N行到后面所有
SELECT DATE_DT,
AMT,
SUM(AMT) OVER(PARTITION BY WEEK ORDER BY DATE_DT ROWS BETWEEN 2 PRECEDING AND UNBOUNDED FOLLOWING)
FROM BUSINESS;
SELECT DATE_DT,
AMT,
SUM(AMT) OVER(PARTITION BY WEEK ORDER BY DATE_DT ROWS BETWEEN 3 PRECEDING AND 2 FOLLOWING) B
FROM BUSINESS; --求的是当前一行前面三个,加上它后面两个的和; 当前的和为六个数相加所得
SELECT DATE_DT,
AMT,
SUM(AMT) OVER(PARTITION BY WEEK ORDER BY DATE_DT ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) B
FROM BUSINESS;--B的值为B所对应的值加上它前面1位的值和后面1位的值
SELECT DATE_DT,
AMT,
SUM(AMT) OVER(PARTITION BY WEEK ORDER BY DATE_DT ROWS BETWEEN CURRENT ROW AND 1 FOLLOWING) B
FROM BUSINESS;
二、不同分析子句组合:--ROWS的出现,必须要伴随ORDER BY
1.SELECT ENAME,SAL,DEPTNO,SUM(SAL)OVER() FROM EMP ;
2.SELECT ENAME,SAL,DEPTNO,SUM(SAL)OVER(PARTITION BY DEPTNO) FROM EMP ;
3.SELECT ENAME,SAL,DEPTNO,SUM(SAL)OVER(ORDER BY SAL ) FROM EMP ;
4.SELECT ENAME,SAL,DEPTNO,SUM(SAL)OVER(ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) FROM EMP ; --ERROR(窗口字句不能单独出现)
5.SELECT ENAME,SAL,DEPTNO,SUM(SAL)OVER(PARTITION BY DEPTNO ORDER BY SAL) FROM EMP ;
6.SELECT ENAME,SAL,DEPTNO,SUM(SAL)OVER(PARTITION BY DEPTNO ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) FROM EMP ; --ERROR (出现ROWS,必须跟随ORDER BY)
7.SELECT ENAME,SAL,DEPTNO,SUM(SAL)OVER(ORDER BY SAL ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) FROM EMP ;
8.SELECT ENAME,SAL,DEPTNO,SUM(SAL)OVER(PARTITION BY DEPTNO ORDER BY SAL ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) FROM EMP ;
--总结
1.窗口子句不能单独出现,必须要有排序子句出现的情况下才能指定窗口范围
2.若出现排序子句,同时未指定窗口范围,默认的窗口范围是第一行到当前行;若未出现排序子句,
同时未指定窗口范围,默认的窗口范围是第一行到最后一行
3.PARTITION BY 分组的范围
ROWS 统计分析的范围
分析范围不会超过分组范围
三、聚合函数与开窗函数的差异:
1.聚合函数每组数据返回一行值;开窗函数每条数据返回一行值
2.开窗函数后会跟一个OVER(),聚合函数后没有
3.开窗函数通过PARTITION BY 分组 ,聚合函数通过GROUP BY 分组o
4.开窗函数做分析时,并不一定是拿整个分组的数据进行分析,而是通过窗口指定;
聚合函数做分析时,一定是拿整个分组的数据进行分析