Oracle分析函数汇总(超全)

前半部分数据源为Oracle自带HR用户下的员工信息表:EMPLOYEES,若创建库的时候未勾选创建示例,可以在这下载sql文件,创建相关表及数据源。(百度网盘链接:https://pan.baidu.com/s/1axlyRjfEGi0pOi8xmRewrA   密码:t3yy)

中间部分的数据源为Oracle自带SCOTT用户下的员工表&部门表:EMP、DEPT

后半部分属于转载,暂无数据源,但是方便归纳总结:https://blog.csdn.net/cc_0101/article/details/80884076

--1、查询各个部门的平均工资,以及该部门的员工信息

SELECT 
	A.MANAGER_ID,A.EMPLOYEE_NAME,A.HIRE_DATE,A.SALARY,B.AVG_SALARY
FROM
	(SELECT MANAGER_ID,FIRST_NAME||' '||LAST_NAME EMPLOYEE_NAME, HIRE_DATE, SALARY FROM EMPLOYEES) A,
	(SELECT MANAGER_ID,AVG(SALARY) AVG_SALARY FROM EMPLOYEES GROUP BY MANAGER_ID) B
WHERE A.MANAGER_ID=B.MANAGER_ID
ORDER BY A.MANAGER_ID;

SELECT 
	MANAGER_ID,FIRST_NAME||' '||LAST_NAME EMPLOYEE_NAME,HIRE_DATE,SALARY,
	AVG(SALARY) OVER (PARTITION BY MANAGER_ID) AVG_SALARY
FROM EMPLOYEES;

--2、按照入职时间排序,并计算第一个员工到当前员工的平均工资(如果时间一样则相同时间一起计算)

SELECT 
	MANAGER_ID, FIRST_NAME||' '||LAST_NAME EMPLOYEE_NAME, HIRE_DATE, SALARY, 
	AVG(SALARY) OVER (ORDER BY HIRE_DATE)
FROM EMPLOYEES;

--3、按照入职时间排序,且按照部门经理进行分组,并计算该部门第一个员工到当前员工的平均工资(如果时间一样则相同时间一起计算)

SELECT 
	MANAGER_ID, FIRST_NAME||' '||LAST_NAME EMPLOYEE_NAME, HIRE_DATE, SALARY, 
	AVG(SALARY) OVER (PARTITION BY MANAGER_ID ORDER BY HIRE_DATE)
FROM EMPLOYEES;

--ROWS表示行
--4、按照入职时间排序,且按照部门经理进行分组,计算当前员工的前一个到后两个共四个员工的平均工资(如果时间一样,则默认按照先后顺序计算)

SELECT 
	MANAGER_ID, FIRST_NAME||' '||LAST_NAME EMPLOYEE_NAME, HIRE_DATE, SALARY, 
	AVG(SALARY) OVER (PARTITION BY MANAGER_ID ORDER BY HIRE_DATE ROWS BETWEEN 1 PRECEDING AND 2 FOLLOWING)
FROM EMPLOYEES;

--RANGE表示取值范围(数字和日期可以取值)
--5、按照入职时间排序,且按照部门经理分组,计算当前员工雇佣时间之前的50天以内,之后的150天以内之间的平均工资(未验证前50,后150的边界问题)

SELECT 
	MANAGER_ID, FIRST_NAME||' '||LAST_NAME EMPLOYEE_NAME, HIRE_DATE, SALARY,
	AVG(SALARY) OVER (PARTITION BY MANAGER_ID ORDER BY HIRE_DATE RANGE BETWEEN 50 PRECEDING AND 150 FOLLOWING)
FROM EMPLOYEES;

/*窗口子句中我们经常用到指定第一行,当前行,最后一行这样的三个属性:
第一行是 unbounded preceding,
当前行是 current row,
最后一行是 unbounded following*/

--求平均做一个总结,并展示第一个到最后一个的取值方法

扫描二维码关注公众号,回复: 9718198 查看本文章
SELECT 
	MANAGER_ID, FIRST_NAME||' '||LAST_NAME EMPLOYEE_NAME, HIRE_DATE, SALARY,
	AVG(SALARY) OVER (PARTITION BY MANAGER_ID ORDER BY HIRE_DATE) AVG_SALARY_PART_ORDER,--累计求平均,和第3个一样
	AVG(SALARY) OVER (PARTITION BY MANAGER_ID ) AVG_SALARY_ORDER,--求整个部门的平均,和第1个一样
	AVG(SALARY) OVER (PARTITION BY MANAGER_ID ORDER BY HIRE_DATE RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AVG_SALARY_UNBOUND1, --求整个部门的平均,表示取值范围第一个到最后一个,结果和上面一致
	AVG(SALARY) OVER (PARTITION BY MANAGER_ID ORDER BY HIRE_DATE ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AVG_SALARY_UNBOUND2--求整个部门的平均,行号取值范围第一个到最后一个,结果和上面一致
FROM EMPLOYEES;
--按照deptno分组,然后计算当前行至最后一行的汇总
SELECT 
	EMPNO,ENAME,DEPTNO,SAL,
	SUM(SAL) OVER(PARTITION BY DEPTNO ORDER BY ENAME ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) MAX_SAL
FROM EMP;


--按照deptno分组,然后计算当前行的上一行(rownum-1)到当前行的汇总
SELECT 
	EMPNO,ENAME,DEPTNO,SAL,
    SUM(SAL) OVER(PARTITION BY DEPTNO ORDER BY ENAME ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) MAX_SAL
FROM EMP;


--按照deptno分组,然后计算当前行的上一行(rownum-1)到当前行的下两行(rownum+2)的汇总
SELECT 
	EMPNO,ENAME,DEPTNO,SAL,
    SUM(SAL) OVER(PARTITION BY DEPTNO ORDER BY ENAME ROWS BETWEEN 1 PRECEDING AND 2 FOLLOWING) MAX_SAL
FROM EMP;
--注意ROWS BETWEEN 1 preceding AND 1 following 是指当前行的上一行(rownum-1)到当前行的下两行(rownum+2)的汇总

--MIN、MAX、AVG、SUM、COUNT

SELECT 
	DEPARTMENT_ID, FIRST_NAME||' '||LAST_NAME EMPLOYEE_NAME, HIRE_DATE, SALARY,
	MIN(SALARY) OVER (PARTITION BY DEPARTMENT_ID ORDER BY HIRE_DATE) AS DEPT_MIN,--按照部门ID分组,然后按照入职时间排序,计算包括当前员工入职时间及之前的所有员工的最低薪资(如果时间一样,则一起计算)
	MAX(SALARY) OVER (PARTITION BY DEPARTMENT_ID ORDER BY HIRE_DATE) AS DEPT_MAX,--按照部门ID分组,然后按照入职时间排序,计算包括当前员工入职时间及之前的所有员工的最高薪资(如果时间一样,则一起计算)
	AVG(SALARY) OVER (PARTITION BY DEPARTMENT_ID ORDER BY HIRE_DATE) AS DEPT_AVG,--按照部门ID分组,然后按照入职时间排序,计算该部门第一个员工到当前员工的平均工资(如果时间一样则相同时间一起计算)
	SUM(SALARY) OVER (PARTITION BY DEPARTMENT_ID ORDER BY HIRE_DATE) AS DEPT_SUM,--按照部门ID分组,然后按照入职时间排序,计算该部门第一个员工到当前员工的工资和(如果时间一样则相同时间一起计算)
	COUNT(1) OVER (ORDER BY SALARY) AS COUNT_BY_SALARY,--按照工资进行排序,统计从第一个到当前工资的个数(如果工资一样,则一起统计)
	COUNT(1) OVER (ORDER BY SALARY RANGE BETWEEN 50 PRECEDING AND 150 FOLLOWING) AS COUNT_BY_SALARY_RANGE--按照工资进行排序,统计比当前工资小50到比当前工资大150的个数
FROM EMPLOYEES;

--RANK、DENSE_RANK
/*row_number的用途非常广泛,排序最好用它,它会为查询出来的每一行记录生成一个序号,依次排序且不会重复,注意使用row_number函数时必须要用over子句选择对某一列进行排序才能生成序号。

rank函数用于返回结果集的分区内每行的排名,行的排名是相关行之前的排名数加一。简单来说rank函数就是对查询出来的记录进行排名,与row_number函数不同的是,rank函数考虑到了over子句中排序字段值相同的情况,如果使用rank函数来生成序号,over子句中排序字段值相同的序号是一样的,后面字段值不相同的序号将跳过相同的排名号排下一个,也就是相关行之前的排名数加一,可以理解为根据当前的记录数生成序号,后面的记录依此类推。

dense_rank函数的功能与rank函数类似,dense_rank函数在生成序号时是连续的,而rank函数生成的序号有可能不连续。dense_rank函数出现相同排名时,将不跳过相同排名号,rank值紧接上一次的rank值。在各个分组内,rank()是跳跃排序,有两个第一名时接下来就是第三名,dense_rank()是连续排序,有两个第一名时仍然跟着第二名。*/

SELECT 
	DEPARTMENT_ID, FIRST_NAME||' '||LAST_NAME EMPLOYEE_NAME, SALARY,
	RANK() OVER (ORDER BY SALARY) AS RANK_ORDER,--按照工资进行排序,标记排名,重复值则标记为一样,后面的跳跃标记
	DENSE_RANK() OVER (ORDER BY SALARY) AS DENSE_RANK_ORDER,--按照工资进行排序,标记排名,重复值则标记为一样,后面的挨着标记
	ROW_NUMBER() OVER (ORDER BY SALARY) AS ROW_NUMBER_ORDER--按照工资进行排序,标记排名,连续排名且不重复
FROM EMPLOYEES;


SELECT 
	DEPARTMENT_ID, FIRST_NAME||' '||LAST_NAME EMPLOYEE_NAME, SALARY,
	RANK() OVER (PARTITION BY DEPARTMENT_ID ORDER BY SALARY) AS RANK_PART_ORDER,--按照部门ID分组,然后按照工资进行排序,标记排名,重复值则标记为一样,后面的跳跃标记
	DENSE_RANK() OVER (PARTITION BY DEPARTMENT_ID ORDER BY SALARY) AS DENSE_RANK_PART_ORDER,--按照部门ID分组,然后按照工资进行排序,标记排名,重复值则标记为一样,后面的挨着标记
	ROW_NUMBER() OVER (PARTITION BY DEPARTMENT_ID ORDER BY SALARY) AS ROW_NUMBER_PART_ORDER--按照部门ID分组,然后按照工资进行排序,标记排名,连续排名且不重复
FROM EMPLOYEES;

/*lag(exp_str,offset,defval) over()
exp_str 指的是要做对比的字段。
offset 是 exp_str 字段的偏移量,即 offset 为N ,指的是在表中从当前行位置向前数N行就是我们所要找的那一行了。
比如说,
在表中,假设当前我们说的当前行在表中排在第四行,则 offset 为3时表示的是我们所要找的数据行就是表中的第一行(即4-3=1)。
offset 的默认值为1
lag()函数的返回值为在表中从当前行位置向前数N行的那一行上 exp_str 字段的值。
当在表中从当前行位置向前数N行已经超出了表的范围时,lag()函数将 defval 这个参数值作为函数的返回值。
比如说,
在表中,假设当前我们说的当前行在表中排在第四行,则 offset 为6时表示的是我们所要找的数据行就是表中的第-2行(即4-6=-2),这就表示我们所要找的数据行不在表中已经超出表的范围了,所以lag()函数将 defval 这个参数值作为函数的返回值。
default 参数的默认值为空值 null,即如果在lag()函数中没有显式设置 default 参数值时 lag() 函数的返回值为空值 null。
Lead函数的用法类似。*/

SELECT ENAME,JOB,SAL ,LAG(SAL) OVER(ORDER BY SAL) LAST_SAL FROM EMP;

SELECT ENAME,JOB,SAL ,LAG(SAL,1,500) OVER(ORDER BY SAL) LAST_SAL FROM EMP;

SELECT ENAME,JOB,SAL ,LAG(SAL,2) OVER(ORDER BY SAL) LAST_SAL FROM EMP;

SELECT ENAME,JOB,SAL ,LEAD(SAL) OVER(ORDER BY SAL) LAST_SAL FROM EMP;

SELECT ENAME,JOB,SAL ,LEAD(SAL,1) OVER(ORDER BY SAL) LAST_SAL FROM EMP;

SELECT ENAME,JOB,SAL ,LEAD(SAL,2) OVER(ORDER BY SAL) LAST_SAL FROM EMP;

SELECT ENAME,JOB,SAL ,LEAD(SAL,1) OVER(PARTITION BY JOB ORDER BY SAL) LAST_SAL FROM EMP;

SELECT ENAME,JOB,SAL ,LAG(SAL,1) OVER(PARTITION BY JOB ORDER BY SAL) LAST_SAL FROM EMP;

--查询各部门工资最高的2名员工信息
SELECT * FROM (SELECT DEPTNO,ENAME,SAL,ROW_NUMBER() OVER (PARTITION BY DEPTNO ORDER BY SAL DESC) RN FROM EMP) WHERE RN<3; 

--查询全公司工资最高的第6-10名员工
select * from (SELECT ENAME,SAL,ROW_NUMBER() OVER (ORDER BY SAL DESC) RN FROM EMP) where  RN>=6 and RN<=10;

--查询大于本部门平均工资的员工信息
select a.DEPTNO,a.ENAME,a.SAL from EMP a,(select DEPTNO,avg(SAL) as SAL from EMP group by DEPTNO) b where a.DEPTNO=b.DEPTNO and a.SAL>b.SAL;

--对排序的理解
--sql无排序,over()排序子句省略
SELECT 
    DEPTNO,EMPNO,ENAME,SAL, 
    LAST_VALUE(SAL) OVER(PARTITION BY DEPTNO)
FROM EMP;

--sql无排序,over()排序子句有,窗口省略
SELECT 
    DEPTNO,EMPNO,ENAME,SAL,
    LAST_VALUE(SAL) OVER(PARTITION BY DEPTNO ORDER BY SAL DESC)
FROM EMP;

--sql无排序,over()排序子句有,窗口也有,窗口特意强调全组数据
SELECT 
    DEPTNO,EMPNO,ENAME,SAL,
    LAST_VALUE(SAL) OVER(PARTITION BY DEPTNO ORDER BY SAL ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) MAX_SAL
FROM EMP;

--sql有排序(正序),over()排序子句无,先做sql排序再进行分析函数运算
SELECT 
    DEPTNO,MGR,ENAME,SAL,HIREDATE,
    LAST_VALUE(SAL) OVER(PARTITION BY DEPTNO) LAST_VALUE
FROM EMP ORDER BY DEPTNO, SAL;

--sql有排序(倒序),over()排序子句无,先做sql排序再进行分析函数运算
SELECT 
    DEPTNO,MGR,ENAME,SAL,HIREDATE,
    LAST_VALUE(SAL) OVER(PARTITION BY DEPTNO) LAST_VALUE
FROM EMP ORDER BY DEPTNO, SAL DESC;

--sql有排序(倒序),over()排序子句有,窗口子句无,此时的运算是:sql先选数据但是不排序,而后排序子句先排序并进行分析函数处理(窗口默认为第一行到当前行),最后再进行sql排序
SELECT 
    DEPTNO,MGR,ENAME,SAL,HIREDATE,
    LAST_VALUE(SAL) OVER(PARTITION BY DEPTNO ORDER BY SAL ASC) LAST_VALUE
FROM EMP ORDER BY DEPTNO,SAL DESC;

SELECT 
    DEPTNO,MGR,ENAME,SAL,HIREDATE,
    LAST_VALUE(SAL) OVER(PARTITION BY DEPTNO ORDER BY SAL DESC ) LAST_VALUE
FROM EMP ORDER BY DEPTNO,SAL DESC;

--以下转载暂无数据源

--count() over()  :统计分区中各组的行数,partition by 可选,order by 可选

SELECT ENAME,ESEX,EAGE,COUNT(*) OVER() FROM EMP; --总计数
SELECT ENAME,ESEX,EAGE,COUNT(*) OVER(ORDER BY EAGE) FROM EMP; --递加计数
SELECT ENAME,ESEX,EAGE,COUNT(*) OVER(PARTITION BY ESEX) FROM EMP; --分组计数
SELECT ENAME,ESEX,EAGE,COUNT(*) OVER(PARTITION BY ESEX ORDER BY EAGE) FROM EMP;--分组递加计数

--sum() over()  :统计分区中记录的总和,partition by 可选,order by 可选

select ename,esex,eage,sum(salary) over() from emp; --总累计求和
select ename,esex,eage,sum(salary) over(order by eage) from emp; --递加累计求和
select ename,esex,eage,sum(salary) over(partition by esex) from emp; --分组累计求和
select ename,esex,eage,sum(salary) over(partition by esex order by eage) from emp; --分组递加累计求和

--avg() over()  :统计分区中记录的平均值,partition by 可选,order by 可选

select ename,esex,eage,avg(salary) over() from emp; --总平均值
select ename,esex,eage,avg(salary) over(order by eage) from emp; --递加求平均值
select ename,esex,eage,avg(salary) over(partition by esex) from emp; --分组求平均值
select ename,esex,eage,avg(salary) over(partition by esex order by eage) from emp; --分组递加求平均值

--min() over() :统计分区中记录的最小值,partition by 可选,order by 可选
--max() over() :统计分区中记录的最大值,partition by 可选,order by 可选

select ename,esex,eage,salary,min(salary) over() from emp; --求总最小值
select ename,esex,eage,salary,min(salary) over(order by eage) from emp; --递加求最小值
select ename,esex,eage,salary,min(salary) over(partition by esex) from emp; --分组求最小值
select ename,esex,eage,salary,min(salary) over(partition by esex order by eage) from emp; --分组递加求最小值
select ename,esex,eage,salary,max(salary) over() from emp; --求总最大值
select ename,esex,eage,salary,max(salary) over(order by eage) from emp; --递加求最大值
select ename,esex,eage,salary,max(salary) over(partition by esex) from emp; --分组求最大值
select ename,esex,eage,salary,max(salary) over(partition by esex order by eage) from emp; --分组递加求最大值

--rank() over()  :跳跃排序,partition by 可选,order by 必选

select ename,eage,rank() over(partition by job order by eage) from emp;
select ename,eage,rank() over(order by eage) from emp;

--dense_rank() :连续排序,partition by 可选,order by 必选

select ename,eage,dense_rank() over(partition by job order by eage) from emp;
select ename,eage,dense_rank() over(order by eage) from emp;

--row_number() over() :排序,无重复值,partition by 可选,order by 必选

select ename,eage,row_number() over(partition by job order by eage) from emp;
select ename,eage,row_number() over(order by eage) from emp;

--ntile(n) over() :partition by 可选,order by 必选
--n表示将分区内记录平均分成n份,多出的按照顺序依次分给前面的组

select ename,salary,ntile(3) over(order by salary desc) from emp;
select ename,salary,ntile(3) over(partition by job order by salary desc) from emp;

--first_value() over() :取出分区中第一条记录的字段值,partition by 可选,order by 可选
--last_value() over() :取出分区中最后一条记录的字段值,partition by 可选,order by 可选

select ename,first_value(salary) over() from emp;
select ename,first_value(salary) over(order by salary desc) from emp;
select ename,first_value(salary) over(partition by job) from emp;                                                           
select ename,first_value(salary) over(partition by job order by salary desc) from emp;

select ename,last_value(ename) over() from emp;
select ename,last_value(ename) over(order by salary desc) from emp;
select ename,last_value(ename) over(partition by job) from emp;
select ename,last_value(ename) over(partition by job order by salary desc) from emp;

--first :从DENSE_RANK返回的集合中取出排在最前面的一个值的行
--last :从DENSE_RANK返回的集合中取出排在最后面的一个值的行

select 
	job,max(salary) keep(dense_rank first order by salary desc),
	max(salary) keep(dense_rank last order by salary desc) 
from emp
group by job;

--lag() over() :取出前n行数据,partition by 可选,order by 必选
--lead() over() :取出后n行数据,partition by 可选,order by 必选

select 
	ename,eage,lag(eage,1,0) over(order by salary), 
	lead(eage,1,0) over(order by salary) 
from emp;
 
select 
	ename,eage,lag(eage,1) over(partition by esex order by salary),
	lead(eage,1) over(partition by esex order by salary) 
from emp;

--ratio_to_report(a) over(partition by b) :求按照b分组后a的值在所属分组中总值的占比,a的值必须为数值或数值型字段。partition by 可选,order by 不可选

select ename,job,salary,ratio_to_report(1) over() from emp; --给每一行赋值1,求当前行在总值的占比,总是0.1
select ename,job,salary,ratio_to_report(salary) over() from emp; --当前行的值在所有数据中的占比
select ename,job,salary,ratio_to_report(1) over(partition by job) from emp; --给每一行赋值1,求当前行在分组后的组内总值的占比
select ename,job,salary,ratio_to_report(salary) over(partition by job) from emp; --当前行的值在分组后组内总值占比

--percent_rank() over()  :partition by 可选,order by 必选
--所在组排名序号-1除以该组所有的行数-1,排名跳跃排序

select ename,job,salary,percent_rank() over(order by salary) from emp;
select ename,job,salary,percent_rank() over(partition by job order by salary) from emp;

--cume_dist() over() :partition by 可选,order by必选
--所在组排名序号除以该组所有的行数,注意对于重复行,计算时取重复行中的最后一行的位置

select ename,job,salary,cume_dist() over(order by salary) from emp;
select ename,job,salary,cume_dist() over(partition by job order by salary) from emp;

/*precentile_cont( x ) within group(order by ...) over()    :over()中partition by可选,order by 不可选
x为输入的百分比,是0-1之间的一个小数,返回该百分比位置的数据,若没有则返回以下计算值(r):
a=1+( x *(N-1) )  x为输入的百分比,N为分区内的记录的行数
b=ceil ( a )  向上取整
c = floor( a ) 向下取整
r=a * 百分比位置上一条数据 + b * 百分比位置下一条数据*/

select ename,job,salary,percentile_cont(0.5) within group(order by salary) over() from emp;
select ename,job,salary,percentile_cont(0.5) within group(order by salary) over(partition by job) from emp;

--precentile_disc( x ) within group(order by ...) over()   :over()中partition by可选,order by 不可选
--x为输入的百分比,是0-1之间的一个小数,返回百分比位置对应位置上的数据值,若没有对应数据值,就取大于该分布值的下一个值

select ename,job,salary,percentile_disc(0.5) within group(order by salary) over()from emp;
select ename,job,salary,percentile_disc(0.5) within group(order by salary) over(partition by job) from emp;

/*stddev() over():计算样本标准差,只有一行数据时返回0,partition by 可选,order by 可选
stddev_samp() over():计算样本标准差,只有一行数据时返回null,partition by 可选,order by 可选
stddev_pop() over():计算总体标准差,partition by 可选,order by 可选*/

select stddev(stu_age) over() from student; --计算所有记录的样本标准差
select stddev(stu_age) over(order by stu_age) from student; --计算递加的样本标准差
select stddev(stu_age) over(partition by stu_major) from student; --计算分组的样本标准差
select stddev(stu_age) over(partition by stu_major order by stu_age) from student; --计算分组递加的样本标准差
select stddev_samp(stu_age) over() from student; --计算所有记录的样本标准差
select stddev_samp(stu_age) over(order by stu_age) from student; --计算递加的样本标准差
select stddev_samp(stu_age) over(partition by stu_major) from student; --计算分组的样本标准差
select stddev_samp(stu_age) over(partition by stu_major order by stu_age) from student; --计算分组递加的样本标准差
select stddev_pop(stu_age) over() from student; --计算所有记录的总体标准差
select stddev_pop(stu_age) over(order by stu_age) from student; --计算递加的总体标准差
select stddev_pop(stu_age) over(partition by stu_major) from student; --计算分组的总体标准差
select stddev_pop(stu_age) over(partition by stu_major order by stu_age) from student;--计算分组递加的总体标准差

/*variance() over():计算样本方差,只有一行数据时返回0,partition by 可选,order by 可选
var_samp() over():计算样本方差,只有一行数据时返回null,partition by 可选,order by 可选
var_pop() over():计算总体方差,partition by 可选,order by 可选*/

select variance(stu_age) over() from student; --计算所有记录的样本方差
select variance(stu_age) over(order by stu_age) from student; --计算递加的样本方差
select variance(stu_age) over(partition by stu_major) from student; --计算分组的样本方差
select variance(stu_age) over(partition by stu_major order by stu_age) from student; --计算分组递加的样本方差
select var_samp(stu_age) over() from student; --计算所有记录的样本方差
select var_samp(stu_age) over(order by stu_age) from student; --计算递加的样本方差
select var_samp(stu_age) over(partition by stu_major) from student; --计算分组的样本方差
select var_samp(stu_age) over(partition by stu_major order by stu_age) from student; --计算分组递加的样本方差
select var_pop(stu_age) over() from student; --记录所有就的总体方差
select var_pop(stu_age) over(order by stu_age) from student; --计算递加的总体方差
select var_pop(stu_age) over(partition by stu_major) from student; --计算分组的总体方差
select var_pop(stu_age) over(partition by stu_major order by stu_age) from student;--计算分组递加的样本方差

/*
stddev()=sqrt( variance() )     sqrt()--求开方
stddev_samp()=sqrt( var_samp() )
stddec_pop=sqrt( var_pop() )
*/

--covar_samp over():返回一对表达式的样本协方差,partition by 可选,order by 可选
--covar_pop over(): 返回一堆表达式的总体协方差,partition by 可选,order by 可选

select covar_samp(stu_age,line) over() from student; --计算所有记录的样本协方差
select covar_samp(stu_age,line) over(order by stu_age) from student; --计算递加的样本协方差
select covar_samp(stu_age,line) over(partition by stu_major) from student; --计算分组的样本协方差
select covar_samp(stu_age,line) over(partition by stu_major order by stu_age) from student; --计算分组递加的样本协方差
select covar_pop(stu_age,line) over() from student; --计算所有记录的总体协方差
select covar_pop(stu_age,line) over(order by stu_age) from student; --计算递加的总体协方差
select covar_pop(stu_age,line) over(partition by stu_major) from student; --计算分组的总体协方差
select covar_pop(stu_age,line) over(partition by stu_major order by stu_age) from student; --计算分组递加的总体协方差

--corr() over() :返回一对表达式的相关系数,partition by 可选,order by 可选

select corr(stu_age,line) over() from student; --计算所有记录的相关系数
select corr(stu_age,line) over(order by stu_age) from student; --计算递加的相关系数
select corr(stu_age,line) over(partition by stu_major) from student; --计算分组的相关系数
select corr(stu_age,line) over(partition by stu_major order by stu_age) from student; --计算分组递加的相关系数

--实际案例

--创建模拟表

create table T( 
   BILL_MONTH varchar2(12) , 
   AREA_CODE number, 
   NET_TYPE varchar(2), 
   LOCAL_FARE number 
);

insert into t values('200405',5761,'G', 7393344.04); 
insert into t values('200405',5761,'J', 5667089.85); 
insert into t values('200405',5762,'G', 6315075.96); 
insert into t values('200405',5762,'J', 6328716.15); 
insert into t values('200405',5763,'G', 8861742.59); 
insert into t values('200405',5763,'J', 7788036.32); 
insert into t values('200405',5764,'G', 6028670.45); 
insert into t values('200405',5764,'J', 6459121.49); 
insert into t values('200405',5765,'G', 13156065.77); 
insert into t values('200405',5765,'J', 11901671.70); 
insert into t values('200406',5761,'G', 7614587.96); 
insert into t values('200406',5761,'J', 5704343.05); 
insert into t values('200406',5762,'G', 6556992.60); 
insert into t values('200406',5762,'J', 6238068.05); 
insert into t values('200406',5763,'G', 9130055.46); 
insert into t values('200406',5763,'J', 7990460.25); 
insert into t values('200406',5764,'G', 6387706.01); 
insert into t values('200406',5764,'J', 6907481.66); 
insert into t values('200406',5765,'G', 13562968.81); 
insert into t values('200406',5765,'J', 12495492.50); 
insert into t values('200407',5761,'G', 7987050.65); 
insert into t values('200407',5761,'J', 5723215.28); 
insert into t values('200407',5762,'G', 6833096.68); 
insert into t values('200407',5762,'J', 6391201.44); 
insert into t values('200407',5763,'G', 9410815.91); 
insert into t values('200407',5763,'J', 8076677.41); 
insert into t values('200407',5764,'G', 6456433.23); 
insert into t values('200407',5764,'J', 6987660.53); 
insert into t values('200407',5765,'G', 14000101.20); 
insert into t values('200407',5765,'J', 12301780.20); 
insert into t values('200408',5761,'G', 8085170.84); 
insert into t values('200408',5761,'J', 6050611.37); 
insert into t values('200408',5762,'G', 6854584.22); 
insert into t values('200408',5762,'J', 6521884.50); 
insert into t values('200408',5763,'G', 9468707.65); 
insert into t values('200408',5763,'J', 8460049.43); 
insert into t values('200408',5764,'G', 6587559.23); 
insert into t values('200408',5764,'J', 7342135.86); 
insert into t values('200408',5765,'G', 14450586.63); 
insert into t values('200408',5765,'J', 12680052.38); 
commit;

--取出每月通话费最高和最低的两个地区

SELECT 
	BILL_MONTH,AREA_CODE,SUM(LOCAL_FARE) LOCAL_FARE, 
    FIRST_VALUE(AREA_CODE) OVER(PARTITION BY BILL_MONTH ORDER BY SUM(LOCAL_FARE) DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) FIRSTVAL, 
    LAST_VALUE(AREA_CODE) OVER(PARTITION BY BILL_MONTH ORDER BY SUM(LOCAL_FARE) DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) LASTVAL 
FROM T GROUP BY BILL_MONTH, AREA_CODE ORDER BY BILL_MONTH;

--计算出各个地区连续3个月的通话费用的平均数(移动平均值)

SELECT 
	AREA_CODE,BILL_MONTH,LOCAL_FARE,
    SUM(LOCAL_FARE) OVER(PARTITION BY AREA_CODE ORDER BY TO_NUMBER(BILL_MONTH) RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING) "3month_sum", 
    AVG(LOCAL_FARE) OVER(PARTITION BY AREA_CODE ORDER BY TO_NUMBER(BILL_MONTH) RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING) "3month_avg", 
    MAX(LOCAL_FARE) OVER(PARTITION BY AREA_CODE ORDER BY TO_NUMBER(BILL_MONTH) RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING) "3month_max", 
    MIN(LOCAL_FARE) OVER(PARTITION BY AREA_CODE ORDER BY TO_NUMBER(BILL_MONTH) RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING) "3month_min" 
 FROM (
	SELECT T.AREA_CODE, T.BILL_MONTH, SUM(T.LOCAL_FARE) LOCAL_FARE FROM T GROUP BY T.AREA_CODE, T.BILL_MONTH);

--求各地区按月份累加的通话费

SELECT 
	AREA_CODE,BILL_MONTH,LOCAL_FARE,
    SUM(LOCAL_FARE) OVER(PARTITION BY AREA_CODE ORDER BY BILL_MONTH ASC) "LAST_SUM_VALUE" 
FROM (
	SELECT T.AREA_CODE, T.BILL_MONTH, SUM(T.LOCAL_FARE) LOCAL_FARE FROM T GROUP BY T.AREA_CODE, T.BILL_MONTH) 
ORDER BY AREA_CODE, BILL_MONTH;
发布了35 篇原创文章 · 获赞 7 · 访问量 3315

猜你喜欢

转载自blog.csdn.net/Hjchidaozhe/article/details/100799294
今日推荐