Hive Analytical Functions
Hive Analytical Functions
Hive Analytical Functions
5.ROW_NUMBER-->is used to assign unique value to each row within group based on
column values user in over clause.
6.RANK -->is used to get rank of the rows in column or within group(NOTE:rows with
equal values recive the same rank with next rank value skipped)
7.DENSE_RANK-->it returns the rank of value in group,rows with equal values for
ranking recives the same rank and assign the rank in a sequential
order i.e. no rank rank values are skipped.
10.LEAD and LAG-->these are used tocompare diffrent rows of table by specifing an
offset from the current row.You can use these functions to
analyze the change and variation in the data.
*******************************************PRACTICALS******************************
******************************
set hive.cli.print.header=true;
***********************************************************************************
*********************
***********************************************************************************
********************
select ename,deptno,RANK() over(order by DEPTNO) as raknum from emp_bk;
OK
ename deptno raknum
KING 10 1
MILLER 10 1
CLARK 10 1
SMITH 20 4
FORD 20 4
ADAMS 20 4
JONES 20 4
SCOTT 20 4
ALLEN 30 9
TURNER 30 9
WARD 30 9
BLAKE 30 9
JAMES 30 9
MARTIN 30 9
Time taken: 21.904 seconds, Fetched: 14 row(s)
***********************************************************************************
********************
select ename,deptno,RANK() over(order by empno) as raknum from emp_bk;
OK
ename deptno raknum
SMITH 20 1
ALLEN 30 2
WARD 30 3
JONES 20 4
MARTIN 30 5
BLAKE 30 6
CLARK 10 7
SCOTT 20 8
KING 10 9
TURNER 30 10
ADAMS 20 11
JAMES 30 12
FORD 20 13
MILLER 10 14
Time taken: 22.97 seconds, Fetched: 14 row(s)
***********************************************************************************
********************
select ename,deptno,sal,DENSE_RANK() over(order by sal) as raknum from emp_bk;
OK
ename deptno sal raknum
SMITH 20 800.0 1
JAMES 30 950.0 2
ADAMS 20 1100.0 3
MARTIN 30 1250.0 4
WARD 30 1250.0 4
MILLER 10 1300.0 5
TURNER 30 1500.0 6
ALLEN 30 1600.0 7
CLARK 10 2450.0 8
BLAKE 30 2850.0 9
JONES 20 2975.0 10
FORD 20 3000.0 11
SCOTT 20 3000.0 11
KING 10 5000.0 12
Time taken: 21.664 seconds, Fetched: 14 row(s)
***********************************************************************************
********************
select empno,ename,deptno,FIRST_VALUE(empno) over(order by deptno) as
first_eid,LAST_VALUE(empno) over( order by deptno) as last_eid from emp_bk;
OK
empno ename deptno first_eid last_eid
7839 KING 10 7839 7782
7934 MILLER 10 7839 7782
7782 CLARK 10 7839 7782
7369 SMITH 20 7839 7788
7902 FORD 20 7839 7788
7876 ADAMS 20 7839 7788
7566 JONES 20 7839 7788
7788 SCOTT 20 7839 7788
7499 ALLEN 30 7839 7654
7844 TURNER 30 7839 7654
7521 WARD 30 7839 7654
7698 BLAKE 30 7839 7654
7900 JAMES 30 7839 7654
7654 MARTIN 30 7839 7654
Time taken: 21.916 seconds, Fetched: 14 row(s)
***********************************************************************************
********************
select ename,deptno,sal,lead(sal,1)over(order by deptno) as leadval,lag(sal,1)over
(order by deptno) from emp_bk;
OK
ename deptno sal leadval _wcol1
KING 10 5000.0 1300.0 NULL
MILLER 10 1300.0 2450.0 5000.0
CLARK 10 2450.0 800.0 1300.0
SMITH 20 800.0 3000.0 2450.0
FORD 20 3000.0 1100.0 800.0
ADAMS 20 1100.0 2975.0 3000.0
JONES 20 2975.0 3000.0 1100.0
SCOTT 20 3000.0 1600.0 2975.0
ALLEN 30 1600.0 1500.0 3000.0
TURNER 30 1500.0 1250.0 1600.0
WARD 30 1250.0 2850.0 1500.0
BLAKE 30 2850.0 950.0 1250.0
JAMES 30 950.0 1250.0 2850.0
MARTIN 30 1250.0 NULL 950.0
Time taken: 21.76 seconds, Fetched: 14 row(s)
***********************************************************************************
********************