Search This Blog

Tuesday, August 31, 2010

PARTITION BY  vs GROUP BY  
CREATE TABLE EMP
(
  EMPNO     NUMBER(15),
  ENAME     VARCHAR2(10 BYTE),
  JOB       VARCHAR2(9 BYTE),
  MGR       NUMBER(4),
  HIREDATE  DATE,
  SAL       NUMBER(7,2),
  COMM      NUMBER(7,2),
  DEPTNO    NUMBER(2)
);



Insert into EMP
   (EMPNO, ENAME, JOB, MGR, HIREDATE, SAL, DEPTNO)
 Values
   (7369, 'SMITH', 'CLERK', 7902, TO_DATE('12/17/2004 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 800, 20);
Insert into EMP
   (EMPNO, ENAME, JOB, MGR, HIREDATE, SAL, COMM, DEPTNO)
 Values
   (7499, 'ALLEN', 'SALES', 7698, TO_DATE('02/20/2005 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 1600, 300, 30);
Insert into NEMA.EMP
   (EMPNO, ENAME, JOB, MGR, HIREDATE, SAL, COMM, DEPTNO)
 Values
   (7521, 'WARD', 'SALES', 7698, TO_DATE('02/22/2003 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 1250, 500, 30);
Insert into EMP
   (EMPNO, ENAME, JOB, MGR, HIREDATE, SAL, DEPTNO)
 Values
   (7566, 'JONES', 'MANAGER', 7839, TO_DATE('04/02/2004 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 2975, 20);
Insert into EMP
   (EMPNO, ENAME, JOB, MGR, HIREDATE, SAL, COMM, DEPTNO)
 Values
   (7654, 'MARTIN', 'SALES', 7698, TO_DATE('09/28/2000 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 1250, 1400, 30);
Insert into EMP
   (EMPNO, ENAME, JOB, MGR, HIREDATE, SAL, DEPTNO)
 Values
   (7698, 'BLAKE', 'MANAGER', 7839, TO_DATE('05/01/2001 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 2850, 30);
Insert into EMP
   (EMPNO, ENAME, JOB, MGR, HIREDATE, SAL, DEPTNO)
 Values
   (7782, 'CLARK', 'MANAGER', 7839, TO_DATE('06/09/2002 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 2450, 10);
Insert into EMP
   (EMPNO, ENAME, JOB, MGR, HIREDATE, SAL, DEPTNO)
 Values
   (7788, 'HESS', 'ANALYST', 7566, TO_DATE('04/19/2003 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 3000, 20);
Insert into EMP
   (EMPNO, ENAME, JOB, HIREDATE, SAL, COMM, DEPTNO)
 Values
   (7839, 'KING', 'PRESIDENT', TO_DATE('11/17/1999 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 5000, 0, 10);
Insert into EMP
   (EMPNO, ENAME, JOB, MGR, HIREDATE, SAL, COMM, DEPTNO)
 Values
   (7844, 'TURNER', 'SALES', 7698, TO_DATE('09/08/2001 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 1500, 0, 30);
Insert into EMP
   (EMPNO, ENAME, JOB, MGR, HIREDATE, SAL, DEPTNO)
 Values
   (7876, 'ADAMS', 'CLERK', 7788, TO_DATE('05/23/2002 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 1100, 20);
Insert into EMP
   (EMPNO, ENAME, JOB, MGR, HIREDATE, SAL, DEPTNO)
 Values
   (7900, 'JAMES', 'CLERK', 7698, TO_DATE('12/03/2001 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 950, 30);
Insert into EMP
   (EMPNO, ENAME, JOB, MGR, HIREDATE, SAL, DEPTNO)
 Values
   (7902, 'FORD', 'ANALYST', 7566, TO_DATE('12/03/2003 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 3000, 20);
Insert into EMP
   (EMPNO, ENAME, JOB, MGR, HIREDATE, SAL, DEPTNO)
 Values
   (7934, 'MILLER', 'CLERK', 7782, TO_DATE('01/23/2005 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 1300, 10);


COMMIT;

- 14 rows
SELECT   * FROM emp;
-- 3 aggregated rows ( 1 per deptno : 10,20,30)
  SELECT   deptno, MAX (sal)
    FROM   emp
GROUP BY   deptno;
-- 14 rows with additional aggregated value from the above query (based on deptno )
SELECT   empno,
         ename,
         job,
         mgr,
         hiredate,
         sal,
         comm,
         deptno,
         MAX (sal) OVER (PARTITION BY deptno) deptno_max_sal
  FROM   emp;
-- The same result as above query ( without partition by )
SELECT   x.empno,
         x.ename,
         x.job,
         x.mgr,
         x.hiredate,
         x.sal,
         x.comm,
         y.deptno,
         y.deptno_max_sal
  FROM   (SELECT   * FROM emp) x, (  SELECT   deptno, MAX (sal) deptno_max_sal
                                       FROM   emp
                                   GROUP BY   deptno) y
 WHERE   x.deptno = y.deptno;
-- Cross check
SELECT   empno,
         ename,
         job,
         mgr,
         hiredate,
         sal,
         comm,
         deptno,
         MAX (sal) OVER (PARTITION BY deptno) deptno_max_sal
  FROM   emp
MINUS
SELECT   x.empno,
         x.ename,
         x.job,
         x.mgr,
         x.hiredate,
         x.sal,
         x.comm,
         y.deptno,
         y.deptno_max_sal
  FROM   (SELECT   * FROM emp) x, (  SELECT   deptno, MAX (sal) deptno_max_sal
                                       FROM   emp
                                   GROUP BY   deptno) y
 WHERE   x.deptno = y.deptno;
-- you get duplicate rows if you the same column as partition ( 14 rows with deptno, max sal)
SELECT   deptno, MAX (sal) OVER (PARTITION BY deptno) FROM emp;
-- on top of this you are doing distinct
SELECT   DISTINCT deptno, MAX (sal) OVER (PARTITION BY deptno) FROM emp;
-- which is equivalent to getting the aggregated result
  SELECT   deptno, MAX (sal)
    FROM   emp
GROUP BY   deptno;
-- when partition is done : Aggregated values are computed and those are appended to data rows based on the value of the partition column
-- means all the data rows with the same value of the partition column will get the same aggregated value.
-- Use 1:
-- Need data from other columns along with the aggregated values.
-- 14 rows with additional aggregated value from the above query (based on deptno )
SELECT   empno,
         ename,
         job,
         mgr,
         hiredate,
         sal,
         comm,
         deptno,
         MAX (sal) OVER (PARTITION BY deptno) deptno_max_sal
  FROM   emp;
-- The same result as above query ( without partition by )
SELECT   x.empno,
         x.ename,
         x.job,
         x.mgr,
         x.hiredate,
         x.sal,
         x.comm,
         y.deptno,
         y.deptno_max_sal
  FROM   (SELECT   * FROM emp) x, (  SELECT   deptno, MAX (sal) deptno_max_sal
                                       FROM   emp
                                   GROUP BY   deptno) y
 WHERE   x.deptno = y.deptno;
-- Use 2:
-- Need various aggregated values in one go.
SELECT   empno,
         ename,
         job,
         mgr,
         hiredate,
         sal,
         comm,
         deptno
  FROM   emp;

SELECT   empno,
         ename,
         job,
         mgr,
         hiredate,
         comm,
         deptno,
         sal,
         MAX (sal) OVER (PARTITION BY deptno) dept_max_sal,
         RANK () OVER (PARTITION BY deptno ORDER BY hiredate ASC)
            seniority_dept,
         ROUND ( (sal / (MAX (sal) OVER (PARTITION BY deptno))) * 100)
            sal_to_max_sal_percentage
  FROM   emp;

-- This is equivalent to joining the data with individual aggregated values.

-- 1  Sql query with deptno, max(sal)

-- 2  Sql query with ranking as per hiredate per deptno

-- 3  sql query with all the data

-- 4  Join the above 4 data sets using deptno column

-- 5  In project columns get the data columns from (3) , aggregated values from (1) and (2)