5시간코딩

[분석함수] department_id 별로 salary의 누적합을 구하여라 본문

문제풀기/테이블 조작 관련 문제

[분석함수] department_id 별로 salary의 누적합을 구하여라

5시간코딩 2019. 4. 11. 02:44

1. oracle

select employee_id, salary a,
first_value(salary) over() b,
last_value(salary) over() c,
min(salary) over() d,
max(salary) over() e,
sum(salary) over() f,
rank() over(order by salary desc) g,
dense_rank() over(order by salary desc) h
from employees;

select employee_id, department_id,
salary a,
sum(salary) over() b,
sum(salary) over(order by employee_id) c,
sum(salary) over(partition by department_id) d,
sum(salary) over(partition by department_id order by employee_id) e
from employees;

 

 

 

 

 

2. r

1)

ddply(emp[,c('EMPLOYEE_ID','DEPARTMENT_ID','SALARY')],.(),transform,sal=sum(SALARY))
ddply(emp[,c('EMPLOYEE_ID','DEPARTMENT_ID','SALARY')],.(),transform,sal=mean(SALARY))
ddply(emp[,c('EMPLOYEE_ID','DEPARTMENT_ID','SALARY')],.(),transform,sal=max(SALARY))
ddply(emp[,c('EMPLOYEE_ID','DEPARTMENT_ID','SALARY')],.(),transform,sal=min(SALARY))

ddply(emp[,c('EMPLOYEE_ID','DEPARTMENT_ID','SALARY')],.(EMPLOYEE_ID),transform,sal=sum(SALARY))
ddply(emp[,c('EMPLOYEE_ID','DEPARTMENT_ID','SALARY')],.(DEPARTMENT_ID),transform,sal=sum(SALARY))
ddply(emp[,c('JOB_ID','DEPARTMENT_ID','SALARY')],.(DEPARTMENT_ID,JOB_ID),transform,sal=sum(SALARY))

df<-ddply(emp[,c('EMPLOYEE_ID','DEPARTMENT_ID','SALARY')],.(DEPARTMENT_ID),transform,sal=sum(SALARY))
library(doBy)
orderBy(~sal,df)

 

2)

sqldf는 분석함수 지원을 안해줌. 추후 업데이트 될 수도 있음(작성일 : 2019-04-15)

 

 

 

 

 

 

3. python

1)

emp=pd.read_csv("c:/data/employees.csv")

emp1=emp.loc[:,['EMPLOYEE_ID','SALARY','DEPARTMENT_ID']]

emp1['SALARY'].cummin()

emp1['SALARY'].cummax()

emp1['SALARY'].cumsum()

emp1['sal']=emp.groupby(emp['DEPARTMENT_ID'])['SALARY'].apply(lambda x:x.cumsum())

 

2)sqlite

emp = pd.read_csv("c:/data/employees.csv")

import sqlite3
conn = sqlite3.connect(":memory:")
c = conn.cursor()
emp.to_sql('emp1',conn,index=False)
c.execute('select sum(salary) over() from emp1')
c.execute('select employee_id, salary, sum(salary) over(partition by department_id order by employee_id) from emp1')
c.fetchall()

emp_new = pd.read_sql_query("select employee_id, salary, sum(salary) over(partition by department_id order by employee_id) sal from emp1",conn)