5시간코딩
data export, import 본문
ORACLE
1. export 하기
1-1. sqldeveloper > 도구 > 데이터베이스 익스포트
1-2. spool
2. import 하기
cmd창과 메모장 사용
direct = true 기능
R
getwd()
setwd("c:/data")
.libPaths() #라이브러리 위치임. 그곳에 집적 패키지를 넣을 수 있음
1. text
1-1.읽기
a <- readline("c:/data/talk.txt") #1개컬럼만 가져옴
a <- readLines("c:/data/talk.txt",encoding='UTF-8')
1-2.쓰기
writeLines(a, "c:/data/emp_writeLines.txt")
class(a)#character
2. csv
2-1.읽기
df <- read.csv("c:/data/employees.csv",header=TRUE,stringsAsFactors = FALSE)
2-2.쓰기
write.csv(df,"c:/data/emp_write.csv",row.names=TRUE)
class(df)#data.frame
3.table(data.frame)
3-1.
df1<-read.table("c:/data/employees.csv",header=T,stringsAsFactors = F,sep=',') #sep=',' 해줘야 스페이스로 구분하지 않음
class(df1)#data.frame
3-2.
write.table(df1,"c:/data/emp_write.table",row.names = T, col.names = F) #col.names 됨
4.excel
4-1.
install.packages("readxl")
library(readxl)
install.packages("xlsx")
library(xlsx)
data <- read_excel("c:/data/test.xlsx",sheet=1,col_names=T,col_types="guess")
data<-read_excel("c:/data/test.xlsx",sheet=1,col_names=T,
col_types=c("guess","guess","text")) #급여 컬럼을 text로 안가져 오면 type guessing을 이상하게 함
4-2.
복잡함
5.sql 직접 접속
1)
install.packages("RJDBC")
library(RJDBC)
- java가 깔려 있어야함(확인법 : cmd창에서 java -version 치면 됨)
install.packages('installr')
library(installr)
install.java()
2)경로/파일이름(기니까 변수에 넣음)
jdbcDriver <- JDBC(driverClass="oracle.jdbc.OracleDriver",
classPath = "C:/Users/eogns/Desktop/이런저런공부/oracle/sqldeveloper/jdbc/lib/ojdbc6.jar") #ojdbc6.jar 위치 넣기
3)접속환경 생성(기니까 일단 변수에 넣음.
conn<-dbConnect(jdbcDriver,"jdbc:oracle:thin:@localhost:1521/xe","hr","hr")
emp<-dbGetQuery(conn, 'select * from employees')
str(emp)
emp
PYTHON
1. txt
1-1. 읽기
file = open("c:/data/movie.txt","r")
result = file.readlines() #read(), readline()
file.close()
print(result)
1-2. 쓰기
1)
with open("c:/data/movie1.txt","w") as file: #"a"
file.write(result)
2)
df=pd.Series(result)
df.to_csv("c:/data/movie2.txt",index=False,header=False)
2. csv
2-1. 읽기
1)
import csv
file = open("c:/data/emp_new.csv","r")
emp_csv = csv.reader(file)
title = next(emp_csv)
result = []
for i in emp_csv:
result.append(i)
file.close()
2)
import pandas as pd
df=pd.read_csv("c:/data/emp.csv")
df=pd.read_csv("c:/data/emp_new.csv",header=None,names=['c1','c2','c3','c4','c5','c6','c7','c8'])
2-2. 쓰기
1)
with open("c:/data/my_emp.csv","w") as file:
for i in result:
file.write(str(i)+"\n") #리스트형은 못씀
2)
df.to_csv("c:/data/my_emp1.csv",index=False,header=False)
3. excel
3-1. 읽기
import pandas as pd
data = pd.read_excel("c:/data/test.xlsx")
3-2. 쓰기
data.to_excel("c:/data/test1.xlsx")
data.to_excel("c:/data/test1.xlsx",header=None,index=None)
4. sqlite3
4-1.
import sqlite3
접속
#conn = sqlite3.connect(":memory:")
conn = sqlite3.connect("c:/data/insa.db")
c=conn.cursor()
테이블생성
c.execute("select name from sqlite_master where type='table'")
c.fetchall()
c.execute("drop table if exists emp")
c.execute("create table if not exists emp(id integer,name char,sal integer)")
c.execute("PRAGMA table_info(emp)")
c.fetchall()
#c.fetchone()
#c.fetchmany()#100개 default
데이터삽입1
c.execute("insert into emp(id,name,sal) values(100,'king',27000)")
c.execute("insert into emp(id,name,sal) values(101,'won',20000)")
c.execute("select * from emp")
c.fetchall()
conn.rollback()
#conn.commit()
데이터삽입2
temp = "insert into emp(id,name,sal) values(?,?,?)"
c.execute(temp,(101,'king',22000))
c.execute(temp,(102,'won',20000))
c.execute(temp,(103,'hong',30000))
c.execute("select * from emp")
c.fetchall()
접속종료
c.close()
conn.close()
4-2.
1) pandas -> sql
기존 데이터 삽입(ctas같은) : to_sql
import pandas as pd
df = pd.read_csv("c:/data/employees.csv")
df.to_sql('emp_copy',conn,index=False)
c.execute("select * from emp_copy")
c.fetchall()
2) sql -> pandas
emp_copy = pd.read_sql_query("select * from emp_copy",conn)
emp_copy
5. 오라클 서버 직접 접속
import cx_Oracle
#안될 경우 : Anaconda Prompt 실행 > pip install cx_Oracle --upgrade
#혹은 cmd창 -> python -m pip install cx_Oracle --upgrade
dsn=cx_Oracle.makedsn('localhost',1521,'XE') # 접속환경 : localhost는 oracle서버의 ip 주소, 1521는 포트, 'XE'는 인스턴스 이름
db=cx_Oracle.connect('hr','hr',dsn) #접속 계정
cursor=db.cursor()
cursor.execute("select * from employees") #여기선 oracle 문법을 쓰면 됨(파이썬문법아님)
#출력방법 3가지 : 1. cursor.fetchall() 2. cursor.fetchone() 3. cursor.fetchmany(100)
row = cursor.fetchall()
type(row) #list 안에 튜플 모양으로 저장됨
import pandas as pd
df=pd.DataFrame(row, columns=['EMPLOYEE_ID', 'FIRST_NAME', 'LAST_NAME', 'EMAIL', 'PHONE_NUMBER', 'HIRE_DATE', 'JOB_ID', 'SALARY', 'COMMISSION_PCT', 'MANAGER_ID', 'DEPARTMENT_ID'])
df.head()
df.info()
df.dtypes
6. 함수/클래스 -> 모듈화
6-1. 모듈명.py , incoding : UTF-8 로 저장
6-2. path 추가
import sys
sys.path
sys.path.remove('c:\\모듈명')
sys.path.append('c:\\모듈명')
6-3.
import myEmployee
emp2=myEmployee.Employee('원대','서울',1000) #클래스를 인스턴스화 하는 작업
emp2.myPrint()
###내용 더 있지만 생략###