자바전문가그룹


글번호	902
작성자	허진경
작성일	2018-09-11 12:19:11
제목	데이터 테이블 다루기
내용	데이터 테이블 다루기 install.packages("data.table") library(data.table) ?fread install.packages("curl") flights <- fread("https://raw.githubusercontent.com/wiki/arunsrinivasan/flights/NYCflights14/flights14.csv") head(flights) class(flights) iris.table <- data.table(iris) class(iris.table) iris.table[Species=="setosa"] iris.table[1:50] iris.table[order(Sepal.Length)] iris.table[, Sepal.Length] class(iris.table[, Sepal.Length]) class(iris.table[, list(Sepal.Length)]) iris.table[, list(Sepal.Length, Sepal.Width)] iris.table[, .(Sepal.Length, Sepal.Width)] iris.table[, .(SL=Sepal.Length, SW=Sepal.Width)] iris.table[Sepal.Length>=6,] iris.table[,sum(Sepal.Length>=6)] iris.table[Sepal.Length>=6, length(Species)] iris[, c("Sepal.Length", "Sepal.Width", "Species")] iris.table[, c("Sepal.Length", "Sepal.Width", "Species")] iris.table[, .(Sepal.Length, Sepal.Width, Species)] head(iris) iris.table[, 1:4] iris.table[, Sepal.Length:Petal.Width] iris.table[, -(Sepal.Length:Petal.Width)] iris.table[, .(Sepal.Length, Petal.Width)] iris.table[ , length(Species)] iris.table[ , .(.N)] # 데이터 테이블 iris.table[ , .N] # 벡터 #종별 행의 수를 출력 iris.table[ , .N, by=.(Species)] iris.table[ , .(.N), by=.(Species)] iris.table[ , .N, by="Species" ] iris.table[ , .N, by=Species ] # Sepal.Length가 5보다 큰 데이터의 수를 종별로 출력하세요. iris.table[Sepal.Length>5, .N, by=Species] # Sepal.Length가 5보다 큰 데이터에서 Petal.Length와 Petal.Width의 평균 # 그리고 데이터의 수를 종별로 출력하세요. iris.table[Sepal.Length>5, .(PL=mean(Petal.Length), PW=mean(Petal.Width), .N), by=Species] iris.table[Sepal.Length>5, .(PL=mean(Petal.Length), PW=mean(Petal.Width), .N), keyby=.(Species)] iris.table[ c(51:100,101:150,1:50), ][, .N, by=Species] iris.table[ c(51:100,101:150,1:50), ][, .N, keyby=Species] # 종별 평균을 출력(꽃잎 길이, 너비, 꽃받침 길, 너비) iris.table[ , .SD] iris.table[ , .SD, by=Species] iris.table[ , print(.SD), by=Species] iris.table[ , .(mean(Petal.Length), mean(Petal.Width), mean(Sepal.Length), mean(Sepal.Width)), by=Species] iris.table[ , lapply(.SD, mean), by=Species] #꽃잎의 길이와 너비 평균을 계산 iris.table[ , lapply(.SD, mean), by=Species, .SDcols=c("Petal.Length", "Petal.Width")] iris.table[ , head(.SD, 2), by=Species] # 종별 데이터 처음 다섯개를 출력하세요. iris.table[ , head(.SD, 5), by=Species] # 종별 데이터 처음 다섯개를 아래의 형식으로 출력하세요. #setosa 5.1 4.9 4.7 4.6 ... iris.table[ , .(val=list(.SD)), by=Species] iris.table[ , .(val=list(c(.SD))), by=Species] iris.table[ , .(val=list(c(Sepal.Length, Sepal.Width))), by=Species] # 종별 꽃잎 길이 데이터 처음 다섯개를 아래 형식으로 출력하세요. # Species val #1: setosa 5.1,4.9,4.7,4.6,5.0 #2: versicolor 7.0,6.4,6.9,5.5,6.5 #3: virginica 6.3,5.8,7.1,6.3,6.5 iris.table[ , .(val=list(head(Sepal.Length,5))), by=Species] iris.table[ , list(head(Sepal.Length,5)), by=Species] iris.table[ , head(Sepal.Length,5), by=Species] install.packages("random") library(random) # iris 데이터 프레임의 행에 이름을 지정 rownames(iris) <- randomStrings(n=150, len=2, digits=FALSE, upperalpha=TRUE, loweralpha=FALSE, unique=TRUE) head(iris) # 데이터 프레임을 데이터 테이블로 변환 iris.table <- as.data.table(iris) head(iris.table) # 데이터 프레임의 행 이름을 유지하려면... iris.table <- as.data.table(iris, keep.rownames=TRUE) head(iris.table) # 다시 행의 이름을 유지하지 않고 데이터 테이블로 변환 iris.table <- as.data.table(iris) head(iris.table) key(iris.table) setkey(iris.table, Sepal.Length) key(iris.table) iris.table[ Sepal.Length==5, ] system.time(iris.table[ Sepal.Length==5,]) setkey(iris.table, Sepal.Length, Sepal.Width) key(iris.table) setkeyv(iris.table, c("Sepal.Length", "Sepal.Width")) key(iris.table) iris.table[ Sepal.Length==5, Sepal.Width:=3] iris.table[ Sepal.Length==5, ] iris.table <- as.data.table(iris) setkey(iris.table, Sepal.Length, Sepal.Width) key(iris.table) iris.table[ .(5)] iris.table[ .(5,3)] # 꽃잎의 길이가 5이거나 6인 모든 행 출력 iris.table[ .(c(5,6))] iris.table[ .(c(5,6), 3)]

내용