티스토리 뷰

R

(macOS)[R] sqldf, plyr

jinozpersona 2022. 3. 30. 11:02

INTRO

sqldf를 이용한 DB 조회

Rstudio Console> install.packages("sqldf")

plyr을 이용한 데이터 처리

 내장함수이며 데이터 분리, 결합 등 데이터 처리기능 제공

# 참고 : dplyr은 c++로 구현되어 속도가 빠르고 chaining(var >> calc1 >> calc2, 중간변수 없이 최종값 계산)을 사용할 수 있음. 설치 필요

Rstudio Console> install.packages("dplyr")

 

sqldf

test_sqldf.R
rm(list=ls())
setwd = "~/Rcoding"

## sqldf
library(sqldf)
data(iris)
sqldf("select * from iris")
sqldf("select * from iris limit 10")
sqldf("select count(*) from iris where Species like 'se%' ")

출력결과

> source("~/Rcoding/test_sqldf.R", echo=TRUE)

> rm(list=ls())

> setwd = "~/Rcoding"

> ## sqldf
> library(sqldf)

> data(iris)

> sqldf("select * from iris")
    Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
1            5.1         3.5          1.4         0.2     setosa
2            4.9         3.0          1.4         0.2     setosa
3            4.7         3.2          1.3         0.2     setosa
4            4.6         3.1          1.5         0.2     setosa
5            5.0         3.6          1.4         0.2     setosa
6            5.4         3.9          1.7         0.4     setosa
7            4.6         3.4          1.4         0.3     setosa
8            5.0         3.4          1.5         0.2     setosa
9            4.4         2.9          1.4         0.2     setosa
10           4.9         3.1          1.5         0.1     setosa
11           5.4         3.7          1.5         0.2     setosa
12           4.8         3.4          1.6         0.2     setosa
13           4.8         3.0          1.4         0.1     setosa
14           4.3         3.0          1.1         0.1     setosa
15           5.8         4.0          1.2         0.2     setosa
16           5.7         4.4          1.5         0.4     setosa
17           5.4         3.9          1.3         0.4     setosa
18           5.1         3.5          1.4         0.3     setosa
19           5.7         3.8          1.7         0.3     setosa
20           5.1         3.8          1.5         0.3     setosa
21           5.4         3.4          1.7         0.2     setosa
22           5.1         3.7          1.5         0.4     setosa
23           4.6         3.6          1.0         0.2     setosa
24           5.1         3.3          1.7         0.5     setosa
25           4.8         3.4          1.9         0.2     setosa
26           5.0         3.0          1.6         0.2     setosa
27           5.0         3.4          1.6         0.4     setosa
28           5.2         3.5          1.5         0.2     setosa
29           5.2         3.4          1.4         0.2     setosa
30           4.7         3.2          1.6         0.2     setosa
31           4.8         3.1          1.6         0.2     setosa
32           5.4         3.4          1.5         0.4     setosa
33           5.2         4.1          1.5         0.1     setosa
....
144          6.8         3.2          5.9         2.3  virginica
145          6.7         3.3          5.7         2.5  virginica
146          6.7         3.0          5.2         2.3  virginica
147          6.3         2.5          5.0         1.9  virginica
148          6.5         3.0          5.2         2.0  virginica
149          6.2         3.4          5.4         2.3  virginica
150          5.9         3.0          5.1         1.8  virginica

> sqldf("select * from iris limit 10")
   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
1           5.1         3.5          1.4         0.2  setosa
2           4.9         3.0          1.4         0.2  setosa
3           4.7         3.2          1.3         0.2  setosa
4           4.6         3.1          1.5         0.2  setosa
5           5.0         3.6          1.4         0.2  setosa
6           5.4         3.9          1.7         0.4  setosa
7           4.6         3.4          1.4         0.3  setosa
8           5.0         3.4          1.5         0.2  setosa
9           4.4         2.9          1.4         0.2  setosa
10          4.9         3.1          1.5         0.1  setosa

> sqldf("select count(*) from iris where Species like 'se%' ")
  count(*)
1       50

 

 

plyr

test_plyr.R
rm(list=ls())
setwd = "~/Rcoding"

## plyr
library(plyr)

set.seed(1)
d = data.frame(year = rep(2018:2020, each=6), count = round(runif(9,0,20)))
print(d)

ddply(d,'year', summarise, mean.count=mean(count))
ddply(d,'year',transform, total.count=sum(count))

ddply(d,'year',function(x) {
  mean.count = mean(x$count)
  sd.count = sd(x$count)
  cv = sd.count/mean.count
  data.frame(mean.count,sd.count,cv.count=cv)
})

출력결과 : dplyr 설치 후 library 선언하고 실행하면 결과는 동일

> source("~/Rcoding/test_plyr.R", echo=TRUE)

> rm(list=ls())

> setwd = "~/Rcoding"

> set.seed(1)

> d = data.frame(year = rep(2018:2020, each=6), count = round(runif(9,0,20)))

> print(d)
   year count
1  2018     5
2  2018     7
3  2018    11
4  2018    18
5  2018     4
6  2018    18
7  2019    19
8  2019    13
9  2019    13
10 2019     5
11 2019     7
12 2019    11
13 2020    18
14 2020     4
15 2020    18
16 2020    19
17 2020    13
18 2020    13

> ddply(d,'year', summarise, mean.count=mean(count))
  year mean.count
1 2018   10.50000
2 2019   11.33333
3 2020   14.16667

> ddply(d,'year',transform, total.count=sum(count))
   year count total.count
1  2018     5          63
2  2018     7          63
3  2018    11          63
4  2018    18          63
5  2018     4          63
6  2018    18          63
7  2019    19          68
8  2019    13          68
9  2019    13          68
10 2019     5          68
11 2019     7          68
12 2019    11          68
13 2020    18          85
14 2020     4          85
15 2020    18          85
16 2020    19          85
17 2020    13          85
18 2020    13          85

> ddply(d,'year',function(x) {
+   mean.count = mean(x$count)
+   sd.count = sd(x$count)
+   cv = sd.count/mean.count
+   .... [TRUNCATED] 
  year mean.count sd.count  cv.count
1 2018   10.50000 6.284903 0.5985621
2 2019   11.33333 4.966555 0.4382254
3 2020   14.16667 5.636193 0.3978489

 

 

반응형

'R' 카테고리의 다른 글

(macOS)[R] 결측값 처리 / 이상치 탐색  (0) 2022.04.04
(macOS)[R] data.table  (0) 2022.03.30
(macOS)[R] reshape  (0) 2022.03.29
(macOS)[R] plot : scatter plot, histogram, box plot  (0) 2022.03.29
(macOS)[R] paste, substr, as., date, format  (0) 2022.03.28
댓글
공지사항
최근에 올라온 글
최근에 달린 댓글
Total
Today
Yesterday
링크
«   2025/02   »
1
2 3 4 5 6 7 8
9 10 11 12 13 14 15
16 17 18 19 20 21 22
23 24 25 26 27 28
글 보관함