티스토리 뷰

R

(macOS)[R] reshape

jinozpersona 2022. 3. 29. 17:32

INTRO

reshape를 이용한 데이터 재정렬

Rstudio Console> install.packages("reshape")

#참고 : reshape2는 reshape의 확장판 개념, 설치 오래걸림

Rstudio Console> install.packages("reshape2")

 

reshape

reshape 패키지는 melt, cast를 사용하여 데이터를 재구성하거나 밀집화된 데이터를 유연하게 생성해줌

 melt 녹이다, cast 전 데이터 형태 추출

 syntax : var = melt(data, id=)

 

 cast 굳히다, 원하는 형태로 데이터를 변형

 syntax : var = melt(data, id=)

 a :  엑셀 피벗팅과 유사하게 자료 변환, 피벗테이블과는 다르게 구별된 순서가 부여됨

 b : y축(행, row) 대상을 첫 칼럼에 놓고 x축(열, column) 대상별로 평균(mean)값을 table 형태로 변환

 c : "|" 기호를 이용하여 산출물을 분리해서 표시

 d : "margins=" 옵션을 통해 행과 열에 대한 소계(grand total)을 산출하는 기능

 e : 'subset=" 옵션을 통해 특정 변수를 지정하여 나열

 f : "range" 함수를 통해 min(_X1), Max(_X2)를 구하여 table 형태로 반환

 

test_reshape.R
rm(list=ls())
setwd = "~/Rcoding"

library(reshape)
## data load
data(airquality)

## data top 10
head(airquality,10)
names(airquality)
## convert col-name to lower case
names(airquality) = tolower(names(airquality))
names(airquality)


#### melt : select "id=month,day" and remove missing datas
agm = melt(airquality, id=c('month','day'), na.rm=TRUE)
print(agm)


#### cast
## sorted values by each id(distinct count)
## cast(data, y-dimension ~ x-dimension ~ target-id)
a = cast(agm, day ~ month ~ variable)
print(a)

## selected statistic values by each id 
## cast(data, y-dimension ~ x-dimension, statistics)
b = cast(agm, month ~ variable, mean)
print(b)

## selected statistic values separated id 
## cast(data, y-dimension ~ . | separate ids, statistics)
c = cast(agm, month ~. | variable, mean)
print(c)

## statistics grand total value 
## cast(data, y-dimension, statistics, margins=)
d = cast(agm, month ~ variable, mean, margins=c('grand_row', 'grand_col'))
print(d)

## detail target statistics values
## cast(data, y-dimension ~ y-dimension, statistics, subset=id=="target")
e = cast(agm, day ~ month, mean, subset=variable=='ozone')
print(e)

## statistics values of min(_X1), Max(_X2) for target id
## cast(data, y-dimension ~ y-dimension, statistics, range)
f = cast(agm, month ~ variable, range)
print(f)

출력결과 : reshape2를 사용해도 결과는 동일함

> source("~/Rcoding/test_reshape.R", echo=TRUE)

> rm(list=ls())

> setwd = "~/Rcoding"

> library(reshape)

> ## data load
> data(airquality)

> ## data top 10
> head(airquality,10)
   Ozone Solar.R Wind Temp Month Day
1     41     190  7.4   67     5   1
2     36     118  8.0   72     5   2
3     12     149 12.6   74     5   3
4     18     313 11.5   62     5   4
5     NA      NA 14.3   56     5   5
6     28      NA 14.9   66     5   6
7     23     299  8.6   65     5   7
8     19      99 13.8   59     5   8
9      8      19 20.1   61     5   9
10    NA     194  8.6   69     5  10

> names(airquality)
[1] "Ozone"   "Solar.R" "Wind"    "Temp"    "Month"   "Day"    

> ## convert col-name to lower case
> names(airquality) = tolower(names(airquality))

> names(airquality)
[1] "ozone"   "solar.r" "wind"    "temp"    "month"   "day"    

> #### melt : select "id=month,day" and remove missing datas
> agm = melt(airquality, id=c('month','day'), na.rm=TRUE)

> print(agm)
    month day variable value
1       5   1    ozone    41
2       5   2    ozone    36
3       5   3    ozone    12
4       5   4    ozone    18
6       5   6    ozone    28
7       5   7    ozone    23
....
151     9  28    ozone    14
152     9  29    ozone    18
153     9  30    ozone    20
154     5   1  solar.r   190
155     5   2  solar.r   118
156     5   3  solar.r   149
157     5   4  solar.r   313
160     5   7  solar.r   299
161     5   8  solar.r    99
162     5   9  solar.r    19
163     5  10  solar.r   194
....
....
292     9  16  solar.r   237
293     9  17  solar.r   224
294     9  18  solar.r    27
 [ reached 'max' / getOption("max.print") -- omitted 318 rows ]

> #### cast
> ## sorted values by each id(distinct count)
> ## cast(data, y-dimension ~ x-dimension ~ target-id)
> a = cast(agm, day ~ month ~ variabl .... [TRUNCATED] 

> print(a)
, , variable = ozone

    month
day    5  6   7   8  9
  1   41 NA 135  39 96
  2   36 NA  49   9 78
  3   12 NA  32  16 73
  4   18 NA  NA  78 91
  5   NA NA  64  35 47
  6   28 NA  40  66 32
  7   23 29  77 122 20
  8   19 NA  97  89 23
  9    8 71  97 110 21
  10  NA 39  85  NA 24
  11   7 NA  NA  NA 44
  12  16 NA  10  44 21
  13  11 23  27  28 28
  14  14 NA  NA  65  9
  15  18 NA   7  NA 13
  16  14 21  48  22 46
  17  34 37  35  59 18
  18   6 20  61  23 13
  19  30 12  79  31 24
  20  11 13  63  44 16
  21   1 NA  16  21 13
  22  11 NA  NA   9 23
  23   4 NA  NA  NA 36
  24  32 NA  80  45  7
  25  NA NA 108 168 14
  26  NA NA  20  73 30
  27  NA NA  52  NA NA
  28  23 NA  82  76 14
  29  45 NA  50 118 18
  30 115 NA  64  84 20
  31  37 NA  59  85 NA

, , variable = solar.r

    month
day    5   6   7   8   9
  1  190 286 269  83 167
  2  118 287 248  24 197
  3  149 242 236  77 183
  4  313 186 101  NA 189
  5   NA 220 175  NA  95
  6   NA 264 314  NA  92
  7  299 127 276 255 252
  8   99 273 267 229 220
  9   19 291 272 207 230
  10 194 323 175 222 259
  11  NA 259 139 137 236
  12 256 250 264 192 259
  13 290 148 175 273 238
  14 274 332 291 157  24
  15  65 322  48  64 112
  16 334 191 260  71 237
  17 307 284 274  51 224
  18  78  37 285 115  27
  19 322 120 187 244 238
  20  44 137 220 190 201
  21   8 150   7 259 238
  22 320  59 258  36  14
  23  25  91 295 255 139
  24  92 250 294 212  49
  25  66 135 223 238  20
  26 266 127  81 215 193
  27  NA  47  82 153 145
  28  13  98 213 203 191
  29 252  31 275 225 131
  30 223 138 253 237 223
  31 279  NA 254 188  NA

, , variable = wind

    month
day     5    6    7    8    9
  1   7.4  8.6  4.1  6.9  6.9
  2   8.0  9.7  9.2 13.8  5.1
  3  12.6 16.1  9.2  7.4  2.8
  4  11.5  9.2 10.9  6.9  4.6
  5  14.3  8.6  4.6  7.4  7.4
  6  14.9 14.3 10.9  4.6 15.5
  7   8.6  9.7  5.1  4.0 10.9
  8  13.8  6.9  6.3 10.3 10.3
  9  20.1 13.8  5.7  8.0 10.9
  10  8.6 11.5  7.4  8.6  9.7
  11  6.9 10.9  8.6 11.5 14.9
  12  9.7  9.2 14.3 11.5 15.5
  13  9.2  8.0 14.9 11.5  6.3
  14 10.9 13.8 14.9  9.7 10.9
  15 13.2 11.5 14.3 11.5 11.5
  16 11.5 14.9  6.9 10.3  6.9
  17 12.0 20.7 10.3  6.3 13.8
  18 18.4  9.2  6.3  7.4 10.3
  19 11.5 11.5  5.1 10.9 10.3
  20  9.7 10.3 11.5 10.3  8.0
  21  9.7  6.3  6.9 15.5 12.6
  22 16.6  1.7  9.7 14.3  9.2
  23  9.7  4.6 11.5 12.6 10.3
  24 12.0  6.3  8.6  9.7 10.3
  25 16.6  8.0  8.0  3.4 16.6
  26 14.9  8.0  8.6  8.0  6.9
  27  8.0 10.3 12.0  5.7 13.2
  28 12.0 11.5  7.4  9.7 14.3
  29 14.9 14.9  7.4  2.3  8.0
  30  5.7  8.0  7.4  6.3 11.5
  31  7.4   NA  9.2  6.3   NA

, , variable = temp

    month
day   5  6  7  8  9
  1  67 78 84 81 91
  2  72 74 85 81 92
  3  74 67 81 82 93
  4  62 84 84 86 93
  5  56 85 83 85 87
  6  66 79 83 87 84
  7  65 82 88 89 80
  8  59 87 92 90 78
  9  61 90 92 90 75
  10 69 87 89 92 73
  11 74 93 82 86 81
  12 69 92 73 86 76
  13 66 82 81 82 77
  14 68 80 91 80 71
  15 58 79 80 79 71
  16 64 77 81 77 78
  17 66 72 82 79 67
  18 57 65 84 76 76
  19 68 73 87 78 68
  20 62 76 85 78 82
  21 59 77 74 77 64
  22 73 76 81 72 71
  23 61 76 82 75 81
  24 61 76 86 79 69
  25 57 75 85 81 63
  26 58 78 82 86 70
  27 57 73 86 88 77
  28 67 80 88 97 75
  29 81 77 86 94 76
  30 79 83 83 96 68
  31 76 NA 81 94 NA


> ## selected statistic values by each id 
> ## cast(data, y-dimension ~ x-dimension, statistics)
> b = cast(agm, month ~ variable, mean)

> print(b)
  month    ozone  solar.r      wind     temp
1     5 23.61538 181.2963 11.622581 65.54839
2     6 29.44444 190.1667 10.266667 79.10000
3     7 59.11538 216.4839  8.941935 83.90323
4     8 59.96154 171.8571  8.793548 83.96774
5     9 31.44828 167.4333 10.180000 76.90000

> ## selected statistic values separated id 
> ## cast(data, y-dimension ~ . | separate ids, statistics)
> c = cast(agm, month ~. | variable, mean)

> print(c)
$ozone
  month    (all)
1     5 23.61538
2     6 29.44444
3     7 59.11538
4     8 59.96154
5     9 31.44828

$solar.r
  month    (all)
1     5 181.2963
2     6 190.1667
3     7 216.4839
4     8 171.8571
5     9 167.4333

$wind
  month     (all)
1     5 11.622581
2     6 10.266667
3     7  8.941935
4     8  8.793548
5     9 10.180000

$temp
  month    (all)
1     5 65.54839
2     6 79.10000
3     7 83.90323
4     8 83.96774
5     9 76.90000


> ## statistics grand total value 
> ## cast(data, y-dimension, statistics, margins=)
> d = cast(agm, month ~ variable, mean, margins=c('grand_row', ' .... [TRUNCATED] 

> print(d)
  month    ozone  solar.r      wind     temp    (all)
1     5 23.61538 181.2963 11.622581 65.54839 68.70696
2     6 29.44444 190.1667 10.266667 79.10000 87.38384
3     7 59.11538 216.4839  8.941935 83.90323 93.49748
4     8 59.96154 171.8571  8.793548 83.96774 79.71207
5     9 31.44828 167.4333 10.180000 76.90000 71.82689
6 (all) 42.12931 185.9315  9.957516 77.88235 80.05722

> ## detail target statistics values
> ## cast(data, y-dimension ~ y-dimension, statistics, subset=id=="target")
> e = cast(agm, day ~ month, mean, su .... [TRUNCATED] 

> print(e)
   day   5   6   7   8   9
1    1  41 NaN 135  39  96
2    2  36 NaN  49   9  78
3    3  12 NaN  32  16  73
4    4  18 NaN NaN  78  91
5    5 NaN NaN  64  35  47
6    6  28 NaN  40  66  32
7    7  23  29  77 122  20
8    8  19 NaN  97  89  23
9    9   8  71  97 110  21
10  10 NaN  39  85 NaN  24
11  11   7 NaN NaN NaN  44
12  12  16 NaN  10  44  21
13  13  11  23  27  28  28
14  14  14 NaN NaN  65   9
15  15  18 NaN   7 NaN  13
16  16  14  21  48  22  46
17  17  34  37  35  59  18
18  18   6  20  61  23  13
19  19  30  12  79  31  24
20  20  11  13  63  44  16
21  21   1 NaN  16  21  13
22  22  11 NaN NaN   9  23
23  23   4 NaN NaN NaN  36
24  24  32 NaN  80  45   7
25  25 NaN NaN 108 168  14
26  26 NaN NaN  20  73  30
27  27 NaN NaN  52 NaN NaN
28  28  23 NaN  82  76  14
29  29  45 NaN  50 118  18
30  30 115 NaN  64  84  20
31  31  37 NaN  59  85 NaN

> ## statistics values of min(_X1), Max(_X2) for target id
> ## cast(data, y-dimension ~ y-dimension, statistics, range)
> f = cast(agm, month ~ varia .... [TRUNCATED] 

> print(f)
  month ozone_X1 ozone_X2 solar.r_X1 solar.r_X2 wind_X1 wind_X2 temp_X1 temp_X2
1     5        1      115          8        334     5.7    20.1      56      81
2     6       12       71         31        332     1.7    20.7      65      93
3     7        7      135          7        314     4.1    14.9      73      92
4     8        9      168         24        273     2.3    15.5      72      97
5     9        7       96         14        259     2.8    16.6      63      93

 

반응형
댓글
공지사항
최근에 올라온 글
최근에 달린 댓글
Total
Today
Yesterday
링크
«   2024/10   »
1 2 3 4 5
6 7 8 9 10 11 12
13 14 15 16 17 18 19
20 21 22 23 24 25 26
27 28 29 30 31
글 보관함