##################################
# Loading R libraries
##################################
library(AppliedPredictiveModeling)
library(caret)
library(rpart)
library(lattice)
library(dplyr)
library(tidyr)
library(moments)
library(skimr)
library(RANN)
library(mlbench)
library(pls)
library(corrplot)
library(tidyverse)
library(lares)
library(DMwR2)
library(gridExtra)
library(rattle)
library(rpart.plot)
library(RColorBrewer)
library(stats)
library(nnet)
library(elasticnet)
library(earth)
library(party)
library(kernlab)
library(randomForest)
library(Cubist)
library(pROC)
library(mda)
library(klaR)
library(pamr)
library(MLmetrics)
library(C50)
##################################
# Loading source and
# formulating the train set
##################################
data(Sonar)
<- Sonar
Sonar.Original
<- Sonar[Sonar$Class=="M",]
Sonar.M <- Sonar[Sonar$Class=="R",]
Sonar.R set.seed(12345678)
<- Sonar.R[sample(1:nrow(Sonar.R),25),]
Sonar.R.Reduced
<- as.data.frame(rbind(Sonar.M,Sonar.R.Reduced))
Sonar
set.seed(12345678)
<- createDataPartition(Sonar$Class, p = .70, list = FALSE)
Sonar_Partition <- Sonar[Sonar_Partition,]
Sonar_Train <- Sonar[-Sonar_Partition,]
Sonar_Test
##################################
# Performing a general exploration of the train set
##################################
dim(Sonar_Train)
## [1] 96 61
str(Sonar_Train)
## 'data.frame': 96 obs. of 61 variables:
## $ V1 : num 0.0629 0.0587 0.0428 0.0599 0.0264 0.0454 0.0283 0.0114 0.0414 0.0228 ...
## $ V2 : num 0.1065 0.121 0.0555 0.0474 0.0071 ...
## $ V3 : num 0.1526 0.1268 0.0708 0.0498 0.0342 ...
## $ V4 : num 0.1229 0.1498 0.0618 0.0387 0.0793 ...
## $ V5 : num 0.144 0.144 0.121 0.103 0.104 ...
## $ V6 : num 0.119 0.0561 0.1524 0.0773 0.0783 ...
## $ V7 : num 0.0884 0.0832 0.1543 0.0853 0.1417 ...
## $ V8 : num 0.0907 0.0672 0.0391 0.0447 0.1176 ...
## $ V9 : num 0.2107 0.1372 0.061 0.1094 0.0453 ...
## $ V10 : num 0.3597 0.2352 0.0113 0.0351 0.0945 ...
## $ V11 : num 0.547 0.321 0.126 0.158 0.113 ...
## $ V12 : num 0.52 0.426 0.247 0.202 0.084 ...
## $ V13 : num 0.5127 0.5201 0.3011 0.2268 0.0717 ...
## $ V14 : num 0.539 0.491 0.375 0.283 0.197 ...
## $ V15 : num 0.656 0.595 0.452 0.382 0.263 ...
## $ V16 : num 0.871 0.722 0.539 0.467 0.419 ...
## $ V17 : num 0.979 0.904 0.659 0.669 0.505 ...
## $ V18 : num 0.933 0.911 0.711 0.865 0.671 ...
## $ V19 : num 0.792 0.872 0.76 0.936 0.792 ...
## $ V20 : num 0.738 0.769 0.867 0.937 0.838 ...
## $ V21 : num 0.691 0.733 0.842 0.914 0.876 ...
## $ V22 : num 0.385 0.522 0.797 0.916 0.942 ...
## $ V23 : num 0.0671 0.3097 0.8385 0.9311 1 ...
## $ V24 : num 0.0502 0.3172 0.9317 0.8604 0.9931 ...
## $ V25 : num 0.272 0.227 0.856 0.733 0.958 ...
## $ V26 : num 0.284 0.164 0.616 0.576 0.865 ...
## $ V27 : num 0.223 0.175 0.414 0.416 0.722 ...
## $ V28 : num 0.191 0.183 0.327 0.411 0.58 ...
## $ V29 : num 0.0408 0.2048 0.3108 0.4146 0.4964 ...
## $ V30 : num 0.253 0.167 0.255 0.315 0.489 ...
## $ V31 : num 0.198 0.277 0.337 0.294 0.408 ...
## $ V32 : num 0.189 0.31 0.447 0.317 0.244 ...
## $ V33 : num 0.243 0.34 0.5 0.315 0.177 ...
## $ V34 : num 0.196 0.444 0.511 0.413 0.247 ...
## $ V35 : num 0.267 0.505 0.519 0.399 0.352 ...
## $ V36 : num 0.134 0.281 0.462 0.419 0.376 ...
## $ V37 : num 0.107 0.168 0.423 0.453 0.291 ...
## $ V38 : num 0.202 0.263 0.437 0.442 0.231 ...
## $ V39 : num 0.179 0.32 0.428 0.474 0.317 ...
## $ V40 : num 0.0227 0.1933 0.4433 0.3431 0.3554 ...
## $ V41 : num 0.1313 0.0934 0.37 0.3194 0.3741 ...
## $ V42 : num 0.1775 0.0443 0.3324 0.337 0.4443 ...
## $ V43 : num 0.155 0.078 0.256 0.249 0.326 ...
## $ V44 : num 0.1626 0.0722 0.2527 0.265 0.1963 ...
## $ V45 : num 0.0708 0.0405 0.2137 0.1748 0.0864 ...
## $ V46 : num 0.0129 0.0553 0.1789 0.0932 0.1688 ...
## $ V47 : num 0.0795 0.1081 0.101 0.053 0.1991 ...
## $ V48 : num 0.0762 0.1139 0.0528 0.0081 0.1217 ...
## $ V49 : num 0.0117 0.0767 0.0453 0.0342 0.0628 0.038 0.0244 0.0728 0.0177 0.0649 ...
## $ V50 : num 0.0061 0.0265 0.0118 0.0137 0.0323 0.0142 0.0179 0.0174 0.0065 0.0313 ...
## $ V51 : num 0.0257 0.0215 0.0009 0.0028 0.0253 0.0137 0.0109 0.0213 0.0222 0.0185 ...
## $ V52 : num 0.0089 0.0331 0.0142 0.0013 0.0214 0.012 0.0147 0.0269 0.0045 0.0098 ...
## $ V53 : num 0.0262 0.0111 0.0179 0.0005 0.0262 0.0042 0.017 0.0152 0.0136 0.0178 ...
## $ V54 : num 0.0108 0.0088 0.0079 0.0227 0.0177 0.0238 0.0158 0.0257 0.0113 0.0077 ...
## $ V55 : num 0.0138 0.0158 0.006 0.0209 0.0037 0.0129 0.0046 0.0097 0.0053 0.0074 ...
## $ V56 : num 0.0187 0.0122 0.0131 0.0081 0.0068 0.0084 0.0073 0.0041 0.0165 0.0095 ...
## $ V57 : num 0.023 0.0038 0.0089 0.0117 0.0121 0.0218 0.0054 0.005 0.0141 0.0055 ...
## $ V58 : num 0.0057 0.0101 0.0084 0.0114 0.0077 0.0321 0.0033 0.0145 0.0077 0.0045 ...
## $ V59 : num 0.0113 0.0228 0.0113 0.0112 0.0078 0.0154 0.0045 0.0103 0.0246 0.0063 ...
## $ V60 : num 0.0131 0.0124 0.0049 0.01 0.0066 0.0053 0.0079 0.0025 0.0198 0.0039 ...
## $ Class: Factor w/ 2 levels "M","R": 1 1 1 1 1 1 1 1 1 1 ...
summary(Sonar_Train)
## V1 V2 V3 V4
## Min. :0.00150 Min. :0.00060 Min. :0.00150 Min. :0.00580
## 1st Qu.:0.01362 1st Qu.:0.01897 1st Qu.:0.02448 1st Qu.:0.02960
## Median :0.02320 Median :0.03385 Median :0.03880 Median :0.04905
## Mean :0.03171 Mean :0.04387 Mean :0.04718 Mean :0.05631
## 3rd Qu.:0.03982 3rd Qu.:0.05892 3rd Qu.:0.06212 3rd Qu.:0.07220
## Max. :0.13710 Max. :0.15740 Max. :0.16650 Max. :0.16440
## V5 V6 V7 V8
## Min. :0.00670 Min. :0.0102 Min. :0.0182 Min. :0.0124
## 1st Qu.:0.04530 1st Qu.:0.0782 1st Qu.:0.0937 1st Qu.:0.0950
## Median :0.07430 Median :0.1135 Median :0.1298 Median :0.1356
## Mean :0.08196 Mean :0.1178 Mean :0.1332 Mean :0.1511
## 3rd Qu.:0.10855 3rd Qu.:0.1496 3rd Qu.:0.1683 3rd Qu.:0.1906
## Max. :0.24820 Max. :0.3823 Max. :0.3729 Max. :0.4566
## V9 V10 V11 V12
## Min. :0.0075 Min. :0.0113 Min. :0.0526 Min. :0.0236
## 1st Qu.:0.1299 1st Qu.:0.1424 1st Qu.:0.1926 1st Qu.:0.1837
## Median :0.1815 Median :0.2124 Median :0.2515 Median :0.2781
## Mean :0.2039 Mean :0.2334 Mean :0.2662 Mean :0.2796
## 3rd Qu.:0.2596 3rd Qu.:0.2940 3rd Qu.:0.3335 3rd Qu.:0.3501
## Max. :0.6828 Max. :0.5965 Max. :0.6675 Max. :0.5679
## V13 V14 V15 V16
## Min. :0.0616 Min. :0.0273 Min. :0.0092 Min. :0.0422
## 1st Qu.:0.2122 1st Qu.:0.1855 1st Qu.:0.1673 1st Qu.:0.1911
## Median :0.2930 Median :0.2904 Median :0.2751 Median :0.3203
## Mean :0.3021 Mean :0.3139 Mean :0.3194 Mean :0.3753
## 3rd Qu.:0.3730 3rd Qu.:0.4051 3rd Qu.:0.4403 3rd Qu.:0.5332
## Max. :0.7131 Max. :0.9970 Max. :0.9137 Max. :0.9751
## V17 V18 V19 V20
## Min. :0.0367 Min. :0.0375 Min. :0.1316 Min. :0.0656
## 1st Qu.:0.2087 1st Qu.:0.2427 1st Qu.:0.2964 1st Qu.:0.3972
## Median :0.3160 Median :0.3730 Median :0.4462 Median :0.6223
## Mean :0.4137 Mean :0.4475 Mean :0.5134 Mean :0.5861
## 3rd Qu.:0.6466 3rd Qu.:0.6731 3rd Qu.:0.7310 3rd Qu.:0.7978
## Max. :1.0000 Max. :0.9335 Max. :0.9828 Max. :1.0000
## V21 V22 V23 V24
## Min. :0.0512 Min. :0.0219 Min. :0.0610 Min. :0.0502
## 1st Qu.:0.4412 1st Qu.:0.3991 1st Qu.:0.4533 1st Qu.:0.5795
## Median :0.6939 Median :0.7021 Median :0.7139 Median :0.6985
## Mean :0.6393 Mean :0.6364 Mean :0.6500 Mean :0.6795
## 3rd Qu.:0.8449 3rd Qu.:0.8498 3rd Qu.:0.8690 3rd Qu.:0.8968
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## V25 V26 V27 V28
## Min. :0.0240 Min. :0.1640 Min. :0.1036 Min. :0.0598
## 1st Qu.:0.5690 1st Qu.:0.5637 1st Qu.:0.4955 1st Qu.:0.5582
## Median :0.7211 Median :0.7560 Median :0.7930 Median :0.7762
## Mean :0.6807 Mean :0.7079 Mean :0.7074 Mean :0.7076
## 3rd Qu.:0.8749 3rd Qu.:0.8766 3rd Qu.:0.9109 3rd Qu.:0.9116
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## V29 V30 V31 V32
## Min. :0.0144 Min. :0.0613 Min. :0.1000 Min. :0.0877
## 1st Qu.:0.4667 1st Qu.:0.4140 1st Qu.:0.3268 1st Qu.:0.2771
## Median :0.7096 Median :0.6028 Median :0.4416 Median :0.4078
## Mean :0.6518 Mean :0.5869 Mean :0.4970 Mean :0.4364
## 3rd Qu.:0.8672 3rd Qu.:0.7189 3rd Qu.:0.6461 3rd Qu.:0.5816
## Max. :1.0000 Max. :1.0000 Max. :0.9657 Max. :0.9306
## V33 V34 V35 V36
## Min. :0.0477 Min. :0.0588 Min. :0.0223 Min. :0.0080
## 1st Qu.:0.2364 1st Qu.:0.2164 1st Qu.:0.1746 1st Qu.:0.1381
## Median :0.3875 Median :0.3644 Median :0.2930 Median :0.2808
## Mean :0.4079 Mean :0.3940 Mean :0.3772 Mean :0.3649
## 3rd Qu.:0.5409 3rd Qu.:0.5421 3rd Qu.:0.5775 3rd Qu.:0.5348
## Max. :1.0000 Max. :0.9536 Max. :0.9518 Max. :1.0000
## V37 V38 V39 V40
## Min. :0.0351 Min. :0.0618 Min. :0.0436 Min. :0.0227
## 1st Qu.:0.1447 1st Qu.:0.1747 1st Qu.:0.1827 1st Qu.:0.1962
## Median :0.2594 Median :0.3245 Median :0.3058 Median :0.2812
## Mean :0.3525 Mean :0.3458 Mean :0.3464 Mean :0.3173
## 3rd Qu.:0.4884 3rd Qu.:0.4405 3rd Qu.:0.4801 3rd Qu.:0.4269
## Max. :0.9123 Max. :0.9480 Max. :0.9709 Max. :0.9297
## V41 V42 V43 V44
## Min. :0.0438 Min. :0.0443 Min. :0.0308 Min. :0.0255
## 1st Qu.:0.1696 1st Qu.:0.1688 1st Qu.:0.1611 1st Qu.:0.1386
## Median :0.2658 Median :0.2808 Median :0.2580 Median :0.1916
## Mean :0.3043 Mean :0.3053 Mean :0.2722 Mean :0.2370
## 3rd Qu.:0.4094 3rd Qu.:0.3973 3rd Qu.:0.3471 3rd Qu.:0.3081
## Max. :0.8995 Max. :0.8246 Max. :0.7517 Max. :0.5772
## V45 V46 V47 V48
## Min. :0.0352 Min. :0.0080 Min. :0.01790 Min. :0.0081
## 1st Qu.:0.1105 1st Qu.:0.0846 1st Qu.:0.07727 1st Qu.:0.0531
## Median :0.1736 Median :0.1445 Median :0.10900 Median :0.0935
## Mean :0.2362 Mean :0.1930 Mean :0.14301 Mean :0.1088
## 3rd Qu.:0.3626 3rd Qu.:0.2283 3rd Qu.:0.18247 3rd Qu.:0.1351
## Max. :0.7034 Max. :0.7292 Max. :0.55220 Max. :0.3339
## V49 V50 V51 V52
## Min. :0.00730 Min. :0.00440 Min. :0.00090 Min. :0.00130
## 1st Qu.:0.03322 1st Qu.:0.01310 1st Qu.:0.01040 1st Qu.:0.00875
## Median :0.05445 Median :0.01920 Median :0.01565 Median :0.01215
## Mean :0.06444 Mean :0.02375 Mean :0.01878 Mean :0.01516
## 3rd Qu.:0.09137 3rd Qu.:0.02902 3rd Qu.:0.02363 3rd Qu.:0.01830
## Max. :0.19810 Max. :0.08250 Max. :0.10040 Max. :0.07090
## V53 V54 V55 V56
## Min. :0.000500 Min. :0.001000 Min. :0.001100 Min. :0.000400
## 1st Qu.:0.004975 1st Qu.:0.005375 1st Qu.:0.003700 1st Qu.:0.004350
## Median :0.007900 Median :0.009700 Median :0.007700 Median :0.007050
## Mean :0.010800 Mean :0.011600 Mean :0.009373 Mean :0.008372
## 3rd Qu.:0.015375 3rd Qu.:0.015050 3rd Qu.:0.012625 3rd Qu.:0.011625
## Max. :0.036100 Max. :0.035200 Max. :0.044700 Max. :0.039400
## V57 V58 V59 V60
## Min. :0.001100 Min. :0.000900 Min. :0.000100 Min. :0.000600
## 1st Qu.:0.003700 1st Qu.:0.003600 1st Qu.:0.003550 1st Qu.:0.003100
## Median :0.005750 Median :0.006300 Median :0.007000 Median :0.005100
## Mean :0.007678 Mean :0.008472 Mean :0.008259 Mean :0.006066
## 3rd Qu.:0.010725 3rd Qu.:0.010275 3rd Qu.:0.010750 3rd Qu.:0.008125
## Max. :0.035500 Max. :0.044000 Max. :0.029400 Max. :0.021800
## Class
## M:78
## R:18
##
##
##
##
##################################
# Performing a general exploration of the test set
##################################
dim(Sonar_Test)
## [1] 40 61
str(Sonar_Test)
## 'data.frame': 40 obs. of 61 variables:
## $ V1 : num 0.0491 0.1313 0.0201 0.0335 0.0162 ...
## $ V2 : num 0.0279 0.2339 0.0423 0.0134 0.0253 ...
## $ V3 : num 0.0592 0.3059 0.0554 0.0696 0.0262 ...
## $ V4 : num 0.127 0.4264 0.0783 0.118 0.0386 ...
## $ V5 : num 0.1772 0.401 0.062 0.0348 0.0645 ...
## $ V6 : num 0.1908 0.1791 0.0871 0.118 0.0472 ...
## $ V7 : num 0.222 0.185 0.12 0.195 0.106 ...
## $ V8 : num 0.0768 0.0055 0.2707 0.1607 0.1388 ...
## $ V9 : num 0.1246 0.1929 0.1206 0.3036 0.0598 ...
## $ V10 : num 0.2028 0.2231 0.0279 0.4372 0.1334 ...
## $ V11 : num 0.0947 0.2907 0.2251 0.5533 0.2969 ...
## $ V12 : num 0.25 0.226 0.262 0.577 0.475 ...
## $ V13 : num 0.221 0.314 0.177 0.702 0.568 ...
## $ V14 : num 0.32 0.33 0.371 0.707 0.569 ...
## $ V15 : num 0.334 0.366 0.453 0.737 0.642 ...
## $ V16 : num 0.332 0.396 0.555 0.739 0.749 ...
## $ V17 : num 0.278 0.439 0.462 0.862 0.9 ...
## $ V18 : num 0.297 0.467 0.38 0.946 1 ...
## $ V19 : num 0.295 0.525 0.345 0.878 0.969 ...
## $ V20 : num 0.173 0.373 0.267 0.791 0.903 ...
## $ V21 : num 0.326 0.224 0.239 0.576 0.768 ...
## $ V22 : num 0.383 0.197 0.113 0.306 0.7 ...
## $ V23 : num 0.3523 0.4337 0.2556 0.0563 0.6644 ...
## $ V24 : num 0.541 0.6532 0.5169 0.0239 0.5964 ...
## $ V25 : num 0.523 0.507 0.378 0.255 0.371 ...
## $ V26 : num 0.4475 0.2796 0.4082 0.4862 0.0921 ...
## $ V27 : num 0.534 0.4163 0.5353 0.5027 0.0481 ...
## $ V28 : num 0.5323 0.595 0.5116 0.4402 0.0876 ...
## $ V29 : num 0.391 0.524 0.454 0.285 0.104 ...
## $ V30 : num 0.346 0.418 0.426 0.18 0.171 ...
## $ V31 : num 0.409 0.371 0.387 0.356 0.326 ...
## $ V32 : num 0.464 0.237 0.394 0.352 0.461 ...
## $ V33 : num 0.558 0.0863 0.4661 0.3321 0.3939 ...
## $ V34 : num 0.573 0.144 0.397 0.311 0.505 ...
## $ V35 : num 0.635 0.29 0.219 0.364 0.483 ...
## $ V36 : num 0.7563 0.4577 0.1816 0.0754 0.3511 ...
## $ V37 : num 0.69 0.372 0.102 0.183 0.232 ...
## $ V38 : num 0.618 0.337 0.211 0.182 0.403 ...
## $ V39 : num 0.538 0.38 0.325 0.181 0.368 ...
## $ V40 : num 0.562 0.418 0.37 0.159 0.151 ...
## $ V41 : num 0.6508 0.3603 0.2912 0.0576 0.0745 ...
## $ V42 : num 0.4797 0.2711 0.301 0.0954 0.1395 ...
## $ V43 : num 0.374 0.165 0.256 0.109 0.155 ...
## $ V44 : num 0.2804 0.1951 0.1927 0.0812 0.0377 ...
## $ V45 : num 0.1982 0.2811 0.2062 0.0784 0.0636 ...
## $ V46 : num 0.2438 0.2246 0.1751 0.0487 0.0443 ...
## $ V47 : num 0.1789 0.1921 0.0841 0.0439 0.0264 ...
## $ V48 : num 0.1706 0.15 0.1035 0.0586 0.0223 ...
## $ V49 : num 0.0762 0.0665 0.0641 0.037 0.0187 0.0245 0.0102 0.0436 0.0293 0.0469 ...
## $ V50 : num 0.0238 0.0193 0.0153 0.0185 0.0077 0.019 0.0057 0.0224 0.0183 0.0114 ...
## $ V51 : num 0.0268 0.0156 0.0081 0.0302 0.0137 0.0063 0.0031 0.0133 0.0104 0.0299 ...
## $ V52 : num 0.0081 0.0362 0.0191 0.0244 0.0071 0.0321 0.0163 0.0078 0.0117 0.0244 ...
## $ V53 : num 0.0129 0.021 0.0182 0.0232 0.0082 0.0189 0.0099 0.0174 0.0101 0.0199 ...
## $ V54 : num 0.0161 0.0154 0.016 0.0093 0.0232 0.0137 0.0084 0.0176 0.0061 0.0257 ...
## $ V55 : num 0.0063 0.018 0.029 0.0159 0.0198 0.0277 0.027 0.0038 0.0031 0.0082 ...
## $ V56 : num 0.0119 0.0013 0.009 0.0193 0.0074 0.0152 0.0277 0.0129 0.0099 0.0151 ...
## $ V57 : num 0.0194 0.0106 0.0242 0.0032 0.0035 0.0052 0.0097 0.0066 0.008 0.0171 ...
## $ V58 : num 0.014 0.0127 0.0224 0.0377 0.01 0.0121 0.0054 0.0044 0.0107 0.0146 ...
## $ V59 : num 0.0332 0.0178 0.019 0.0126 0.0048 0.0124 0.0148 0.0134 0.0161 0.0134 ...
## $ V60 : num 0.0439 0.0231 0.0096 0.0156 0.0019 0.0055 0.0092 0.0092 0.0133 0.0056 ...
## $ Class: Factor w/ 2 levels "M","R": 1 1 1 1 1 1 1 1 1 1 ...
summary(Sonar_Test)
## V1 V2 V3 V4
## Min. :0.00470 Min. :0.00220 Min. :0.00450 Min. :0.00760
## 1st Qu.:0.01620 1st Qu.:0.01392 1st Qu.:0.01770 1st Qu.:0.02615
## Median :0.02495 Median :0.03190 Median :0.03660 Median :0.04465
## Mean :0.03229 Mean :0.03954 Mean :0.04819 Mean :0.07107
## 3rd Qu.:0.03665 3rd Qu.:0.04850 3rd Qu.:0.05635 3rd Qu.:0.08830
## Max. :0.13130 Max. :0.23390 Max. :0.30590 Max. :0.42640
## V5 V6 V7 V8
## Min. :0.00970 Min. :0.02260 Min. :0.00330 Min. :0.00550
## 1st Qu.:0.03470 1st Qu.:0.05325 1st Qu.:0.06792 1st Qu.:0.08903
## Median :0.06155 Median :0.07610 Median :0.09480 Median :0.11180
## Mean :0.08154 Mean :0.08995 Mean :0.11237 Mean :0.12967
## 3rd Qu.:0.08470 3rd Qu.:0.11365 3rd Qu.:0.14510 3rd Qu.:0.15188
## Max. :0.40100 Max. :0.22470 Max. :0.33220 Max. :0.45900
## V9 V10 V11 V12
## Min. :0.0494 Min. :0.0193 Min. :0.0523 Min. :0.0259
## 1st Qu.:0.1000 1st Qu.:0.1261 1st Qu.:0.1572 1st Qu.:0.2245
## Median :0.1439 Median :0.1813 Median :0.2363 Median :0.2599
## Mean :0.1795 Mean :0.2212 Mean :0.2595 Mean :0.2809
## 3rd Qu.:0.2196 3rd Qu.:0.2596 3rd Qu.:0.2991 3rd Qu.:0.3141
## Max. :0.5664 Max. :0.7106 Max. :0.7342 Max. :0.5771
## V13 V14 V15 V16
## Min. :0.1184 Min. :0.0336 Min. :0.0166 Min. :0.0572
## 1st Qu.:0.2081 1st Qu.:0.2122 1st Qu.:0.1990 1st Qu.:0.2072
## Median :0.2581 Median :0.2959 Median :0.3125 Median :0.3199
## Mean :0.2880 Mean :0.3048 Mean :0.3301 Mean :0.3778
## 3rd Qu.:0.3155 3rd Qu.:0.3464 3rd Qu.:0.4298 3rd Qu.:0.5161
## Max. :0.7022 Max. :0.7067 Max. :0.7367 Max. :0.8278
## V17 V18 V19 V20
## Min. :0.1162 Min. :0.0837 Min. :0.1151 Min. :0.0902
## 1st Qu.:0.2159 1st Qu.:0.2492 1st Qu.:0.3366 1st Qu.:0.3652
## Median :0.3154 Median :0.3607 Median :0.5134 Median :0.6252
## Mean :0.4086 Mean :0.4693 Mean :0.5419 Mean :0.5995
## 3rd Qu.:0.6000 3rd Qu.:0.6776 3rd Qu.:0.8178 3rd Qu.:0.8684
## Max. :0.8999 Max. :1.0000 Max. :0.9975 Max. :0.9911
## V21 V22 V23 V24
## Min. :0.1354 Min. :0.1127 Min. :0.0563 Min. :0.0239
## 1st Qu.:0.4244 1st Qu.:0.4482 1st Qu.:0.5467 1st Qu.:0.5782
## Median :0.7064 Median :0.7190 Median :0.7579 Median :0.7542
## Mean :0.6382 Mean :0.6577 Mean :0.6836 Mean :0.7058
## 3rd Qu.:0.8115 3rd Qu.:0.8320 3rd Qu.:0.8524 3rd Qu.:0.8771
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## V25 V26 V27 V28
## Min. :0.1934 Min. :0.0921 Min. :0.0481 Min. :0.0284
## 1st Qu.:0.5189 1st Qu.:0.4807 1st Qu.:0.4598 1st Qu.:0.5109
## Median :0.7201 Median :0.7925 Median :0.7719 Median :0.7435
## Mean :0.6937 Mean :0.6907 Mean :0.6910 Mean :0.6893
## 3rd Qu.:0.9090 3rd Qu.:0.9534 3rd Qu.:0.9674 3rd Qu.:0.9476
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## V29 V30 V31 V32
## Min. :0.1008 Min. :0.1714 Min. :0.0482 Min. :0.0404
## 1st Qu.:0.4543 1st Qu.:0.4012 1st Qu.:0.3486 1st Qu.:0.3132
## Median :0.6583 Median :0.6019 Median :0.4360 Median :0.4179
## Mean :0.6327 Mean :0.5438 Mean :0.4626 Mean :0.4103
## 3rd Qu.:0.8402 3rd Qu.:0.6990 3rd Qu.:0.5930 3rd Qu.:0.4918
## Max. :1.0000 Max. :0.9151 Max. :0.8828 Max. :0.9108
## V33 V34 V35 V36
## Min. :0.0637 Min. :0.0212 Min. :0.0619 Min. :0.0271
## 1st Qu.:0.2634 1st Qu.:0.2005 1st Qu.:0.1475 1st Qu.:0.1501
## Median :0.3797 Median :0.3052 Median :0.2669 Median :0.2350
## Mean :0.3832 Mean :0.3476 Mean :0.3285 Mean :0.3095
## 3rd Qu.:0.5090 3rd Qu.:0.4620 3rd Qu.:0.4560 3rd Qu.:0.4424
## Max. :0.7927 Max. :0.8703 Max. :1.0000 Max. :0.9212
## V37 V38 V39 V40
## Min. :0.0476 Min. :0.0411 Min. :0.0712 Min. :0.0325
## 1st Qu.:0.1535 1st Qu.:0.1741 1st Qu.:0.1754 1st Qu.:0.1572
## Median :0.2416 Median :0.3095 Median :0.3251 Median :0.2807
## Mean :0.2919 Mean :0.3190 Mean :0.3071 Mean :0.2859
## 3rd Qu.:0.4083 3rd Qu.:0.4115 3rd Qu.:0.3901 3rd Qu.:0.4062
## Max. :0.9386 Max. :0.9303 Max. :0.7601 Max. :0.6034
## V41 V42 V43 V44
## Min. :0.0360 Min. :0.0300 Min. :0.0537 Min. :0.0255
## 1st Qu.:0.1157 1st Qu.:0.1473 1st Qu.:0.1704 1st Qu.:0.1412
## Median :0.2497 Median :0.2228 Median :0.2265 Median :0.1953
## Mean :0.2644 Mean :0.2729 Mean :0.2534 Mean :0.2204
## 3rd Qu.:0.3752 3rd Qu.:0.4326 3rd Qu.:0.3649 3rd Qu.:0.2792
## Max. :0.6508 Max. :0.6443 Max. :0.4478 Max. :0.5245
## V45 V46 V47 V48
## Min. :0.0298 Min. :0.01380 Min. :0.0237 Min. :0.00410
## 1st Qu.:0.0908 1st Qu.:0.07405 1st Qu.:0.0744 1st Qu.:0.04977
## Median :0.1463 Median :0.12550 Median :0.1134 Median :0.08030
## Mean :0.1969 Mean :0.15892 Mean :0.1220 Mean :0.08778
## 3rd Qu.:0.2072 3rd Qu.:0.20820 3rd Qu.:0.1572 3rd Qu.:0.12095
## Max. :0.6149 Max. :0.52930 Max. :0.3385 Max. :0.20520
## V49 V50 V51 V52
## Min. :0.01020 Min. :0.00500 Min. :0.00260 Min. :0.00400
## 1st Qu.:0.02652 1st Qu.:0.01155 1st Qu.:0.01093 1st Qu.:0.00945
## Median :0.04525 Median :0.01875 Median :0.01550 Median :0.01340
## Mean :0.04845 Mean :0.01904 Mean :0.01648 Mean :0.01561
## 3rd Qu.:0.06732 3rd Qu.:0.02312 3rd Qu.:0.02050 3rd Qu.:0.01770
## Max. :0.10690 Max. :0.06370 Max. :0.03800 Max. :0.04590
## V53 V54 V55 V56
## Min. :0.001500 Min. :0.00180 Min. :0.001300 Min. :0.00130
## 1st Qu.:0.009125 1st Qu.:0.00605 1st Qu.:0.003875 1st Qu.:0.00450
## Median :0.012000 Median :0.00905 Median :0.006250 Median :0.00700
## Mean :0.012740 Mean :0.01205 Mean :0.010433 Mean :0.00858
## 3rd Qu.:0.015675 3rd Qu.:0.01638 3rd Qu.:0.014550 3rd Qu.:0.01063
## Max. :0.039000 Max. :0.03350 Max. :0.037600 Max. :0.02770
## V57 V58 V59 V60
## Min. :0.000900 Min. :0.000600 Min. :0.000200 Min. :0.00150
## 1st Qu.:0.003425 1st Qu.:0.003600 1st Qu.:0.003575 1st Qu.:0.00310
## Median :0.005800 Median :0.005800 Median :0.006000 Median :0.00570
## Mean :0.007403 Mean :0.008155 Mean :0.009057 Mean :0.00817
## 3rd Qu.:0.009025 3rd Qu.:0.011650 3rd Qu.:0.012450 3rd Qu.:0.01020
## Max. :0.024200 Max. :0.037700 Max. :0.036400 Max. :0.04390
## Class
## M:33
## R: 7
##
##
##
##
##################################
# Formulating a data type assessment summary
##################################
<- Sonar_Train
PDA <- data.frame(
(PDA.Summary Column.Index=c(1:length(names(PDA))),
Column.Name= names(PDA),
Column.Type=sapply(PDA, function(x) class(x)),
row.names=NULL)
)
## Column.Index Column.Name Column.Type
## 1 1 V1 numeric
## 2 2 V2 numeric
## 3 3 V3 numeric
## 4 4 V4 numeric
## 5 5 V5 numeric
## 6 6 V6 numeric
## 7 7 V7 numeric
## 8 8 V8 numeric
## 9 9 V9 numeric
## 10 10 V10 numeric
## 11 11 V11 numeric
## 12 12 V12 numeric
## 13 13 V13 numeric
## 14 14 V14 numeric
## 15 15 V15 numeric
## 16 16 V16 numeric
## 17 17 V17 numeric
## 18 18 V18 numeric
## 19 19 V19 numeric
## 20 20 V20 numeric
## 21 21 V21 numeric
## 22 22 V22 numeric
## 23 23 V23 numeric
## 24 24 V24 numeric
## 25 25 V25 numeric
## 26 26 V26 numeric
## 27 27 V27 numeric
## 28 28 V28 numeric
## 29 29 V29 numeric
## 30 30 V30 numeric
## 31 31 V31 numeric
## 32 32 V32 numeric
## 33 33 V33 numeric
## 34 34 V34 numeric
## 35 35 V35 numeric
## 36 36 V36 numeric
## 37 37 V37 numeric
## 38 38 V38 numeric
## 39 39 V39 numeric
## 40 40 V40 numeric
## 41 41 V41 numeric
## 42 42 V42 numeric
## 43 43 V43 numeric
## 44 44 V44 numeric
## 45 45 V45 numeric
## 46 46 V46 numeric
## 47 47 V47 numeric
## 48 48 V48 numeric
## 49 49 V49 numeric
## 50 50 V50 numeric
## 51 51 V51 numeric
## 52 52 V52 numeric
## 53 53 V53 numeric
## 54 54 V54 numeric
## 55 55 V55 numeric
## 56 56 V56 numeric
## 57 57 V57 numeric
## 58 58 V58 numeric
## 59 59 V59 numeric
## 60 60 V60 numeric
## 61 61 Class factor
##################################
# Loading dataset
##################################
<- Sonar_Train
DQA
##################################
# Formulating an overall data quality assessment summary
##################################
<- data.frame(
(DQA.Summary Column.Index=c(1:length(names(DQA))),
Column.Name= names(DQA),
Column.Type=sapply(DQA, function(x) class(x)),
Row.Count=sapply(DQA, function(x) nrow(DQA)),
NA.Count=sapply(DQA,function(x)sum(is.na(x))),
Fill.Rate=sapply(DQA,function(x)format(round((sum(!is.na(x))/nrow(DQA)),3),nsmall=3)),
row.names=NULL)
)
## Column.Index Column.Name Column.Type Row.Count NA.Count Fill.Rate
## 1 1 V1 numeric 96 0 1.000
## 2 2 V2 numeric 96 0 1.000
## 3 3 V3 numeric 96 0 1.000
## 4 4 V4 numeric 96 0 1.000
## 5 5 V5 numeric 96 0 1.000
## 6 6 V6 numeric 96 0 1.000
## 7 7 V7 numeric 96 0 1.000
## 8 8 V8 numeric 96 0 1.000
## 9 9 V9 numeric 96 0 1.000
## 10 10 V10 numeric 96 0 1.000
## 11 11 V11 numeric 96 0 1.000
## 12 12 V12 numeric 96 0 1.000
## 13 13 V13 numeric 96 0 1.000
## 14 14 V14 numeric 96 0 1.000
## 15 15 V15 numeric 96 0 1.000
## 16 16 V16 numeric 96 0 1.000
## 17 17 V17 numeric 96 0 1.000
## 18 18 V18 numeric 96 0 1.000
## 19 19 V19 numeric 96 0 1.000
## 20 20 V20 numeric 96 0 1.000
## 21 21 V21 numeric 96 0 1.000
## 22 22 V22 numeric 96 0 1.000
## 23 23 V23 numeric 96 0 1.000
## 24 24 V24 numeric 96 0 1.000
## 25 25 V25 numeric 96 0 1.000
## 26 26 V26 numeric 96 0 1.000
## 27 27 V27 numeric 96 0 1.000
## 28 28 V28 numeric 96 0 1.000
## 29 29 V29 numeric 96 0 1.000
## 30 30 V30 numeric 96 0 1.000
## 31 31 V31 numeric 96 0 1.000
## 32 32 V32 numeric 96 0 1.000
## 33 33 V33 numeric 96 0 1.000
## 34 34 V34 numeric 96 0 1.000
## 35 35 V35 numeric 96 0 1.000
## 36 36 V36 numeric 96 0 1.000
## 37 37 V37 numeric 96 0 1.000
## 38 38 V38 numeric 96 0 1.000
## 39 39 V39 numeric 96 0 1.000
## 40 40 V40 numeric 96 0 1.000
## 41 41 V41 numeric 96 0 1.000
## 42 42 V42 numeric 96 0 1.000
## 43 43 V43 numeric 96 0 1.000
## 44 44 V44 numeric 96 0 1.000
## 45 45 V45 numeric 96 0 1.000
## 46 46 V46 numeric 96 0 1.000
## 47 47 V47 numeric 96 0 1.000
## 48 48 V48 numeric 96 0 1.000
## 49 49 V49 numeric 96 0 1.000
## 50 50 V50 numeric 96 0 1.000
## 51 51 V51 numeric 96 0 1.000
## 52 52 V52 numeric 96 0 1.000
## 53 53 V53 numeric 96 0 1.000
## 54 54 V54 numeric 96 0 1.000
## 55 55 V55 numeric 96 0 1.000
## 56 56 V56 numeric 96 0 1.000
## 57 57 V57 numeric 96 0 1.000
## 58 58 V58 numeric 96 0 1.000
## 59 59 V59 numeric 96 0 1.000
## 60 60 V60 numeric 96 0 1.000
## 61 61 Class factor 96 0 1.000
##################################
# Listing all predictors
##################################
<- DQA[,!names(DQA) %in% c("Class")]
DQA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DQA.Predictors[,sapply(DQA.Predictors, is.numeric)]
DQA.Predictors.Numeric
if (length(names(DQA.Predictors.Numeric))>0) {
print(paste0("There are ",
length(names(DQA.Predictors.Numeric))),
(" numeric predictor variable(s)."))
else {
} print("There are no numeric predictor variables.")
}
## [1] "There are 60 numeric predictor variable(s)."
##################################
# Listing all factor predictors
##################################
<- DQA.Predictors[,sapply(DQA.Predictors, is.factor)]
DQA.Predictors.Factor
if (length(names(DQA.Predictors.Factor))>0) {
print(paste0("There are ",
length(names(DQA.Predictors.Factor))),
(" factor predictor variable(s)."))
else {
} print("There are no factor predictor variables.")
}
## [1] "There are no factor predictor variables."
##################################
# Formulating a data quality assessment summary for factor predictors
##################################
if (length(names(DQA.Predictors.Factor))>0) {
##################################
# Formulating a function to determine the first mode
##################################
<- function(x) {
FirstModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab == max(tab)]
ux[tab
}
##################################
# Formulating a function to determine the second mode
##################################
<- function(x) {
SecondModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab = ux[tab == max(tab)]
fm = x[!(x %in% fm)]
sm <- unique(sm)
usm <- tabulate(match(sm, usm))
tabsm ifelse(is.na(usm[tabsm == max(tabsm)])==TRUE,
return("x"),
return(usm[tabsm == max(tabsm)]))
}
<- data.frame(
(DQA.Predictors.Factor.Summary Column.Name= names(DQA.Predictors.Factor),
Column.Type=sapply(DQA.Predictors.Factor, function(x) class(x)),
Unique.Count=sapply(DQA.Predictors.Factor, function(x) length(unique(x))),
First.Mode.Value=sapply(DQA.Predictors.Factor, function(x) as.character(FirstModes(x)[1])),
Second.Mode.Value=sapply(DQA.Predictors.Factor, function(x) as.character(SecondModes(x)[1])),
First.Mode.Count=sapply(DQA.Predictors.Factor, function(x) sum(na.omit(x) == FirstModes(x)[1])),
Second.Mode.Count=sapply(DQA.Predictors.Factor, function(x) sum(na.omit(x) == SecondModes(x)[1])),
Unique.Count.Ratio=sapply(DQA.Predictors.Factor, function(x) format(round((length(unique(x))/nrow(DQA.Predictors.Factor)),3), nsmall=3)),
First.Second.Mode.Ratio=sapply(DQA.Predictors.Factor, function(x) format(round((sum(na.omit(x) == FirstModes(x)[1])/sum(na.omit(x) == SecondModes(x)[1])),3), nsmall=3)),
row.names=NULL)
)
}
##################################
# Formulating a data quality assessment summary for numeric predictors
##################################
if (length(names(DQA.Predictors.Numeric))>0) {
##################################
# Formulating a function to determine the first mode
##################################
<- function(x) {
FirstModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab == max(tab)]
ux[tab
}
##################################
# Formulating a function to determine the second mode
##################################
<- function(x) {
SecondModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab = ux[tab == max(tab)]
fm = na.omit(x)[!(na.omit(x) %in% fm)]
sm <- unique(sm)
usm <- tabulate(match(sm, usm))
tabsm ifelse(is.na(usm[tabsm == max(tabsm)])==TRUE,
return(0.00001),
return(usm[tabsm == max(tabsm)]))
}
<- data.frame(
(DQA.Predictors.Numeric.Summary Column.Name= names(DQA.Predictors.Numeric),
Column.Type=sapply(DQA.Predictors.Numeric, function(x) class(x)),
Unique.Count=sapply(DQA.Predictors.Numeric, function(x) length(unique(x))),
Unique.Count.Ratio=sapply(DQA.Predictors.Numeric, function(x) format(round((length(unique(x))/nrow(DQA.Predictors.Numeric)),3), nsmall=3)),
First.Mode.Value=sapply(DQA.Predictors.Numeric, function(x) format(round((FirstModes(x)[1]),3),nsmall=3)),
Second.Mode.Value=sapply(DQA.Predictors.Numeric, function(x) format(round((SecondModes(x)[1]),3),nsmall=3)),
First.Mode.Count=sapply(DQA.Predictors.Numeric, function(x) sum(na.omit(x) == FirstModes(x)[1])),
Second.Mode.Count=sapply(DQA.Predictors.Numeric, function(x) sum(na.omit(x) == SecondModes(x)[1])),
First.Second.Mode.Ratio=sapply(DQA.Predictors.Numeric, function(x) format(round((sum(na.omit(x) == FirstModes(x)[1])/sum(na.omit(x) == SecondModes(x)[1])),3), nsmall=3)),
Minimum=sapply(DQA.Predictors.Numeric, function(x) format(round(min(x,na.rm = TRUE),3), nsmall=3)),
Mean=sapply(DQA.Predictors.Numeric, function(x) format(round(mean(x,na.rm = TRUE),3), nsmall=3)),
Median=sapply(DQA.Predictors.Numeric, function(x) format(round(median(x,na.rm = TRUE),3), nsmall=3)),
Maximum=sapply(DQA.Predictors.Numeric, function(x) format(round(max(x,na.rm = TRUE),3), nsmall=3)),
Skewness=sapply(DQA.Predictors.Numeric, function(x) format(round(skewness(x,na.rm = TRUE),3), nsmall=3)),
Kurtosis=sapply(DQA.Predictors.Numeric, function(x) format(round(kurtosis(x,na.rm = TRUE),3), nsmall=3)),
Percentile25th=sapply(DQA.Predictors.Numeric, function(x) format(round(quantile(x,probs=0.25,na.rm = TRUE),3), nsmall=3)),
Percentile75th=sapply(DQA.Predictors.Numeric, function(x) format(round(quantile(x,probs=0.75,na.rm = TRUE),3), nsmall=3)),
row.names=NULL)
)
}
## Column.Name Column.Type Unique.Count Unique.Count.Ratio First.Mode.Value
## 1 V1 numeric 91 0.948 0.021
## 2 V2 numeric 93 0.969 0.019
## 3 V3 numeric 92 0.958 0.030
## 4 V4 numeric 90 0.938 0.061
## 5 V5 numeric 93 0.969 0.112
## 6 V6 numeric 94 0.979 0.152
## 7 V7 numeric 94 0.979 0.149
## 8 V8 numeric 95 0.990 0.168
## 9 V9 numeric 96 1.000 0.211
## 10 V10 numeric 96 1.000 0.360
## 11 V11 numeric 94 0.979 0.213
## 12 V12 numeric 96 1.000 0.520
## 13 V13 numeric 95 0.990 0.286
## 14 V14 numeric 93 0.969 0.290
## 15 V15 numeric 93 0.969 0.377
## 16 V16 numeric 96 1.000 0.871
## 17 V17 numeric 94 0.979 1.000
## 18 V18 numeric 95 0.990 0.243
## 19 V19 numeric 96 1.000 0.792
## 20 V20 numeric 93 0.969 0.769
## 21 V21 numeric 93 0.969 1.000
## 22 V22 numeric 94 0.979 1.000
## 23 V23 numeric 94 0.979 1.000
## 24 V24 numeric 94 0.979 1.000
## 25 V25 numeric 93 0.969 1.000
## 26 V26 numeric 91 0.948 1.000
## 27 V27 numeric 90 0.938 1.000
## 28 V28 numeric 88 0.917 1.000
## 29 V29 numeric 93 0.969 1.000
## 30 V30 numeric 93 0.969 1.000
## 31 V31 numeric 95 0.990 0.386
## 32 V32 numeric 96 1.000 0.189
## 33 V33 numeric 94 0.979 0.525
## 34 V34 numeric 96 1.000 0.196
## 35 V35 numeric 96 1.000 0.267
## 36 V36 numeric 95 0.990 0.233
## 37 V37 numeric 96 1.000 0.107
## 38 V38 numeric 96 1.000 0.202
## 39 V39 numeric 95 0.990 0.089
## 40 V40 numeric 95 0.990 0.443
## 41 V41 numeric 96 1.000 0.131
## 42 V42 numeric 96 1.000 0.178
## 43 V43 numeric 96 1.000 0.155
## 44 V44 numeric 95 0.990 0.192
## 45 V45 numeric 96 1.000 0.071
## 46 V46 numeric 95 0.990 0.096
## 47 V47 numeric 94 0.979 0.080
## 48 V48 numeric 96 1.000 0.076
## 49 V49 numeric 95 0.990 0.108
## 50 V50 numeric 83 0.865 0.018
## 51 V51 numeric 83 0.865 0.014
## 52 V52 numeric 83 0.865 0.009
## 53 V53 numeric 78 0.812 0.018
## 54 V54 numeric 79 0.823 0.011
## 55 V55 numeric 75 0.781 0.008
## 56 V56 numeric 79 0.823 0.003
## 57 V57 numeric 72 0.750 0.005
## 58 V58 numeric 71 0.740 0.010
## 59 V59 numeric 70 0.729 0.008
## 60 V60 numeric 70 0.729 0.003
## Second.Mode.Value First.Mode.Count Second.Mode.Count First.Second.Mode.Ratio
## 1 0.023 3 2 1.500
## 2 0.106 2 1 2.000
## 3 0.153 2 1 2.000
## 4 0.039 3 2 1.500
## 5 0.144 2 1 2.000
## 6 0.119 2 1 2.000
## 7 0.088 2 1 2.000
## 8 0.091 2 1 2.000
## 9 0.000 1 0 Inf
## 10 0.000 1 0 Inf
## 11 0.547 2 1 2.000
## 12 0.000 1 0 Inf
## 13 0.513 2 1 2.000
## 14 0.539 2 1 2.000
## 15 0.656 2 1 2.000
## 16 0.000 1 0 Inf
## 17 0.979 2 1 2.000
## 18 0.933 2 1 2.000
## 19 0.000 1 0 Inf
## 20 0.738 2 1 2.000
## 21 0.691 4 1 4.000
## 22 0.385 3 1 3.000
## 23 0.067 3 1 3.000
## 24 0.050 3 1 3.000
## 25 0.272 4 1 4.000
## 26 0.284 6 1 6.000
## 27 0.892 5 2 2.500
## 28 0.191 9 1 9.000
## 29 0.904 3 2 1.500
## 30 0.253 4 1 4.000
## 31 0.198 2 1 2.000
## 32 0.000 1 0 Inf
## 33 0.243 2 1 2.000
## 34 0.000 1 0 Inf
## 35 0.000 1 0 Inf
## 36 0.134 2 1 2.000
## 37 0.000 1 0 Inf
## 38 0.000 1 0 Inf
## 39 0.179 2 1 2.000
## 40 0.023 2 1 2.000
## 41 0.000 1 0 Inf
## 42 0.000 1 0 Inf
## 43 0.000 1 0 Inf
## 44 0.163 2 1 2.000
## 45 0.000 1 0 Inf
## 46 0.013 2 1 2.000
## 47 0.080 2 1 2.000
## 48 0.000 1 0 Inf
## 49 0.012 2 1 2.000
## 50 0.026 3 2 1.500
## 51 0.025 3 2 1.500
## 52 0.009 3 2 1.500
## 53 0.026 3 2 1.500
## 54 0.008 3 2 1.500
## 55 0.004 4 3 1.333
## 56 0.008 3 2 1.500
## 57 0.004 4 3 1.333
## 58 0.006 3 2 1.500
## 59 0.007 4 3 1.333
## 60 0.002 4 3 1.333
## Minimum Mean Median Maximum Skewness Kurtosis Percentile25th Percentile75th
## 1 0.002 0.032 0.023 0.137 1.792 6.412 0.014 0.040
## 2 0.001 0.044 0.034 0.157 1.257 4.025 0.019 0.059
## 3 0.002 0.047 0.039 0.166 1.468 5.010 0.024 0.062
## 4 0.006 0.056 0.049 0.164 1.121 3.944 0.030 0.072
## 5 0.007 0.082 0.074 0.248 0.841 3.808 0.045 0.109
## 6 0.010 0.118 0.113 0.382 1.173 5.889 0.078 0.150
## 7 0.018 0.133 0.130 0.373 0.754 4.295 0.094 0.168
## 8 0.012 0.151 0.136 0.457 1.233 4.917 0.095 0.191
## 9 0.007 0.204 0.182 0.683 1.542 6.658 0.130 0.260
## 10 0.011 0.233 0.212 0.597 0.912 3.670 0.142 0.294
## 11 0.053 0.266 0.252 0.667 0.727 3.824 0.193 0.334
## 12 0.024 0.280 0.278 0.568 0.325 2.699 0.184 0.350
## 13 0.062 0.302 0.293 0.713 0.544 3.116 0.212 0.373
## 14 0.027 0.314 0.290 0.997 1.098 4.726 0.185 0.405
## 15 0.009 0.319 0.275 0.914 0.891 3.188 0.167 0.440
## 16 0.042 0.375 0.320 0.975 0.810 2.804 0.191 0.533
## 17 0.037 0.414 0.316 1.000 0.678 2.281 0.209 0.647
## 18 0.038 0.447 0.373 0.933 0.480 1.909 0.243 0.673
## 19 0.132 0.513 0.446 0.983 0.265 1.756 0.296 0.731
## 20 0.066 0.586 0.622 1.000 -0.329 1.943 0.397 0.798
## 21 0.051 0.639 0.694 1.000 -0.553 2.254 0.441 0.845
## 22 0.022 0.636 0.702 1.000 -0.522 2.078 0.399 0.850
## 23 0.061 0.650 0.714 1.000 -0.642 2.271 0.453 0.869
## 24 0.050 0.680 0.699 1.000 -0.741 2.742 0.580 0.897
## 25 0.024 0.681 0.721 1.000 -0.824 3.019 0.569 0.875
## 26 0.164 0.708 0.756 1.000 -0.686 2.618 0.564 0.877
## 27 0.104 0.707 0.793 1.000 -0.682 2.297 0.496 0.911
## 28 0.060 0.708 0.776 1.000 -0.681 2.421 0.558 0.912
## 29 0.014 0.652 0.710 1.000 -0.601 2.411 0.467 0.867
## 30 0.061 0.587 0.603 1.000 -0.025 2.431 0.414 0.719
## 31 0.100 0.497 0.442 0.966 0.465 2.307 0.327 0.646
## 32 0.088 0.436 0.408 0.931 0.460 2.307 0.277 0.582
## 33 0.048 0.408 0.387 1.000 0.470 2.647 0.236 0.541
## 34 0.059 0.394 0.364 0.954 0.588 2.552 0.216 0.542
## 35 0.022 0.377 0.293 0.952 0.570 2.141 0.175 0.578
## 36 0.008 0.365 0.281 1.000 0.794 2.447 0.138 0.535
## 37 0.035 0.352 0.259 0.912 0.783 2.269 0.145 0.488
## 38 0.062 0.346 0.324 0.948 0.995 3.280 0.175 0.441
## 39 0.044 0.346 0.306 0.971 0.796 3.067 0.183 0.480
## 40 0.023 0.317 0.281 0.930 0.806 3.719 0.196 0.427
## 41 0.044 0.304 0.266 0.899 0.879 3.612 0.170 0.409
## 42 0.044 0.305 0.281 0.825 0.869 3.538 0.169 0.397
## 43 0.031 0.272 0.258 0.752 0.658 3.105 0.161 0.347
## 44 0.025 0.237 0.192 0.577 0.861 2.674 0.139 0.308
## 45 0.035 0.236 0.174 0.703 0.958 2.730 0.111 0.363
## 46 0.008 0.193 0.145 0.729 1.458 4.505 0.085 0.228
## 47 0.018 0.143 0.109 0.552 1.636 5.675 0.077 0.182
## 48 0.008 0.109 0.094 0.334 1.102 3.696 0.053 0.135
## 49 0.007 0.064 0.054 0.198 0.974 3.513 0.033 0.091
## 50 0.004 0.024 0.019 0.082 1.591 5.793 0.013 0.029
## 51 0.001 0.019 0.016 0.100 2.728 14.443 0.010 0.024
## 52 0.001 0.015 0.012 0.071 2.229 10.249 0.009 0.018
## 53 0.000 0.011 0.008 0.036 1.024 3.674 0.005 0.015
## 54 0.001 0.012 0.010 0.035 0.991 3.390 0.005 0.015
## 55 0.001 0.009 0.008 0.045 1.958 8.464 0.004 0.013
## 56 0.000 0.008 0.007 0.039 2.124 10.327 0.004 0.012
## 57 0.001 0.008 0.006 0.035 1.859 8.338 0.004 0.011
## 58 0.001 0.008 0.006 0.044 2.152 9.133 0.004 0.010
## 59 0.000 0.008 0.007 0.029 1.280 4.224 0.004 0.011
## 60 0.001 0.006 0.005 0.022 1.381 5.177 0.003 0.008
##################################
# Identifying potential data quality issues
##################################
##################################
# Checking for missing observations
##################################
if ((nrow(DQA.Summary[DQA.Summary$NA.Count>0,]))>0){
print(paste0("Missing observations noted for ",
nrow(DQA.Summary[DQA.Summary$NA.Count>0,])),
(" variable(s) with NA.Count>0 and Fill.Rate<1.0."))
$NA.Count>0,]
DQA.Summary[DQA.Summaryelse {
} print("No missing observations noted.")
}
## [1] "No missing observations noted."
##################################
# Checking for zero or near-zero variance predictors
##################################
if (length(names(DQA.Predictors.Factor))==0) {
print("No factor predictors noted.")
else if (nrow(DQA.Predictors.Factor.Summary[as.numeric(as.character(DQA.Predictors.Factor.Summary$First.Second.Mode.Ratio))>5,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Predictors.Factor.Summary[as.numeric(as.character(DQA.Predictors.Factor.Summary$First.Second.Mode.Ratio))>5,])),
(" factor variable(s) with First.Second.Mode.Ratio>5."))
as.numeric(as.character(DQA.Predictors.Factor.Summary$First.Second.Mode.Ratio))>5,]
DQA.Predictors.Factor.Summary[else {
} print("No low variance factor predictors due to high first-second mode ratio noted.")
}
## [1] "No factor predictors noted."
if (length(names(DQA.Predictors.Numeric))==0) {
print("No numeric predictors noted.")
else if (nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$First.Second.Mode.Ratio))>5,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$First.Second.Mode.Ratio))>5,])),
(" numeric variable(s) with First.Second.Mode.Ratio>5."))
as.numeric(as.character(DQA.Predictors.Numeric.Summary$First.Second.Mode.Ratio))>5,]
DQA.Predictors.Numeric.Summary[else {
} print("No low variance numeric predictors due to high first-second mode ratio noted.")
}
## [1] "Low variance observed for 17 numeric variable(s) with First.Second.Mode.Ratio>5."
## Column.Name Column.Type Unique.Count Unique.Count.Ratio First.Mode.Value
## 9 V9 numeric 96 1.000 0.211
## 10 V10 numeric 96 1.000 0.360
## 12 V12 numeric 96 1.000 0.520
## 16 V16 numeric 96 1.000 0.871
## 19 V19 numeric 96 1.000 0.792
## 26 V26 numeric 91 0.948 1.000
## 28 V28 numeric 88 0.917 1.000
## 32 V32 numeric 96 1.000 0.189
## 34 V34 numeric 96 1.000 0.196
## 35 V35 numeric 96 1.000 0.267
## 37 V37 numeric 96 1.000 0.107
## 38 V38 numeric 96 1.000 0.202
## 41 V41 numeric 96 1.000 0.131
## 42 V42 numeric 96 1.000 0.178
## 43 V43 numeric 96 1.000 0.155
## 45 V45 numeric 96 1.000 0.071
## 48 V48 numeric 96 1.000 0.076
## Second.Mode.Value First.Mode.Count Second.Mode.Count First.Second.Mode.Ratio
## 9 0.000 1 0 Inf
## 10 0.000 1 0 Inf
## 12 0.000 1 0 Inf
## 16 0.000 1 0 Inf
## 19 0.000 1 0 Inf
## 26 0.284 6 1 6.000
## 28 0.191 9 1 9.000
## 32 0.000 1 0 Inf
## 34 0.000 1 0 Inf
## 35 0.000 1 0 Inf
## 37 0.000 1 0 Inf
## 38 0.000 1 0 Inf
## 41 0.000 1 0 Inf
## 42 0.000 1 0 Inf
## 43 0.000 1 0 Inf
## 45 0.000 1 0 Inf
## 48 0.000 1 0 Inf
## Minimum Mean Median Maximum Skewness Kurtosis Percentile25th Percentile75th
## 9 0.007 0.204 0.182 0.683 1.542 6.658 0.130 0.260
## 10 0.011 0.233 0.212 0.597 0.912 3.670 0.142 0.294
## 12 0.024 0.280 0.278 0.568 0.325 2.699 0.184 0.350
## 16 0.042 0.375 0.320 0.975 0.810 2.804 0.191 0.533
## 19 0.132 0.513 0.446 0.983 0.265 1.756 0.296 0.731
## 26 0.164 0.708 0.756 1.000 -0.686 2.618 0.564 0.877
## 28 0.060 0.708 0.776 1.000 -0.681 2.421 0.558 0.912
## 32 0.088 0.436 0.408 0.931 0.460 2.307 0.277 0.582
## 34 0.059 0.394 0.364 0.954 0.588 2.552 0.216 0.542
## 35 0.022 0.377 0.293 0.952 0.570 2.141 0.175 0.578
## 37 0.035 0.352 0.259 0.912 0.783 2.269 0.145 0.488
## 38 0.062 0.346 0.324 0.948 0.995 3.280 0.175 0.441
## 41 0.044 0.304 0.266 0.899 0.879 3.612 0.170 0.409
## 42 0.044 0.305 0.281 0.825 0.869 3.538 0.169 0.397
## 43 0.031 0.272 0.258 0.752 0.658 3.105 0.161 0.347
## 45 0.035 0.236 0.174 0.703 0.958 2.730 0.111 0.363
## 48 0.008 0.109 0.094 0.334 1.102 3.696 0.053 0.135
if (length(names(DQA.Predictors.Numeric))==0) {
print("No numeric predictors noted.")
else if (nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Unique.Count.Ratio))<0.01,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Unique.Count.Ratio))<0.01,])),
(" numeric variable(s) with Unique.Count.Ratio<0.01."))
as.numeric(as.character(DQA.Predictors.Numeric.Summary$Unique.Count.Ratio))<0.01,]
DQA.Predictors.Numeric.Summary[else {
} print("No low variance numeric predictors due to low unique count ratio noted.")
}
## [1] "No low variance numeric predictors due to low unique count ratio noted."
##################################
# Checking for skewed predictors
##################################
if (length(names(DQA.Predictors.Numeric))==0) {
print("No numeric predictors noted.")
else if (nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))>3 |
} as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))<(-3),])>0){
print(paste0("High skewness observed for ",
nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))>3 |
(as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))<(-3),])),
" numeric variable(s) with Skewness>3 or Skewness<(-3)."))
as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))>3 |
DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))<(-3),]
else {
} print("No skewed numeric predictors noted.")
}
## [1] "No skewed numeric predictors noted."
##################################
# Loading dataset
##################################
<- Sonar_Train
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("Class")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,sapply(DPA.Predictors, is.numeric)]
DPA.Predictors.Numeric
##################################
# Identifying outliers for the numeric predictors
##################################
<- c()
OutlierCountList
for (i in 1:ncol(DPA.Predictors.Numeric)) {
<- boxplot.stats(DPA.Predictors.Numeric[,i])$out
Outliers <- length(Outliers)
OutlierCount <- append(OutlierCountList,OutlierCount)
OutlierCountList <- which(DPA.Predictors.Numeric[,i] %in% c(Outliers))
OutlierIndices boxplot(DPA.Predictors.Numeric[,i],
ylab = names(DPA.Predictors.Numeric)[i],
main = names(DPA.Predictors.Numeric)[i],
horizontal=TRUE)
mtext(paste0(OutlierCount, " Outlier(s) Detected"))
}
<- as.data.frame(cbind(names(DPA.Predictors.Numeric),(OutlierCountList)))
OutlierCountSummary names(OutlierCountSummary) <- c("NumericPredictors","OutlierCount")
$OutlierCount <- as.numeric(as.character(OutlierCountSummary$OutlierCount))
OutlierCountSummary<- nrow(OutlierCountSummary[OutlierCountSummary$OutlierCount>0,])
NumericPredictorWithOutlierCount print(paste0(NumericPredictorWithOutlierCount, " numeric variable(s) were noted with outlier(s)." ))
## [1] "38 numeric variable(s) were noted with outlier(s)."
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA.Predictors.Numeric)) (DPA_Skimmed
Name | DPA.Predictors.Numeric |
Number of rows | 96 |
Number of columns | 60 |
_______________________ | |
Column type frequency: | |
numeric | 60 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
V1 | 0 | 1 | 0.03 | 0.03 | 0.00 | 0.01 | 0.02 | 0.04 | 0.14 | ▇▃▂▁▁ |
V2 | 0 | 1 | 0.04 | 0.03 | 0.00 | 0.02 | 0.03 | 0.06 | 0.16 | ▇▆▂▁▁ |
V3 | 0 | 1 | 0.05 | 0.03 | 0.00 | 0.02 | 0.04 | 0.06 | 0.17 | ▇▆▂▁▁ |
V4 | 0 | 1 | 0.06 | 0.04 | 0.01 | 0.03 | 0.05 | 0.07 | 0.16 | ▇▇▃▁▁ |
V5 | 0 | 1 | 0.08 | 0.05 | 0.01 | 0.05 | 0.07 | 0.11 | 0.25 | ▇▇▅▁▁ |
V6 | 0 | 1 | 0.12 | 0.06 | 0.01 | 0.08 | 0.11 | 0.15 | 0.38 | ▅▇▂▁▁ |
V7 | 0 | 1 | 0.13 | 0.06 | 0.02 | 0.09 | 0.13 | 0.17 | 0.37 | ▃▇▃▁▁ |
V8 | 0 | 1 | 0.15 | 0.09 | 0.01 | 0.10 | 0.14 | 0.19 | 0.46 | ▆▇▃▁▁ |
V9 | 0 | 1 | 0.20 | 0.12 | 0.01 | 0.13 | 0.18 | 0.26 | 0.68 | ▅▇▃▁▁ |
V10 | 0 | 1 | 0.23 | 0.13 | 0.01 | 0.14 | 0.21 | 0.29 | 0.60 | ▃▇▅▂▁ |
V11 | 0 | 1 | 0.27 | 0.12 | 0.05 | 0.19 | 0.25 | 0.33 | 0.67 | ▅▇▅▂▁ |
V12 | 0 | 1 | 0.28 | 0.13 | 0.02 | 0.18 | 0.28 | 0.35 | 0.57 | ▃▅▇▃▂ |
V13 | 0 | 1 | 0.30 | 0.13 | 0.06 | 0.21 | 0.29 | 0.37 | 0.71 | ▃▇▅▂▁ |
V14 | 0 | 1 | 0.31 | 0.17 | 0.03 | 0.19 | 0.29 | 0.41 | 1.00 | ▇▇▃▁▁ |
V15 | 0 | 1 | 0.32 | 0.21 | 0.01 | 0.17 | 0.28 | 0.44 | 0.91 | ▇▅▆▁▁ |
V16 | 0 | 1 | 0.38 | 0.23 | 0.04 | 0.19 | 0.32 | 0.53 | 0.98 | ▇▅▅▂▂ |
V17 | 0 | 1 | 0.41 | 0.25 | 0.04 | 0.21 | 0.32 | 0.65 | 1.00 | ▇▇▃▅▂ |
V18 | 0 | 1 | 0.45 | 0.25 | 0.04 | 0.24 | 0.37 | 0.67 | 0.93 | ▃▇▂▃▃ |
V19 | 0 | 1 | 0.51 | 0.25 | 0.13 | 0.30 | 0.45 | 0.73 | 0.98 | ▇▇▃▅▅ |
V20 | 0 | 1 | 0.59 | 0.26 | 0.07 | 0.40 | 0.62 | 0.80 | 1.00 | ▅▆▅▇▆ |
V21 | 0 | 1 | 0.64 | 0.27 | 0.05 | 0.44 | 0.69 | 0.84 | 1.00 | ▂▃▅▆▇ |
V22 | 0 | 1 | 0.64 | 0.27 | 0.02 | 0.40 | 0.70 | 0.85 | 1.00 | ▂▃▂▆▇ |
V23 | 0 | 1 | 0.65 | 0.26 | 0.06 | 0.45 | 0.71 | 0.87 | 1.00 | ▂▂▃▅▇ |
V24 | 0 | 1 | 0.68 | 0.24 | 0.05 | 0.58 | 0.70 | 0.90 | 1.00 | ▂▂▃▇▇ |
V25 | 0 | 1 | 0.68 | 0.25 | 0.02 | 0.57 | 0.72 | 0.87 | 1.00 | ▂▂▃▇▇ |
V26 | 0 | 1 | 0.71 | 0.22 | 0.16 | 0.56 | 0.76 | 0.88 | 1.00 | ▂▂▅▇▇ |
V27 | 0 | 1 | 0.71 | 0.25 | 0.10 | 0.50 | 0.79 | 0.91 | 1.00 | ▁▂▂▃▇ |
V28 | 0 | 1 | 0.71 | 0.25 | 0.06 | 0.56 | 0.78 | 0.91 | 1.00 | ▁▂▃▅▇ |
V29 | 0 | 1 | 0.65 | 0.25 | 0.01 | 0.47 | 0.71 | 0.87 | 1.00 | ▂▂▅▅▇ |
V30 | 0 | 1 | 0.59 | 0.22 | 0.06 | 0.41 | 0.60 | 0.72 | 1.00 | ▁▆▇▇▅ |
V31 | 0 | 1 | 0.50 | 0.23 | 0.10 | 0.33 | 0.44 | 0.65 | 0.97 | ▃▇▅▃▃ |
V32 | 0 | 1 | 0.44 | 0.22 | 0.09 | 0.28 | 0.41 | 0.58 | 0.93 | ▆▇▆▃▂ |
V33 | 0 | 1 | 0.41 | 0.21 | 0.05 | 0.24 | 0.39 | 0.54 | 1.00 | ▆▇▇▃▁ |
V34 | 0 | 1 | 0.39 | 0.22 | 0.06 | 0.22 | 0.36 | 0.54 | 0.95 | ▇▇▃▅▂ |
V35 | 0 | 1 | 0.38 | 0.26 | 0.02 | 0.17 | 0.29 | 0.58 | 0.95 | ▇▆▃▃▂ |
V36 | 0 | 1 | 0.36 | 0.28 | 0.01 | 0.14 | 0.28 | 0.53 | 1.00 | ▇▅▂▂▂ |
V37 | 0 | 1 | 0.35 | 0.26 | 0.04 | 0.14 | 0.26 | 0.49 | 0.91 | ▇▃▂▁▃ |
V38 | 0 | 1 | 0.35 | 0.22 | 0.06 | 0.17 | 0.32 | 0.44 | 0.95 | ▇▆▂▂▁ |
V39 | 0 | 1 | 0.35 | 0.20 | 0.04 | 0.18 | 0.31 | 0.48 | 0.97 | ▇▇▅▂▁ |
V40 | 0 | 1 | 0.32 | 0.18 | 0.02 | 0.20 | 0.28 | 0.43 | 0.93 | ▅▇▃▁▁ |
V41 | 0 | 1 | 0.30 | 0.17 | 0.04 | 0.17 | 0.27 | 0.41 | 0.90 | ▇▇▃▂▁ |
V42 | 0 | 1 | 0.31 | 0.18 | 0.04 | 0.17 | 0.28 | 0.40 | 0.82 | ▇▇▆▁▁ |
V43 | 0 | 1 | 0.27 | 0.15 | 0.03 | 0.16 | 0.26 | 0.35 | 0.75 | ▆▇▅▂▁ |
V44 | 0 | 1 | 0.24 | 0.14 | 0.03 | 0.14 | 0.19 | 0.31 | 0.58 | ▅▇▂▂▂ |
V45 | 0 | 1 | 0.24 | 0.17 | 0.04 | 0.11 | 0.17 | 0.36 | 0.70 | ▇▃▂▂▁ |
V46 | 0 | 1 | 0.19 | 0.16 | 0.01 | 0.08 | 0.14 | 0.23 | 0.73 | ▇▅▂▁▁ |
V47 | 0 | 1 | 0.14 | 0.10 | 0.02 | 0.08 | 0.11 | 0.18 | 0.55 | ▇▃▁▁▁ |
V48 | 0 | 1 | 0.11 | 0.07 | 0.01 | 0.05 | 0.09 | 0.14 | 0.33 | ▆▇▂▁▁ |
V49 | 0 | 1 | 0.06 | 0.04 | 0.01 | 0.03 | 0.05 | 0.09 | 0.20 | ▇▇▃▂▁ |
V50 | 0 | 1 | 0.02 | 0.02 | 0.00 | 0.01 | 0.02 | 0.03 | 0.08 | ▇▅▂▁▁ |
V51 | 0 | 1 | 0.02 | 0.01 | 0.00 | 0.01 | 0.02 | 0.02 | 0.10 | ▇▃▁▁▁ |
V52 | 0 | 1 | 0.02 | 0.01 | 0.00 | 0.01 | 0.01 | 0.02 | 0.07 | ▇▃▁▁▁ |
V53 | 0 | 1 | 0.01 | 0.01 | 0.00 | 0.00 | 0.01 | 0.02 | 0.04 | ▇▃▃▁▁ |
V54 | 0 | 1 | 0.01 | 0.01 | 0.00 | 0.01 | 0.01 | 0.02 | 0.04 | ▆▇▂▂▁ |
V55 | 0 | 1 | 0.01 | 0.01 | 0.00 | 0.00 | 0.01 | 0.01 | 0.04 | ▇▃▁▁▁ |
V56 | 0 | 1 | 0.01 | 0.01 | 0.00 | 0.00 | 0.01 | 0.01 | 0.04 | ▇▅▁▁▁ |
V57 | 0 | 1 | 0.01 | 0.01 | 0.00 | 0.00 | 0.01 | 0.01 | 0.04 | ▇▅▁▁▁ |
V58 | 0 | 1 | 0.01 | 0.01 | 0.00 | 0.00 | 0.01 | 0.01 | 0.04 | ▇▂▁▁▁ |
V59 | 0 | 1 | 0.01 | 0.01 | 0.00 | 0.00 | 0.01 | 0.01 | 0.03 | ▇▇▂▁▁ |
V60 | 0 | 1 | 0.01 | 0.00 | 0.00 | 0.00 | 0.01 | 0.01 | 0.02 | ▇▆▂▁▁ |
###################################
# Verifying the data dimensions
###################################
dim(DPA.Predictors.Numeric)
## [1] 96 60
##################################
# Loading dataset
##################################
<- Sonar_Train
DPA
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA)) (DPA_Skimmed
Name | DPA |
Number of rows | 96 |
Number of columns | 61 |
_______________________ | |
Column type frequency: | |
factor | 1 |
numeric | 60 |
________________________ | |
Group variables | None |
Variable type: factor
skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
---|---|---|---|---|---|
Class | 0 | 1 | FALSE | 2 | M: 78, R: 18 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
V1 | 0 | 1 | 0.03 | 0.03 | 0.00 | 0.01 | 0.02 | 0.04 | 0.14 | ▇▃▂▁▁ |
V2 | 0 | 1 | 0.04 | 0.03 | 0.00 | 0.02 | 0.03 | 0.06 | 0.16 | ▇▆▂▁▁ |
V3 | 0 | 1 | 0.05 | 0.03 | 0.00 | 0.02 | 0.04 | 0.06 | 0.17 | ▇▆▂▁▁ |
V4 | 0 | 1 | 0.06 | 0.04 | 0.01 | 0.03 | 0.05 | 0.07 | 0.16 | ▇▇▃▁▁ |
V5 | 0 | 1 | 0.08 | 0.05 | 0.01 | 0.05 | 0.07 | 0.11 | 0.25 | ▇▇▅▁▁ |
V6 | 0 | 1 | 0.12 | 0.06 | 0.01 | 0.08 | 0.11 | 0.15 | 0.38 | ▅▇▂▁▁ |
V7 | 0 | 1 | 0.13 | 0.06 | 0.02 | 0.09 | 0.13 | 0.17 | 0.37 | ▃▇▃▁▁ |
V8 | 0 | 1 | 0.15 | 0.09 | 0.01 | 0.10 | 0.14 | 0.19 | 0.46 | ▆▇▃▁▁ |
V9 | 0 | 1 | 0.20 | 0.12 | 0.01 | 0.13 | 0.18 | 0.26 | 0.68 | ▅▇▃▁▁ |
V10 | 0 | 1 | 0.23 | 0.13 | 0.01 | 0.14 | 0.21 | 0.29 | 0.60 | ▃▇▅▂▁ |
V11 | 0 | 1 | 0.27 | 0.12 | 0.05 | 0.19 | 0.25 | 0.33 | 0.67 | ▅▇▅▂▁ |
V12 | 0 | 1 | 0.28 | 0.13 | 0.02 | 0.18 | 0.28 | 0.35 | 0.57 | ▃▅▇▃▂ |
V13 | 0 | 1 | 0.30 | 0.13 | 0.06 | 0.21 | 0.29 | 0.37 | 0.71 | ▃▇▅▂▁ |
V14 | 0 | 1 | 0.31 | 0.17 | 0.03 | 0.19 | 0.29 | 0.41 | 1.00 | ▇▇▃▁▁ |
V15 | 0 | 1 | 0.32 | 0.21 | 0.01 | 0.17 | 0.28 | 0.44 | 0.91 | ▇▅▆▁▁ |
V16 | 0 | 1 | 0.38 | 0.23 | 0.04 | 0.19 | 0.32 | 0.53 | 0.98 | ▇▅▅▂▂ |
V17 | 0 | 1 | 0.41 | 0.25 | 0.04 | 0.21 | 0.32 | 0.65 | 1.00 | ▇▇▃▅▂ |
V18 | 0 | 1 | 0.45 | 0.25 | 0.04 | 0.24 | 0.37 | 0.67 | 0.93 | ▃▇▂▃▃ |
V19 | 0 | 1 | 0.51 | 0.25 | 0.13 | 0.30 | 0.45 | 0.73 | 0.98 | ▇▇▃▅▅ |
V20 | 0 | 1 | 0.59 | 0.26 | 0.07 | 0.40 | 0.62 | 0.80 | 1.00 | ▅▆▅▇▆ |
V21 | 0 | 1 | 0.64 | 0.27 | 0.05 | 0.44 | 0.69 | 0.84 | 1.00 | ▂▃▅▆▇ |
V22 | 0 | 1 | 0.64 | 0.27 | 0.02 | 0.40 | 0.70 | 0.85 | 1.00 | ▂▃▂▆▇ |
V23 | 0 | 1 | 0.65 | 0.26 | 0.06 | 0.45 | 0.71 | 0.87 | 1.00 | ▂▂▃▅▇ |
V24 | 0 | 1 | 0.68 | 0.24 | 0.05 | 0.58 | 0.70 | 0.90 | 1.00 | ▂▂▃▇▇ |
V25 | 0 | 1 | 0.68 | 0.25 | 0.02 | 0.57 | 0.72 | 0.87 | 1.00 | ▂▂▃▇▇ |
V26 | 0 | 1 | 0.71 | 0.22 | 0.16 | 0.56 | 0.76 | 0.88 | 1.00 | ▂▂▅▇▇ |
V27 | 0 | 1 | 0.71 | 0.25 | 0.10 | 0.50 | 0.79 | 0.91 | 1.00 | ▁▂▂▃▇ |
V28 | 0 | 1 | 0.71 | 0.25 | 0.06 | 0.56 | 0.78 | 0.91 | 1.00 | ▁▂▃▅▇ |
V29 | 0 | 1 | 0.65 | 0.25 | 0.01 | 0.47 | 0.71 | 0.87 | 1.00 | ▂▂▅▅▇ |
V30 | 0 | 1 | 0.59 | 0.22 | 0.06 | 0.41 | 0.60 | 0.72 | 1.00 | ▁▆▇▇▅ |
V31 | 0 | 1 | 0.50 | 0.23 | 0.10 | 0.33 | 0.44 | 0.65 | 0.97 | ▃▇▅▃▃ |
V32 | 0 | 1 | 0.44 | 0.22 | 0.09 | 0.28 | 0.41 | 0.58 | 0.93 | ▆▇▆▃▂ |
V33 | 0 | 1 | 0.41 | 0.21 | 0.05 | 0.24 | 0.39 | 0.54 | 1.00 | ▆▇▇▃▁ |
V34 | 0 | 1 | 0.39 | 0.22 | 0.06 | 0.22 | 0.36 | 0.54 | 0.95 | ▇▇▃▅▂ |
V35 | 0 | 1 | 0.38 | 0.26 | 0.02 | 0.17 | 0.29 | 0.58 | 0.95 | ▇▆▃▃▂ |
V36 | 0 | 1 | 0.36 | 0.28 | 0.01 | 0.14 | 0.28 | 0.53 | 1.00 | ▇▅▂▂▂ |
V37 | 0 | 1 | 0.35 | 0.26 | 0.04 | 0.14 | 0.26 | 0.49 | 0.91 | ▇▃▂▁▃ |
V38 | 0 | 1 | 0.35 | 0.22 | 0.06 | 0.17 | 0.32 | 0.44 | 0.95 | ▇▆▂▂▁ |
V39 | 0 | 1 | 0.35 | 0.20 | 0.04 | 0.18 | 0.31 | 0.48 | 0.97 | ▇▇▅▂▁ |
V40 | 0 | 1 | 0.32 | 0.18 | 0.02 | 0.20 | 0.28 | 0.43 | 0.93 | ▅▇▃▁▁ |
V41 | 0 | 1 | 0.30 | 0.17 | 0.04 | 0.17 | 0.27 | 0.41 | 0.90 | ▇▇▃▂▁ |
V42 | 0 | 1 | 0.31 | 0.18 | 0.04 | 0.17 | 0.28 | 0.40 | 0.82 | ▇▇▆▁▁ |
V43 | 0 | 1 | 0.27 | 0.15 | 0.03 | 0.16 | 0.26 | 0.35 | 0.75 | ▆▇▅▂▁ |
V44 | 0 | 1 | 0.24 | 0.14 | 0.03 | 0.14 | 0.19 | 0.31 | 0.58 | ▅▇▂▂▂ |
V45 | 0 | 1 | 0.24 | 0.17 | 0.04 | 0.11 | 0.17 | 0.36 | 0.70 | ▇▃▂▂▁ |
V46 | 0 | 1 | 0.19 | 0.16 | 0.01 | 0.08 | 0.14 | 0.23 | 0.73 | ▇▅▂▁▁ |
V47 | 0 | 1 | 0.14 | 0.10 | 0.02 | 0.08 | 0.11 | 0.18 | 0.55 | ▇▃▁▁▁ |
V48 | 0 | 1 | 0.11 | 0.07 | 0.01 | 0.05 | 0.09 | 0.14 | 0.33 | ▆▇▂▁▁ |
V49 | 0 | 1 | 0.06 | 0.04 | 0.01 | 0.03 | 0.05 | 0.09 | 0.20 | ▇▇▃▂▁ |
V50 | 0 | 1 | 0.02 | 0.02 | 0.00 | 0.01 | 0.02 | 0.03 | 0.08 | ▇▅▂▁▁ |
V51 | 0 | 1 | 0.02 | 0.01 | 0.00 | 0.01 | 0.02 | 0.02 | 0.10 | ▇▃▁▁▁ |
V52 | 0 | 1 | 0.02 | 0.01 | 0.00 | 0.01 | 0.01 | 0.02 | 0.07 | ▇▃▁▁▁ |
V53 | 0 | 1 | 0.01 | 0.01 | 0.00 | 0.00 | 0.01 | 0.02 | 0.04 | ▇▃▃▁▁ |
V54 | 0 | 1 | 0.01 | 0.01 | 0.00 | 0.01 | 0.01 | 0.02 | 0.04 | ▆▇▂▂▁ |
V55 | 0 | 1 | 0.01 | 0.01 | 0.00 | 0.00 | 0.01 | 0.01 | 0.04 | ▇▃▁▁▁ |
V56 | 0 | 1 | 0.01 | 0.01 | 0.00 | 0.00 | 0.01 | 0.01 | 0.04 | ▇▅▁▁▁ |
V57 | 0 | 1 | 0.01 | 0.01 | 0.00 | 0.00 | 0.01 | 0.01 | 0.04 | ▇▅▁▁▁ |
V58 | 0 | 1 | 0.01 | 0.01 | 0.00 | 0.00 | 0.01 | 0.01 | 0.04 | ▇▂▁▁▁ |
V59 | 0 | 1 | 0.01 | 0.01 | 0.00 | 0.00 | 0.01 | 0.01 | 0.03 | ▇▇▂▁▁ |
V60 | 0 | 1 | 0.01 | 0.00 | 0.00 | 0.00 | 0.01 | 0.01 | 0.02 | ▇▆▂▁▁ |
##################################
# Identifying columns with low variance
###################################
<- nearZeroVar(DPA,
DPA_LowVariance freqCut = 95/5,
uniqueCut = 10,
saveMetrics= TRUE)
$nzv,]) (DPA_LowVariance[DPA_LowVariance
## [1] freqRatio percentUnique zeroVar nzv
## <0 rows> (or 0-length row.names)
if ((nrow(DPA_LowVariance[DPA_LowVariance$nzv,]))==0){
print("No low variance predictors noted.")
else {
}
print(paste0("Low variance observed for ",
nrow(DPA_LowVariance[DPA_LowVariance$nzv,])),
(" numeric variable(s) with First.Second.Mode.Ratio>4 and Unique.Count.Ratio<0.10."))
<- (nrow(DPA_LowVariance[DPA_LowVariance$nzv,]))
DPA_LowVarianceForRemoval
print(paste0("Low variance can be resolved by removing ",
nrow(DPA_LowVariance[DPA_LowVariance$nzv,])),
(" numeric variable(s)."))
for (j in 1:DPA_LowVarianceForRemoval) {
<- rownames(DPA_LowVariance[DPA_LowVariance$nzv,])[j]
DPA_LowVarianceRemovedVariable print(paste0("Variable ",
j," for removal: ",
DPA_LowVarianceRemovedVariable))
}
%>%
DPA skim() %>%
::filter(skim_variable %in% rownames(DPA_LowVariance[DPA_LowVariance$nzv,]))
dplyr
##################################
# Filtering out columns with low variance
#################################
<- DPA[,!names(DPA) %in% rownames(DPA_LowVariance[DPA_LowVariance$nzv,])]
DPA_ExcludedLowVariance
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_ExcludedLowVariance))
(DPA_ExcludedLowVariance_Skimmed
###################################
# Verifying the data dimensions
###################################
dim(DPA_ExcludedLowVariance)
}
## [1] "No low variance predictors noted."
##################################
# Loading dataset
##################################
<- Sonar_Train
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("Class")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,sapply(DPA.Predictors, is.numeric)]
DPA.Predictors.Numeric
##################################
# Visualizing pairwise correlation between predictors
##################################
<- cor.mtest(DPA.Predictors.Numeric,
DPA_CorrelationTest method = "pearson",
conf.level = .95)
corrplot(cor(DPA.Predictors.Numeric,
method = "pearson",
use="pairwise.complete.obs"),
method = "circle",
type = "upper",
order = "original",
tl.col = "black",
tl.cex = 0.75,
tl.srt = 90,
sig.level = 0.05,
p.mat = DPA_CorrelationTest$p,
insig = "blank")
##################################
# Identifying the highly correlated variables
##################################
<- cor(DPA.Predictors.Numeric,
DPA_Correlation method = "pearson",
use="pairwise.complete.obs")
<- sum(abs(DPA_Correlation[upper.tri(DPA_Correlation)]) > 0.95)) (DPA_HighlyCorrelatedCount
## [1] 0
if (DPA_HighlyCorrelatedCount == 0) {
print("No highly correlated predictors noted.")
else {
} print(paste0("High correlation observed for ",
(DPA_HighlyCorrelatedCount)," pairs of numeric variable(s) with Correlation.Coefficient>0.95."))
<- corr_cross(DPA.Predictors.Numeric,
(DPA_HighlyCorrelatedPairs max_pvalue = 0.05,
top = DPA_HighlyCorrelatedCount,
rm.na = TRUE,
grid = FALSE
))
}
## [1] "No highly correlated predictors noted."
if (DPA_HighlyCorrelatedCount > 0) {
<- findCorrelation(DPA_Correlation, cutoff = 0.95)
DPA_HighlyCorrelated
<- length(DPA_HighlyCorrelated))
(DPA_HighlyCorrelatedForRemoval
print(paste0("High correlation can be resolved by removing ",
(DPA_HighlyCorrelatedForRemoval)," numeric variable(s)."))
for (j in 1:DPA_HighlyCorrelatedForRemoval) {
<- colnames(DPA.Predictors.Numeric)[DPA_HighlyCorrelated[j]]
DPA_HighlyCorrelatedRemovedVariable print(paste0("Variable ",
j," for removal: ",
DPA_HighlyCorrelatedRemovedVariable))
}
##################################
# Filtering out columns with high correlation
#################################
<- DPA[,-DPA_HighlyCorrelated]
DPA_ExcludedHighCorrelation
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_ExcludedHighCorrelation))
(DPA_ExcludedHighCorrelation_Skimmed
###################################
# Verifying the data dimensions
###################################
dim(DPA_ExcludedHighCorrelation)
}
##################################
# Loading dataset
##################################
<- Sonar_Train
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("Class")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,sapply(DPA.Predictors, is.numeric)]
DPA.Predictors.Numeric
##################################
# Identifying the linearly dependent variables
##################################
<- findLinearCombos(DPA.Predictors.Numeric)
DPA_LinearlyDependent
<- length(DPA_LinearlyDependent$linearCombos)) (DPA_LinearlyDependentCount
## [1] 0
if (DPA_LinearlyDependentCount == 0) {
print("No linearly dependent predictors noted.")
else {
} print(paste0("Linear dependency observed for ",
(DPA_LinearlyDependentCount)," subset(s) of numeric variable(s)."))
for (i in 1:DPA_LinearlyDependentCount) {
<- colnames(DPA.Predictors.Numeric)[DPA_LinearlyDependent$linearCombos[[i]]]
DPA_LinearlyDependentSubset print(paste0("Linear dependent variable(s) for subset ",
i," include: ",
DPA_LinearlyDependentSubset))
}
}
## [1] "No linearly dependent predictors noted."
##################################
# Identifying the linearly dependent variables for removal
##################################
if (DPA_LinearlyDependentCount > 0) {
<- findLinearCombos(DPA.Predictors.Numeric)
DPA_LinearlyDependent
<- length(DPA_LinearlyDependent$remove)
DPA_LinearlyDependentForRemoval
print(paste0("Linear dependency can be resolved by removing ",
(DPA_LinearlyDependentForRemoval)," numeric variable(s)."))
for (j in 1:DPA_LinearlyDependentForRemoval) {
<- colnames(DPA.Predictors.Numeric)[DPA_LinearlyDependent$remove[j]]
DPA_LinearlyDependentRemovedVariable print(paste0("Variable ",
j," for removal: ",
DPA_LinearlyDependentRemovedVariable))
}
##################################
# Filtering out columns with linear dependency
#################################
<- DPA[,-DPA_LinearlyDependent$remove]
DPA_ExcludedLinearlyDependent
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_ExcludedLinearlyDependent))
(DPA_ExcludedLinearlyDependent_Skimmed
###################################
# Verifying the data dimensions
###################################
dim(DPA_ExcludedLinearlyDependent)
else {
}
###################################
# Verifying the data dimensions
###################################
dim(DPA)
}
## [1] 96 61
##################################
# Loading dataset
##################################
<- Sonar_Train
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("Class")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,sapply(DPA.Predictors, is.numeric)]
DPA.Predictors.Numeric
##################################
# Applying a Box-Cox transformation
##################################
<- preProcess(DPA.Predictors.Numeric, method = c("BoxCox"))
DPA_BoxCox <- predict(DPA_BoxCox, DPA.Predictors.Numeric)
DPA_BoxCoxTransformed
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_BoxCoxTransformed)) (DPA_BoxCoxTransformedSkimmed
Name | DPA_BoxCoxTransformed |
Number of rows | 96 |
Number of columns | 60 |
_______________________ | |
Column type frequency: | |
numeric | 60 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
V1 | 0 | 1 | -2.61 | 0.38 | -3.64 | -2.88 | -2.64 | -2.38 | -1.64 | ▁▅▇▅▁ |
V2 | 0 | 1 | -2.11 | 0.31 | -2.97 | -2.32 | -2.13 | -1.91 | -1.42 | ▁▃▇▆▂ |
V3 | 0 | 1 | -2.07 | 0.28 | -2.86 | -2.24 | -2.08 | -1.89 | -1.39 | ▁▃▇▅▂ |
V4 | 0 | 1 | -1.98 | 0.27 | -2.62 | -2.17 | -1.98 | -1.82 | -1.39 | ▁▆▇▅▂ |
V5 | 0 | 1 | -1.62 | 0.22 | -2.16 | -1.77 | -1.62 | -1.47 | -1.07 | ▂▆▇▆▁ |
V6 | 0 | 1 | -1.34 | 0.18 | -1.80 | -1.44 | -1.33 | -1.23 | -0.76 | ▂▅▇▂▁ |
V7 | 0 | 1 | -1.18 | 0.14 | -1.52 | -1.26 | -1.18 | -1.09 | -0.74 | ▂▆▇▃▁ |
V8 | 0 | 1 | -1.37 | 0.27 | -2.07 | -1.52 | -1.38 | -1.21 | -0.67 | ▁▃▇▃▁ |
V9 | 0 | 1 | -1.23 | 0.31 | -2.15 | -1.40 | -1.24 | -1.04 | -0.35 | ▁▃▇▃▁ |
V10 | 0 | 1 | -1.07 | 0.26 | -1.79 | -1.25 | -1.08 | -0.92 | -0.46 | ▁▃▇▅▂ |
V11 | 0 | 1 | -1.00 | 0.24 | -1.54 | -1.12 | -1.00 | -0.85 | -0.37 | ▂▃▇▃▁ |
V12 | 0 | 1 | -0.86 | 0.19 | -1.32 | -0.99 | -0.85 | -0.74 | -0.47 | ▂▅▇▇▃ |
V13 | 0 | 1 | -0.93 | 0.25 | -1.50 | -1.08 | -0.92 | -0.78 | -0.31 | ▂▅▇▅▂ |
V14 | 0 | 1 | -1.05 | 0.39 | -2.20 | -1.32 | -1.03 | -0.79 | 0.00 | ▁▃▇▆▁ |
V15 | 0 | 1 | -1.08 | 0.48 | -2.52 | -1.38 | -1.07 | -0.73 | -0.09 | ▁▃▇▇▃ |
V16 | 0 | 1 | -1.01 | 0.51 | -2.35 | -1.41 | -1.02 | -0.59 | -0.03 | ▁▆▇▇▅ |
V17 | 0 | 1 | -0.94 | 0.53 | -2.42 | -1.34 | -1.03 | -0.42 | 0.00 | ▁▅▇▅▇ |
V18 | 0 | 1 | -0.76 | 0.42 | -1.83 | -1.08 | -0.81 | -0.37 | -0.07 | ▁▅▇▅▆ |
V19 | 0 | 1 | -0.64 | 0.39 | -1.39 | -0.96 | -0.69 | -0.29 | -0.02 | ▃▇▅▇▇ |
V20 | 0 | 1 | 0.59 | 0.26 | 0.07 | 0.40 | 0.62 | 0.80 | 1.00 | ▅▆▅▇▆ |
V21 | 0 | 1 | -0.33 | 0.23 | -0.81 | -0.52 | -0.30 | -0.15 | 0.00 | ▃▃▅▇▇ |
V22 | 0 | 1 | -0.34 | 0.24 | -0.82 | -0.56 | -0.29 | -0.15 | 0.00 | ▃▅▅▇▇ |
V23 | 0 | 1 | -0.30 | 0.20 | -0.70 | -0.48 | -0.27 | -0.13 | 0.00 | ▃▅▃▇▇ |
V24 | 0 | 1 | -0.27 | 0.18 | -0.66 | -0.37 | -0.28 | -0.10 | 0.00 | ▅▂▇▆▇ |
V25 | 0 | 1 | -0.27 | 0.18 | -0.66 | -0.38 | -0.26 | -0.12 | 0.00 | ▃▃▇▇▇ |
V26 | 0 | 1 | -0.24 | 0.16 | -0.56 | -0.37 | -0.22 | -0.12 | 0.00 | ▃▅▆▇▇ |
V27 | 0 | 1 | -0.24 | 0.19 | -0.61 | -0.42 | -0.19 | -0.09 | 0.00 | ▃▃▃▃▇ |
V28 | 0 | 1 | -0.24 | 0.19 | -0.62 | -0.38 | -0.21 | -0.09 | 0.00 | ▃▂▅▆▇ |
V29 | 0 | 1 | -0.31 | 0.21 | -0.77 | -0.48 | -0.28 | -0.13 | 0.00 | ▂▆▃▆▇ |
V30 | 0 | 1 | 0.59 | 0.22 | 0.06 | 0.41 | 0.60 | 0.72 | 1.00 | ▁▆▇▇▅ |
V31 | 0 | 1 | -0.66 | 0.35 | -1.50 | -0.90 | -0.70 | -0.40 | -0.03 | ▁▅▇▆▆ |
V32 | 0 | 1 | -0.76 | 0.37 | -1.56 | -1.00 | -0.75 | -0.49 | -0.07 | ▃▇▇▇▆ |
V33 | 0 | 1 | -0.77 | 0.33 | -1.56 | -1.03 | -0.76 | -0.53 | 0.00 | ▁▇▇▇▂ |
V34 | 0 | 1 | -0.85 | 0.40 | -1.70 | -1.14 | -0.83 | -0.54 | -0.05 | ▂▆▇▆▃ |
V35 | 0 | 1 | -0.92 | 0.50 | -1.95 | -1.26 | -0.97 | -0.49 | -0.05 | ▃▇▇▆▇ |
V36 | 0 | 1 | -1.03 | 0.59 | -2.55 | -1.49 | -1.06 | -0.57 | 0.00 | ▁▅▇▆▆ |
V37 | 0 | 1 | -1.34 | 0.81 | -3.35 | -1.93 | -1.35 | -0.72 | -0.09 | ▁▅▇▇▇ |
V38 | 0 | 1 | -1.27 | 0.67 | -2.78 | -1.74 | -1.13 | -0.82 | -0.05 | ▂▅▅▇▅ |
V39 | 0 | 1 | -0.99 | 0.42 | -2.03 | -1.33 | -1.00 | -0.66 | -0.03 | ▁▇▇▆▂ |
V40 | 0 | 1 | -0.92 | 0.32 | -1.70 | -1.11 | -0.94 | -0.69 | -0.07 | ▁▆▇▅▁ |
V41 | 0 | 1 | -1.08 | 0.40 | -2.03 | -1.38 | -1.09 | -0.78 | -0.10 | ▂▆▇▆▂ |
V42 | 0 | 1 | -1.01 | 0.37 | -1.78 | -1.27 | -1.00 | -0.77 | -0.19 | ▃▅▇▅▂ |
V43 | 0 | 1 | -1.00 | 0.29 | -1.65 | -1.20 | -0.98 | -0.82 | -0.27 | ▃▆▇▅▂ |
V44 | 0 | 1 | -1.36 | 0.47 | -2.60 | -1.63 | -1.41 | -1.05 | -0.52 | ▁▃▇▅▅ |
V45 | 0 | 1 | -1.70 | 0.74 | -3.35 | -2.20 | -1.75 | -1.02 | -0.35 | ▂▅▇▃▆ |
V46 | 0 | 1 | -1.57 | 0.56 | -3.10 | -1.95 | -1.60 | -1.28 | -0.31 | ▁▅▇▅▃ |
V47 | 0 | 1 | -2.16 | 0.66 | -4.02 | -2.56 | -2.22 | -1.70 | -0.59 | ▁▅▇▅▂ |
V48 | 0 | 1 | -1.70 | 0.35 | -2.55 | -1.95 | -1.70 | -1.50 | -0.93 | ▂▅▇▅▂ |
V49 | 0 | 1 | -1.93 | 0.29 | -2.57 | -2.13 | -1.94 | -1.71 | -1.28 | ▃▆▇▆▂ |
V50 | 0 | 1 | -3.94 | 0.64 | -5.43 | -4.34 | -3.95 | -3.54 | -2.49 | ▂▆▇▆▃ |
V51 | 0 | 1 | -2.37 | 0.21 | -2.93 | -2.49 | -2.38 | -2.25 | -1.66 | ▂▅▇▂▁ |
V52 | 0 | 1 | -4.40 | 0.66 | -6.65 | -4.74 | -4.41 | -4.00 | -2.65 | ▁▂▇▆▁ |
V53 | 0 | 1 | -2.52 | 0.18 | -2.99 | -2.65 | -2.55 | -2.38 | -2.10 | ▁▆▇▇▂ |
V54 | 0 | 1 | -2.51 | 0.19 | -2.91 | -2.64 | -2.50 | -2.39 | -2.11 | ▂▃▇▃▃ |
V55 | 0 | 1 | -4.95 | 0.78 | -6.81 | -5.60 | -4.87 | -4.37 | -3.11 | ▂▇▇▇▁ |
V56 | 0 | 1 | -2.58 | 0.17 | -3.01 | -2.68 | -2.58 | -2.46 | -2.07 | ▁▆▇▃▁ |
V57 | 0 | 1 | -5.11 | 0.71 | -6.81 | -5.60 | -5.16 | -4.54 | -3.34 | ▂▇▇▇▂ |
V58 | 0 | 1 | -5.07 | 0.79 | -7.01 | -5.63 | -5.07 | -4.58 | -3.12 | ▁▅▇▅▂ |
V59 | 0 | 1 | -2.59 | 0.18 | -3.12 | -2.72 | -2.58 | -2.48 | -2.18 | ▁▃▇▅▂ |
V60 | 0 | 1 | -3.27 | 0.25 | -3.87 | -3.43 | -3.26 | -3.09 | -2.67 | ▂▆▇▆▂ |
###################################
# Verifying the data dimensions
###################################
dim(DPA_BoxCoxTransformed)
## [1] 96 60
##################################
# Loading dataset
##################################
<- Sonar_Train
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("Class")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,sapply(DPA.Predictors, is.numeric)]
DPA.Predictors.Numeric
##################################
# Applying a Box-Cox transformation
##################################
<- preProcess(DPA.Predictors.Numeric, method = c("BoxCox"))
DPA_BoxCox <- predict(DPA_BoxCox, DPA.Predictors.Numeric)
DPA_BoxCoxTransformed
##################################
# Applying a center and scale data transformation
##################################
<- preProcess(DPA_BoxCoxTransformed, method = c("center","scale"))
DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaled <- predict(DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaled, DPA_BoxCoxTransformed)
DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed)) (DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformedSkimmed
Name | DPA.Predictors.Numeric_Bo… |
Number of rows | 96 |
Number of columns | 60 |
_______________________ | |
Column type frequency: | |
numeric | 60 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
V1 | 0 | 1 | 0 | 1 | -2.69 | -0.72 | -0.09 | 0.61 | 2.54 | ▁▅▇▅▁ |
V2 | 0 | 1 | 0 | 1 | -2.81 | -0.68 | -0.05 | 0.66 | 2.26 | ▁▃▇▆▂ |
V3 | 0 | 1 | 0 | 1 | -2.79 | -0.60 | -0.02 | 0.65 | 2.41 | ▁▃▇▅▂ |
V4 | 0 | 1 | 0 | 1 | -2.40 | -0.71 | 0.00 | 0.63 | 2.23 | ▁▆▇▅▂ |
V5 | 0 | 1 | 0 | 1 | -2.44 | -0.69 | 0.02 | 0.67 | 2.49 | ▂▆▇▆▁ |
V6 | 0 | 1 | 0 | 1 | -2.57 | -0.58 | 0.06 | 0.62 | 3.20 | ▂▅▇▂▁ |
V7 | 0 | 1 | 0 | 1 | -2.33 | -0.57 | 0.04 | 0.62 | 3.08 | ▂▆▇▃▁ |
V8 | 0 | 1 | 0 | 1 | -2.54 | -0.55 | -0.01 | 0.59 | 2.56 | ▁▃▇▃▁ |
V9 | 0 | 1 | 0 | 1 | -2.90 | -0.52 | -0.01 | 0.60 | 2.79 | ▁▃▇▃▁ |
V10 | 0 | 1 | 0 | 1 | -2.73 | -0.67 | -0.03 | 0.59 | 2.34 | ▁▃▇▅▂ |
V11 | 0 | 1 | 0 | 1 | -2.29 | -0.53 | -0.01 | 0.63 | 2.64 | ▂▃▇▃▁ |
V12 | 0 | 1 | 0 | 1 | -2.42 | -0.70 | 0.06 | 0.59 | 2.02 | ▂▅▇▇▃ |
V13 | 0 | 1 | 0 | 1 | -2.30 | -0.60 | 0.05 | 0.60 | 2.47 | ▂▅▇▅▂ |
V14 | 0 | 1 | 0 | 1 | -2.92 | -0.68 | 0.05 | 0.67 | 2.67 | ▁▃▇▆▁ |
V15 | 0 | 1 | 0 | 1 | -3.02 | -0.65 | 0.01 | 0.73 | 2.07 | ▁▃▇▇▃ |
V16 | 0 | 1 | 0 | 1 | -2.63 | -0.78 | -0.01 | 0.83 | 1.95 | ▁▆▇▇▅ |
V17 | 0 | 1 | 0 | 1 | -2.82 | -0.77 | -0.17 | 0.99 | 1.78 | ▁▅▇▅▇ |
V18 | 0 | 1 | 0 | 1 | -2.53 | -0.76 | -0.13 | 0.94 | 1.64 | ▁▅▇▅▆ |
V19 | 0 | 1 | 0 | 1 | -1.93 | -0.82 | -0.12 | 0.90 | 1.62 | ▃▇▅▇▇ |
V20 | 0 | 1 | 0 | 1 | -2.01 | -0.73 | 0.14 | 0.82 | 1.60 | ▅▆▅▇▆ |
V21 | 0 | 1 | 0 | 1 | -2.03 | -0.80 | 0.17 | 0.78 | 1.43 | ▃▃▅▇▇ |
V22 | 0 | 1 | 0 | 1 | -2.06 | -0.93 | 0.21 | 0.80 | 1.42 | ▃▅▅▇▇ |
V23 | 0 | 1 | 0 | 1 | -1.95 | -0.86 | 0.18 | 0.87 | 1.50 | ▃▅▃▇▇ |
V24 | 0 | 1 | 0 | 1 | -2.09 | -0.53 | -0.02 | 0.94 | 1.49 | ▅▂▇▆▇ |
V25 | 0 | 1 | 0 | 1 | -2.13 | -0.59 | 0.08 | 0.82 | 1.48 | ▃▃▇▇▇ |
V26 | 0 | 1 | 0 | 1 | -1.99 | -0.77 | 0.12 | 0.77 | 1.51 | ▃▅▆▇▇ |
V27 | 0 | 1 | 0 | 1 | -1.94 | -0.95 | 0.26 | 0.83 | 1.29 | ▃▃▃▃▇ |
V28 | 0 | 1 | 0 | 1 | -2.01 | -0.73 | 0.19 | 0.84 | 1.30 | ▃▂▅▆▇ |
V29 | 0 | 1 | 0 | 1 | -2.16 | -0.81 | 0.18 | 0.88 | 1.50 | ▂▆▃▆▇ |
V30 | 0 | 1 | 0 | 1 | -2.42 | -0.80 | 0.07 | 0.61 | 1.90 | ▁▆▇▇▅ |
V31 | 0 | 1 | 0 | 1 | -2.40 | -0.69 | -0.11 | 0.73 | 1.77 | ▁▅▇▆▆ |
V32 | 0 | 1 | 0 | 1 | -2.15 | -0.65 | 0.02 | 0.74 | 1.87 | ▃▇▇▇▆ |
V33 | 0 | 1 | 0 | 1 | -2.40 | -0.79 | 0.03 | 0.71 | 2.31 | ▁▇▇▇▂ |
V34 | 0 | 1 | 0 | 1 | -2.14 | -0.75 | 0.04 | 0.76 | 2.02 | ▂▆▇▆▃ |
V35 | 0 | 1 | 0 | 1 | -2.08 | -0.68 | -0.11 | 0.85 | 1.74 | ▃▇▇▆▇ |
V36 | 0 | 1 | 0 | 1 | -2.57 | -0.78 | -0.04 | 0.78 | 1.75 | ▁▅▇▆▆ |
V37 | 0 | 1 | 0 | 1 | -2.48 | -0.73 | -0.01 | 0.77 | 1.54 | ▁▅▇▇▇ |
V38 | 0 | 1 | 0 | 1 | -2.25 | -0.70 | 0.22 | 0.67 | 1.81 | ▂▅▅▇▅ |
V39 | 0 | 1 | 0 | 1 | -2.48 | -0.81 | -0.01 | 0.79 | 2.29 | ▁▇▇▆▂ |
V40 | 0 | 1 | 0 | 1 | -2.46 | -0.62 | -0.07 | 0.71 | 2.67 | ▁▆▇▅▁ |
V41 | 0 | 1 | 0 | 1 | -2.35 | -0.73 | -0.03 | 0.74 | 2.42 | ▂▆▇▆▂ |
V42 | 0 | 1 | 0 | 1 | -2.09 | -0.71 | 0.04 | 0.65 | 2.23 | ▃▅▇▅▂ |
V43 | 0 | 1 | 0 | 1 | -2.23 | -0.68 | 0.05 | 0.60 | 2.51 | ▃▆▇▅▂ |
V44 | 0 | 1 | 0 | 1 | -2.66 | -0.58 | -0.10 | 0.67 | 1.81 | ▁▃▇▅▅ |
V45 | 0 | 1 | 0 | 1 | -2.21 | -0.67 | -0.06 | 0.93 | 1.82 | ▂▅▇▃▆ |
V46 | 0 | 1 | 0 | 1 | -2.72 | -0.67 | -0.05 | 0.53 | 2.27 | ▁▅▇▅▃ |
V47 | 0 | 1 | 0 | 1 | -2.81 | -0.60 | -0.08 | 0.70 | 2.37 | ▁▅▇▅▂ |
V48 | 0 | 1 | 0 | 1 | -2.44 | -0.72 | 0.01 | 0.57 | 2.21 | ▂▅▇▅▂ |
V49 | 0 | 1 | 0 | 1 | -2.21 | -0.69 | -0.02 | 0.79 | 2.26 | ▃▆▇▆▂ |
V50 | 0 | 1 | 0 | 1 | -2.31 | -0.61 | -0.02 | 0.62 | 2.25 | ▂▆▇▆▃ |
V51 | 0 | 1 | 0 | 1 | -2.60 | -0.53 | -0.01 | 0.59 | 3.36 | ▂▅▇▂▁ |
V52 | 0 | 1 | 0 | 1 | -3.42 | -0.52 | -0.02 | 0.60 | 2.65 | ▁▂▇▆▁ |
V53 | 0 | 1 | 0 | 1 | -2.66 | -0.76 | -0.19 | 0.78 | 2.34 | ▁▆▇▇▂ |
V54 | 0 | 1 | 0 | 1 | -2.12 | -0.69 | 0.01 | 0.62 | 2.05 | ▂▃▇▃▃ |
V55 | 0 | 1 | 0 | 1 | -2.39 | -0.83 | 0.11 | 0.75 | 2.37 | ▂▇▇▇▁ |
V56 | 0 | 1 | 0 | 1 | -2.63 | -0.61 | 0.00 | 0.74 | 3.08 | ▁▆▇▃▁ |
V57 | 0 | 1 | 0 | 1 | -2.40 | -0.69 | -0.07 | 0.81 | 2.49 | ▂▇▇▇▂ |
V58 | 0 | 1 | 0 | 1 | -2.44 | -0.70 | 0.01 | 0.62 | 2.45 | ▁▅▇▅▂ |
V59 | 0 | 1 | 0 | 1 | -2.88 | -0.69 | 0.06 | 0.62 | 2.25 | ▁▃▇▅▂ |
V60 | 0 | 1 | 0 | 1 | -2.43 | -0.65 | 0.02 | 0.71 | 2.39 | ▂▆▇▆▂ |
###################################
# Verifying the data dimensions
###################################
dim(DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed)
## [1] 96 60
##################################
# Creating the pre-modelling
# train set
##################################
<- DPA$Class
Class <- DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed
PMA.Predictors.Numeric <- cbind(Class,PMA.Predictors.Numeric)
PMA_BoxCoxTransformed_CenteredScaledTransformed <- PMA_BoxCoxTransformed_CenteredScaledTransformed
PMA_PreModelling_Train
##################################
# Gathering descriptive statistics
##################################
<- skim(PMA_PreModelling_Train)) (PMA_PreModelling_Train_Skimmed
Name | PMA_PreModelling_Train |
Number of rows | 96 |
Number of columns | 61 |
_______________________ | |
Column type frequency: | |
factor | 1 |
numeric | 60 |
________________________ | |
Group variables | None |
Variable type: factor
skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
---|---|---|---|---|---|
Class | 0 | 1 | FALSE | 2 | M: 78, R: 18 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
V1 | 0 | 1 | 0 | 1 | -2.69 | -0.72 | -0.09 | 0.61 | 2.54 | ▁▅▇▅▁ |
V2 | 0 | 1 | 0 | 1 | -2.81 | -0.68 | -0.05 | 0.66 | 2.26 | ▁▃▇▆▂ |
V3 | 0 | 1 | 0 | 1 | -2.79 | -0.60 | -0.02 | 0.65 | 2.41 | ▁▃▇▅▂ |
V4 | 0 | 1 | 0 | 1 | -2.40 | -0.71 | 0.00 | 0.63 | 2.23 | ▁▆▇▅▂ |
V5 | 0 | 1 | 0 | 1 | -2.44 | -0.69 | 0.02 | 0.67 | 2.49 | ▂▆▇▆▁ |
V6 | 0 | 1 | 0 | 1 | -2.57 | -0.58 | 0.06 | 0.62 | 3.20 | ▂▅▇▂▁ |
V7 | 0 | 1 | 0 | 1 | -2.33 | -0.57 | 0.04 | 0.62 | 3.08 | ▂▆▇▃▁ |
V8 | 0 | 1 | 0 | 1 | -2.54 | -0.55 | -0.01 | 0.59 | 2.56 | ▁▃▇▃▁ |
V9 | 0 | 1 | 0 | 1 | -2.90 | -0.52 | -0.01 | 0.60 | 2.79 | ▁▃▇▃▁ |
V10 | 0 | 1 | 0 | 1 | -2.73 | -0.67 | -0.03 | 0.59 | 2.34 | ▁▃▇▅▂ |
V11 | 0 | 1 | 0 | 1 | -2.29 | -0.53 | -0.01 | 0.63 | 2.64 | ▂▃▇▃▁ |
V12 | 0 | 1 | 0 | 1 | -2.42 | -0.70 | 0.06 | 0.59 | 2.02 | ▂▅▇▇▃ |
V13 | 0 | 1 | 0 | 1 | -2.30 | -0.60 | 0.05 | 0.60 | 2.47 | ▂▅▇▅▂ |
V14 | 0 | 1 | 0 | 1 | -2.92 | -0.68 | 0.05 | 0.67 | 2.67 | ▁▃▇▆▁ |
V15 | 0 | 1 | 0 | 1 | -3.02 | -0.65 | 0.01 | 0.73 | 2.07 | ▁▃▇▇▃ |
V16 | 0 | 1 | 0 | 1 | -2.63 | -0.78 | -0.01 | 0.83 | 1.95 | ▁▆▇▇▅ |
V17 | 0 | 1 | 0 | 1 | -2.82 | -0.77 | -0.17 | 0.99 | 1.78 | ▁▅▇▅▇ |
V18 | 0 | 1 | 0 | 1 | -2.53 | -0.76 | -0.13 | 0.94 | 1.64 | ▁▅▇▅▆ |
V19 | 0 | 1 | 0 | 1 | -1.93 | -0.82 | -0.12 | 0.90 | 1.62 | ▃▇▅▇▇ |
V20 | 0 | 1 | 0 | 1 | -2.01 | -0.73 | 0.14 | 0.82 | 1.60 | ▅▆▅▇▆ |
V21 | 0 | 1 | 0 | 1 | -2.03 | -0.80 | 0.17 | 0.78 | 1.43 | ▃▃▅▇▇ |
V22 | 0 | 1 | 0 | 1 | -2.06 | -0.93 | 0.21 | 0.80 | 1.42 | ▃▅▅▇▇ |
V23 | 0 | 1 | 0 | 1 | -1.95 | -0.86 | 0.18 | 0.87 | 1.50 | ▃▅▃▇▇ |
V24 | 0 | 1 | 0 | 1 | -2.09 | -0.53 | -0.02 | 0.94 | 1.49 | ▅▂▇▆▇ |
V25 | 0 | 1 | 0 | 1 | -2.13 | -0.59 | 0.08 | 0.82 | 1.48 | ▃▃▇▇▇ |
V26 | 0 | 1 | 0 | 1 | -1.99 | -0.77 | 0.12 | 0.77 | 1.51 | ▃▅▆▇▇ |
V27 | 0 | 1 | 0 | 1 | -1.94 | -0.95 | 0.26 | 0.83 | 1.29 | ▃▃▃▃▇ |
V28 | 0 | 1 | 0 | 1 | -2.01 | -0.73 | 0.19 | 0.84 | 1.30 | ▃▂▅▆▇ |
V29 | 0 | 1 | 0 | 1 | -2.16 | -0.81 | 0.18 | 0.88 | 1.50 | ▂▆▃▆▇ |
V30 | 0 | 1 | 0 | 1 | -2.42 | -0.80 | 0.07 | 0.61 | 1.90 | ▁▆▇▇▅ |
V31 | 0 | 1 | 0 | 1 | -2.40 | -0.69 | -0.11 | 0.73 | 1.77 | ▁▅▇▆▆ |
V32 | 0 | 1 | 0 | 1 | -2.15 | -0.65 | 0.02 | 0.74 | 1.87 | ▃▇▇▇▆ |
V33 | 0 | 1 | 0 | 1 | -2.40 | -0.79 | 0.03 | 0.71 | 2.31 | ▁▇▇▇▂ |
V34 | 0 | 1 | 0 | 1 | -2.14 | -0.75 | 0.04 | 0.76 | 2.02 | ▂▆▇▆▃ |
V35 | 0 | 1 | 0 | 1 | -2.08 | -0.68 | -0.11 | 0.85 | 1.74 | ▃▇▇▆▇ |
V36 | 0 | 1 | 0 | 1 | -2.57 | -0.78 | -0.04 | 0.78 | 1.75 | ▁▅▇▆▆ |
V37 | 0 | 1 | 0 | 1 | -2.48 | -0.73 | -0.01 | 0.77 | 1.54 | ▁▅▇▇▇ |
V38 | 0 | 1 | 0 | 1 | -2.25 | -0.70 | 0.22 | 0.67 | 1.81 | ▂▅▅▇▅ |
V39 | 0 | 1 | 0 | 1 | -2.48 | -0.81 | -0.01 | 0.79 | 2.29 | ▁▇▇▆▂ |
V40 | 0 | 1 | 0 | 1 | -2.46 | -0.62 | -0.07 | 0.71 | 2.67 | ▁▆▇▅▁ |
V41 | 0 | 1 | 0 | 1 | -2.35 | -0.73 | -0.03 | 0.74 | 2.42 | ▂▆▇▆▂ |
V42 | 0 | 1 | 0 | 1 | -2.09 | -0.71 | 0.04 | 0.65 | 2.23 | ▃▅▇▅▂ |
V43 | 0 | 1 | 0 | 1 | -2.23 | -0.68 | 0.05 | 0.60 | 2.51 | ▃▆▇▅▂ |
V44 | 0 | 1 | 0 | 1 | -2.66 | -0.58 | -0.10 | 0.67 | 1.81 | ▁▃▇▅▅ |
V45 | 0 | 1 | 0 | 1 | -2.21 | -0.67 | -0.06 | 0.93 | 1.82 | ▂▅▇▃▆ |
V46 | 0 | 1 | 0 | 1 | -2.72 | -0.67 | -0.05 | 0.53 | 2.27 | ▁▅▇▅▃ |
V47 | 0 | 1 | 0 | 1 | -2.81 | -0.60 | -0.08 | 0.70 | 2.37 | ▁▅▇▅▂ |
V48 | 0 | 1 | 0 | 1 | -2.44 | -0.72 | 0.01 | 0.57 | 2.21 | ▂▅▇▅▂ |
V49 | 0 | 1 | 0 | 1 | -2.21 | -0.69 | -0.02 | 0.79 | 2.26 | ▃▆▇▆▂ |
V50 | 0 | 1 | 0 | 1 | -2.31 | -0.61 | -0.02 | 0.62 | 2.25 | ▂▆▇▆▃ |
V51 | 0 | 1 | 0 | 1 | -2.60 | -0.53 | -0.01 | 0.59 | 3.36 | ▂▅▇▂▁ |
V52 | 0 | 1 | 0 | 1 | -3.42 | -0.52 | -0.02 | 0.60 | 2.65 | ▁▂▇▆▁ |
V53 | 0 | 1 | 0 | 1 | -2.66 | -0.76 | -0.19 | 0.78 | 2.34 | ▁▆▇▇▂ |
V54 | 0 | 1 | 0 | 1 | -2.12 | -0.69 | 0.01 | 0.62 | 2.05 | ▂▃▇▃▃ |
V55 | 0 | 1 | 0 | 1 | -2.39 | -0.83 | 0.11 | 0.75 | 2.37 | ▂▇▇▇▁ |
V56 | 0 | 1 | 0 | 1 | -2.63 | -0.61 | 0.00 | 0.74 | 3.08 | ▁▆▇▃▁ |
V57 | 0 | 1 | 0 | 1 | -2.40 | -0.69 | -0.07 | 0.81 | 2.49 | ▂▇▇▇▂ |
V58 | 0 | 1 | 0 | 1 | -2.44 | -0.70 | 0.01 | 0.62 | 2.45 | ▁▅▇▅▂ |
V59 | 0 | 1 | 0 | 1 | -2.88 | -0.69 | 0.06 | 0.62 | 2.25 | ▁▃▇▅▂ |
V60 | 0 | 1 | 0 | 1 | -2.43 | -0.65 | 0.02 | 0.71 | 2.39 | ▂▆▇▆▂ |
###################################
# Verifying the data dimensions
# for the train set
###################################
dim(PMA_PreModelling_Train)
## [1] 96 61
##################################
# Formulating the test set
##################################
<- Sonar_Test
DPA_Test <- DPA_Test[,!names(DPA_Test) %in% c("Class")]
DPA_Test.Predictors <- DPA_Test.Predictors[,sapply(DPA_Test.Predictors, is.numeric)]
DPA_Test.Predictors.Numeric <- preProcess(DPA_Test.Predictors.Numeric, method = c("BoxCox"))
DPA_Test_BoxCox <- predict(DPA_Test_BoxCox, DPA_Test.Predictors.Numeric)
DPA_Test_BoxCoxTransformed <- preProcess(DPA_Test_BoxCoxTransformed, method = c("center","scale"))
DPA_Test.Predictors.Numeric_BoxCoxTransformed_CenteredScaled <- predict(DPA_Test.Predictors.Numeric_BoxCoxTransformed_CenteredScaled, DPA_Test_BoxCoxTransformed)
DPA_Test.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed
##################################
# Creating the pre-modelling
# test set
##################################
<- DPA_Test$Class
Class <- DPA_Test.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed
PMA_Test.Predictors.Numeric <- cbind(Class,PMA_Test.Predictors.Numeric)
PMA_Test_BoxCoxTransformed_CenteredScaledTransformed <- PMA_Test_BoxCoxTransformed_CenteredScaledTransformed
PMA_PreModelling_Test
##################################
# Gathering descriptive statistics
##################################
<- skim(PMA_PreModelling_Test)) (PMA_PreModelling_Test_Skimmed
Name | PMA_PreModelling_Test |
Number of rows | 40 |
Number of columns | 61 |
_______________________ | |
Column type frequency: | |
factor | 1 |
numeric | 60 |
________________________ | |
Group variables | None |
Variable type: factor
skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
---|---|---|---|---|---|
Class | 0 | 1 | FALSE | 2 | M: 33, R: 7 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
V1 | 0 | 1 | 0 | 1 | -2.49 | -0.67 | -0.04 | 0.53 | 2.40 | ▁▃▇▃▁ |
V2 | 0 | 1 | 0 | 1 | -2.15 | -0.72 | 0.11 | 0.59 | 2.77 | ▂▆▇▃▁ |
V3 | 0 | 1 | 0 | 1 | -2.02 | -0.59 | 0.17 | 0.62 | 2.39 | ▃▃▇▃▁ |
V4 | 0 | 1 | 0 | 1 | -2.20 | -0.74 | -0.11 | 0.69 | 2.55 | ▂▇▇▅▁ |
V5 | 0 | 1 | 0 | 1 | -2.24 | -0.66 | 0.06 | 0.46 | 2.40 | ▂▅▇▃▂ |
V6 | 0 | 1 | 0 | 1 | -2.15 | -0.65 | -0.03 | 0.67 | 1.87 | ▂▅▇▆▅ |
V7 | 0 | 1 | 0 | 1 | -2.66 | -0.61 | -0.13 | 0.61 | 2.59 | ▁▅▇▅▁ |
V8 | 0 | 1 | 0 | 1 | -2.93 | -0.43 | -0.08 | 0.45 | 3.01 | ▁▂▇▂▁ |
V9 | 0 | 1 | 0 | 1 | -1.95 | -0.73 | -0.09 | 0.64 | 2.29 | ▃▆▇▅▂ |
V10 | 0 | 1 | 0 | 1 | -2.23 | -0.50 | -0.04 | 0.47 | 2.23 | ▂▃▇▃▁ |
V11 | 0 | 1 | 0 | 1 | -2.20 | -0.62 | 0.06 | 0.47 | 2.26 | ▂▃▇▂▂ |
V12 | 0 | 1 | 0 | 1 | -2.47 | -0.39 | -0.10 | 0.33 | 2.13 | ▁▂▇▃▂ |
V13 | 0 | 1 | 0 | 1 | -2.21 | -0.57 | -0.01 | 0.48 | 2.17 | ▂▃▇▃▂ |
V14 | 0 | 1 | 0 | 1 | -2.48 | -0.66 | 0.00 | 0.37 | 2.67 | ▁▃▇▃▁ |
V15 | 0 | 1 | 0 | 1 | -2.49 | -0.71 | 0.01 | 0.64 | 2.04 | ▁▅▇▅▃ |
V16 | 0 | 1 | 0 | 1 | -2.52 | -0.85 | -0.12 | 0.79 | 1.84 | ▁▆▆▇▃ |
V17 | 0 | 1 | 0 | 1 | -1.87 | -0.81 | -0.16 | 0.94 | 1.63 | ▂▇▅▃▇ |
V18 | 0 | 1 | 0 | 1 | -2.20 | -0.76 | -0.20 | 0.86 | 1.59 | ▁▆▇▃▇ |
V19 | 0 | 1 | 0 | 1 | -1.82 | -0.69 | 0.00 | 1.00 | 1.52 | ▃▇▆▂▇ |
V20 | 0 | 1 | 0 | 1 | -1.85 | -0.85 | 0.09 | 0.98 | 1.43 | ▃▃▅▅▇ |
V21 | 0 | 1 | 0 | 1 | -1.83 | -0.91 | 0.22 | 0.68 | 1.56 | ▅▂▅▇▆ |
V22 | 0 | 1 | 0 | 1 | -1.98 | -0.90 | 0.20 | 0.70 | 1.49 | ▃▃▃▇▅ |
V23 | 0 | 1 | 0 | 1 | -2.05 | -0.68 | 0.22 | 0.67 | 1.42 | ▃▃▅▇▇ |
V24 | 0 | 1 | 0 | 1 | -2.24 | -0.71 | 0.11 | 0.75 | 1.45 | ▂▅▆▇▇ |
V25 | 0 | 1 | 0 | 1 | -1.79 | -0.83 | 0.00 | 0.92 | 1.41 | ▃▂▆▃▇ |
V26 | 0 | 1 | 0 | 1 | -1.98 | -0.80 | 0.34 | 0.96 | 1.15 | ▂▃▂▂▇ |
V27 | 0 | 1 | 0 | 1 | -1.91 | -0.86 | 0.22 | 0.97 | 1.10 | ▂▃▂▂▇ |
V28 | 0 | 1 | 0 | 1 | -2.12 | -0.74 | 0.14 | 1.00 | 1.23 | ▂▃▅▃▇ |
V29 | 0 | 1 | 0 | 1 | -2.01 | -0.81 | 0.05 | 0.90 | 1.69 | ▃▇▅▇▆ |
V30 | 0 | 1 | 0 | 1 | -1.85 | -0.71 | 0.29 | 0.77 | 1.84 | ▃▇▃▇▃ |
V31 | 0 | 1 | 0 | 1 | -2.08 | -0.57 | -0.13 | 0.65 | 2.11 | ▂▅▇▃▃ |
V32 | 0 | 1 | 0 | 1 | -2.19 | -0.41 | 0.12 | 0.46 | 2.19 | ▂▂▇▂▂ |
V33 | 0 | 1 | 0 | 1 | -2.15 | -0.59 | 0.08 | 0.73 | 1.96 | ▂▃▇▆▂ |
V34 | 0 | 1 | 0 | 1 | -2.44 | -0.68 | -0.07 | 0.67 | 2.14 | ▁▅▇▆▃ |
V35 | 0 | 1 | 0 | 1 | -1.85 | -0.72 | 0.07 | 0.77 | 1.80 | ▅▅▅▇▃ |
V36 | 0 | 1 | 0 | 1 | -2.08 | -0.63 | -0.11 | 0.76 | 1.98 | ▃▃▇▇▂ |
V37 | 0 | 1 | 0 | 1 | -2.28 | -0.61 | 0.05 | 0.80 | 1.99 | ▂▃▇▆▂ |
V38 | 0 | 1 | 0 | 1 | -2.53 | -0.79 | 0.14 | 0.66 | 2.44 | ▁▇▇▇▂ |
V39 | 0 | 1 | 0 | 1 | -1.77 | -0.71 | 0.27 | 0.61 | 2.08 | ▆▅▇▇▂ |
V40 | 0 | 1 | 0 | 1 | -2.18 | -0.84 | 0.07 | 0.84 | 1.89 | ▁▇▇▇▂ |
V41 | 0 | 1 | 0 | 1 | -1.77 | -0.81 | 0.12 | 0.74 | 1.76 | ▆▃▅▇▃ |
V42 | 0 | 1 | 0 | 1 | -2.08 | -0.74 | -0.17 | 1.01 | 1.94 | ▂▇▇▅▃ |
V43 | 0 | 1 | 0 | 1 | -2.14 | -0.69 | -0.15 | 0.99 | 1.59 | ▂▅▇▂▇ |
V44 | 0 | 1 | 0 | 1 | -2.40 | -0.56 | -0.04 | 0.61 | 2.01 | ▁▃▇▅▃ |
V45 | 0 | 1 | 0 | 1 | -2.22 | -0.70 | -0.04 | 0.43 | 1.92 | ▁▇▇▅▅ |
V46 | 0 | 1 | 0 | 1 | -2.34 | -0.68 | -0.03 | 0.66 | 2.13 | ▁▇▇▇▃ |
V47 | 0 | 1 | 0 | 1 | -2.06 | -0.61 | 0.07 | 0.66 | 2.28 | ▂▆▇▆▁ |
V48 | 0 | 1 | 0 | 1 | -2.05 | -0.70 | -0.05 | 0.70 | 2.07 | ▃▆▇▆▂ |
V49 | 0 | 1 | 0 | 1 | -1.83 | -0.75 | 0.05 | 0.78 | 1.77 | ▅▅▅▇▃ |
V50 | 0 | 1 | 0 | 1 | -1.81 | -0.60 | 0.20 | 0.57 | 2.59 | ▃▃▇▂▁ |
V51 | 0 | 1 | 0 | 1 | -2.15 | -0.57 | 0.02 | 0.57 | 2.09 | ▂▃▇▃▂ |
V52 | 0 | 1 | 0 | 1 | -2.12 | -0.61 | 0.01 | 0.50 | 2.17 | ▃▆▇▅▃ |
V53 | 0 | 1 | 0 | 1 | -2.04 | -0.36 | 0.05 | 0.51 | 2.65 | ▃▆▇▃▁ |
V54 | 0 | 1 | 0 | 1 | -2.06 | -0.68 | -0.15 | 0.72 | 1.92 | ▃▆▇▇▅ |
V55 | 0 | 1 | 0 | 1 | -1.93 | -0.71 | -0.17 | 0.77 | 1.84 | ▃▆▇▇▅ |
V56 | 0 | 1 | 0 | 1 | -2.19 | -0.53 | 0.06 | 0.62 | 1.91 | ▂▃▇▅▃ |
V57 | 0 | 1 | 0 | 1 | -2.34 | -0.64 | 0.04 | 0.60 | 1.86 | ▂▃▇▇▅ |
V58 | 0 | 1 | 0 | 1 | -2.16 | -0.60 | -0.08 | 0.77 | 2.49 | ▃▇▇▇▁ |
V59 | 0 | 1 | 0 | 1 | -2.56 | -0.74 | -0.21 | 0.70 | 2.43 | ▁▅▇▆▁ |
V60 | 0 | 1 | 0 | 1 | -1.76 | -0.83 | -0.06 | 0.69 | 2.55 | ▅▇▇▅▁ |
###################################
# Verifying the data dimensions
# for the test set
###################################
dim(PMA_PreModelling_Test)
## [1] 40 61
##################################
# Loading dataset
##################################
<- PMA_PreModelling_Train
EDA
##################################
# Listing all predictors
##################################
<- EDA[,!names(EDA) %in% c("Class")]
EDA.Predictors
##################################
# Listing all numeric predictors
##################################
<- EDA.Predictors[,sapply(EDA.Predictors, is.numeric)]
EDA.Predictors.Numeric ncol(EDA.Predictors.Numeric)
## [1] 60
names(EDA.Predictors.Numeric)
## [1] "V1" "V2" "V3" "V4" "V5" "V6" "V7" "V8" "V9" "V10" "V11" "V12"
## [13] "V13" "V14" "V15" "V16" "V17" "V18" "V19" "V20" "V21" "V22" "V23" "V24"
## [25] "V25" "V26" "V27" "V28" "V29" "V30" "V31" "V32" "V33" "V34" "V35" "V36"
## [37] "V37" "V38" "V39" "V40" "V41" "V42" "V43" "V44" "V45" "V46" "V47" "V48"
## [49] "V49" "V50" "V51" "V52" "V53" "V54" "V55" "V56" "V57" "V58" "V59" "V60"
##################################
# Formulating the box plots
##################################
featurePlot(x = EDA.Predictors.Numeric,
y = EDA$Class,
plot = "box",
scales = list(x = list(relation="free", rot = 90),
y = list(relation="free")),
adjust = 1.5,
pch = "|")
##################################
# Verifying the class distribution
# for the original data
##################################
table(PMA_PreModelling_Train$Class)
##
## M R
## 78 18
##################################
# Creating a function for
# a customized summary metrics
##################################
<- function (data, lev = levels(data$obs), model = NULL)
fourMetricSummary
{
<- postResample(data[, "pred"], data[, "obs"])
accKapp <- c(accKapp,
out sensitivity(data[, "pred"], data[, "obs"], lev[1]),
specificity(data[, "pred"], data[, "obs"], lev[2]))
names(out)[3:4] <- c("Sensitivity", "Specificity")
out
}
##################################
# Creating consistent fold assignments
# for the Repeated Cross Validation process
##################################
set.seed(12345678)
<- trainControl(method = "cv",
KFold_Control classProbs = FALSE,
summaryFunction = fourMetricSummary)
##################################
# Setting the conditions
# for hyperparameter tuning
##################################
##################################
# Pre-computing the sigma parameter
##################################
set.seed(12345678)
<- sigest(Class~.,
sigma data=PMA_PreModelling_Train,
frac=0.75)
names(sigma)=NULL
<- data.frame(sigma=sigma[2], C=2^seq(-6,1,length=15))
SVM_R_Grid
##################################
# Running the support vector machine (radial basis function kernel) model
# by setting the caret method to 'svmRadial'
##################################
set.seed(12345678)
<- train(x = PMA_PreModelling_Train[,!names(PMA_PreModelling_Train) %in% c("Class")],
SVM_R_Tune y = PMA_PreModelling_Train$Class,
method = "svmRadial",
tuneGrid = SVM_R_Grid,
metric = "Specificity",
preProc = c("center", "scale"),
trControl = KFold_Control,
class.weights = c(M=1,R=1))
##################################
# Reporting the cross-validation results
# for the train set
##################################
SVM_R_Tune
## Support Vector Machines with Radial Basis Function Kernel
##
## 96 samples
## 60 predictors
## 2 classes: 'M', 'R'
##
## Pre-processing: centered (60), scaled (60)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 86, 87, 86, 87, 87, 86, ...
## Resampling results across tuning parameters:
##
## C Accuracy Kappa Sensitivity Specificity
## 0.01562500 0.8133333 0.0000000 1 0.0
## 0.02209709 0.8133333 0.0000000 1 0.0
## 0.03125000 0.8133333 0.0000000 1 0.0
## 0.04419417 0.8133333 0.0000000 1 0.0
## 0.06250000 0.8133333 0.0000000 1 0.0
## 0.08838835 0.8133333 0.0000000 1 0.0
## 0.12500000 0.8133333 0.0000000 1 0.0
## 0.17677670 0.8133333 0.0000000 1 0.0
## 0.25000000 0.8133333 0.0000000 1 0.0
## 0.35355339 0.8133333 0.0000000 1 0.0
## 0.50000000 0.8133333 0.0000000 1 0.0
## 0.70710678 0.8133333 0.0000000 1 0.0
## 1.00000000 0.8655556 0.3224080 1 0.3
## 1.41421356 0.9066667 0.5678930 1 0.5
## 2.00000000 0.9277778 0.6454849 1 0.6
##
## Tuning parameter 'sigma' was held constant at a value of 0.008732089
## Specificity was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.008732089 and C = 2.
$finalModel SVM_R_Tune
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 2
##
## Gaussian Radial Basis kernel function.
## Hyperparameter : sigma = 0.00873208894584695
##
## Number of Support Vectors : 57
##
## Objective Function Value : -33.2466
## Training error : 0.020833
$results SVM_R_Tune
## sigma C Accuracy Kappa Sensitivity Specificity
## 1 0.008732089 0.01562500 0.8133333 0.0000000 1 0.0
## 2 0.008732089 0.02209709 0.8133333 0.0000000 1 0.0
## 3 0.008732089 0.03125000 0.8133333 0.0000000 1 0.0
## 4 0.008732089 0.04419417 0.8133333 0.0000000 1 0.0
## 5 0.008732089 0.06250000 0.8133333 0.0000000 1 0.0
## 6 0.008732089 0.08838835 0.8133333 0.0000000 1 0.0
## 7 0.008732089 0.12500000 0.8133333 0.0000000 1 0.0
## 8 0.008732089 0.17677670 0.8133333 0.0000000 1 0.0
## 9 0.008732089 0.25000000 0.8133333 0.0000000 1 0.0
## 10 0.008732089 0.35355339 0.8133333 0.0000000 1 0.0
## 11 0.008732089 0.50000000 0.8133333 0.0000000 1 0.0
## 12 0.008732089 0.70710678 0.8133333 0.0000000 1 0.0
## 13 0.008732089 1.00000000 0.8655556 0.3224080 1 0.3
## 14 0.008732089 1.41421356 0.9066667 0.5678930 1 0.5
## 15 0.008732089 2.00000000 0.9277778 0.6454849 1 0.6
## AccuracySD KappaSD SensitivitySD SpecificitySD
## 1 0.04084163 0.0000000 0 0.0000000
## 2 0.04084163 0.0000000 0 0.0000000
## 3 0.04084163 0.0000000 0 0.0000000
## 4 0.04084163 0.0000000 0 0.0000000
## 5 0.04084163 0.0000000 0 0.0000000
## 6 0.04084163 0.0000000 0 0.0000000
## 7 0.04084163 0.0000000 0 0.0000000
## 8 0.04084163 0.0000000 0 0.0000000
## 9 0.04084163 0.0000000 0 0.0000000
## 10 0.04084163 0.0000000 0 0.0000000
## 11 0.04084163 0.0000000 0 0.0000000
## 12 0.04084163 0.0000000 0 0.0000000
## 13 0.08387887 0.4358567 0 0.4216370
## 14 0.05766371 0.3384260 0 0.3333333
## 15 0.06874337 0.3858752 0 0.3944053
<- SVM_R_Tune$results[SVM_R_Tune$results$C==SVM_R_Tune$bestTune$C,
(SVM_R_Train_Specificity c("Specificity")])
## [1] 0.6
<- data.frame(SVM_R_Observed = PMA_PreModelling_Train$Class,
SVM_R_Train SVM_R_Predicted = predict(SVM_R_Tune,
!names(PMA_PreModelling_Train) %in% c("Class")],
PMA_PreModelling_Train[,type = "raw"))
<- confusionMatrix(data = SVM_R_Train$SVM_R_Predicted,
(SVM_R_Train_ConfusionMatrix reference = SVM_R_Train$SVM_R_Observed))
## Confusion Matrix and Statistics
##
## Reference
## Prediction M R
## M 78 2
## R 0 16
##
## Accuracy : 0.9792
## 95% CI : (0.9268, 0.9975)
## No Information Rate : 0.8125
## P-Value [Acc > NIR] : 5.86e-07
##
## Kappa : 0.9286
##
## Mcnemar's Test P-Value : 0.4795
##
## Sensitivity : 1.0000
## Specificity : 0.8889
## Pos Pred Value : 0.9750
## Neg Pred Value : 1.0000
## Prevalence : 0.8125
## Detection Rate : 0.8125
## Detection Prevalence : 0.8333
## Balanced Accuracy : 0.9444
##
## 'Positive' Class : M
##
##################################
# Identifying and plotting the
# best model predictors
##################################
# model does not support variable importance measurement
##################################
# Independently evaluating the model
# on the test set
##################################
<- data.frame(SVM_R_Observed = PMA_PreModelling_Test$Class,
SVM_R_Test SVM_R_Predicted = predict(SVM_R_Tune,
!names(PMA_PreModelling_Test) %in% c("Class")],
PMA_PreModelling_Test[,type = "raw"))
SVM_R_Test
## SVM_R_Observed SVM_R_Predicted
## 1 M M
## 2 M M
## 3 M M
## 4 M M
## 5 M M
## 6 M M
## 7 M M
## 8 M M
## 9 M M
## 10 M M
## 11 M M
## 12 M M
## 13 M M
## 14 M M
## 15 M M
## 16 M M
## 17 M M
## 18 M M
## 19 M M
## 20 M M
## 21 M M
## 22 M M
## 23 M M
## 24 M R
## 25 M M
## 26 M M
## 27 M M
## 28 M M
## 29 M M
## 30 M M
## 31 M M
## 32 M M
## 33 M M
## 34 R M
## 35 R M
## 36 R M
## 37 R M
## 38 R M
## 39 R R
## 40 R M
##################################
# Reporting the independent evaluation results
# for the test set
##################################
<- Specificity(y_pred = SVM_R_Test$SVM_R_Predicted,
(SVM_R_Test_Specificity y_true = SVM_R_Test$SVM_R_Observed))
## [1] 0.1428571
<- confusionMatrix(data = SVM_R_Test$SVM_R_Predicted,
(SVM_R_Test_ConfusionMatrix reference = SVM_R_Test$SVM_R_Observed))
## Confusion Matrix and Statistics
##
## Reference
## Prediction M R
## M 32 6
## R 1 1
##
## Accuracy : 0.825
## 95% CI : (0.6722, 0.9266)
## No Information Rate : 0.825
## P-Value [Acc > NIR] : 0.5992
##
## Kappa : 0.1566
##
## Mcnemar's Test P-Value : 0.1306
##
## Sensitivity : 0.9697
## Specificity : 0.1429
## Pos Pred Value : 0.8421
## Neg Pred Value : 0.5000
## Prevalence : 0.8250
## Detection Rate : 0.8000
## Detection Prevalence : 0.9500
## Balanced Accuracy : 0.5563
##
## 'Positive' Class : M
##
##################################
# Verifying the class distribution
# for the original data
##################################
table(PMA_PreModelling_Train$Class)
##
## M R
## 78 18
##################################
# Creating a function for
# a customized summary metrics
##################################
<- function (data, lev = levels(data$obs), model = NULL)
fourMetricSummary
{
<- postResample(data[, "pred"], data[, "obs"])
accKapp <- c(accKapp,
out sensitivity(data[, "pred"], data[, "obs"], lev[1]),
specificity(data[, "pred"], data[, "obs"], lev[2]))
names(out)[3:4] <- c("Sensitivity", "Specificity")
out
}
##################################
# Creating consistent fold assignments
# for the Repeated Cross Validation process
##################################
set.seed(12345678)
<- trainControl(method = "cv",
KFold_Control classProbs = FALSE,
summaryFunction = fourMetricSummary)
##################################
# Setting the conditions
# for hyperparameter tuning
##################################
##################################
# Pre-computing the sigma parameter
##################################
set.seed(12345678)
<- sigest(Class~.,
sigma data=PMA_PreModelling_Train,
frac=0.75)
names(sigma)=NULL
<- data.frame(sigma=sigma[2], C=2^seq(-6,1,length=15))
CW_SVM_R_Grid
##################################
# Running the support vector machine (radial basis function kernel) model
# by setting the caret method to 'svmRadial'
##################################
set.seed(12345678)
<- train(x = PMA_PreModelling_Train[,!names(PMA_PreModelling_Train) %in% c("Class")],
CW_SVM_R_Tune y = PMA_PreModelling_Train$Class,
method = "svmRadial",
tuneGrid = CW_SVM_R_Grid,
metric = "Specificity",
preProc = c("center", "scale"),
trControl = KFold_Control,
class.weights = c(M=1,R=4))
##################################
# Reporting the cross-validation results
# for the train set
##################################
CW_SVM_R_Tune
## Support Vector Machines with Radial Basis Function Kernel
##
## 96 samples
## 60 predictors
## 2 classes: 'M', 'R'
##
## Pre-processing: centered (60), scaled (60)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 86, 87, 86, 87, 87, 86, ...
## Resampling results across tuning parameters:
##
## C Accuracy Kappa Sensitivity Specificity
## 0.01562500 0.8133333 0.0000000 1.0000000 0.00
## 0.02209709 0.8133333 0.0000000 1.0000000 0.00
## 0.03125000 0.8133333 0.0000000 1.0000000 0.00
## 0.04419417 0.8133333 0.0000000 1.0000000 0.00
## 0.06250000 0.8133333 0.0000000 1.0000000 0.00
## 0.08838835 0.8444444 0.2230769 1.0000000 0.20
## 0.12500000 0.8633333 0.4848567 0.9357143 0.55
## 0.17677670 0.8422222 0.4554315 0.8964286 0.60
## 0.25000000 0.8322222 0.4389841 0.8839286 0.60
## 0.35355339 0.8633333 0.5094884 0.9089286 0.65
## 0.50000000 0.8844444 0.5566738 0.9339286 0.65
## 0.70710678 0.8944444 0.5758125 0.9464286 0.65
## 1.00000000 0.9166667 0.6324754 0.9732143 0.65
## 1.41421356 0.9066667 0.5940139 0.9732143 0.60
## 2.00000000 0.9066667 0.5940139 0.9732143 0.60
##
## Tuning parameter 'sigma' was held constant at a value of 0.008732089
## Specificity was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.008732089 and C = 0.3535534.
$finalModel CW_SVM_R_Tune
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 0.353553390593274
##
## Gaussian Radial Basis kernel function.
## Hyperparameter : sigma = 0.00873208894584695
##
## Number of Support Vectors : 78
##
## Objective Function Value : -25.2282
## Training error : 0.020833
$results CW_SVM_R_Tune
## sigma C Accuracy Kappa Sensitivity Specificity
## 1 0.008732089 0.01562500 0.8133333 0.0000000 1.0000000 0.00
## 2 0.008732089 0.02209709 0.8133333 0.0000000 1.0000000 0.00
## 3 0.008732089 0.03125000 0.8133333 0.0000000 1.0000000 0.00
## 4 0.008732089 0.04419417 0.8133333 0.0000000 1.0000000 0.00
## 5 0.008732089 0.06250000 0.8133333 0.0000000 1.0000000 0.00
## 6 0.008732089 0.08838835 0.8444444 0.2230769 1.0000000 0.20
## 7 0.008732089 0.12500000 0.8633333 0.4848567 0.9357143 0.55
## 8 0.008732089 0.17677670 0.8422222 0.4554315 0.8964286 0.60
## 9 0.008732089 0.25000000 0.8322222 0.4389841 0.8839286 0.60
## 10 0.008732089 0.35355339 0.8633333 0.5094884 0.9089286 0.65
## 11 0.008732089 0.50000000 0.8844444 0.5566738 0.9339286 0.65
## 12 0.008732089 0.70710678 0.8944444 0.5758125 0.9464286 0.65
## 13 0.008732089 1.00000000 0.9166667 0.6324754 0.9732143 0.65
## 14 0.008732089 1.41421356 0.9066667 0.5940139 0.9732143 0.60
## 15 0.008732089 2.00000000 0.9066667 0.5940139 0.9732143 0.60
## AccuracySD KappaSD SensitivitySD SpecificitySD
## 1 0.04084163 0.0000000 0.00000000 0.0000000
## 2 0.04084163 0.0000000 0.00000000 0.0000000
## 3 0.04084163 0.0000000 0.00000000 0.0000000
## 4 0.04084163 0.0000000 0.00000000 0.0000000
## 5 0.04084163 0.0000000 0.00000000 0.0000000
## 6 0.07388866 0.3741306 0.00000000 0.3496029
## 7 0.10331276 0.3537822 0.12262483 0.3689324
## 8 0.11814757 0.3704066 0.13410702 0.3944053
## 9 0.12608378 0.3779497 0.14192404 0.3944053
## 10 0.10331276 0.3644062 0.12441758 0.4116363
## 11 0.10659977 0.3886770 0.11357979 0.4116363
## 12 0.10240590 0.3927539 0.09671474 0.4116363
## 13 0.08318918 0.3982785 0.05662589 0.4116363
## 14 0.07790114 0.3768373 0.05662589 0.3944053
## 15 0.07790114 0.3768373 0.05662589 0.3944053
<- CW_SVM_R_Tune$results[CW_SVM_R_Tune$results$C==CW_SVM_R_Tune$bestTune$C,
(CW_SVM_R_Train_Specificity c("Specificity")])
## [1] 0.65
<- data.frame(CW_SVM_R_Observed = PMA_PreModelling_Train$Class,
CW_SVM_R_Train CW_SVM_R_Predicted = predict(CW_SVM_R_Tune,
!names(PMA_PreModelling_Train) %in% c("Class")],
PMA_PreModelling_Train[,type = "raw"))
<- confusionMatrix(data = CW_SVM_R_Train$CW_SVM_R_Predicted,
(CW_SVM_R_Train_ConfusionMatrix reference = CW_SVM_R_Train$CW_SVM_R_Observed))
## Confusion Matrix and Statistics
##
## Reference
## Prediction M R
## M 77 1
## R 1 17
##
## Accuracy : 0.9792
## 95% CI : (0.9268, 0.9975)
## No Information Rate : 0.8125
## P-Value [Acc > NIR] : 5.86e-07
##
## Kappa : 0.9316
##
## Mcnemar's Test P-Value : 1
##
## Sensitivity : 0.9872
## Specificity : 0.9444
## Pos Pred Value : 0.9872
## Neg Pred Value : 0.9444
## Prevalence : 0.8125
## Detection Rate : 0.8021
## Detection Prevalence : 0.8125
## Balanced Accuracy : 0.9658
##
## 'Positive' Class : M
##
##################################
# Identifying and plotting the
# best model predictors
##################################
# model does not support variable importance measurement
##################################
# Independently evaluating the model
# on the test set
##################################
<- data.frame(CW_SVM_R_Observed = PMA_PreModelling_Test$Class,
CW_SVM_R_Test CW_SVM_R_Predicted = predict(CW_SVM_R_Tune,
!names(PMA_PreModelling_Test) %in% c("Class")],
PMA_PreModelling_Test[,type = "raw"))
CW_SVM_R_Test
## CW_SVM_R_Observed CW_SVM_R_Predicted
## 1 M M
## 2 M M
## 3 M R
## 4 M M
## 5 M R
## 6 M M
## 7 M M
## 8 M M
## 9 M M
## 10 M M
## 11 M M
## 12 M M
## 13 M M
## 14 M M
## 15 M M
## 16 M M
## 17 M M
## 18 M M
## 19 M M
## 20 M M
## 21 M M
## 22 M M
## 23 M M
## 24 M R
## 25 M M
## 26 M M
## 27 M M
## 28 M M
## 29 M M
## 30 M M
## 31 M M
## 32 M M
## 33 M M
## 34 R M
## 35 R M
## 36 R R
## 37 R M
## 38 R M
## 39 R R
## 40 R M
##################################
# Reporting the independent evaluation results
# for the test set
##################################
<- Specificity(y_pred = CW_SVM_R_Test$CW_SVM_R_Predicted,
(CW_SVM_R_Test_Specificity y_true = CW_SVM_R_Test$CW_SVM_R_Observed))
## [1] 0.2857143
<- confusionMatrix(data = CW_SVM_R_Test$CW_SVM_R_Predicted,
(CW_SVM_R_Test_ConfusionMatrix reference = CW_SVM_R_Test$CW_SVM_R_Observed))
## Confusion Matrix and Statistics
##
## Reference
## Prediction M R
## M 30 5
## R 3 2
##
## Accuracy : 0.8
## 95% CI : (0.6435, 0.9095)
## No Information Rate : 0.825
## P-Value [Acc > NIR] : 0.7427
##
## Kappa : 0.2195
##
## Mcnemar's Test P-Value : 0.7237
##
## Sensitivity : 0.9091
## Specificity : 0.2857
## Pos Pred Value : 0.8571
## Neg Pred Value : 0.4000
## Prevalence : 0.8250
## Detection Rate : 0.7500
## Detection Prevalence : 0.8750
## Balanced Accuracy : 0.5974
##
## 'Positive' Class : M
##
##################################
# Verifying the class distribution
# for the original data
##################################
table(PMA_PreModelling_Train$Class)
##
## M R
## 78 18
##################################
# Creating a function for
# a customized summary metrics
##################################
<- function (data, lev = levels(data$obs), model = NULL)
fourMetricSummary
{
<- postResample(data[, "pred"], data[, "obs"])
accKapp <- c(accKapp,
out sensitivity(data[, "pred"], data[, "obs"], lev[1]),
specificity(data[, "pred"], data[, "obs"], lev[2]))
names(out)[3:4] <- c("Sensitivity", "Specificity")
out
}
##################################
# Creating consistent fold assignments
# for the Repeated Cross Validation process
##################################
set.seed(12345678)
<- trainControl(method = "cv",
KFold_Control classProbs = FALSE,
summaryFunction = fourMetricSummary)
##################################
# Setting the conditions
# for hyperparameter tuning
##################################
= data.frame(cp = c(0.0001, 0.0005, 0.001, 0.005, 0.010, 0.015, 0.020))
CART_Grid
##################################
# Formulating the cost matrix
##################################
<- matrix(c(0,1,1,0), ncol=2)
CART_CostMatrix rownames(CART_CostMatrix) <- levels(PMA_PreModelling_Train$Class)
colnames(CART_CostMatrix) <- levels(PMA_PreModelling_Train$Class)
CART_CostMatrix
## M R
## M 0 1
## R 1 0
##################################
# Running the classification and regression trees model
# by setting the caret method to 'rpart'
##################################
set.seed(12345678)
<- train(x = PMA_PreModelling_Train[,!names(PMA_PreModelling_Train) %in% c("Class")],
CART_Tune y = PMA_PreModelling_Train$Class,
method = "rpart",
tuneGrid = CART_Grid,
metric = "Specificity",
trControl = KFold_Control,
parms = list(loss=CART_CostMatrix))
##################################
# Reporting the cross-validation results
# for the train set
##################################
CART_Tune
## CART
##
## 96 samples
## 60 predictors
## 2 classes: 'M', 'R'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 86, 87, 86, 87, 87, 86, ...
## Resampling results across tuning parameters:
##
## cp Accuracy Kappa Sensitivity Specificity
## 0.0001 0.7922222 0.2829196 0.8714286 0.45
## 0.0005 0.7922222 0.2829196 0.8714286 0.45
## 0.0010 0.7922222 0.2829196 0.8714286 0.45
## 0.0050 0.7922222 0.2829196 0.8714286 0.45
## 0.0100 0.7922222 0.2829196 0.8714286 0.45
## 0.0150 0.7922222 0.2829196 0.8714286 0.45
## 0.0200 0.7922222 0.2829196 0.8714286 0.45
##
## Specificity was used to select the optimal model using the largest value.
## The final value used for the model was cp = 0.02.
$finalModel CART_Tune
## n= 96
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 96 18 M (0.81250000 0.18750000)
## 2) V11>=-1.453302 87 10 M (0.88505747 0.11494253)
## 4) V49>=-1.421354 79 5 M (0.93670886 0.06329114) *
## 5) V49< -1.421354 8 3 R (0.37500000 0.62500000) *
## 3) V11< -1.453302 9 1 R (0.11111111 0.88888889) *
$results CART_Tune
## cp Accuracy Kappa Sensitivity Specificity AccuracySD KappaSD
## 1 0.0001 0.7922222 0.2829196 0.8714286 0.45 0.1193317 0.4149074
## 2 0.0005 0.7922222 0.2829196 0.8714286 0.45 0.1193317 0.4149074
## 3 0.0010 0.7922222 0.2829196 0.8714286 0.45 0.1193317 0.4149074
## 4 0.0050 0.7922222 0.2829196 0.8714286 0.45 0.1193317 0.4149074
## 5 0.0100 0.7922222 0.2829196 0.8714286 0.45 0.1193317 0.4149074
## 6 0.0150 0.7922222 0.2829196 0.8714286 0.45 0.1193317 0.4149074
## 7 0.0200 0.7922222 0.2829196 0.8714286 0.45 0.1193317 0.4149074
## SensitivitySD SpecificitySD
## 1 0.1225092 0.4377975
## 2 0.1225092 0.4377975
## 3 0.1225092 0.4377975
## 4 0.1225092 0.4377975
## 5 0.1225092 0.4377975
## 6 0.1225092 0.4377975
## 7 0.1225092 0.4377975
<- CART_Tune$results[CART_Tune$results$cp==CART_Tune$bestTune$cp,
(CART_Train_Specificity c("Specificity")])
## [1] 0.45
<- data.frame(CART_Observed = PMA_PreModelling_Train$Class,
CART_Train CART_Predicted = predict(CART_Tune,
!names(PMA_PreModelling_Train) %in% c("Class")],
PMA_PreModelling_Train[,type = "raw"))
<- confusionMatrix(data = CART_Train$CART_Predicted,
(CART_Train_ConfusionMatrix reference = CART_Train$CART_Observed))
## Confusion Matrix and Statistics
##
## Reference
## Prediction M R
## M 74 5
## R 4 13
##
## Accuracy : 0.9062
## 95% CI : (0.8295, 0.9562)
## No Information Rate : 0.8125
## P-Value [Acc > NIR] : 0.008989
##
## Kappa : 0.6856
##
## Mcnemar's Test P-Value : 1.000000
##
## Sensitivity : 0.9487
## Specificity : 0.7222
## Pos Pred Value : 0.9367
## Neg Pred Value : 0.7647
## Prevalence : 0.8125
## Detection Rate : 0.7708
## Detection Prevalence : 0.8229
## Balanced Accuracy : 0.8355
##
## 'Positive' Class : M
##
##################################
# Identifying and plotting the
# best model predictors
##################################
<- varImp(CART_Tune, scale = TRUE)
CART_VarImp plot(CART_VarImp,
top=25,
scales=list(y=list(cex = .95)),
main="Ranked Variable Importance : Classification and Regression Trees",
xlab="Scaled Variable Importance Metrics",
ylab="Predictors",
cex=2,
origin=0,
alpha=0.45)
##################################
# Independently evaluating the model
# on the test set
##################################
<- data.frame(CART_Observed = PMA_PreModelling_Test$Class,
CART_Test CART_Predicted = predict(CART_Tune,
!names(PMA_PreModelling_Test) %in% c("Class")],
PMA_PreModelling_Test[,type = "raw"))
CART_Test
## CART_Observed CART_Predicted
## 1 M M
## 2 M M
## 3 M M
## 4 M M
## 5 M M
## 6 M M
## 7 M R
## 8 M R
## 9 M M
## 10 M M
## 11 M M
## 12 M M
## 13 M M
## 14 M M
## 15 M M
## 16 M M
## 17 M M
## 18 M R
## 19 M M
## 20 M M
## 21 M R
## 22 M R
## 23 M M
## 24 M M
## 25 M M
## 26 M M
## 27 M M
## 28 M M
## 29 M M
## 30 M M
## 31 M M
## 32 M M
## 33 M M
## 34 R M
## 35 R R
## 36 R R
## 37 R M
## 38 R M
## 39 R M
## 40 R M
##################################
# Reporting the independent evaluation results
# for the test set
##################################
<- Specificity(y_pred = CART_Test$CART_Predicted,
(CART_Test_Specificity y_true = CART_Test$CART_Observed))
## [1] 0.2857143
<- confusionMatrix(data = CART_Test$CART_Predicted,
(CART_Test_ConfusionMatrix reference = CART_Test$CART_Observed))
## Confusion Matrix and Statistics
##
## Reference
## Prediction M R
## M 28 5
## R 5 2
##
## Accuracy : 0.75
## 95% CI : (0.588, 0.8731)
## No Information Rate : 0.825
## P-Value [Acc > NIR] : 0.922
##
## Kappa : 0.1342
##
## Mcnemar's Test P-Value : 1.000
##
## Sensitivity : 0.8485
## Specificity : 0.2857
## Pos Pred Value : 0.8485
## Neg Pred Value : 0.2857
## Prevalence : 0.8250
## Detection Rate : 0.7000
## Detection Prevalence : 0.8250
## Balanced Accuracy : 0.5671
##
## 'Positive' Class : M
##
##################################
# Verifying the class distribution
# for the original data
##################################
table(PMA_PreModelling_Train$Class)
##
## M R
## 78 18
##################################
# Creating a function for
# a customized summary metrics
##################################
<- function (data, lev = levels(data$obs), model = NULL)
fourMetricSummary
{
<- postResample(data[, "pred"], data[, "obs"])
accKapp <- c(accKapp,
out sensitivity(data[, "pred"], data[, "obs"], lev[1]),
specificity(data[, "pred"], data[, "obs"], lev[2]))
names(out)[3:4] <- c("Sensitivity", "Specificity")
out
}
##################################
# Creating consistent fold assignments
# for the Repeated Cross Validation process
##################################
set.seed(12345678)
<- trainControl(method = "cv",
KFold_Control classProbs = FALSE,
summaryFunction = fourMetricSummary)
##################################
# Setting the conditions
# for hyperparameter tuning
##################################
= data.frame(cp = c(0.0001, 0.0005, 0.0010, 0.0050, 0.0100, 0.0150, 0.0200))
CS_CART_Grid
##################################
# Formulating the cost matrix
##################################
<- matrix(c(0,4,1,0), ncol=2)
CS_CART_CostMatrix rownames(CS_CART_CostMatrix) <- levels(PMA_PreModelling_Train$Class)
colnames(CS_CART_CostMatrix) <- levels(PMA_PreModelling_Train$Class)
CS_CART_CostMatrix
## M R
## M 0 1
## R 4 0
##################################
# Running the classification and regression trees model
# by setting the caret method to 'rpart'
##################################
set.seed(12345678)
<- train(x = PMA_PreModelling_Train[,!names(PMA_PreModelling_Train) %in% c("Class")],
CS_CART_Tune y = PMA_PreModelling_Train$Class,
method = "rpart",
tuneGrid = CS_CART_Grid,
metric = "Specificity",
trControl = KFold_Control,
parms = list(loss=CS_CART_CostMatrix))
##################################
# Reporting the cross-validation results
# for the train set
##################################
CS_CART_Tune
## CART
##
## 96 samples
## 60 predictors
## 2 classes: 'M', 'R'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 86, 87, 86, 87, 87, 86, ...
## Resampling results across tuning parameters:
##
## cp Accuracy Kappa Sensitivity Specificity
## 0.0001 0.76 0.169608 0.8535714 0.3
## 0.0005 0.76 0.169608 0.8535714 0.3
## 0.0010 0.76 0.169608 0.8535714 0.3
## 0.0050 0.76 0.169608 0.8535714 0.3
## 0.0100 0.76 0.169608 0.8535714 0.3
## 0.0150 0.76 0.169608 0.8535714 0.3
## 0.0200 0.76 0.169608 0.8535714 0.3
##
## Specificity was used to select the optimal model using the largest value.
## The final value used for the model was cp = 0.02.
$finalModel CS_CART_Tune
## n= 96
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 96 72 M (0.81250000 0.18750000)
## 2) V11>=-0.92108 78 28 M (0.91025641 0.08974359)
## 4) V49>=-1.421354 71 12 M (0.95774648 0.04225352) *
## 5) V49< -1.421354 7 3 R (0.42857143 0.57142857) *
## 3) V11< -0.92108 18 7 R (0.38888889 0.61111111) *
$results CS_CART_Tune
## cp Accuracy Kappa Sensitivity Specificity AccuracySD KappaSD
## 1 0.0001 0.76 0.169608 0.8535714 0.3 0.1607766 0.3396056
## 2 0.0005 0.76 0.169608 0.8535714 0.3 0.1607766 0.3396056
## 3 0.0010 0.76 0.169608 0.8535714 0.3 0.1607766 0.3396056
## 4 0.0050 0.76 0.169608 0.8535714 0.3 0.1607766 0.3396056
## 5 0.0100 0.76 0.169608 0.8535714 0.3 0.1607766 0.3396056
## 6 0.0150 0.76 0.169608 0.8535714 0.3 0.1607766 0.3396056
## 7 0.0200 0.76 0.169608 0.8535714 0.3 0.1607766 0.3396056
## SensitivitySD SpecificitySD
## 1 0.1921061 0.3496029
## 2 0.1921061 0.3496029
## 3 0.1921061 0.3496029
## 4 0.1921061 0.3496029
## 5 0.1921061 0.3496029
## 6 0.1921061 0.3496029
## 7 0.1921061 0.3496029
<- CS_CART_Tune$results[CS_CART_Tune$results$cp==CS_CART_Tune$bestTune$cp,
(CS_CART_Train_Specificity c("Specificity")])
## [1] 0.3
<- data.frame(CS_CART_Observed = PMA_PreModelling_Train$Class,
CS_CART_Train CS_CART_Predicted = predict(CS_CART_Tune,
!names(PMA_PreModelling_Train) %in% c("Class")],
PMA_PreModelling_Train[,type = "raw"))
<- confusionMatrix(data = CS_CART_Train$CS_CART_Predicted,
(CS_CART_Train_ConfusionMatrix reference = CS_CART_Train$CS_CART_Observed))
## Confusion Matrix and Statistics
##
## Reference
## Prediction M R
## M 68 3
## R 10 15
##
## Accuracy : 0.8646
## 95% CI : (0.7796, 0.9259)
## No Information Rate : 0.8125
## P-Value [Acc > NIR] : 0.11704
##
## Kappa : 0.6134
##
## Mcnemar's Test P-Value : 0.09609
##
## Sensitivity : 0.8718
## Specificity : 0.8333
## Pos Pred Value : 0.9577
## Neg Pred Value : 0.6000
## Prevalence : 0.8125
## Detection Rate : 0.7083
## Detection Prevalence : 0.7396
## Balanced Accuracy : 0.8526
##
## 'Positive' Class : M
##
##################################
# Identifying and plotting the
# best model predictors
##################################
<- varImp(CS_CART_Tune, scale = TRUE)
CS_CART_VarImp plot(CS_CART_VarImp,
top=25,
scales=list(y=list(cex = .95)),
main="Ranked Variable Importance : Cost-Sensitive Classification and Regression Trees",
xlab="Scaled Variable Importance Metrics",
ylab="Predictors",
cex=2,
origin=0,
alpha=0.45)
##################################
# Independently evaluating the model
# on the test set
##################################
<- data.frame(CS_CART_Observed = PMA_PreModelling_Test$Class,
CS_CART_Test CS_CART_Predicted = predict(CS_CART_Tune,
!names(PMA_PreModelling_Test) %in% c("Class")],
PMA_PreModelling_Test[,type = "raw"))
CS_CART_Test
## CS_CART_Observed CS_CART_Predicted
## 1 M R
## 2 M M
## 3 M M
## 4 M M
## 5 M M
## 6 M M
## 7 M R
## 8 M R
## 9 M R
## 10 M M
## 11 M M
## 12 M M
## 13 M M
## 14 M M
## 15 M M
## 16 M M
## 17 M M
## 18 M R
## 19 M M
## 20 M M
## 21 M R
## 22 M R
## 23 M M
## 24 M M
## 25 M M
## 26 M M
## 27 M M
## 28 M M
## 29 M M
## 30 M M
## 31 M M
## 32 M M
## 33 M M
## 34 R M
## 35 R R
## 36 R R
## 37 R R
## 38 R M
## 39 R R
## 40 R R
##################################
# Reporting the independent evaluation results
# for the test set
##################################
<- Specificity(y_pred = CS_CART_Test$CS_CART_Predicted,
(CS_CART_Test_Specificity y_true = CS_CART_Test$CS_CART_Observed))
## [1] 0.7142857
<- confusionMatrix(data = CS_CART_Test$CS_CART_Predicted,
(CS_CART_Test_ConfusionMatrix reference = CS_CART_Test$CS_CART_Observed))
## Confusion Matrix and Statistics
##
## Reference
## Prediction M R
## M 26 2
## R 7 5
##
## Accuracy : 0.775
## 95% CI : (0.6155, 0.8916)
## No Information Rate : 0.825
## P-Value [Acc > NIR] : 0.8509
##
## Kappa : 0.3919
##
## Mcnemar's Test P-Value : 0.1824
##
## Sensitivity : 0.7879
## Specificity : 0.7143
## Pos Pred Value : 0.9286
## Neg Pred Value : 0.4167
## Prevalence : 0.8250
## Detection Rate : 0.6500
## Detection Prevalence : 0.7000
## Balanced Accuracy : 0.7511
##
## 'Positive' Class : M
##
##################################
# Verifying the class distribution
# for the original data
##################################
table(PMA_PreModelling_Train$Class)
##
## M R
## 78 18
##################################
# Creating a function for
# a customized summary metrics
##################################
<- function (data, lev = levels(data$obs), model = NULL)
fourMetricSummary
{
<- postResample(data[, "pred"], data[, "obs"])
accKapp <- c(accKapp,
out sensitivity(data[, "pred"], data[, "obs"], lev[1]),
specificity(data[, "pred"], data[, "obs"], lev[2]))
names(out)[3:4] <- c("Sensitivity", "Specificity")
out
}
##################################
# Creating consistent fold assignments
# for the Repeated Cross Validation process
##################################
set.seed(12345678)
<- trainControl(method = "cv",
KFold_Control classProbs = FALSE,
summaryFunction = fourMetricSummary)
##################################
# Setting the conditions
# for hyperparameter tuning
##################################
= expand.grid(trials = c(1:9, (1:10)*10),
C50_Grid model = c("tree", "rules"),
winnow = c(TRUE, FALSE))
##################################
# Formulating the cost matrix
##################################
<- matrix(c(0,1,1,0), ncol=2)
C50_CostMatrix rownames(C50_CostMatrix) <- levels(PMA_PreModelling_Train$Class)
colnames(C50_CostMatrix) <- levels(PMA_PreModelling_Train$Class)
C50_CostMatrix
## M R
## M 0 1
## R 1 0
##################################
# Running the C5.0 decision trees model
# by setting the caret method to 'C5.0'
##################################
set.seed(12345678)
<- train(x = PMA_PreModelling_Train[,!names(PMA_PreModelling_Train) %in% c("Class")],
C50_Tune y = PMA_PreModelling_Train$Class,
method = "C5.0",
tuneGrid = C50_Grid,
metric = "Specificity",
trControl = KFold_Control,
parms = list(loss=C50_CostMatrix))
##################################
# Reporting the cross-validation results
# for the train set
##################################
C50_Tune
## C5.0
##
## 96 samples
## 60 predictors
## 2 classes: 'M', 'R'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 86, 87, 86, 87, 87, 86, ...
## Resampling results across tuning parameters:
##
## model winnow trials Accuracy Kappa Sensitivity Specificity
## rules FALSE 1 0.8444444 0.3945908 0.9232143 0.50
## rules FALSE 2 0.8333333 0.2971608 0.9482143 0.35
## rules FALSE 3 0.8533333 0.4043357 0.9464286 0.45
## rules FALSE 4 0.8333333 0.1839465 0.9875000 0.20
## rules FALSE 5 0.8655556 0.3811992 0.9732143 0.40
## rules FALSE 6 0.8655556 0.3811992 0.9732143 0.40
## rules FALSE 7 0.8655556 0.3811992 0.9732143 0.40
## rules FALSE 8 0.8655556 0.3811992 0.9732143 0.40
## rules FALSE 9 0.8544444 0.3603297 0.9607143 0.40
## rules FALSE 10 0.8655556 0.3811992 0.9732143 0.40
## rules FALSE 20 0.8877778 0.4454849 1.0000000 0.40
## rules FALSE 30 0.8877778 0.4454849 1.0000000 0.40
## rules FALSE 40 0.8766667 0.4063545 0.9875000 0.40
## rules FALSE 50 0.8655556 0.3672241 0.9875000 0.35
## rules FALSE 60 0.8655556 0.3672241 0.9875000 0.35
## rules FALSE 70 0.8655556 0.3672241 0.9875000 0.35
## rules FALSE 80 0.8655556 0.3672241 0.9875000 0.35
## rules FALSE 90 0.8655556 0.3672241 0.9875000 0.35
## rules FALSE 100 0.8766667 0.4063545 0.9875000 0.40
## rules TRUE 1 0.8155556 0.1465909 0.9500000 0.20
## rules TRUE 2 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 3 0.8044444 0.1865909 0.9250000 0.30
## rules TRUE 4 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 5 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 6 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 7 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 8 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 9 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 10 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 20 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 30 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 40 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 50 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 60 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 70 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 80 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 90 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 100 0.8144444 0.2106294 0.9375000 0.30
## tree FALSE 1 0.8322222 0.4030199 0.8964286 0.55
## tree FALSE 2 0.8233333 0.2482776 0.9500000 0.30
## tree FALSE 3 0.8222222 0.3149126 0.9214286 0.40
## tree FALSE 4 0.8322222 0.2764632 0.9607143 0.30
## tree FALSE 5 0.8322222 0.3495363 0.9339286 0.40
## tree FALSE 6 0.8333333 0.2323161 0.9750000 0.25
## tree FALSE 7 0.8544444 0.3686992 0.9607143 0.40
## tree FALSE 8 0.8444444 0.3071608 0.9607143 0.35
## tree FALSE 9 0.8433333 0.3633125 0.9339286 0.45
## tree FALSE 10 0.8233333 0.2439919 0.9482143 0.30
## tree FALSE 20 0.8655556 0.3721154 0.9875000 0.35
## tree FALSE 30 0.8544444 0.3329849 0.9750000 0.35
## tree FALSE 40 0.8433333 0.2938545 0.9750000 0.30
## tree FALSE 50 0.8544444 0.3547241 0.9750000 0.35
## tree FALSE 60 0.8544444 0.3547241 0.9750000 0.35
## tree FALSE 70 0.8433333 0.3295688 0.9607143 0.35
## tree FALSE 80 0.8544444 0.3547241 0.9750000 0.35
## tree FALSE 90 0.8433333 0.3295688 0.9607143 0.35
## tree FALSE 100 0.8433333 0.3295688 0.9607143 0.35
## tree TRUE 1 0.7955556 0.1147589 0.9250000 0.20
## tree TRUE 2 0.8155556 0.1552448 0.9500000 0.20
## tree TRUE 3 0.7844444 0.1547589 0.9000000 0.30
## tree TRUE 4 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 5 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 6 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 7 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 8 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 9 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 10 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 20 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 30 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 40 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 50 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 60 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 70 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 80 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 90 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 100 0.8044444 0.1952448 0.9250000 0.30
##
## Specificity was used to select the optimal model using the largest value.
## The final values used for the model were trials = 1, model = tree and winnow
## = FALSE.
$finalModel C50_Tune
##
## Call:
## (function (x, y, trials = 1, rules = FALSE, weights = NULL, control
## = 3942L), parms = list(loss = structure(c(0, 1, 1, 0), dim = c(2L,
## 2L), dimnames = list(c("M", "R"), c("M", "R")))))
##
## Classification Tree
## Number of samples: 96
## Number of predictors: 60
##
## Tree size: 6
##
## Non-standard options: attempt to group attributes
$results C50_Tune
## model winnow trials Accuracy Kappa Sensitivity Specificity AccuracySD
## 39 rules FALSE 1 0.8444444 0.3945908 0.9232143 0.50 0.08764563
## 58 rules TRUE 1 0.8155556 0.1465909 0.9500000 0.20 0.10213093
## 1 tree FALSE 1 0.8322222 0.4030199 0.8964286 0.55 0.11468930
## 20 tree TRUE 1 0.7955556 0.1147589 0.9250000 0.20 0.11357756
## 40 rules FALSE 2 0.8333333 0.2971608 0.9482143 0.35 0.05341557
## 59 rules TRUE 2 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 2 tree FALSE 2 0.8233333 0.2482776 0.9500000 0.30 0.04870054
## 21 tree TRUE 2 0.8155556 0.1552448 0.9500000 0.20 0.11248533
## 41 rules FALSE 3 0.8533333 0.4043357 0.9464286 0.45 0.09293271
## 60 rules TRUE 3 0.8044444 0.1865909 0.9250000 0.30 0.09927031
## 3 tree FALSE 3 0.8222222 0.3149126 0.9214286 0.40 0.10210406
## 22 tree TRUE 3 0.7844444 0.1547589 0.9000000 0.30 0.10876536
## 42 rules FALSE 4 0.8333333 0.1839465 0.9875000 0.20 0.05341557
## 61 rules TRUE 4 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 4 tree FALSE 4 0.8322222 0.2764632 0.9607143 0.30 0.07723799
## 23 tree TRUE 4 0.8044444 0.1952448 0.9250000 0.30 0.10989457
## 43 rules FALSE 5 0.8655556 0.3811992 0.9732143 0.40 0.06937898
## 62 rules TRUE 5 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 5 tree FALSE 5 0.8322222 0.3495363 0.9339286 0.40 0.07723799
## 24 tree TRUE 5 0.8044444 0.1952448 0.9250000 0.30 0.10989457
## 44 rules FALSE 6 0.8655556 0.3811992 0.9732143 0.40 0.06937898
## 63 rules TRUE 6 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 6 tree FALSE 6 0.8333333 0.2323161 0.9750000 0.25 0.05341557
## 25 tree TRUE 6 0.8044444 0.1952448 0.9250000 0.30 0.10989457
## 45 rules FALSE 7 0.8655556 0.3811992 0.9732143 0.40 0.06937898
## 64 rules TRUE 7 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 7 tree FALSE 7 0.8544444 0.3686992 0.9607143 0.40 0.07397215
## 26 tree TRUE 7 0.8044444 0.1952448 0.9250000 0.30 0.10989457
## 46 rules FALSE 8 0.8655556 0.3811992 0.9732143 0.40 0.06937898
## 65 rules TRUE 8 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 8 tree FALSE 8 0.8444444 0.3071608 0.9607143 0.35 0.07388866
## 27 tree TRUE 8 0.8044444 0.1952448 0.9250000 0.30 0.10989457
## 47 rules FALSE 9 0.8544444 0.3603297 0.9607143 0.40 0.07397215
## 66 rules TRUE 9 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 9 tree FALSE 9 0.8433333 0.3633125 0.9339286 0.45 0.09273323
## 28 tree TRUE 9 0.8044444 0.1952448 0.9250000 0.30 0.10989457
## 48 rules FALSE 10 0.8655556 0.3811992 0.9732143 0.40 0.06937898
## 67 rules TRUE 10 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 10 tree FALSE 10 0.8233333 0.2439919 0.9482143 0.30 0.04870054
## 29 tree TRUE 10 0.8044444 0.1952448 0.9250000 0.30 0.10989457
## 49 rules FALSE 20 0.8877778 0.4454849 1.0000000 0.40 0.07360034
## 68 rules TRUE 20 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 11 tree FALSE 20 0.8655556 0.3721154 0.9875000 0.35 0.08693059
## 30 tree TRUE 20 0.8044444 0.1952448 0.9250000 0.30 0.10989457
## 50 rules FALSE 30 0.8877778 0.4454849 1.0000000 0.40 0.07360034
## 69 rules TRUE 30 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 12 tree FALSE 30 0.8544444 0.3329849 0.9750000 0.35 0.07397215
## 31 tree TRUE 30 0.8044444 0.1952448 0.9250000 0.30 0.10989457
## 51 rules FALSE 40 0.8766667 0.4063545 0.9875000 0.40 0.06229493
## 70 rules TRUE 40 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 13 tree FALSE 40 0.8433333 0.2938545 0.9750000 0.30 0.05578963
## 32 tree TRUE 40 0.8044444 0.1952448 0.9250000 0.30 0.10989457
## 52 rules FALSE 50 0.8655556 0.3672241 0.9875000 0.35 0.04549680
## 71 rules TRUE 50 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 14 tree FALSE 50 0.8544444 0.3547241 0.9750000 0.35 0.05223404
## 33 tree TRUE 50 0.8044444 0.1952448 0.9250000 0.30 0.10989457
## 53 rules FALSE 60 0.8655556 0.3672241 0.9875000 0.35 0.04549680
## 72 rules TRUE 60 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 15 tree FALSE 60 0.8544444 0.3547241 0.9750000 0.35 0.05223404
## 34 tree TRUE 60 0.8044444 0.1952448 0.9250000 0.30 0.10989457
## 54 rules FALSE 70 0.8655556 0.3672241 0.9875000 0.35 0.04549680
## 73 rules TRUE 70 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 16 tree FALSE 70 0.8433333 0.3295688 0.9607143 0.35 0.05578963
## 35 tree TRUE 70 0.8044444 0.1952448 0.9250000 0.30 0.10989457
## 55 rules FALSE 80 0.8655556 0.3672241 0.9875000 0.35 0.04549680
## 74 rules TRUE 80 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 17 tree FALSE 80 0.8544444 0.3547241 0.9750000 0.35 0.05223404
## 36 tree TRUE 80 0.8044444 0.1952448 0.9250000 0.30 0.10989457
## 56 rules FALSE 90 0.8655556 0.3672241 0.9875000 0.35 0.04549680
## 75 rules TRUE 90 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 18 tree FALSE 90 0.8433333 0.3295688 0.9607143 0.35 0.05578963
## 37 tree TRUE 90 0.8044444 0.1952448 0.9250000 0.30 0.10989457
## 57 rules FALSE 100 0.8766667 0.4063545 0.9875000 0.40 0.06229493
## 76 rules TRUE 100 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 19 tree FALSE 100 0.8433333 0.3295688 0.9607143 0.35 0.05578963
## 38 tree TRUE 100 0.8044444 0.1952448 0.9250000 0.30 0.10989457
## KappaSD SensitivitySD SpecificitySD
## 39 0.3644110 0.08870845 0.4082483
## 58 0.3221457 0.12076147 0.3496029
## 1 0.4308081 0.10507259 0.4377975
## 20 0.3239613 0.13437096 0.3496029
## 40 0.2734629 0.08926587 0.3374743
## 59 0.3501294 0.13501543 0.4216370
## 2 0.3017636 0.08740074 0.3496029
## 21 0.3604969 0.12076147 0.3496029
## 41 0.3452821 0.11325177 0.3689324
## 60 0.3267224 0.13437096 0.4216370
## 3 0.3911162 0.11237238 0.3944053
## 22 0.3327914 0.14191155 0.4216370
## 42 0.2961876 0.03952847 0.3496029
## 61 0.3501294 0.13501543 0.4216370
## 4 0.3588047 0.06344244 0.3496029
## 23 0.3635361 0.13437096 0.4216370
## 43 0.3622503 0.05662589 0.3944053
## 62 0.3501294 0.13501543 0.4216370
## 5 0.3063525 0.09709864 0.3162278
## 24 0.3635361 0.13437096 0.4216370
## 44 0.3622503 0.05662589 0.3944053
## 63 0.3501294 0.13501543 0.4216370
## 6 0.3290280 0.05270463 0.3535534
## 25 0.3635361 0.13437096 0.4216370
## 45 0.3622503 0.05662589 0.3944053
## 64 0.3501294 0.13501543 0.4216370
## 7 0.3786511 0.06344244 0.3944053
## 26 0.3635361 0.13437096 0.4216370
## 46 0.3622503 0.05662589 0.3944053
## 65 0.3501294 0.13501543 0.4216370
## 8 0.3840725 0.06344244 0.4116363
## 27 0.3635361 0.13437096 0.4216370
## 47 0.3535960 0.08658617 0.3944053
## 66 0.3501294 0.13501543 0.4216370
## 9 0.3943442 0.09709864 0.4377975
## 28 0.3635361 0.13437096 0.4216370
## 48 0.3622503 0.05662589 0.3944053
## 67 0.3501294 0.13501543 0.4216370
## 10 0.2996664 0.06705351 0.3496029
## 29 0.3635361 0.13437096 0.4216370
## 49 0.4112362 0.00000000 0.3944053
## 68 0.3501294 0.13501543 0.4216370
## 11 0.4430978 0.03952847 0.4116363
## 30 0.3635361 0.13437096 0.4216370
## 50 0.4112362 0.00000000 0.3944053
## 69 0.3501294 0.13501543 0.4216370
## 12 0.3962940 0.05270463 0.4116363
## 31 0.3635361 0.13437096 0.4216370
## 51 0.3690642 0.03952847 0.3944053
## 70 0.3501294 0.13501543 0.4216370
## 13 0.3381708 0.05270463 0.3496029
## 32 0.3635361 0.13437096 0.4216370
## 52 0.3160680 0.03952847 0.3374743
## 71 0.3501294 0.13501543 0.4216370
## 14 0.3341588 0.05270463 0.3374743
## 33 0.3635361 0.13437096 0.4216370
## 53 0.3160680 0.03952847 0.3374743
## 72 0.3501294 0.13501543 0.4216370
## 15 0.3341588 0.05270463 0.3374743
## 34 0.3635361 0.13437096 0.4216370
## 54 0.3160680 0.03952847 0.3374743
## 73 0.3501294 0.13501543 0.4216370
## 16 0.3221690 0.06344244 0.3374743
## 35 0.3635361 0.13437096 0.4216370
## 55 0.3160680 0.03952847 0.3374743
## 74 0.3501294 0.13501543 0.4216370
## 17 0.3341588 0.05270463 0.3374743
## 36 0.3635361 0.13437096 0.4216370
## 56 0.3160680 0.03952847 0.3374743
## 75 0.3501294 0.13501543 0.4216370
## 18 0.3221690 0.06344244 0.3374743
## 37 0.3635361 0.13437096 0.4216370
## 57 0.3690642 0.03952847 0.3944053
## 76 0.3501294 0.13501543 0.4216370
## 19 0.3221690 0.06344244 0.3374743
## 38 0.3635361 0.13437096 0.4216370
<- C50_Tune$results[C50_Tune$results$trials==C50_Tune$bestTune$trials &
(C50_Train_Specificity $results$model==C50_Tune$bestTune$model &
C50_Tune$results$winnow==C50_Tune$bestTune$winnow,
C50_Tunec("Specificity")])
## [1] 0.55
<- data.frame(C50_Observed = PMA_PreModelling_Train$Class,
C50_Train C50_Predicted = predict(C50_Tune,
!names(PMA_PreModelling_Train) %in% c("Class")],
PMA_PreModelling_Train[,type = "raw"))
<- confusionMatrix(data = C50_Train$C50_Predicted,
(C50_Train_ConfusionMatrix reference = C50_Train$C50_Observed))
## Confusion Matrix and Statistics
##
## Reference
## Prediction M R
## M 77 1
## R 1 17
##
## Accuracy : 0.9792
## 95% CI : (0.9268, 0.9975)
## No Information Rate : 0.8125
## P-Value [Acc > NIR] : 5.86e-07
##
## Kappa : 0.9316
##
## Mcnemar's Test P-Value : 1
##
## Sensitivity : 0.9872
## Specificity : 0.9444
## Pos Pred Value : 0.9872
## Neg Pred Value : 0.9444
## Prevalence : 0.8125
## Detection Rate : 0.8021
## Detection Prevalence : 0.8125
## Balanced Accuracy : 0.9658
##
## 'Positive' Class : M
##
##################################
# Identifying and plotting the
# best model predictors
##################################
<- varImp(C50_Tune, scale = TRUE)
C50_VarImp plot(C50_VarImp,
top=25,
scales=list(y=list(cex = .95)),
main="Ranked Variable Importance : C5.0 Decision Trees",
xlab="Scaled Variable Importance Metrics",
ylab="Predictors",
cex=2,
origin=0,
alpha=0.45)
##################################
# Independently evaluating the model
# on the test set
##################################
<- data.frame(C50_Observed = PMA_PreModelling_Test$Class,
C50_Test C50_Predicted = predict(C50_Tune,
!names(PMA_PreModelling_Test) %in% c("Class")],
PMA_PreModelling_Test[,type = "raw"))
C50_Test
## C50_Observed C50_Predicted
## 1 M M
## 2 M M
## 3 M M
## 4 M M
## 5 M M
## 6 M R
## 7 M M
## 8 M R
## 9 M M
## 10 M R
## 11 M M
## 12 M M
## 13 M M
## 14 M M
## 15 M M
## 16 M M
## 17 M M
## 18 M R
## 19 M M
## 20 M M
## 21 M R
## 22 M M
## 23 M M
## 24 M M
## 25 M M
## 26 M M
## 27 M M
## 28 M M
## 29 M M
## 30 M M
## 31 M M
## 32 M M
## 33 M M
## 34 R M
## 35 R R
## 36 R M
## 37 R M
## 38 R M
## 39 R M
## 40 R M
##################################
# Reporting the independent evaluation results
# for the test set
##################################
<- Specificity(y_pred = C50_Test$C50_Predicted,
(C50_Test_Specificity y_true = C50_Test$C50_Observed))
## [1] 0.1428571
<- confusionMatrix(data = C50_Test$C50_Predicted,
(C50_Test_ConfusionMatrix reference = C50_Test$C50_Observed))
## Confusion Matrix and Statistics
##
## Reference
## Prediction M R
## M 28 6
## R 5 1
##
## Accuracy : 0.725
## 95% CI : (0.5611, 0.854)
## No Information Rate : 0.825
## P-Value [Acc > NIR] : 0.9632
##
## Kappa : -0.0092
##
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 0.8485
## Specificity : 0.1429
## Pos Pred Value : 0.8235
## Neg Pred Value : 0.1667
## Prevalence : 0.8250
## Detection Rate : 0.7000
## Detection Prevalence : 0.8500
## Balanced Accuracy : 0.4957
##
## 'Positive' Class : M
##
##################################
# Verifying the class distribution
# for the original data
##################################
table(PMA_PreModelling_Train$Class)
##
## M R
## 78 18
##################################
# Creating a function for
# a customized summary metrics
##################################
<- function (data, lev = levels(data$obs), model = NULL)
fourMetricSummary
{
<- postResample(data[, "pred"], data[, "obs"])
accKapp <- c(accKapp,
out sensitivity(data[, "pred"], data[, "obs"], lev[1]),
specificity(data[, "pred"], data[, "obs"], lev[2]))
names(out)[3:4] <- c("Sensitivity", "Specificity")
out
}
##################################
# Creating consistent fold assignments
# for the Repeated Cross Validation process
##################################
set.seed(12345678)
<- trainControl(method = "cv",
KFold_Control classProbs = FALSE,
summaryFunction = fourMetricSummary)
##################################
# Setting the conditions
# for hyperparameter tuning
##################################
= expand.grid(trials = c(1:9, (1:10)*10),
CS_C50_Grid model = c("tree", "rules"),
winnow = c(TRUE, FALSE))
##################################
# Formulating the cost matrix
##################################
<- matrix(c(0,1,4,0), ncol=2)
CS_C50_CostMatrix rownames(CS_C50_CostMatrix) <- levels(PMA_PreModelling_Train$Class)
colnames(CS_C50_CostMatrix) <- levels(PMA_PreModelling_Train$Class)
CS_C50_CostMatrix
## M R
## M 0 4
## R 1 0
##################################
# Running the C5.0 decision trees model
# by setting the caret method to 'C5.0'
##################################
set.seed(12345678)
<- train(x = PMA_PreModelling_Train[,!names(PMA_PreModelling_Train) %in% c("Class")],
CS_C50_Tune y = PMA_PreModelling_Train$Class,
method = "C5.0",
tuneGrid = CS_C50_Grid,
metric = "Specificity",
trControl = KFold_Control,
parms = list(loss=CS_C50_CostMatrix))
##################################
# Reporting the cross-validation results
# for the train set
##################################
CS_C50_Tune
## C5.0
##
## 96 samples
## 60 predictors
## 2 classes: 'M', 'R'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 86, 87, 86, 87, 87, 86, ...
## Resampling results across tuning parameters:
##
## model winnow trials Accuracy Kappa Sensitivity Specificity
## rules FALSE 1 0.8444444 0.3945908 0.9232143 0.50
## rules FALSE 2 0.8333333 0.2971608 0.9482143 0.35
## rules FALSE 3 0.8533333 0.4043357 0.9464286 0.45
## rules FALSE 4 0.8333333 0.1839465 0.9875000 0.20
## rules FALSE 5 0.8655556 0.3811992 0.9732143 0.40
## rules FALSE 6 0.8655556 0.3811992 0.9732143 0.40
## rules FALSE 7 0.8655556 0.3811992 0.9732143 0.40
## rules FALSE 8 0.8655556 0.3811992 0.9732143 0.40
## rules FALSE 9 0.8544444 0.3603297 0.9607143 0.40
## rules FALSE 10 0.8655556 0.3811992 0.9732143 0.40
## rules FALSE 20 0.8877778 0.4454849 1.0000000 0.40
## rules FALSE 30 0.8877778 0.4454849 1.0000000 0.40
## rules FALSE 40 0.8766667 0.4063545 0.9875000 0.40
## rules FALSE 50 0.8655556 0.3672241 0.9875000 0.35
## rules FALSE 60 0.8655556 0.3672241 0.9875000 0.35
## rules FALSE 70 0.8655556 0.3672241 0.9875000 0.35
## rules FALSE 80 0.8655556 0.3672241 0.9875000 0.35
## rules FALSE 90 0.8655556 0.3672241 0.9875000 0.35
## rules FALSE 100 0.8766667 0.4063545 0.9875000 0.40
## rules TRUE 1 0.8155556 0.1465909 0.9500000 0.20
## rules TRUE 2 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 3 0.8044444 0.1865909 0.9250000 0.30
## rules TRUE 4 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 5 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 6 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 7 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 8 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 9 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 10 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 20 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 30 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 40 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 50 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 60 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 70 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 80 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 90 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 100 0.8144444 0.2106294 0.9375000 0.30
## tree FALSE 1 0.8322222 0.4030199 0.8964286 0.55
## tree FALSE 2 0.8233333 0.2482776 0.9500000 0.30
## tree FALSE 3 0.8222222 0.3149126 0.9214286 0.40
## tree FALSE 4 0.8322222 0.2764632 0.9607143 0.30
## tree FALSE 5 0.8322222 0.3495363 0.9339286 0.40
## tree FALSE 6 0.8333333 0.2323161 0.9750000 0.25
## tree FALSE 7 0.8544444 0.3686992 0.9607143 0.40
## tree FALSE 8 0.8444444 0.3071608 0.9607143 0.35
## tree FALSE 9 0.8433333 0.3633125 0.9339286 0.45
## tree FALSE 10 0.8233333 0.2439919 0.9482143 0.30
## tree FALSE 20 0.8655556 0.3721154 0.9875000 0.35
## tree FALSE 30 0.8544444 0.3329849 0.9750000 0.35
## tree FALSE 40 0.8433333 0.2938545 0.9750000 0.30
## tree FALSE 50 0.8544444 0.3547241 0.9750000 0.35
## tree FALSE 60 0.8544444 0.3547241 0.9750000 0.35
## tree FALSE 70 0.8433333 0.3295688 0.9607143 0.35
## tree FALSE 80 0.8544444 0.3547241 0.9750000 0.35
## tree FALSE 90 0.8433333 0.3295688 0.9607143 0.35
## tree FALSE 100 0.8433333 0.3295688 0.9607143 0.35
## tree TRUE 1 0.7955556 0.1147589 0.9250000 0.20
## tree TRUE 2 0.8155556 0.1552448 0.9500000 0.20
## tree TRUE 3 0.7844444 0.1547589 0.9000000 0.30
## tree TRUE 4 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 5 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 6 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 7 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 8 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 9 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 10 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 20 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 30 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 40 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 50 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 60 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 70 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 80 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 90 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 100 0.8044444 0.1952448 0.9250000 0.30
##
## Specificity was used to select the optimal model using the largest value.
## The final values used for the model were trials = 1, model = tree and winnow
## = FALSE.
$finalModel CS_C50_Tune
##
## Call:
## (function (x, y, trials = 1, rules = FALSE, weights = NULL, control
## = 3942L), parms = list(loss = structure(c(0, 1, 4, 0), dim = c(2L,
## 2L), dimnames = list(c("M", "R"), c("M", "R")))))
##
## Classification Tree
## Number of samples: 96
## Number of predictors: 60
##
## Tree size: 6
##
## Non-standard options: attempt to group attributes
$results CS_C50_Tune
## model winnow trials Accuracy Kappa Sensitivity Specificity AccuracySD
## 39 rules FALSE 1 0.8444444 0.3945908 0.9232143 0.50 0.08764563
## 58 rules TRUE 1 0.8155556 0.1465909 0.9500000 0.20 0.10213093
## 1 tree FALSE 1 0.8322222 0.4030199 0.8964286 0.55 0.11468930
## 20 tree TRUE 1 0.7955556 0.1147589 0.9250000 0.20 0.11357756
## 40 rules FALSE 2 0.8333333 0.2971608 0.9482143 0.35 0.05341557
## 59 rules TRUE 2 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 2 tree FALSE 2 0.8233333 0.2482776 0.9500000 0.30 0.04870054
## 21 tree TRUE 2 0.8155556 0.1552448 0.9500000 0.20 0.11248533
## 41 rules FALSE 3 0.8533333 0.4043357 0.9464286 0.45 0.09293271
## 60 rules TRUE 3 0.8044444 0.1865909 0.9250000 0.30 0.09927031
## 3 tree FALSE 3 0.8222222 0.3149126 0.9214286 0.40 0.10210406
## 22 tree TRUE 3 0.7844444 0.1547589 0.9000000 0.30 0.10876536
## 42 rules FALSE 4 0.8333333 0.1839465 0.9875000 0.20 0.05341557
## 61 rules TRUE 4 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 4 tree FALSE 4 0.8322222 0.2764632 0.9607143 0.30 0.07723799
## 23 tree TRUE 4 0.8044444 0.1952448 0.9250000 0.30 0.10989457
## 43 rules FALSE 5 0.8655556 0.3811992 0.9732143 0.40 0.06937898
## 62 rules TRUE 5 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 5 tree FALSE 5 0.8322222 0.3495363 0.9339286 0.40 0.07723799
## 24 tree TRUE 5 0.8044444 0.1952448 0.9250000 0.30 0.10989457
## 44 rules FALSE 6 0.8655556 0.3811992 0.9732143 0.40 0.06937898
## 63 rules TRUE 6 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 6 tree FALSE 6 0.8333333 0.2323161 0.9750000 0.25 0.05341557
## 25 tree TRUE 6 0.8044444 0.1952448 0.9250000 0.30 0.10989457
## 45 rules FALSE 7 0.8655556 0.3811992 0.9732143 0.40 0.06937898
## 64 rules TRUE 7 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 7 tree FALSE 7 0.8544444 0.3686992 0.9607143 0.40 0.07397215
## 26 tree TRUE 7 0.8044444 0.1952448 0.9250000 0.30 0.10989457
## 46 rules FALSE 8 0.8655556 0.3811992 0.9732143 0.40 0.06937898
## 65 rules TRUE 8 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 8 tree FALSE 8 0.8444444 0.3071608 0.9607143 0.35 0.07388866
## 27 tree TRUE 8 0.8044444 0.1952448 0.9250000 0.30 0.10989457
## 47 rules FALSE 9 0.8544444 0.3603297 0.9607143 0.40 0.07397215
## 66 rules TRUE 9 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 9 tree FALSE 9 0.8433333 0.3633125 0.9339286 0.45 0.09273323
## 28 tree TRUE 9 0.8044444 0.1952448 0.9250000 0.30 0.10989457
## 48 rules FALSE 10 0.8655556 0.3811992 0.9732143 0.40 0.06937898
## 67 rules TRUE 10 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 10 tree FALSE 10 0.8233333 0.2439919 0.9482143 0.30 0.04870054
## 29 tree TRUE 10 0.8044444 0.1952448 0.9250000 0.30 0.10989457
## 49 rules FALSE 20 0.8877778 0.4454849 1.0000000 0.40 0.07360034
## 68 rules TRUE 20 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 11 tree FALSE 20 0.8655556 0.3721154 0.9875000 0.35 0.08693059
## 30 tree TRUE 20 0.8044444 0.1952448 0.9250000 0.30 0.10989457
## 50 rules FALSE 30 0.8877778 0.4454849 1.0000000 0.40 0.07360034
## 69 rules TRUE 30 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 12 tree FALSE 30 0.8544444 0.3329849 0.9750000 0.35 0.07397215
## 31 tree TRUE 30 0.8044444 0.1952448 0.9250000 0.30 0.10989457
## 51 rules FALSE 40 0.8766667 0.4063545 0.9875000 0.40 0.06229493
## 70 rules TRUE 40 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 13 tree FALSE 40 0.8433333 0.2938545 0.9750000 0.30 0.05578963
## 32 tree TRUE 40 0.8044444 0.1952448 0.9250000 0.30 0.10989457
## 52 rules FALSE 50 0.8655556 0.3672241 0.9875000 0.35 0.04549680
## 71 rules TRUE 50 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 14 tree FALSE 50 0.8544444 0.3547241 0.9750000 0.35 0.05223404
## 33 tree TRUE 50 0.8044444 0.1952448 0.9250000 0.30 0.10989457
## 53 rules FALSE 60 0.8655556 0.3672241 0.9875000 0.35 0.04549680
## 72 rules TRUE 60 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 15 tree FALSE 60 0.8544444 0.3547241 0.9750000 0.35 0.05223404
## 34 tree TRUE 60 0.8044444 0.1952448 0.9250000 0.30 0.10989457
## 54 rules FALSE 70 0.8655556 0.3672241 0.9875000 0.35 0.04549680
## 73 rules TRUE 70 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 16 tree FALSE 70 0.8433333 0.3295688 0.9607143 0.35 0.05578963
## 35 tree TRUE 70 0.8044444 0.1952448 0.9250000 0.30 0.10989457
## 55 rules FALSE 80 0.8655556 0.3672241 0.9875000 0.35 0.04549680
## 74 rules TRUE 80 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 17 tree FALSE 80 0.8544444 0.3547241 0.9750000 0.35 0.05223404
## 36 tree TRUE 80 0.8044444 0.1952448 0.9250000 0.30 0.10989457
## 56 rules FALSE 90 0.8655556 0.3672241 0.9875000 0.35 0.04549680
## 75 rules TRUE 90 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 18 tree FALSE 90 0.8433333 0.3295688 0.9607143 0.35 0.05578963
## 37 tree TRUE 90 0.8044444 0.1952448 0.9250000 0.30 0.10989457
## 57 rules FALSE 100 0.8766667 0.4063545 0.9875000 0.40 0.06229493
## 76 rules TRUE 100 0.8144444 0.2106294 0.9375000 0.30 0.10371032
## 19 tree FALSE 100 0.8433333 0.3295688 0.9607143 0.35 0.05578963
## 38 tree TRUE 100 0.8044444 0.1952448 0.9250000 0.30 0.10989457
## KappaSD SensitivitySD SpecificitySD
## 39 0.3644110 0.08870845 0.4082483
## 58 0.3221457 0.12076147 0.3496029
## 1 0.4308081 0.10507259 0.4377975
## 20 0.3239613 0.13437096 0.3496029
## 40 0.2734629 0.08926587 0.3374743
## 59 0.3501294 0.13501543 0.4216370
## 2 0.3017636 0.08740074 0.3496029
## 21 0.3604969 0.12076147 0.3496029
## 41 0.3452821 0.11325177 0.3689324
## 60 0.3267224 0.13437096 0.4216370
## 3 0.3911162 0.11237238 0.3944053
## 22 0.3327914 0.14191155 0.4216370
## 42 0.2961876 0.03952847 0.3496029
## 61 0.3501294 0.13501543 0.4216370
## 4 0.3588047 0.06344244 0.3496029
## 23 0.3635361 0.13437096 0.4216370
## 43 0.3622503 0.05662589 0.3944053
## 62 0.3501294 0.13501543 0.4216370
## 5 0.3063525 0.09709864 0.3162278
## 24 0.3635361 0.13437096 0.4216370
## 44 0.3622503 0.05662589 0.3944053
## 63 0.3501294 0.13501543 0.4216370
## 6 0.3290280 0.05270463 0.3535534
## 25 0.3635361 0.13437096 0.4216370
## 45 0.3622503 0.05662589 0.3944053
## 64 0.3501294 0.13501543 0.4216370
## 7 0.3786511 0.06344244 0.3944053
## 26 0.3635361 0.13437096 0.4216370
## 46 0.3622503 0.05662589 0.3944053
## 65 0.3501294 0.13501543 0.4216370
## 8 0.3840725 0.06344244 0.4116363
## 27 0.3635361 0.13437096 0.4216370
## 47 0.3535960 0.08658617 0.3944053
## 66 0.3501294 0.13501543 0.4216370
## 9 0.3943442 0.09709864 0.4377975
## 28 0.3635361 0.13437096 0.4216370
## 48 0.3622503 0.05662589 0.3944053
## 67 0.3501294 0.13501543 0.4216370
## 10 0.2996664 0.06705351 0.3496029
## 29 0.3635361 0.13437096 0.4216370
## 49 0.4112362 0.00000000 0.3944053
## 68 0.3501294 0.13501543 0.4216370
## 11 0.4430978 0.03952847 0.4116363
## 30 0.3635361 0.13437096 0.4216370
## 50 0.4112362 0.00000000 0.3944053
## 69 0.3501294 0.13501543 0.4216370
## 12 0.3962940 0.05270463 0.4116363
## 31 0.3635361 0.13437096 0.4216370
## 51 0.3690642 0.03952847 0.3944053
## 70 0.3501294 0.13501543 0.4216370
## 13 0.3381708 0.05270463 0.3496029
## 32 0.3635361 0.13437096 0.4216370
## 52 0.3160680 0.03952847 0.3374743
## 71 0.3501294 0.13501543 0.4216370
## 14 0.3341588 0.05270463 0.3374743
## 33 0.3635361 0.13437096 0.4216370
## 53 0.3160680 0.03952847 0.3374743
## 72 0.3501294 0.13501543 0.4216370
## 15 0.3341588 0.05270463 0.3374743
## 34 0.3635361 0.13437096 0.4216370
## 54 0.3160680 0.03952847 0.3374743
## 73 0.3501294 0.13501543 0.4216370
## 16 0.3221690 0.06344244 0.3374743
## 35 0.3635361 0.13437096 0.4216370
## 55 0.3160680 0.03952847 0.3374743
## 74 0.3501294 0.13501543 0.4216370
## 17 0.3341588 0.05270463 0.3374743
## 36 0.3635361 0.13437096 0.4216370
## 56 0.3160680 0.03952847 0.3374743
## 75 0.3501294 0.13501543 0.4216370
## 18 0.3221690 0.06344244 0.3374743
## 37 0.3635361 0.13437096 0.4216370
## 57 0.3690642 0.03952847 0.3944053
## 76 0.3501294 0.13501543 0.4216370
## 19 0.3221690 0.06344244 0.3374743
## 38 0.3635361 0.13437096 0.4216370
<- CS_C50_Tune$results[CS_C50_Tune$results$trials==CS_C50_Tune$bestTune$trials &
(CS_C50_Train_Specificity $results$model==CS_C50_Tune$bestTune$model &
CS_C50_Tune$results$winnow==CS_C50_Tune$bestTune$winnow,
CS_C50_Tunec("Specificity")])
## [1] 0.55
<- data.frame(CS_C50_Observed = PMA_PreModelling_Train$Class,
CS_C50_Train CS_C50_Predicted = predict(CS_C50_Tune,
!names(PMA_PreModelling_Train) %in% c("Class")],
PMA_PreModelling_Train[,type = "raw"))
<- confusionMatrix(data = CS_C50_Train$CS_C50_Predicted,
(CS_C50_Train_ConfusionMatrix reference = CS_C50_Train$CS_C50_Observed))
## Confusion Matrix and Statistics
##
## Reference
## Prediction M R
## M 77 1
## R 1 17
##
## Accuracy : 0.9792
## 95% CI : (0.9268, 0.9975)
## No Information Rate : 0.8125
## P-Value [Acc > NIR] : 5.86e-07
##
## Kappa : 0.9316
##
## Mcnemar's Test P-Value : 1
##
## Sensitivity : 0.9872
## Specificity : 0.9444
## Pos Pred Value : 0.9872
## Neg Pred Value : 0.9444
## Prevalence : 0.8125
## Detection Rate : 0.8021
## Detection Prevalence : 0.8125
## Balanced Accuracy : 0.9658
##
## 'Positive' Class : M
##
##################################
# Identifying and plotting the
# best model predictors
##################################
<- varImp(CS_C50_Tune, scale = TRUE)
CS_C50_VarImp plot(CS_C50_VarImp,
top=25,
scales=list(y=list(cex = .95)),
main="Ranked Variable Importance : Cost-Sensitive C5.0 Decision Trees",
xlab="Scaled Variable Importance Metrics",
ylab="Predictors",
cex=2,
origin=0,
alpha=0.45)
##################################
# Independently evaluating the model
# on the test set
##################################
<- data.frame(CS_C50_Observed = PMA_PreModelling_Test$Class,
CS_C50_Test CS_C50_Predicted = predict(CS_C50_Tune,
!names(PMA_PreModelling_Test) %in% c("Class")],
PMA_PreModelling_Test[,type = "raw"))
CS_C50_Test
## CS_C50_Observed CS_C50_Predicted
## 1 M M
## 2 M M
## 3 M M
## 4 M M
## 5 M M
## 6 M R
## 7 M M
## 8 M R
## 9 M M
## 10 M R
## 11 M M
## 12 M M
## 13 M M
## 14 M M
## 15 M M
## 16 M M
## 17 M M
## 18 M R
## 19 M M
## 20 M M
## 21 M R
## 22 M M
## 23 M M
## 24 M M
## 25 M M
## 26 M M
## 27 M M
## 28 M M
## 29 M M
## 30 M M
## 31 M M
## 32 M M
## 33 M M
## 34 R M
## 35 R R
## 36 R M
## 37 R M
## 38 R M
## 39 R M
## 40 R M
##################################
# Reporting the independent evaluation results
# for the test set
##################################
<- Specificity(y_pred = CS_C50_Test$CS_C50_Predicted,
(CS_C50_Test_Specificity y_true = CS_C50_Test$CS_C50_Observed))
## [1] 0.1428571
<- confusionMatrix(data = CS_C50_Test$CS_C50_Predicted,
(CS_C50_Test_ConfusionMatrix reference = CS_C50_Test$CS_C50_Observed))
## Confusion Matrix and Statistics
##
## Reference
## Prediction M R
## M 28 6
## R 5 1
##
## Accuracy : 0.725
## 95% CI : (0.5611, 0.854)
## No Information Rate : 0.825
## P-Value [Acc > NIR] : 0.9632
##
## Kappa : -0.0092
##
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 0.8485
## Specificity : 0.1429
## Pos Pred Value : 0.8235
## Neg Pred Value : 0.1667
## Prevalence : 0.8250
## Detection Rate : 0.7000
## Detection Prevalence : 0.8500
## Balanced Accuracy : 0.4957
##
## 'Positive' Class : M
##
##################################
# Consolidating all evaluation results
# for the train and test sets
# using the specificity metric
##################################
<- c('SVM_R','CW_SVM_R','CART','CS_CART','C50','CS_C50',
Model 'SVM_R','CW_SVM_R','CART','CS_CART','C50','CS_C50')
<- c(rep('Cross-Validation',6),rep('Test',6))
Set
<- c(SVM_R_Train_Specificity,
Specificity
CW_SVM_R_Train_Specificity,
CART_Train_Specificity,
CS_CART_Train_Specificity,
C50_Train_Specificity,
CS_C50_Train_Specificity,
SVM_R_Test_Specificity,
CW_SVM_R_Test_Specificity,
CART_Test_Specificity,
CS_CART_Test_Specificity,
C50_Test_Specificity,
CS_C50_Test_Specificity)
<- as.data.frame(cbind(Model,Set,Specificity))
Specificity_Summary
$Specificity <- as.numeric(as.character(Specificity_Summary$Specificity))
Specificity_Summary$Set <- factor(Specificity_Summary$Set,
Specificity_Summarylevels = c("Cross-Validation",
"Test"))
$Model <- factor(Specificity_Summary$Model,
Specificity_Summarylevels =c('SVM_R',
'CW_SVM_R',
'CART',
'CS_CART',
'C50',
'CS_C50'))
print(Specificity_Summary, row.names=FALSE)
## Model Set Specificity
## SVM_R Cross-Validation 0.6000000
## CW_SVM_R Cross-Validation 0.6500000
## CART Cross-Validation 0.4500000
## CS_CART Cross-Validation 0.3000000
## C50 Cross-Validation 0.5500000
## CS_C50 Cross-Validation 0.5500000
## SVM_R Test 0.1428571
## CW_SVM_R Test 0.2857143
## CART Test 0.2857143
## CS_CART Test 0.7142857
## C50 Test 0.1428571
## CS_C50 Test 0.1428571
<- dotplot(Model ~ Specificity,
(Specificity_Plot data = Specificity_Summary,
groups = Set,
main = "Classification Model Performance Comparison",
ylab = "Model",
xlab = "Specificity",
auto.key = list(adj=1, space="top", columns=2),
type=c("p", "h"),
origin = 0,
alpha = 0.45,
pch = 16,
cex = 2))
##################################
# Consolidating the resampling results
# for the candidate models
##################################
<- resamples(list(SVM_R = SVM_R_Tune,
(COST_COMPARISON_RESAMPLING CW_SVM_R = SVM_R_Tune,
CART = CART_Tune,
CS_CART = CS_CART_Tune,
C50 = C50_Tune,
CS_C50 = CS_C50_Tune)))
##
## Call:
## resamples.default(x = list(SVM_R = SVM_R_Tune, CW_SVM_R = SVM_R_Tune, CART
## = CART_Tune, CS_CART = CS_CART_Tune, C50 = C50_Tune, CS_C50 = CS_C50_Tune))
##
## Models: SVM_R, CW_SVM_R, CART, CS_CART, C50, CS_C50
## Number of resamples: 10
## Performance metrics: Accuracy, Kappa, Sensitivity, Specificity
## Time estimates for: everything, final model fit
summary(COST_COMPARISON_RESAMPLING)
##
## Call:
## summary.resamples(object = COST_COMPARISON_RESAMPLING)
##
## Models: SVM_R, CW_SVM_R, CART, CS_CART, C50, CS_C50
## Number of resamples: 10
##
## Accuracy
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## SVM_R 0.8000000 0.8916667 0.9000000 0.9277778 1.0000000 1.0 0
## CW_SVM_R 0.8000000 0.8916667 0.9000000 0.9277778 1.0000000 1.0 0
## CART 0.6000000 0.7194444 0.7888889 0.7922222 0.8750000 1.0 0
## CS_CART 0.4000000 0.7777778 0.8000000 0.7600000 0.8666667 0.9 0
## C50 0.6666667 0.7777778 0.8000000 0.8322222 0.9000000 1.0 0
## CS_C50 0.6666667 0.7777778 0.8000000 0.8322222 0.9000000 1.0 0
##
## Kappa
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## SVM_R 0.0000000 0.61036789 0.61538462 0.6454849 1.0000000 1.0000000 0
## CW_SVM_R 0.0000000 0.61036789 0.61538462 0.6454849 1.0000000 1.0000000 0
## CART -0.2500000 -0.09375000 0.29090909 0.2829196 0.5979021 1.0000000 0
## CS_CART -0.3636364 0.00000000 0.02631579 0.1696080 0.4884868 0.6153846 0
## C50 -0.1538462 0.04545455 0.38750000 0.4030199 0.7064777 1.0000000 0
## CS_C50 -0.1538462 0.04545455 0.38750000 0.4030199 0.7064777 1.0000000 0
##
## Sensitivity
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## SVM_R 1.0000000 1.0000000 1.0000 1.0000000 1 1 0
## CW_SVM_R 1.0000000 1.0000000 1.0000 1.0000000 1 1 0
## CART 0.7142857 0.7500000 0.8750 0.8714286 1 1 0
## CS_CART 0.5000000 0.7544643 0.9375 0.8535714 1 1 0
## C50 0.7142857 0.8750000 0.8750 0.8964286 1 1 0
## CS_C50 0.7142857 0.8750000 0.8750 0.8964286 1 1 0
##
## Specificity
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## SVM_R 0 0.500 0.50 0.60 1.000 1 0
## CW_SVM_R 0 0.500 0.50 0.60 1.000 1 0
## CART 0 0.000 0.50 0.45 0.875 1 0
## CS_CART 0 0.000 0.25 0.30 0.500 1 0
## C50 0 0.125 0.50 0.55 1.000 1 0
## CS_C50 0 0.125 0.50 0.55 1.000 1 0
##################################
# Exploring the resampling results
##################################
bwplot(COST_COMPARISON_RESAMPLING,
main = "Model Resampling Performance Comparison (Range)",
ylab = "Model",
pch = 16,
cex = 2,
layout=c(4,1))
dotplot(COST_COMPARISON_RESAMPLING,
main = "Model Resampling Performance Comparison (95% Confidence Interval)",
ylab = "Model",
pch = 16,
cex = 2,
layout=c(4,1))
##################################
# Consolidating all models
##################################
<- (list(SVM_R = SVM_R_Tune,
(COST_COMPARISON_MODELS CW_SVM_R = CW_SVM_R_Tune,
CART = CART_Tune,
CS_CART = CS_CART_Tune,
C50 = C50_Tune,
CS_C50 = CS_C50_Tune)))
## $SVM_R
## Support Vector Machines with Radial Basis Function Kernel
##
## 96 samples
## 60 predictors
## 2 classes: 'M', 'R'
##
## Pre-processing: centered (60), scaled (60)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 86, 87, 86, 87, 87, 86, ...
## Resampling results across tuning parameters:
##
## C Accuracy Kappa Sensitivity Specificity
## 0.01562500 0.8133333 0.0000000 1 0.0
## 0.02209709 0.8133333 0.0000000 1 0.0
## 0.03125000 0.8133333 0.0000000 1 0.0
## 0.04419417 0.8133333 0.0000000 1 0.0
## 0.06250000 0.8133333 0.0000000 1 0.0
## 0.08838835 0.8133333 0.0000000 1 0.0
## 0.12500000 0.8133333 0.0000000 1 0.0
## 0.17677670 0.8133333 0.0000000 1 0.0
## 0.25000000 0.8133333 0.0000000 1 0.0
## 0.35355339 0.8133333 0.0000000 1 0.0
## 0.50000000 0.8133333 0.0000000 1 0.0
## 0.70710678 0.8133333 0.0000000 1 0.0
## 1.00000000 0.8655556 0.3224080 1 0.3
## 1.41421356 0.9066667 0.5678930 1 0.5
## 2.00000000 0.9277778 0.6454849 1 0.6
##
## Tuning parameter 'sigma' was held constant at a value of 0.008732089
## Specificity was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.008732089 and C = 2.
##
## $CW_SVM_R
## Support Vector Machines with Radial Basis Function Kernel
##
## 96 samples
## 60 predictors
## 2 classes: 'M', 'R'
##
## Pre-processing: centered (60), scaled (60)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 86, 87, 86, 87, 87, 86, ...
## Resampling results across tuning parameters:
##
## C Accuracy Kappa Sensitivity Specificity
## 0.01562500 0.8133333 0.0000000 1.0000000 0.00
## 0.02209709 0.8133333 0.0000000 1.0000000 0.00
## 0.03125000 0.8133333 0.0000000 1.0000000 0.00
## 0.04419417 0.8133333 0.0000000 1.0000000 0.00
## 0.06250000 0.8133333 0.0000000 1.0000000 0.00
## 0.08838835 0.8444444 0.2230769 1.0000000 0.20
## 0.12500000 0.8633333 0.4848567 0.9357143 0.55
## 0.17677670 0.8422222 0.4554315 0.8964286 0.60
## 0.25000000 0.8322222 0.4389841 0.8839286 0.60
## 0.35355339 0.8633333 0.5094884 0.9089286 0.65
## 0.50000000 0.8844444 0.5566738 0.9339286 0.65
## 0.70710678 0.8944444 0.5758125 0.9464286 0.65
## 1.00000000 0.9166667 0.6324754 0.9732143 0.65
## 1.41421356 0.9066667 0.5940139 0.9732143 0.60
## 2.00000000 0.9066667 0.5940139 0.9732143 0.60
##
## Tuning parameter 'sigma' was held constant at a value of 0.008732089
## Specificity was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.008732089 and C = 0.3535534.
##
## $CART
## CART
##
## 96 samples
## 60 predictors
## 2 classes: 'M', 'R'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 86, 87, 86, 87, 87, 86, ...
## Resampling results across tuning parameters:
##
## cp Accuracy Kappa Sensitivity Specificity
## 0.0001 0.7922222 0.2829196 0.8714286 0.45
## 0.0005 0.7922222 0.2829196 0.8714286 0.45
## 0.0010 0.7922222 0.2829196 0.8714286 0.45
## 0.0050 0.7922222 0.2829196 0.8714286 0.45
## 0.0100 0.7922222 0.2829196 0.8714286 0.45
## 0.0150 0.7922222 0.2829196 0.8714286 0.45
## 0.0200 0.7922222 0.2829196 0.8714286 0.45
##
## Specificity was used to select the optimal model using the largest value.
## The final value used for the model was cp = 0.02.
##
## $CS_CART
## CART
##
## 96 samples
## 60 predictors
## 2 classes: 'M', 'R'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 86, 87, 86, 87, 87, 86, ...
## Resampling results across tuning parameters:
##
## cp Accuracy Kappa Sensitivity Specificity
## 0.0001 0.76 0.169608 0.8535714 0.3
## 0.0005 0.76 0.169608 0.8535714 0.3
## 0.0010 0.76 0.169608 0.8535714 0.3
## 0.0050 0.76 0.169608 0.8535714 0.3
## 0.0100 0.76 0.169608 0.8535714 0.3
## 0.0150 0.76 0.169608 0.8535714 0.3
## 0.0200 0.76 0.169608 0.8535714 0.3
##
## Specificity was used to select the optimal model using the largest value.
## The final value used for the model was cp = 0.02.
##
## $C50
## C5.0
##
## 96 samples
## 60 predictors
## 2 classes: 'M', 'R'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 86, 87, 86, 87, 87, 86, ...
## Resampling results across tuning parameters:
##
## model winnow trials Accuracy Kappa Sensitivity Specificity
## rules FALSE 1 0.8444444 0.3945908 0.9232143 0.50
## rules FALSE 2 0.8333333 0.2971608 0.9482143 0.35
## rules FALSE 3 0.8533333 0.4043357 0.9464286 0.45
## rules FALSE 4 0.8333333 0.1839465 0.9875000 0.20
## rules FALSE 5 0.8655556 0.3811992 0.9732143 0.40
## rules FALSE 6 0.8655556 0.3811992 0.9732143 0.40
## rules FALSE 7 0.8655556 0.3811992 0.9732143 0.40
## rules FALSE 8 0.8655556 0.3811992 0.9732143 0.40
## rules FALSE 9 0.8544444 0.3603297 0.9607143 0.40
## rules FALSE 10 0.8655556 0.3811992 0.9732143 0.40
## rules FALSE 20 0.8877778 0.4454849 1.0000000 0.40
## rules FALSE 30 0.8877778 0.4454849 1.0000000 0.40
## rules FALSE 40 0.8766667 0.4063545 0.9875000 0.40
## rules FALSE 50 0.8655556 0.3672241 0.9875000 0.35
## rules FALSE 60 0.8655556 0.3672241 0.9875000 0.35
## rules FALSE 70 0.8655556 0.3672241 0.9875000 0.35
## rules FALSE 80 0.8655556 0.3672241 0.9875000 0.35
## rules FALSE 90 0.8655556 0.3672241 0.9875000 0.35
## rules FALSE 100 0.8766667 0.4063545 0.9875000 0.40
## rules TRUE 1 0.8155556 0.1465909 0.9500000 0.20
## rules TRUE 2 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 3 0.8044444 0.1865909 0.9250000 0.30
## rules TRUE 4 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 5 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 6 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 7 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 8 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 9 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 10 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 20 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 30 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 40 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 50 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 60 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 70 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 80 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 90 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 100 0.8144444 0.2106294 0.9375000 0.30
## tree FALSE 1 0.8322222 0.4030199 0.8964286 0.55
## tree FALSE 2 0.8233333 0.2482776 0.9500000 0.30
## tree FALSE 3 0.8222222 0.3149126 0.9214286 0.40
## tree FALSE 4 0.8322222 0.2764632 0.9607143 0.30
## tree FALSE 5 0.8322222 0.3495363 0.9339286 0.40
## tree FALSE 6 0.8333333 0.2323161 0.9750000 0.25
## tree FALSE 7 0.8544444 0.3686992 0.9607143 0.40
## tree FALSE 8 0.8444444 0.3071608 0.9607143 0.35
## tree FALSE 9 0.8433333 0.3633125 0.9339286 0.45
## tree FALSE 10 0.8233333 0.2439919 0.9482143 0.30
## tree FALSE 20 0.8655556 0.3721154 0.9875000 0.35
## tree FALSE 30 0.8544444 0.3329849 0.9750000 0.35
## tree FALSE 40 0.8433333 0.2938545 0.9750000 0.30
## tree FALSE 50 0.8544444 0.3547241 0.9750000 0.35
## tree FALSE 60 0.8544444 0.3547241 0.9750000 0.35
## tree FALSE 70 0.8433333 0.3295688 0.9607143 0.35
## tree FALSE 80 0.8544444 0.3547241 0.9750000 0.35
## tree FALSE 90 0.8433333 0.3295688 0.9607143 0.35
## tree FALSE 100 0.8433333 0.3295688 0.9607143 0.35
## tree TRUE 1 0.7955556 0.1147589 0.9250000 0.20
## tree TRUE 2 0.8155556 0.1552448 0.9500000 0.20
## tree TRUE 3 0.7844444 0.1547589 0.9000000 0.30
## tree TRUE 4 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 5 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 6 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 7 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 8 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 9 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 10 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 20 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 30 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 40 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 50 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 60 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 70 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 80 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 90 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 100 0.8044444 0.1952448 0.9250000 0.30
##
## Specificity was used to select the optimal model using the largest value.
## The final values used for the model were trials = 1, model = tree and winnow
## = FALSE.
##
## $CS_C50
## C5.0
##
## 96 samples
## 60 predictors
## 2 classes: 'M', 'R'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 86, 87, 86, 87, 87, 86, ...
## Resampling results across tuning parameters:
##
## model winnow trials Accuracy Kappa Sensitivity Specificity
## rules FALSE 1 0.8444444 0.3945908 0.9232143 0.50
## rules FALSE 2 0.8333333 0.2971608 0.9482143 0.35
## rules FALSE 3 0.8533333 0.4043357 0.9464286 0.45
## rules FALSE 4 0.8333333 0.1839465 0.9875000 0.20
## rules FALSE 5 0.8655556 0.3811992 0.9732143 0.40
## rules FALSE 6 0.8655556 0.3811992 0.9732143 0.40
## rules FALSE 7 0.8655556 0.3811992 0.9732143 0.40
## rules FALSE 8 0.8655556 0.3811992 0.9732143 0.40
## rules FALSE 9 0.8544444 0.3603297 0.9607143 0.40
## rules FALSE 10 0.8655556 0.3811992 0.9732143 0.40
## rules FALSE 20 0.8877778 0.4454849 1.0000000 0.40
## rules FALSE 30 0.8877778 0.4454849 1.0000000 0.40
## rules FALSE 40 0.8766667 0.4063545 0.9875000 0.40
## rules FALSE 50 0.8655556 0.3672241 0.9875000 0.35
## rules FALSE 60 0.8655556 0.3672241 0.9875000 0.35
## rules FALSE 70 0.8655556 0.3672241 0.9875000 0.35
## rules FALSE 80 0.8655556 0.3672241 0.9875000 0.35
## rules FALSE 90 0.8655556 0.3672241 0.9875000 0.35
## rules FALSE 100 0.8766667 0.4063545 0.9875000 0.40
## rules TRUE 1 0.8155556 0.1465909 0.9500000 0.20
## rules TRUE 2 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 3 0.8044444 0.1865909 0.9250000 0.30
## rules TRUE 4 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 5 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 6 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 7 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 8 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 9 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 10 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 20 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 30 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 40 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 50 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 60 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 70 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 80 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 90 0.8144444 0.2106294 0.9375000 0.30
## rules TRUE 100 0.8144444 0.2106294 0.9375000 0.30
## tree FALSE 1 0.8322222 0.4030199 0.8964286 0.55
## tree FALSE 2 0.8233333 0.2482776 0.9500000 0.30
## tree FALSE 3 0.8222222 0.3149126 0.9214286 0.40
## tree FALSE 4 0.8322222 0.2764632 0.9607143 0.30
## tree FALSE 5 0.8322222 0.3495363 0.9339286 0.40
## tree FALSE 6 0.8333333 0.2323161 0.9750000 0.25
## tree FALSE 7 0.8544444 0.3686992 0.9607143 0.40
## tree FALSE 8 0.8444444 0.3071608 0.9607143 0.35
## tree FALSE 9 0.8433333 0.3633125 0.9339286 0.45
## tree FALSE 10 0.8233333 0.2439919 0.9482143 0.30
## tree FALSE 20 0.8655556 0.3721154 0.9875000 0.35
## tree FALSE 30 0.8544444 0.3329849 0.9750000 0.35
## tree FALSE 40 0.8433333 0.2938545 0.9750000 0.30
## tree FALSE 50 0.8544444 0.3547241 0.9750000 0.35
## tree FALSE 60 0.8544444 0.3547241 0.9750000 0.35
## tree FALSE 70 0.8433333 0.3295688 0.9607143 0.35
## tree FALSE 80 0.8544444 0.3547241 0.9750000 0.35
## tree FALSE 90 0.8433333 0.3295688 0.9607143 0.35
## tree FALSE 100 0.8433333 0.3295688 0.9607143 0.35
## tree TRUE 1 0.7955556 0.1147589 0.9250000 0.20
## tree TRUE 2 0.8155556 0.1552448 0.9500000 0.20
## tree TRUE 3 0.7844444 0.1547589 0.9000000 0.30
## tree TRUE 4 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 5 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 6 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 7 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 8 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 9 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 10 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 20 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 30 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 40 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 50 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 60 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 70 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 80 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 90 0.8044444 0.1952448 0.9250000 0.30
## tree TRUE 100 0.8044444 0.1952448 0.9250000 0.30
##
## Specificity was used to select the optimal model using the largest value.
## The final values used for the model were trials = 1, model = tree and winnow
## = FALSE.
##################################
# Creating a function model performance
# on test data
##################################
<- function(model, data) {
COST_COMPARISON_TEST_Specificity <- data.frame(Observed = data$Class,
Data_Test Predicted = predict(model,
!names(data) %in% c("Class")],
data[,type = "raw"))
<- Specificity(y_pred = Data_Test$Predicted,
Specificity y_true = Data_Test$Observed)
return(Specificity)
}
<- lapply(COST_COMPARISON_MODELS,
COST_COMPARISON_TEST_SUMMARY
COST_COMPARISON_TEST_Specificity,data = PMA_PreModelling_Test)
<- lapply(COST_COMPARISON_TEST_SUMMARY, as.vector)
COST_COMPARISON_TEST_SUMMARY <- do.call("rbind", COST_COMPARISON_TEST_SUMMARY)
COST_COMPARISON_TEST_SUMMARY colnames(COST_COMPARISON_TEST_SUMMARY) <- c("Specificity")
<- as.data.frame(COST_COMPARISON_TEST_SUMMARY)) (COST_COMPARISON_TEST_SUMMARY
## Specificity
## SVM_R 0.1428571
## CW_SVM_R 0.2857143
## CART 0.2857143
## CS_CART 0.7142857
## C50 0.1428571
## CS_C50 0.1428571