##################################
# Loading R libraries
##################################
library(AppliedPredictiveModeling)
library(caret)
library(rpart)
library(lattice)
library(dplyr)
library(tidyr)
library(moments)
library(skimr)
library(RANN)
library(mlbench)
library(pls)
library(corrplot)
library(tidyverse)
library(lares)
library(DMwR2)
library(gridExtra)
library(rattle)
library(rpart.plot)
library(RColorBrewer)
library(stats)
library(nnet)
library(elasticnet)
library(earth)
library(party)
library(kernlab)
library(randomForest)
library(Cubist)
library(pROC)
library(mda)
library(klaR)
library(pamr)
##################################
# Loading source and
# formulating the train set
##################################
data(Sonar)
set.seed(12345678)
<- createDataPartition(Sonar$Class, p = .80, list = FALSE)
Sonar_Partition <- Sonar[Sonar_Partition,]
Sonar_Train <- Sonar[-Sonar_Partition,]
Sonar_Test
##################################
# Performing a general exploration of the train set
##################################
dim(Sonar_Train)
## [1] 167 61
str(Sonar_Train)
## 'data.frame': 167 obs. of 61 variables:
## $ V1 : num 0.0262 0.01 0.0762 0.0286 0.0519 0.0223 0.0164 0.0039 0.0123 0.0124 ...
## $ V2 : num 0.0582 0.0171 0.0666 0.0453 0.0548 0.0375 0.0173 0.0063 0.0309 0.0433 ...
## $ V3 : num 0.1099 0.0623 0.0481 0.0277 0.0842 ...
## $ V4 : num 0.1083 0.0205 0.0394 0.0174 0.0319 ...
## $ V5 : num 0.0974 0.0205 0.059 0.0384 0.1158 ...
## $ V6 : num 0.228 0.0368 0.0649 0.099 0.0922 0.0591 0.0671 0.0284 0.0102 0.0355 ...
## $ V7 : num 0.243 0.11 0.121 0.12 0.103 ...
## $ V8 : num 0.3771 0.1276 0.2467 0.1833 0.0613 ...
## $ V9 : num 0.5598 0.0598 0.3564 0.2105 0.1465 ...
## $ V10 : num 0.619 0.126 0.446 0.304 0.284 ...
## $ V11 : num 0.6333 0.0881 0.4152 0.2988 0.2802 ...
## $ V12 : num 0.706 0.199 0.395 0.425 0.309 ...
## $ V13 : num 0.5544 0.0184 0.4256 0.6343 0.2657 ...
## $ V14 : num 0.532 0.226 0.413 0.82 0.38 ...
## $ V15 : num 0.648 0.173 0.453 1 0.563 ...
## $ V16 : num 0.693 0.213 0.533 0.999 0.438 ...
## $ V17 : num 0.6759 0.0693 0.7306 0.9508 0.2617 ...
## $ V18 : num 0.755 0.228 0.619 0.902 0.12 ...
## $ V19 : num 0.893 0.406 0.203 0.723 0.668 ...
## $ V20 : num 0.862 0.397 0.464 0.512 0.94 ...
## $ V21 : num 0.797 0.274 0.415 0.207 0.783 ...
## $ V22 : num 0.674 0.369 0.429 0.399 0.535 ...
## $ V23 : num 0.429 0.556 0.573 0.589 0.681 ...
## $ V24 : num 0.365 0.485 0.54 0.287 0.917 ...
## $ V25 : num 0.533 0.314 0.316 0.204 0.761 ...
## $ V26 : num 0.241 0.533 0.229 0.578 0.822 ...
## $ V27 : num 0.507 0.526 0.7 0.539 0.887 ...
## $ V28 : num 0.853 0.252 1 0.375 0.609 ...
## $ V29 : num 0.604 0.209 0.726 0.341 0.297 ...
## $ V30 : num 0.851 0.356 0.472 0.507 0.11 ...
## $ V31 : num 0.851 0.626 0.51 0.558 0.132 ...
## $ V32 : num 0.5045 0.734 0.5459 0.4778 0.0624 ...
## $ V33 : num 0.186 0.612 0.288 0.33 0.099 ...
## $ V34 : num 0.2709 0.3497 0.0981 0.2198 0.4006 ...
## $ V35 : num 0.423 0.395 0.195 0.141 0.367 ...
## $ V36 : num 0.304 0.301 0.418 0.286 0.105 ...
## $ V37 : num 0.612 0.541 0.46 0.381 0.192 ...
## $ V38 : num 0.676 0.881 0.322 0.416 0.393 ...
## $ V39 : num 0.537 0.986 0.283 0.405 0.429 ...
## $ V40 : num 0.472 0.917 0.243 0.33 0.255 ...
## $ V41 : num 0.465 0.612 0.198 0.271 0.115 ...
## $ V42 : num 0.259 0.501 0.244 0.265 0.22 ...
## $ V43 : num 0.2129 0.321 0.1847 0.0723 0.1879 ...
## $ V44 : num 0.2222 0.3202 0.0841 0.1238 0.1437 ...
## $ V45 : num 0.2111 0.4295 0.0692 0.1192 0.2146 ...
## $ V46 : num 0.0176 0.3654 0.0528 0.1089 0.236 ...
## $ V47 : num 0.1348 0.2655 0.0357 0.0623 0.1125 ...
## $ V48 : num 0.0744 0.1576 0.0085 0.0494 0.0254 ...
## $ V49 : num 0.013 0.0681 0.023 0.0264 0.0285 0.0777 0.0092 0.0228 0.0134 0.045 ...
## $ V50 : num 0.0106 0.0294 0.0046 0.0081 0.0178 0.0439 0.0198 0.0073 0.0217 0.0167 ...
## $ V51 : num 0.0033 0.0241 0.0156 0.0104 0.0052 0.0061 0.0118 0.0062 0.0188 0.0078 ...
## $ V52 : num 0.0232 0.0121 0.0031 0.0045 0.0081 0.0145 0.009 0.0062 0.0133 0.0083 ...
## $ V53 : num 0.0166 0.0036 0.0054 0.0014 0.012 0.0128 0.0223 0.012 0.0265 0.0057 ...
## $ V54 : num 0.0095 0.015 0.0105 0.0038 0.0045 0.0145 0.0179 0.0052 0.0224 0.0174 ...
## $ V55 : num 0.018 0.0085 0.011 0.0013 0.0121 0.0058 0.0084 0.0056 0.0074 0.0188 ...
## $ V56 : num 0.0244 0.0073 0.0015 0.0089 0.0097 0.0049 0.0068 0.0093 0.0118 0.0054 ...
## $ V57 : num 0.0316 0.005 0.0072 0.0057 0.0085 0.0065 0.0032 0.0042 0.0026 0.0114 ...
## $ V58 : num 0.0164 0.0044 0.0048 0.0027 0.0047 0.0093 0.0035 0.0003 0.0092 0.0196 ...
## $ V59 : num 0.0095 0.004 0.0107 0.0051 0.0048 0.0059 0.0056 0.0053 0.0009 0.0147 ...
## $ V60 : num 0.0078 0.0117 0.0094 0.0062 0.0053 0.0022 0.004 0.0036 0.0044 0.0062 ...
## $ Class: Factor w/ 2 levels "M","R": 2 2 2 2 2 2 2 2 2 2 ...
summary(Sonar_Train)
## V1 V2 V3 V4
## Min. :0.00150 Min. :0.00170 Min. :0.00150 Min. :0.0058
## 1st Qu.:0.01380 1st Qu.:0.01645 1st Qu.:0.01845 1st Qu.:0.0238
## Median :0.02280 Median :0.03080 Median :0.03470 Median :0.0444
## Mean :0.02928 Mean :0.03823 Mean :0.04328 Mean :0.0515
## 3rd Qu.:0.03640 3rd Qu.:0.04755 3rd Qu.:0.06015 3rd Qu.:0.0657
## Max. :0.13710 Max. :0.16320 Max. :0.16650 Max. :0.1732
## V5 V6 V7 V8
## Min. :0.00670 Min. :0.01020 Min. :0.01300 Min. :0.0057
## 1st Qu.:0.03640 1st Qu.:0.06665 1st Qu.:0.08365 1st Qu.:0.0780
## Median :0.06130 Median :0.09210 Median :0.10540 Median :0.1119
## Mean :0.07196 Mean :0.10333 Mean :0.12062 Mean :0.1345
## 3rd Qu.:0.09905 3rd Qu.:0.13145 3rd Qu.:0.15035 3rd Qu.:0.1723
## Max. :0.25650 Max. :0.38230 Max. :0.37290 Max. :0.4566
## V9 V10 V11 V12
## Min. :0.01170 Min. :0.0113 Min. :0.0289 Min. :0.0236
## 1st Qu.:0.09555 1st Qu.:0.1069 1st Qu.:0.1258 1st Qu.:0.1283
## Median :0.15220 Median :0.1799 Median :0.2210 Median :0.2484
## Mean :0.17995 Mean :0.2087 Mean :0.2367 Mean :0.2490
## 3rd Qu.:0.23940 3rd Qu.:0.2736 3rd Qu.:0.3081 3rd Qu.:0.3341
## Max. :0.68280 Max. :0.7106 Max. :0.7342 Max. :0.7060
## V13 V14 V15 V16
## Min. :0.0184 Min. :0.0273 Min. :0.0031 Min. :0.0162
## 1st Qu.:0.1626 1st Qu.:0.1665 1st Qu.:0.1548 1st Qu.:0.1842
## Median :0.2655 Median :0.2793 Median :0.2616 Median :0.2934
## Mean :0.2727 Mean :0.2970 Mean :0.3151 Mean :0.3707
## 3rd Qu.:0.3584 3rd Qu.:0.3946 3rd Qu.:0.4524 3rd Qu.:0.5361
## Max. :0.7131 Max. :0.9970 Max. :1.0000 Max. :0.9988
## V17 V18 V19 V20
## Min. :0.0349 Min. :0.0375 Min. :0.0494 Min. :0.0740
## 1st Qu.:0.2019 1st Qu.:0.2344 1st Qu.:0.2990 1st Qu.:0.3346
## Median :0.3041 Median :0.3657 Median :0.4309 Median :0.5224
## Mean :0.4092 Mean :0.4484 Mean :0.5012 Mean :0.5571
## 3rd Qu.:0.6601 3rd Qu.:0.6759 3rd Qu.:0.7307 3rd Qu.:0.7990
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## V21 V22 V23 V24
## Min. :0.0512 Min. :0.0219 Min. :0.0563 Min. :0.0239
## 1st Qu.:0.3919 1st Qu.:0.3994 1st Qu.:0.4485 1st Qu.:0.5413
## Median :0.5911 Median :0.6464 Median :0.6809 Median :0.6954
## Mean :0.5992 Mean :0.6130 Mean :0.6385 Mean :0.6675
## 3rd Qu.:0.8153 3rd Qu.:0.8318 3rd Qu.:0.8517 3rd Qu.:0.8692
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## V25 V26 V27 V28
## Min. :0.0240 Min. :0.1543 Min. :0.0874 Min. :0.0284
## 1st Qu.:0.5246 1st Qu.:0.5468 1st Qu.:0.5163 1st Qu.:0.5104
## Median :0.7221 Median :0.7529 Median :0.7207 Median :0.7278
## Mean :0.6702 Mean :0.6948 Mean :0.6976 Mean :0.6955
## 3rd Qu.:0.8623 3rd Qu.:0.8801 3rd Qu.:0.9000 3rd Qu.:0.9055
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## V29 V30 V31 V32
## Min. :0.0144 Min. :0.0613 Min. :0.1000 Min. :0.0404
## 1st Qu.:0.4736 1st Qu.:0.4447 1st Qu.:0.3493 1st Qu.:0.2806
## Median :0.6898 Median :0.6213 Median :0.4973 Median :0.4241
## Mean :0.6528 Mean :0.6009 Mean :0.5211 Mean :0.4464
## 3rd Qu.:0.8620 3rd Qu.:0.7541 3rd Qu.:0.6797 3rd Qu.:0.6219
## Max. :1.0000 Max. :1.0000 Max. :0.9657 Max. :0.9306
## V33 V34 V35 V36
## Min. :0.0477 Min. :0.0212 Min. :0.0223 Min. :0.0080
## 1st Qu.:0.2691 1st Qu.:0.2183 1st Qu.:0.1840 1st Qu.:0.1552
## Median :0.3921 Median :0.3785 Median :0.3330 Median :0.3172
## Mean :0.4259 Mean :0.4121 Mean :0.3976 Mean :0.3873
## 3rd Qu.:0.5774 3rd Qu.:0.6046 3rd Qu.:0.6110 3rd Qu.:0.5663
## Max. :1.0000 Max. :0.9647 Max. :1.0000 Max. :1.0000
## V37 V38 V39 V40
## Min. :0.0351 Min. :0.0383 Min. :0.0371 Min. :0.0117
## 1st Qu.:0.1593 1st Qu.:0.1784 1st Qu.:0.1787 1st Qu.:0.1921
## Median :0.3039 Median :0.3104 Median :0.2828 Median :0.2792
## Mean :0.3677 Mean :0.3433 Mean :0.3319 Mean :0.3148
## 3rd Qu.:0.5395 3rd Qu.:0.4410 3rd Qu.:0.4587 3rd Qu.:0.4254
## Max. :0.9497 Max. :1.0000 Max. :0.9857 Max. :0.9167
## V41 V42 V43 V44
## Min. :0.0438 Min. :0.0056 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.1691 1st Qu.:0.1722 1st Qu.:0.1555 1st Qu.:0.1253
## Median :0.2649 Median :0.2587 Median :0.2275 Median :0.1753
## Mean :0.2977 Mean :0.2847 Mean :0.2495 Mean :0.2147
## 3rd Qu.:0.4024 3rd Qu.:0.3851 3rd Qu.:0.3225 3rd Qu.:0.2655
## Max. :0.7751 Max. :0.8246 Max. :0.7733 Max. :0.7762
## V45 V46 V47 V48
## Min. :0.00000 Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.09695 1st Qu.:0.0693 1st Qu.:0.06635 1st Qu.:0.04625
## Median :0.14730 Median :0.1199 Median :0.10340 Median :0.08120
## Mean :0.20028 Mean :0.1631 Mean :0.12474 Mean :0.09464
## 3rd Qu.:0.23645 3rd Qu.:0.2006 3rd Qu.:0.15475 3rd Qu.:0.12245
## Max. :0.70340 Max. :0.7292 Max. :0.55220 Max. :0.33390
## V49 V50 V51 V52
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00080
## 1st Qu.:0.02875 1st Qu.:0.01170 1st Qu.:0.00785 1st Qu.:0.00725
## Median :0.04520 Median :0.01790 Median :0.01360 Median :0.01120
## Mean :0.05411 Mean :0.02075 Mean :0.01595 Mean :0.01351
## 3rd Qu.:0.07235 3rd Qu.:0.02545 3rd Qu.:0.02115 3rd Qu.:0.01670
## Max. :0.17940 Max. :0.08250 Max. :0.10040 Max. :0.07090
## V53 V54 V55 V56
## Min. :0.00050 Min. :0.00100 Min. :0.000600 Min. :0.000400
## 1st Qu.:0.00470 1st Qu.:0.00545 1st Qu.:0.004100 1st Qu.:0.004450
## Median :0.00800 Median :0.00950 Median :0.007500 Median :0.007000
## Mean :0.01067 Mean :0.01083 Mean :0.009063 Mean :0.008187
## 3rd Qu.:0.01525 3rd Qu.:0.01375 3rd Qu.:0.012100 3rd Qu.:0.010350
## Max. :0.03900 Max. :0.03520 Max. :0.044700 Max. :0.039400
## V57 V58 V59 V60
## Min. :0.000300 Min. :0.000300 Min. :0.000100 Min. :0.000600
## 1st Qu.:0.003700 1st Qu.:0.003600 1st Qu.:0.003500 1st Qu.:0.003000
## Median :0.006000 Median :0.006000 Median :0.006000 Median :0.005100
## Mean :0.007763 Mean :0.008172 Mean :0.007586 Mean :0.006029
## 3rd Qu.:0.010350 3rd Qu.:0.010350 3rd Qu.:0.009800 3rd Qu.:0.008500
## Max. :0.035500 Max. :0.044000 Max. :0.029400 Max. :0.021800
## Class
## M:89
## R:78
##
##
##
##
##################################
# Performing a general exploration of the test set
##################################
dim(Sonar_Test)
## [1] 41 61
str(Sonar_Test)
## 'data.frame': 41 obs. of 61 variables:
## $ V1 : num 0.02 0.0453 0.0317 0.0079 0.009 0.0352 0.0099 0.01 0.0189 0.0123 ...
## $ V2 : num 0.0371 0.0523 0.0956 0.0086 0.0062 0.0116 0.0484 0.0275 0.0308 0.0022 ...
## $ V3 : num 0.0428 0.0843 0.1321 0.0055 0.0253 ...
## $ V4 : num 0.0207 0.0689 0.1408 0.025 0.0489 ...
## $ V5 : num 0.0954 0.1183 0.1674 0.0344 0.1197 ...
## $ V6 : num 0.0986 0.2583 0.171 0.0546 0.1589 ...
## $ V7 : num 0.1539 0.2156 0.0731 0.0528 0.1392 ...
## $ V8 : num 0.1601 0.3481 0.1401 0.0958 0.0987 ...
## $ V9 : num 0.3109 0.3337 0.2083 0.1009 0.0955 ...
## $ V10 : num 0.211 0.287 0.351 0.124 0.19 ...
## $ V11 : num 0.161 0.492 0.179 0.11 0.19 ...
## $ V12 : num 0.1582 0.6552 0.0658 0.1215 0.2547 ...
## $ V13 : num 0.2238 0.6919 0.0513 0.1874 0.4073 ...
## $ V14 : num 0.0645 0.7797 0.3752 0.3383 0.2988 ...
## $ V15 : num 0.066 0.746 0.542 0.323 0.29 ...
## $ V16 : num 0.227 0.944 0.544 0.272 0.533 ...
## $ V17 : num 0.31 1 0.515 0.394 0.402 ...
## $ V18 : num 0.3 0.887 0.426 0.643 0.157 ...
## $ V19 : num 0.508 0.802 0.202 0.727 0.302 ...
## $ V20 : num 0.48 0.782 0.423 0.867 0.391 ...
## $ V21 : num 0.578 0.521 0.772 0.967 0.354 ...
## $ V22 : num 0.507 0.405 0.974 0.985 0.444 ...
## $ V23 : num 0.433 0.396 0.939 0.948 0.641 ...
## $ V24 : num 0.555 0.391 0.556 0.804 0.46 ...
## $ V25 : num 0.671 0.325 0.527 0.683 0.601 ...
## $ V26 : num 0.641 0.32 0.683 0.514 0.869 ...
## $ V27 : num 0.71 0.327 0.571 0.309 0.835 ...
## $ V28 : num 0.808 0.2767 0.5429 0.0832 0.7669 ...
## $ V29 : num 0.679 0.442 0.218 0.402 0.508 ...
## $ V30 : num 0.386 0.203 0.215 0.234 0.462 ...
## $ V31 : num 0.131 0.379 0.581 0.19 0.538 ...
## $ V32 : num 0.26 0.295 0.632 0.123 0.537 ...
## $ V33 : num 0.512 0.198 0.296 0.172 0.384 ...
## $ V34 : num 0.755 0.234 0.187 0.235 0.36 ...
## $ V35 : num 0.854 0.131 0.297 0.249 0.74 ...
## $ V36 : num 0.851 0.418 0.516 0.365 0.776 ...
## $ V37 : num 0.669 0.384 0.615 0.338 0.386 ...
## $ V38 : num 0.6097 0.1057 0.4283 0.1589 0.0667 ...
## $ V39 : num 0.4943 0.184 0.5479 0.0989 0.3684 ...
## $ V40 : num 0.274 0.197 0.613 0.109 0.611 ...
## $ V41 : num 0.051 0.167 0.502 0.104 0.351 ...
## $ V42 : num 0.2834 0.0583 0.2377 0.0839 0.2312 ...
## $ V43 : num 0.282 0.14 0.196 0.139 0.22 ...
## $ V44 : num 0.4256 0.1628 0.1749 0.0819 0.3051 ...
## $ V45 : num 0.2641 0.0621 0.1304 0.0678 0.1937 ...
## $ V46 : num 0.1386 0.0203 0.0597 0.0663 0.157 ...
## $ V47 : num 0.1051 0.053 0.1124 0.1202 0.0479 ...
## $ V48 : num 0.1343 0.0742 0.1047 0.0692 0.0538 ...
## $ V49 : num 0.0383 0.0409 0.0507 0.0152 0.0146 ...
## $ V50 : num 0.0324 0.0061 0.0159 0.0266 0.0068 0.0469 0.0779 0.0247 0.0143 0.0074 ...
## $ V51 : num 0.0232 0.0125 0.0195 0.0174 0.0187 0.0426 0.0396 0.0118 0.0091 0.0149 ...
## $ V52 : num 0.0027 0.0084 0.0201 0.0176 0.0059 0.0346 0.0173 0.0088 0.0038 0.0125 ...
## $ V53 : num 0.0065 0.0089 0.0248 0.0127 0.0095 0.0158 0.0149 0.0104 0.0096 0.0134 ...
## $ V54 : num 0.0159 0.0048 0.0131 0.0088 0.0194 0.0154 0.0115 0.0036 0.0142 0.0026 ...
## $ V55 : num 0.0072 0.0094 0.007 0.0098 0.008 0.0109 0.0202 0.0088 0.019 0.0038 ...
## $ V56 : num 0.0167 0.0191 0.0138 0.0019 0.0152 0.0048 0.0139 0.0047 0.014 0.0018 ...
## $ V57 : num 0.018 0.014 0.0092 0.0059 0.0158 0.0095 0.0029 0.0117 0.0099 0.0113 ...
## $ V58 : num 0.0084 0.0049 0.0143 0.0058 0.0053 0.0015 0.016 0.002 0.0092 0.0058 ...
## $ V59 : num 0.009 0.0052 0.0036 0.0059 0.0189 0.0073 0.0106 0.0091 0.0052 0.0047 ...
## $ V60 : num 0.0032 0.0044 0.0103 0.0032 0.0102 0.0067 0.0134 0.0058 0.0075 0.0071 ...
## $ Class: Factor w/ 2 levels "M","R": 2 2 2 2 2 2 2 2 2 2 ...
summary(Sonar_Test)
## V1 V2 V3 V4
## Min. :0.00790 Min. :0.00060 Min. :0.00300 Min. :0.00610
## 1st Qu.:0.01290 1st Qu.:0.01650 1st Qu.:0.01910 1st Qu.:0.02500
## Median :0.02100 Median :0.03080 Median :0.03060 Median :0.03990
## Mean :0.02868 Mean :0.03928 Mean :0.04607 Mean :0.06363
## 3rd Qu.:0.03460 3rd Qu.:0.05090 3rd Qu.:0.04660 3rd Qu.:0.06270
## Max. :0.13130 Max. :0.23390 Max. :0.30590 Max. :0.42640
## V5 V6 V7 V8
## Min. :0.0080 Min. :0.0201 Min. :0.0033 Min. :0.0055
## 1st Qu.:0.0397 1st Qu.:0.0696 1st Qu.:0.0742 1st Qu.:0.0941
## Median :0.0652 Median :0.0924 Median :0.1178 Median :0.1134
## Mean :0.0884 Mean :0.1096 Mean :0.1264 Mean :0.1358
## 3rd Qu.:0.1158 3rd Qu.:0.1589 3rd Qu.:0.1683 3rd Qu.:0.1601
## Max. :0.4010 Max. :0.2587 Max. :0.3322 Max. :0.4590
## V9 V10 V11 V12
## Min. :0.0075 Min. :0.0279 Min. :0.0575 Min. :0.0259
## 1st Qu.:0.1063 1st Qu.:0.1370 1st Qu.:0.1532 1st Qu.:0.1741
## Median :0.1523 Median :0.2028 Median :0.2295 Median :0.2497
## Mean :0.1701 Mean :0.2066 Mean :0.2333 Mean :0.2552
## 3rd Qu.:0.2083 3rd Qu.:0.2571 3rd Qu.:0.2931 3rd Qu.:0.3134
## Max. :0.5526 Max. :0.5966 Max. :0.5304 Max. :0.6552
## V13 V14 V15 V16
## Min. :0.0513 Min. :0.0336 Min. :0.0660 Min. :0.0742
## 1st Qu.:0.1874 1st Qu.:0.1943 1st Qu.:0.1840 1st Qu.:0.2285
## Median :0.2508 Median :0.2917 Median :0.3024 Median :0.3323
## Mean :0.2757 Mean :0.2950 Mean :0.3408 Mean :0.4100
## 3rd Qu.:0.3206 3rd Qu.:0.3383 3rd Qu.:0.4533 3rd Qu.:0.5343
## Max. :0.6919 Max. :0.7797 Max. :0.7464 Max. :0.9751
## V17 V18 V19 V20
## Min. :0.0699 Min. :0.0837 Min. :0.1151 Min. :0.0656
## 1st Qu.:0.2177 1st Qu.:0.2838 1st Qu.:0.3024 1st Qu.:0.3915
## Median :0.3943 Median :0.3797 Median :0.5036 Median :0.6127
## Mean :0.4438 Mean :0.4682 Mean :0.5194 Mean :0.5873
## 3rd Qu.:0.6441 3rd Qu.:0.6921 3rd Qu.:0.7708 3rd Qu.:0.8627
## Max. :1.0000 Max. :1.0000 Max. :0.9832 Max. :0.9634
## V21 V22 V23 V24
## Min. :0.1354 Min. :0.1127 Min. :0.1668 Min. :0.1611
## 1st Qu.:0.4539 1st Qu.:0.4789 1st Qu.:0.4645 1st Qu.:0.5410
## Median :0.7209 Median :0.7676 Median :0.7609 Median :0.7605
## Mean :0.6491 Mean :0.6703 Mean :0.6814 Mean :0.6936
## 3rd Qu.:0.8646 3rd Qu.:0.8318 3rd Qu.:0.8449 3rd Qu.:0.8760
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## V25 V26 V27 V28
## Min. :0.1934 Min. :0.0921 Min. :0.0481 Min. :0.0832
## 1st Qu.:0.5268 1st Qu.:0.5423 1st Qu.:0.5353 1st Qu.:0.5897
## Median :0.7115 Median :0.7867 Median :0.7750 Median :0.7325
## Mean :0.6967 Mean :0.7206 Mean :0.7208 Mean :0.6881
## 3rd Qu.:0.9066 3rd Qu.:0.9481 3rd Qu.:0.9673 3rd Qu.:0.8664
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## V29 V30 V31 V32
## Min. :0.1040 Min. :0.0823 Min. :0.0482 Min. :0.0994
## 1st Qu.:0.4421 1st Qu.:0.3822 1st Qu.:0.3264 1st Qu.:0.2947
## Median :0.5807 Median :0.4301 Median :0.4302 Median :0.4444
## Mean :0.5983 Mean :0.4997 Mean :0.4369 Mean :0.4092
## 3rd Qu.:0.8184 3rd Qu.:0.6616 3rd Qu.:0.5524 3rd Qu.:0.5141
## Max. :1.0000 Max. :0.8660 Max. :0.8787 Max. :0.9108
## V33 V34 V35 V36
## Min. :0.0507 Min. :0.0431 Min. :0.0619 Min. :0.0271
## 1st Qu.:0.2277 1st Qu.:0.2017 1st Qu.:0.1641 1st Qu.:0.1519
## Median :0.3844 Median :0.3095 Median :0.2896 Median :0.3649
## Mean :0.3820 Mean :0.3669 Mean :0.3722 Mean :0.3750
## 3rd Qu.:0.5121 3rd Qu.:0.5050 3rd Qu.:0.5531 3rd Qu.:0.5163
## Max. :0.8032 Max. :0.8703 Max. :1.0000 Max. :0.9212
## V37 V38 V39 V40
## Min. :0.0535 Min. :0.0411 Min. :0.0477 Min. :0.0202
## 1st Qu.:0.1644 1st Qu.:0.1730 1st Qu.:0.1252 1st Qu.:0.1485
## Median :0.3201 Median :0.3170 Median :0.2916 Median :0.2715
## Mean :0.3481 Mean :0.3249 Mean :0.3009 Mean :0.2964
## 3rd Qu.:0.4801 3rd Qu.:0.4283 3rd Qu.:0.3803 3rd Qu.:0.4022
## Max. :0.9386 Max. :0.9303 Max. :0.9709 Max. :0.9297
## V41 V42 V43 V44
## Min. :0.0360 Min. :0.0300 Min. :0.0550 Min. :0.0375
## 1st Qu.:0.1043 1st Qu.:0.1197 1st Qu.:0.1552 1st Qu.:0.1381
## Median :0.1921 Median :0.1986 Median :0.2195 Median :0.1927
## Mean :0.2549 Mean :0.2521 Mean :0.2344 Mean :0.2114
## 3rd Qu.:0.3510 3rd Qu.:0.3010 3rd Qu.:0.3370 3rd Qu.:0.2804
## Max. :0.8995 Max. :0.7911 Max. :0.5600 Max. :0.5245
## V45 V46 V47 V48
## Min. :0.0335 Min. :0.0203 Min. :0.0179 Min. :0.00410
## 1st Qu.:0.0902 1st Qu.:0.0654 1st Qu.:0.0530 1st Qu.:0.04180
## Median :0.1625 Median :0.1294 Median :0.1013 Median :0.06920
## Mean :0.1848 Mean :0.1505 Mean :0.1131 Mean :0.07832
## 3rd Qu.:0.2078 3rd Qu.:0.1985 3rd Qu.:0.1521 3rd Qu.:0.10470
## Max. :0.6149 Max. :0.5507 Max. :0.4331 Max. :0.29050
## V49 V50 V51 V52
## Min. :0.00730 Min. :0.00060 Min. :0.00190 Min. :0.00250
## 1st Qu.:0.01870 1st Qu.:0.00950 1st Qu.:0.01040 1st Qu.:0.00780
## Median :0.03830 Median :0.01700 Median :0.01540 Median :0.01170
## Mean :0.04304 Mean :0.01909 Mean :0.01655 Mean :0.01304
## 3rd Qu.:0.05490 3rd Qu.:0.02470 3rd Qu.:0.01950 3rd Qu.:0.01680
## Max. :0.19810 Max. :0.07790 Max. :0.04260 Max. :0.03620
## V53 V54 V55 V56
## Min. :0.00190 Min. :0.00130 Min. :0.00110 Min. :0.001300
## 1st Qu.:0.00820 1st Qu.:0.00510 1st Qu.:0.00430 1st Qu.:0.004200
## Median :0.01030 Median :0.00840 Median :0.00720 Median :0.006500
## Mean :0.01089 Mean :0.01139 Mean :0.01022 Mean :0.008363
## 3rd Qu.:0.01290 3rd Qu.:0.01600 3rd Qu.:0.01230 3rd Qu.:0.011900
## Max. :0.02480 Max. :0.03350 Max. :0.03760 Max. :0.027700
## V57 V58 V59 V60
## Min. :0.000900 Min. :0.000600 Min. :0.002300 Min. :0.001600
## 1st Qu.:0.003700 1st Qu.:0.003400 1st Qu.:0.004400 1st Qu.:0.004300
## Median :0.005900 Median :0.005500 Median :0.007300 Median :0.006000
## Mean :0.008051 Mean :0.007039 Mean :0.009388 Mean :0.008454
## 3rd Qu.:0.010600 3rd Qu.:0.010300 3rd Qu.:0.011000 3rd Qu.:0.010200
## Max. :0.024200 Max. :0.022400 Max. :0.036400 Max. :0.043900
## Class
## M:22
## R:19
##
##
##
##
##################################
# Formulating a data type assessment summary
##################################
<- Sonar_Train
PDA <- data.frame(
(PDA.Summary Column.Index=c(1:length(names(PDA))),
Column.Name= names(PDA),
Column.Type=sapply(PDA, function(x) class(x)),
row.names=NULL)
)
## Column.Index Column.Name Column.Type
## 1 1 V1 numeric
## 2 2 V2 numeric
## 3 3 V3 numeric
## 4 4 V4 numeric
## 5 5 V5 numeric
## 6 6 V6 numeric
## 7 7 V7 numeric
## 8 8 V8 numeric
## 9 9 V9 numeric
## 10 10 V10 numeric
## 11 11 V11 numeric
## 12 12 V12 numeric
## 13 13 V13 numeric
## 14 14 V14 numeric
## 15 15 V15 numeric
## 16 16 V16 numeric
## 17 17 V17 numeric
## 18 18 V18 numeric
## 19 19 V19 numeric
## 20 20 V20 numeric
## 21 21 V21 numeric
## 22 22 V22 numeric
## 23 23 V23 numeric
## 24 24 V24 numeric
## 25 25 V25 numeric
## 26 26 V26 numeric
## 27 27 V27 numeric
## 28 28 V28 numeric
## 29 29 V29 numeric
## 30 30 V30 numeric
## 31 31 V31 numeric
## 32 32 V32 numeric
## 33 33 V33 numeric
## 34 34 V34 numeric
## 35 35 V35 numeric
## 36 36 V36 numeric
## 37 37 V37 numeric
## 38 38 V38 numeric
## 39 39 V39 numeric
## 40 40 V40 numeric
## 41 41 V41 numeric
## 42 42 V42 numeric
## 43 43 V43 numeric
## 44 44 V44 numeric
## 45 45 V45 numeric
## 46 46 V46 numeric
## 47 47 V47 numeric
## 48 48 V48 numeric
## 49 49 V49 numeric
## 50 50 V50 numeric
## 51 51 V51 numeric
## 52 52 V52 numeric
## 53 53 V53 numeric
## 54 54 V54 numeric
## 55 55 V55 numeric
## 56 56 V56 numeric
## 57 57 V57 numeric
## 58 58 V58 numeric
## 59 59 V59 numeric
## 60 60 V60 numeric
## 61 61 Class factor
##################################
# Loading dataset
##################################
<- Sonar_Train
DQA
##################################
# Formulating an overall data quality assessment summary
##################################
<- data.frame(
(DQA.Summary Column.Index=c(1:length(names(DQA))),
Column.Name= names(DQA),
Column.Type=sapply(DQA, function(x) class(x)),
Row.Count=sapply(DQA, function(x) nrow(DQA)),
NA.Count=sapply(DQA,function(x)sum(is.na(x))),
Fill.Rate=sapply(DQA,function(x)format(round((sum(!is.na(x))/nrow(DQA)),3),nsmall=3)),
row.names=NULL)
)
## Column.Index Column.Name Column.Type Row.Count NA.Count Fill.Rate
## 1 1 V1 numeric 167 0 1.000
## 2 2 V2 numeric 167 0 1.000
## 3 3 V3 numeric 167 0 1.000
## 4 4 V4 numeric 167 0 1.000
## 5 5 V5 numeric 167 0 1.000
## 6 6 V6 numeric 167 0 1.000
## 7 7 V7 numeric 167 0 1.000
## 8 8 V8 numeric 167 0 1.000
## 9 9 V9 numeric 167 0 1.000
## 10 10 V10 numeric 167 0 1.000
## 11 11 V11 numeric 167 0 1.000
## 12 12 V12 numeric 167 0 1.000
## 13 13 V13 numeric 167 0 1.000
## 14 14 V14 numeric 167 0 1.000
## 15 15 V15 numeric 167 0 1.000
## 16 16 V16 numeric 167 0 1.000
## 17 17 V17 numeric 167 0 1.000
## 18 18 V18 numeric 167 0 1.000
## 19 19 V19 numeric 167 0 1.000
## 20 20 V20 numeric 167 0 1.000
## 21 21 V21 numeric 167 0 1.000
## 22 22 V22 numeric 167 0 1.000
## 23 23 V23 numeric 167 0 1.000
## 24 24 V24 numeric 167 0 1.000
## 25 25 V25 numeric 167 0 1.000
## 26 26 V26 numeric 167 0 1.000
## 27 27 V27 numeric 167 0 1.000
## 28 28 V28 numeric 167 0 1.000
## 29 29 V29 numeric 167 0 1.000
## 30 30 V30 numeric 167 0 1.000
## 31 31 V31 numeric 167 0 1.000
## 32 32 V32 numeric 167 0 1.000
## 33 33 V33 numeric 167 0 1.000
## 34 34 V34 numeric 167 0 1.000
## 35 35 V35 numeric 167 0 1.000
## 36 36 V36 numeric 167 0 1.000
## 37 37 V37 numeric 167 0 1.000
## 38 38 V38 numeric 167 0 1.000
## 39 39 V39 numeric 167 0 1.000
## 40 40 V40 numeric 167 0 1.000
## 41 41 V41 numeric 167 0 1.000
## 42 42 V42 numeric 167 0 1.000
## 43 43 V43 numeric 167 0 1.000
## 44 44 V44 numeric 167 0 1.000
## 45 45 V45 numeric 167 0 1.000
## 46 46 V46 numeric 167 0 1.000
## 47 47 V47 numeric 167 0 1.000
## 48 48 V48 numeric 167 0 1.000
## 49 49 V49 numeric 167 0 1.000
## 50 50 V50 numeric 167 0 1.000
## 51 51 V51 numeric 167 0 1.000
## 52 52 V52 numeric 167 0 1.000
## 53 53 V53 numeric 167 0 1.000
## 54 54 V54 numeric 167 0 1.000
## 55 55 V55 numeric 167 0 1.000
## 56 56 V56 numeric 167 0 1.000
## 57 57 V57 numeric 167 0 1.000
## 58 58 V58 numeric 167 0 1.000
## 59 59 V59 numeric 167 0 1.000
## 60 60 V60 numeric 167 0 1.000
## 61 61 Class factor 167 0 1.000
##################################
# Listing all predictors
##################################
<- DQA[,!names(DQA) %in% c("Class")]
DQA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DQA.Predictors[,sapply(DQA.Predictors, is.numeric)]
DQA.Predictors.Numeric
if (length(names(DQA.Predictors.Numeric))>0) {
print(paste0("There are ",
length(names(DQA.Predictors.Numeric))),
(" numeric predictor variable(s)."))
else {
} print("There are no numeric predictor variables.")
}
## [1] "There are 60 numeric predictor variable(s)."
##################################
# Listing all factor predictors
##################################
<- DQA.Predictors[,sapply(DQA.Predictors, is.factor)]
DQA.Predictors.Factor
if (length(names(DQA.Predictors.Factor))>0) {
print(paste0("There are ",
length(names(DQA.Predictors.Factor))),
(" factor predictor variable(s)."))
else {
} print("There are no factor predictor variables.")
}
## [1] "There are no factor predictor variables."
##################################
# Formulating a data quality assessment summary for factor predictors
##################################
if (length(names(DQA.Predictors.Factor))>0) {
##################################
# Formulating a function to determine the first mode
##################################
<- function(x) {
FirstModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab == max(tab)]
ux[tab
}
##################################
# Formulating a function to determine the second mode
##################################
<- function(x) {
SecondModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab = ux[tab == max(tab)]
fm = x[!(x %in% fm)]
sm <- unique(sm)
usm <- tabulate(match(sm, usm))
tabsm ifelse(is.na(usm[tabsm == max(tabsm)])==TRUE,
return("x"),
return(usm[tabsm == max(tabsm)]))
}
<- data.frame(
(DQA.Predictors.Factor.Summary Column.Name= names(DQA.Predictors.Factor),
Column.Type=sapply(DQA.Predictors.Factor, function(x) class(x)),
Unique.Count=sapply(DQA.Predictors.Factor, function(x) length(unique(x))),
First.Mode.Value=sapply(DQA.Predictors.Factor, function(x) as.character(FirstModes(x)[1])),
Second.Mode.Value=sapply(DQA.Predictors.Factor, function(x) as.character(SecondModes(x)[1])),
First.Mode.Count=sapply(DQA.Predictors.Factor, function(x) sum(na.omit(x) == FirstModes(x)[1])),
Second.Mode.Count=sapply(DQA.Predictors.Factor, function(x) sum(na.omit(x) == SecondModes(x)[1])),
Unique.Count.Ratio=sapply(DQA.Predictors.Factor, function(x) format(round((length(unique(x))/nrow(DQA.Predictors.Factor)),3), nsmall=3)),
First.Second.Mode.Ratio=sapply(DQA.Predictors.Factor, function(x) format(round((sum(na.omit(x) == FirstModes(x)[1])/sum(na.omit(x) == SecondModes(x)[1])),3), nsmall=3)),
row.names=NULL)
)
}
##################################
# Formulating a data quality assessment summary for numeric predictors
##################################
if (length(names(DQA.Predictors.Numeric))>0) {
##################################
# Formulating a function to determine the first mode
##################################
<- function(x) {
FirstModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab == max(tab)]
ux[tab
}
##################################
# Formulating a function to determine the second mode
##################################
<- function(x) {
SecondModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab = ux[tab == max(tab)]
fm = na.omit(x)[!(na.omit(x) %in% fm)]
sm <- unique(sm)
usm <- tabulate(match(sm, usm))
tabsm ifelse(is.na(usm[tabsm == max(tabsm)])==TRUE,
return(0.00001),
return(usm[tabsm == max(tabsm)]))
}
<- data.frame(
(DQA.Predictors.Numeric.Summary Column.Name= names(DQA.Predictors.Numeric),
Column.Type=sapply(DQA.Predictors.Numeric, function(x) class(x)),
Unique.Count=sapply(DQA.Predictors.Numeric, function(x) length(unique(x))),
Unique.Count.Ratio=sapply(DQA.Predictors.Numeric, function(x) format(round((length(unique(x))/nrow(DQA.Predictors.Numeric)),3), nsmall=3)),
First.Mode.Value=sapply(DQA.Predictors.Numeric, function(x) format(round((FirstModes(x)[1]),3),nsmall=3)),
Second.Mode.Value=sapply(DQA.Predictors.Numeric, function(x) format(round((SecondModes(x)[1]),3),nsmall=3)),
First.Mode.Count=sapply(DQA.Predictors.Numeric, function(x) sum(na.omit(x) == FirstModes(x)[1])),
Second.Mode.Count=sapply(DQA.Predictors.Numeric, function(x) sum(na.omit(x) == SecondModes(x)[1])),
First.Second.Mode.Ratio=sapply(DQA.Predictors.Numeric, function(x) format(round((sum(na.omit(x) == FirstModes(x)[1])/sum(na.omit(x) == SecondModes(x)[1])),3), nsmall=3)),
Minimum=sapply(DQA.Predictors.Numeric, function(x) format(round(min(x,na.rm = TRUE),3), nsmall=3)),
Mean=sapply(DQA.Predictors.Numeric, function(x) format(round(mean(x,na.rm = TRUE),3), nsmall=3)),
Median=sapply(DQA.Predictors.Numeric, function(x) format(round(median(x,na.rm = TRUE),3), nsmall=3)),
Maximum=sapply(DQA.Predictors.Numeric, function(x) format(round(max(x,na.rm = TRUE),3), nsmall=3)),
Skewness=sapply(DQA.Predictors.Numeric, function(x) format(round(skewness(x,na.rm = TRUE),3), nsmall=3)),
Kurtosis=sapply(DQA.Predictors.Numeric, function(x) format(round(kurtosis(x,na.rm = TRUE),3), nsmall=3)),
Percentile25th=sapply(DQA.Predictors.Numeric, function(x) format(round(quantile(x,probs=0.25,na.rm = TRUE),3), nsmall=3)),
Percentile75th=sapply(DQA.Predictors.Numeric, function(x) format(round(quantile(x,probs=0.75,na.rm = TRUE),3), nsmall=3)),
row.names=NULL)
)
}
## Column.Name Column.Type Unique.Count Unique.Count.Ratio First.Mode.Value
## 1 V1 numeric 149 0.892 0.020
## 2 V2 numeric 154 0.922 0.045
## 3 V3 numeric 153 0.916 0.028
## 4 V4 numeric 150 0.898 0.011
## 5 V5 numeric 156 0.934 0.065
## 6 V6 numeric 156 0.934 0.028
## 7 V7 numeric 158 0.946 0.077
## 8 V8 numeric 161 0.964 0.183
## 9 V9 numeric 165 0.988 0.210
## 10 V10 numeric 167 1.000 0.619
## 11 V11 numeric 163 0.976 0.248
## 12 V12 numeric 165 0.988 0.374
## 13 V13 numeric 160 0.958 0.266
## 14 V14 numeric 162 0.970 0.280
## 15 V15 numeric 164 0.982 0.187
## 16 V16 numeric 166 0.994 0.203
## 17 V17 numeric 163 0.976 0.216
## 18 V18 numeric 164 0.982 1.000
## 19 V19 numeric 166 0.994 0.406
## 20 V20 numeric 162 0.970 0.334
## 21 V21 numeric 161 0.964 1.000
## 22 V22 numeric 164 0.982 1.000
## 23 V23 numeric 161 0.964 1.000
## 24 V24 numeric 161 0.964 1.000
## 25 V25 numeric 158 0.946 1.000
## 26 V26 numeric 158 0.946 1.000
## 27 V27 numeric 155 0.928 1.000
## 28 V28 numeric 154 0.922 1.000
## 29 V29 numeric 159 0.952 1.000
## 30 V30 numeric 162 0.970 1.000
## 31 V31 numeric 166 0.994 0.386
## 32 V32 numeric 165 0.988 0.290
## 33 V33 numeric 165 0.988 0.525
## 34 V34 numeric 167 1.000 0.271
## 35 V35 numeric 165 0.988 1.000
## 36 V36 numeric 165 0.988 1.000
## 37 V37 numeric 167 1.000 0.612
## 38 V38 numeric 165 0.988 0.315
## 39 V39 numeric 163 0.976 0.168
## 40 V40 numeric 166 0.994 0.443
## 41 V41 numeric 165 0.988 0.305
## 42 V42 numeric 167 1.000 0.259
## 43 V43 numeric 165 0.988 0.212
## 44 V44 numeric 156 0.934 0.320
## 45 V45 numeric 164 0.982 0.119
## 46 V46 numeric 162 0.970 0.143
## 47 V47 numeric 164 0.982 0.079
## 48 V48 numeric 166 0.994 0.075
## 49 V49 numeric 157 0.940 0.032
## 50 V50 numeric 135 0.808 0.018
## 51 V51 numeric 138 0.826 0.015
## 52 V52 numeric 124 0.743 0.009
## 53 V53 numeric 115 0.689 0.004
## 54 V54 numeric 112 0.671 0.015
## 55 V55 numeric 105 0.629 0.004
## 56 V56 numeric 106 0.635 0.007
## 57 V57 numeric 107 0.641 0.005
## 58 V58 numeric 107 0.641 0.004
## 59 V59 numeric 103 0.617 0.007
## 60 V60 numeric 94 0.563 0.005
## Second.Mode.Value First.Mode.Count Second.Mode.Count First.Second.Mode.Ratio
## 1 0.010 3 2 1.500
## 2 0.058 2 1 2.000
## 3 0.110 2 1 2.000
## 4 0.039 3 2 1.500
## 5 0.097 2 1 2.000
## 6 0.228 2 1 2.000
## 7 0.099 3 2 1.500
## 8 0.377 2 1 2.000
## 9 0.560 2 1 2.000
## 10 0.000 1 0 Inf
## 11 0.633 2 1 2.000
## 12 0.706 2 1 2.000
## 13 0.554 2 1 2.000
## 14 0.532 2 1 2.000
## 15 0.648 2 1 2.000
## 16 0.693 2 1 2.000
## 17 0.676 2 1 2.000
## 18 0.755 2 1 2.000
## 19 0.893 2 1 2.000
## 20 0.862 2 1 2.000
## 21 0.797 7 1 7.000
## 22 0.674 4 1 4.000
## 23 0.143 6 2 3.000
## 24 0.943 6 2 3.000
## 25 0.695 6 2 3.000
## 26 0.754 6 2 3.000
## 27 0.684 11 2 5.500
## 28 0.440 11 2 5.500
## 29 0.904 6 3 2.000
## 30 0.851 6 1 6.000
## 31 0.851 2 1 2.000
## 32 0.504 2 1 2.000
## 33 0.186 2 1 2.000
## 34 0.000 1 0 Inf
## 35 0.423 2 1 2.000
## 36 0.304 2 1 2.000
## 37 0.000 1 0 Inf
## 38 0.676 2 1 2.000
## 39 0.537 2 1 2.000
## 40 0.472 2 1 2.000
## 41 0.465 2 1 2.000
## 42 0.000 1 0 Inf
## 43 0.213 2 1 2.000
## 44 0.222 2 1 2.000
## 45 0.211 2 1 2.000
## 46 0.018 2 1 2.000
## 47 0.135 2 1 2.000
## 48 0.074 2 1 2.000
## 49 0.078 3 2 1.500
## 50 0.022 5 3 1.667
## 51 0.003 3 2 1.500
## 52 0.003 4 3 1.333
## 53 0.005 3 2 1.500
## 54 0.002 4 3 1.333
## 55 0.008 5 4 1.250
## 56 0.005 4 3 1.333
## 57 0.004 6 4 1.500
## 58 0.004 5 4 1.250
## 59 0.005 5 4 1.250
## 60 0.003 6 5 1.200
## Minimum Mean Median Maximum Skewness Kurtosis Percentile25th Percentile75th
## 1 0.002 0.029 0.023 0.137 1.945 7.608 0.014 0.036
## 2 0.002 0.038 0.031 0.163 1.604 5.752 0.016 0.048
## 3 0.002 0.043 0.035 0.166 1.467 5.425 0.018 0.060
## 4 0.006 0.052 0.044 0.173 1.259 4.317 0.024 0.066
## 5 0.007 0.072 0.061 0.256 1.167 4.555 0.036 0.099
## 6 0.010 0.103 0.092 0.382 1.375 6.358 0.067 0.131
## 7 0.013 0.121 0.105 0.373 1.008 4.620 0.084 0.150
## 8 0.006 0.135 0.112 0.457 1.400 5.331 0.078 0.172
## 9 0.012 0.180 0.152 0.683 1.611 6.269 0.096 0.239
## 10 0.011 0.209 0.180 0.711 1.278 4.581 0.107 0.274
## 11 0.029 0.237 0.221 0.734 0.986 4.104 0.126 0.308
## 12 0.024 0.249 0.248 0.706 0.584 2.867 0.128 0.334
## 13 0.018 0.273 0.266 0.713 0.687 3.213 0.163 0.358
## 14 0.027 0.297 0.279 0.997 1.028 4.457 0.166 0.395
## 15 0.003 0.315 0.262 1.000 0.811 3.092 0.155 0.452
## 16 0.016 0.371 0.293 0.999 0.679 2.388 0.184 0.536
## 17 0.035 0.409 0.304 1.000 0.653 2.145 0.202 0.660
## 18 0.038 0.448 0.366 1.000 0.541 1.961 0.234 0.676
## 19 0.049 0.501 0.431 1.000 0.297 1.842 0.299 0.731
## 20 0.074 0.557 0.522 1.000 -0.048 1.764 0.335 0.799
## 21 0.051 0.599 0.591 1.000 -0.206 1.955 0.392 0.815
## 22 0.022 0.613 0.646 1.000 -0.325 2.010 0.399 0.832
## 23 0.056 0.639 0.681 1.000 -0.571 2.356 0.448 0.852
## 24 0.024 0.668 0.695 1.000 -0.684 2.746 0.541 0.869
## 25 0.024 0.670 0.722 1.000 -0.815 2.860 0.525 0.862
## 26 0.154 0.695 0.753 1.000 -0.690 2.496 0.547 0.880
## 27 0.087 0.698 0.721 1.000 -0.562 2.352 0.516 0.900
## 28 0.028 0.695 0.728 1.000 -0.544 2.358 0.510 0.905
## 29 0.014 0.653 0.690 1.000 -0.503 2.401 0.474 0.862
## 30 0.061 0.601 0.621 1.000 -0.182 2.404 0.445 0.754
## 31 0.100 0.521 0.497 0.966 0.218 2.146 0.349 0.680
## 32 0.040 0.446 0.424 0.931 0.283 2.161 0.281 0.622
## 33 0.048 0.426 0.392 1.000 0.406 2.421 0.269 0.577
## 34 0.021 0.412 0.378 0.965 0.489 2.345 0.218 0.605
## 35 0.022 0.398 0.333 1.000 0.558 2.265 0.184 0.611
## 36 0.008 0.387 0.317 1.000 0.659 2.385 0.155 0.566
## 37 0.035 0.368 0.304 0.950 0.648 2.311 0.159 0.539
## 38 0.038 0.343 0.310 1.000 0.979 3.362 0.178 0.441
## 39 0.037 0.332 0.283 0.986 0.858 3.179 0.179 0.459
## 40 0.012 0.315 0.279 0.917 0.773 3.539 0.192 0.425
## 41 0.044 0.298 0.265 0.775 0.740 3.037 0.169 0.402
## 42 0.006 0.285 0.259 0.825 0.814 3.530 0.172 0.385
## 43 0.000 0.250 0.228 0.773 0.921 3.992 0.155 0.322
## 44 0.000 0.215 0.175 0.776 1.271 4.382 0.125 0.265
## 45 0.000 0.200 0.147 0.703 1.314 3.825 0.097 0.236
## 46 0.000 0.163 0.120 0.729 1.674 5.816 0.069 0.201
## 47 0.000 0.125 0.103 0.552 1.770 7.024 0.066 0.155
## 48 0.000 0.095 0.081 0.334 1.224 4.510 0.046 0.122
## 49 0.000 0.054 0.045 0.179 1.065 3.769 0.029 0.072
## 50 0.000 0.021 0.018 0.082 1.681 6.763 0.012 0.025
## 51 0.000 0.016 0.014 0.100 2.850 17.472 0.008 0.021
## 52 0.001 0.014 0.011 0.071 2.156 10.103 0.007 0.017
## 53 0.000 0.011 0.008 0.039 1.067 3.957 0.005 0.015
## 54 0.001 0.011 0.010 0.035 1.173 4.193 0.005 0.014
## 55 0.001 0.009 0.007 0.045 1.857 8.514 0.004 0.012
## 56 0.000 0.008 0.007 0.039 1.960 9.547 0.004 0.010
## 57 0.000 0.008 0.006 0.035 1.784 7.243 0.004 0.010
## 58 0.000 0.008 0.006 0.044 2.125 9.392 0.004 0.010
## 59 0.000 0.008 0.006 0.029 1.456 5.083 0.003 0.010
## 60 0.001 0.006 0.005 0.022 1.290 4.732 0.003 0.009
##################################
# Identifying potential data quality issues
##################################
##################################
# Checking for missing observations
##################################
if ((nrow(DQA.Summary[DQA.Summary$NA.Count>0,]))>0){
print(paste0("Missing observations noted for ",
nrow(DQA.Summary[DQA.Summary$NA.Count>0,])),
(" variable(s) with NA.Count>0 and Fill.Rate<1.0."))
$NA.Count>0,]
DQA.Summary[DQA.Summaryelse {
} print("No missing observations noted.")
}
## [1] "No missing observations noted."
##################################
# Checking for zero or near-zero variance predictors
##################################
if (length(names(DQA.Predictors.Factor))==0) {
print("No factor predictors noted.")
else if (nrow(DQA.Predictors.Factor.Summary[as.numeric(as.character(DQA.Predictors.Factor.Summary$First.Second.Mode.Ratio))>5,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Predictors.Factor.Summary[as.numeric(as.character(DQA.Predictors.Factor.Summary$First.Second.Mode.Ratio))>5,])),
(" factor variable(s) with First.Second.Mode.Ratio>5."))
as.numeric(as.character(DQA.Predictors.Factor.Summary$First.Second.Mode.Ratio))>5,]
DQA.Predictors.Factor.Summary[else {
} print("No low variance factor predictors due to high first-second mode ratio noted.")
}
## [1] "No factor predictors noted."
if (length(names(DQA.Predictors.Numeric))==0) {
print("No numeric predictors noted.")
else if (nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$First.Second.Mode.Ratio))>5,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$First.Second.Mode.Ratio))>5,])),
(" numeric variable(s) with First.Second.Mode.Ratio>5."))
as.numeric(as.character(DQA.Predictors.Numeric.Summary$First.Second.Mode.Ratio))>5,]
DQA.Predictors.Numeric.Summary[else {
} print("No low variance numeric predictors due to high first-second mode ratio noted.")
}
## [1] "Low variance observed for 8 numeric variable(s) with First.Second.Mode.Ratio>5."
## Column.Name Column.Type Unique.Count Unique.Count.Ratio First.Mode.Value
## 10 V10 numeric 167 1.000 0.619
## 21 V21 numeric 161 0.964 1.000
## 27 V27 numeric 155 0.928 1.000
## 28 V28 numeric 154 0.922 1.000
## 30 V30 numeric 162 0.970 1.000
## 34 V34 numeric 167 1.000 0.271
## 37 V37 numeric 167 1.000 0.612
## 42 V42 numeric 167 1.000 0.259
## Second.Mode.Value First.Mode.Count Second.Mode.Count First.Second.Mode.Ratio
## 10 0.000 1 0 Inf
## 21 0.797 7 1 7.000
## 27 0.684 11 2 5.500
## 28 0.440 11 2 5.500
## 30 0.851 6 1 6.000
## 34 0.000 1 0 Inf
## 37 0.000 1 0 Inf
## 42 0.000 1 0 Inf
## Minimum Mean Median Maximum Skewness Kurtosis Percentile25th Percentile75th
## 10 0.011 0.209 0.180 0.711 1.278 4.581 0.107 0.274
## 21 0.051 0.599 0.591 1.000 -0.206 1.955 0.392 0.815
## 27 0.087 0.698 0.721 1.000 -0.562 2.352 0.516 0.900
## 28 0.028 0.695 0.728 1.000 -0.544 2.358 0.510 0.905
## 30 0.061 0.601 0.621 1.000 -0.182 2.404 0.445 0.754
## 34 0.021 0.412 0.378 0.965 0.489 2.345 0.218 0.605
## 37 0.035 0.368 0.304 0.950 0.648 2.311 0.159 0.539
## 42 0.006 0.285 0.259 0.825 0.814 3.530 0.172 0.385
if (length(names(DQA.Predictors.Numeric))==0) {
print("No numeric predictors noted.")
else if (nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Unique.Count.Ratio))<0.01,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Unique.Count.Ratio))<0.01,])),
(" numeric variable(s) with Unique.Count.Ratio<0.01."))
as.numeric(as.character(DQA.Predictors.Numeric.Summary$Unique.Count.Ratio))<0.01,]
DQA.Predictors.Numeric.Summary[else {
} print("No low variance numeric predictors due to low unique count ratio noted.")
}
## [1] "No low variance numeric predictors due to low unique count ratio noted."
##################################
# Checking for skewed predictors
##################################
if (length(names(DQA.Predictors.Numeric))==0) {
print("No numeric predictors noted.")
else if (nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))>3 |
} as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))<(-3),])>0){
print(paste0("High skewness observed for ",
nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))>3 |
(as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))<(-3),])),
" numeric variable(s) with Skewness>3 or Skewness<(-3)."))
as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))>3 |
DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))<(-3),]
else {
} print("No skewed numeric predictors noted.")
}
## [1] "No skewed numeric predictors noted."
##################################
# Loading dataset
##################################
<- Sonar_Train
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("Class")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,sapply(DPA.Predictors, is.numeric)]
DPA.Predictors.Numeric
##################################
# Identifying outliers for the numeric predictors
##################################
<- c()
OutlierCountList
for (i in 1:ncol(DPA.Predictors.Numeric)) {
<- boxplot.stats(DPA.Predictors.Numeric[,i])$out
Outliers <- length(Outliers)
OutlierCount <- append(OutlierCountList,OutlierCount)
OutlierCountList <- which(DPA.Predictors.Numeric[,i] %in% c(Outliers))
OutlierIndices boxplot(DPA.Predictors.Numeric[,i],
ylab = names(DPA.Predictors.Numeric)[i],
main = names(DPA.Predictors.Numeric)[i],
horizontal=TRUE)
mtext(paste0(OutlierCount, " Outlier(s) Detected"))
}
<- as.data.frame(cbind(names(DPA.Predictors.Numeric),(OutlierCountList)))
OutlierCountSummary names(OutlierCountSummary) <- c("NumericPredictors","OutlierCount")
$OutlierCount <- as.numeric(as.character(OutlierCountSummary$OutlierCount))
OutlierCountSummary<- nrow(OutlierCountSummary[OutlierCountSummary$OutlierCount>0,])
NumericPredictorWithOutlierCount print(paste0(NumericPredictorWithOutlierCount, " numeric variable(s) were noted with outlier(s)." ))
## [1] "39 numeric variable(s) were noted with outlier(s)."
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA.Predictors.Numeric)) (DPA_Skimmed
Name | DPA.Predictors.Numeric |
Number of rows | 167 |
Number of columns | 60 |
_______________________ | |
Column type frequency: | |
numeric | 60 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
V1 | 0 | 1 | 0.03 | 0.02 | 0.00 | 0.01 | 0.02 | 0.04 | 0.14 | ▇▃▁▁▁ |
V2 | 0 | 1 | 0.04 | 0.03 | 0.00 | 0.02 | 0.03 | 0.05 | 0.16 | ▇▅▁▁▁ |
V3 | 0 | 1 | 0.04 | 0.03 | 0.00 | 0.02 | 0.03 | 0.06 | 0.17 | ▇▆▂▁▁ |
V4 | 0 | 1 | 0.05 | 0.04 | 0.01 | 0.02 | 0.04 | 0.07 | 0.17 | ▇▆▂▁▁ |
V5 | 0 | 1 | 0.07 | 0.05 | 0.01 | 0.04 | 0.06 | 0.10 | 0.26 | ▇▆▃▁▁ |
V6 | 0 | 1 | 0.10 | 0.06 | 0.01 | 0.07 | 0.09 | 0.13 | 0.38 | ▇▇▂▁▁ |
V7 | 0 | 1 | 0.12 | 0.06 | 0.01 | 0.08 | 0.11 | 0.15 | 0.37 | ▅▇▃▁▁ |
V8 | 0 | 1 | 0.13 | 0.09 | 0.01 | 0.08 | 0.11 | 0.17 | 0.46 | ▇▇▃▁▁ |
V9 | 0 | 1 | 0.18 | 0.12 | 0.01 | 0.10 | 0.15 | 0.24 | 0.68 | ▇▆▂▁▁ |
V10 | 0 | 1 | 0.21 | 0.14 | 0.01 | 0.11 | 0.18 | 0.27 | 0.71 | ▇▆▃▁▁ |
V11 | 0 | 1 | 0.24 | 0.14 | 0.03 | 0.13 | 0.22 | 0.31 | 0.73 | ▇▇▃▁▁ |
V12 | 0 | 1 | 0.25 | 0.14 | 0.02 | 0.13 | 0.25 | 0.33 | 0.71 | ▇▇▆▂▁ |
V13 | 0 | 1 | 0.27 | 0.14 | 0.02 | 0.16 | 0.27 | 0.36 | 0.71 | ▅▇▅▂▁ |
V14 | 0 | 1 | 0.30 | 0.17 | 0.03 | 0.17 | 0.28 | 0.39 | 1.00 | ▇▇▃▁▁ |
V15 | 0 | 1 | 0.32 | 0.21 | 0.00 | 0.15 | 0.26 | 0.45 | 1.00 | ▇▆▅▂▁ |
V16 | 0 | 1 | 0.37 | 0.23 | 0.02 | 0.18 | 0.29 | 0.54 | 1.00 | ▇▆▃▃▁ |
V17 | 0 | 1 | 0.41 | 0.27 | 0.03 | 0.20 | 0.30 | 0.66 | 1.00 | ▇▇▃▅▃ |
V18 | 0 | 1 | 0.45 | 0.26 | 0.04 | 0.23 | 0.37 | 0.68 | 1.00 | ▆▇▂▅▃ |
V19 | 0 | 1 | 0.50 | 0.26 | 0.05 | 0.30 | 0.43 | 0.73 | 1.00 | ▃▇▃▅▅ |
V20 | 0 | 1 | 0.56 | 0.26 | 0.07 | 0.33 | 0.52 | 0.80 | 1.00 | ▅▇▆▆▇ |
V21 | 0 | 1 | 0.60 | 0.26 | 0.05 | 0.39 | 0.59 | 0.82 | 1.00 | ▃▆▆▆▇ |
V22 | 0 | 1 | 0.61 | 0.26 | 0.02 | 0.40 | 0.65 | 0.83 | 1.00 | ▂▅▅▆▇ |
V23 | 0 | 1 | 0.64 | 0.25 | 0.06 | 0.45 | 0.68 | 0.85 | 1.00 | ▂▃▅▆▇ |
V24 | 0 | 1 | 0.67 | 0.24 | 0.02 | 0.54 | 0.70 | 0.87 | 1.00 | ▂▂▅▇▇ |
V25 | 0 | 1 | 0.67 | 0.25 | 0.02 | 0.52 | 0.72 | 0.86 | 1.00 | ▂▂▃▇▇ |
V26 | 0 | 1 | 0.69 | 0.23 | 0.15 | 0.55 | 0.75 | 0.88 | 1.00 | ▂▂▅▆▇ |
V27 | 0 | 1 | 0.70 | 0.24 | 0.09 | 0.52 | 0.72 | 0.90 | 1.00 | ▁▂▅▃▇ |
V28 | 0 | 1 | 0.70 | 0.24 | 0.03 | 0.51 | 0.73 | 0.91 | 1.00 | ▁▂▃▅▇ |
V29 | 0 | 1 | 0.65 | 0.24 | 0.01 | 0.47 | 0.69 | 0.86 | 1.00 | ▁▂▆▅▇ |
V30 | 0 | 1 | 0.60 | 0.22 | 0.06 | 0.44 | 0.62 | 0.75 | 1.00 | ▂▅▇▇▅ |
V31 | 0 | 1 | 0.52 | 0.22 | 0.10 | 0.35 | 0.50 | 0.68 | 0.97 | ▅▇▇▅▅ |
V32 | 0 | 1 | 0.45 | 0.22 | 0.04 | 0.28 | 0.42 | 0.62 | 0.93 | ▃▇▆▅▃ |
V33 | 0 | 1 | 0.43 | 0.21 | 0.05 | 0.27 | 0.39 | 0.58 | 1.00 | ▅▇▆▅▁ |
V34 | 0 | 1 | 0.41 | 0.24 | 0.02 | 0.22 | 0.38 | 0.60 | 0.96 | ▆▇▅▅▂ |
V35 | 0 | 1 | 0.40 | 0.26 | 0.02 | 0.18 | 0.33 | 0.61 | 1.00 | ▇▇▃▅▂ |
V36 | 0 | 1 | 0.39 | 0.27 | 0.01 | 0.16 | 0.32 | 0.57 | 1.00 | ▇▆▅▃▂ |
V37 | 0 | 1 | 0.37 | 0.24 | 0.04 | 0.16 | 0.30 | 0.54 | 0.95 | ▇▅▅▂▂ |
V38 | 0 | 1 | 0.34 | 0.22 | 0.04 | 0.18 | 0.31 | 0.44 | 1.00 | ▇▇▃▂▁ |
V39 | 0 | 1 | 0.33 | 0.20 | 0.04 | 0.18 | 0.28 | 0.46 | 0.99 | ▇▇▃▂▁ |
V40 | 0 | 1 | 0.31 | 0.18 | 0.01 | 0.19 | 0.28 | 0.43 | 0.92 | ▅▇▅▂▁ |
V41 | 0 | 1 | 0.30 | 0.16 | 0.04 | 0.17 | 0.26 | 0.40 | 0.78 | ▆▇▃▂▁ |
V42 | 0 | 1 | 0.28 | 0.17 | 0.01 | 0.17 | 0.26 | 0.39 | 0.82 | ▅▇▅▁▁ |
V43 | 0 | 1 | 0.25 | 0.14 | 0.00 | 0.16 | 0.23 | 0.32 | 0.77 | ▃▇▃▁▁ |
V44 | 0 | 1 | 0.21 | 0.14 | 0.00 | 0.13 | 0.18 | 0.27 | 0.78 | ▇▇▂▂▁ |
V45 | 0 | 1 | 0.20 | 0.16 | 0.00 | 0.10 | 0.15 | 0.24 | 0.70 | ▇▆▁▂▁ |
V46 | 0 | 1 | 0.16 | 0.14 | 0.00 | 0.07 | 0.12 | 0.20 | 0.73 | ▇▃▁▁▁ |
V47 | 0 | 1 | 0.12 | 0.09 | 0.00 | 0.07 | 0.10 | 0.15 | 0.55 | ▇▅▁▁▁ |
V48 | 0 | 1 | 0.09 | 0.06 | 0.00 | 0.05 | 0.08 | 0.12 | 0.33 | ▇▇▂▁▁ |
V49 | 0 | 1 | 0.05 | 0.04 | 0.00 | 0.03 | 0.05 | 0.07 | 0.18 | ▇▇▃▂▁ |
V50 | 0 | 1 | 0.02 | 0.01 | 0.00 | 0.01 | 0.02 | 0.03 | 0.08 | ▇▇▂▁▁ |
V51 | 0 | 1 | 0.02 | 0.01 | 0.00 | 0.01 | 0.01 | 0.02 | 0.10 | ▇▃▁▁▁ |
V52 | 0 | 1 | 0.01 | 0.01 | 0.00 | 0.01 | 0.01 | 0.02 | 0.07 | ▇▃▁▁▁ |
V53 | 0 | 1 | 0.01 | 0.01 | 0.00 | 0.00 | 0.01 | 0.02 | 0.04 | ▇▅▂▁▁ |
V54 | 0 | 1 | 0.01 | 0.01 | 0.00 | 0.01 | 0.01 | 0.01 | 0.04 | ▆▇▂▁▁ |
V55 | 0 | 1 | 0.01 | 0.01 | 0.00 | 0.00 | 0.01 | 0.01 | 0.04 | ▇▃▁▁▁ |
V56 | 0 | 1 | 0.01 | 0.01 | 0.00 | 0.00 | 0.01 | 0.01 | 0.04 | ▇▅▁▁▁ |
V57 | 0 | 1 | 0.01 | 0.01 | 0.00 | 0.00 | 0.01 | 0.01 | 0.04 | ▇▅▁▁▁ |
V58 | 0 | 1 | 0.01 | 0.01 | 0.00 | 0.00 | 0.01 | 0.01 | 0.04 | ▇▃▁▁▁ |
V59 | 0 | 1 | 0.01 | 0.01 | 0.00 | 0.00 | 0.01 | 0.01 | 0.03 | ▇▆▁▁▁ |
V60 | 0 | 1 | 0.01 | 0.00 | 0.00 | 0.00 | 0.01 | 0.01 | 0.02 | ▇▆▂▁▁ |
###################################
# Verifying the data dimensions
###################################
dim(DPA.Predictors.Numeric)
## [1] 167 60
##################################
# Loading dataset
##################################
<- Sonar_Train
DPA
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA)) (DPA_Skimmed
Name | DPA |
Number of rows | 167 |
Number of columns | 61 |
_______________________ | |
Column type frequency: | |
factor | 1 |
numeric | 60 |
________________________ | |
Group variables | None |
Variable type: factor
skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
---|---|---|---|---|---|
Class | 0 | 1 | FALSE | 2 | M: 89, R: 78 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
V1 | 0 | 1 | 0.03 | 0.02 | 0.00 | 0.01 | 0.02 | 0.04 | 0.14 | ▇▃▁▁▁ |
V2 | 0 | 1 | 0.04 | 0.03 | 0.00 | 0.02 | 0.03 | 0.05 | 0.16 | ▇▅▁▁▁ |
V3 | 0 | 1 | 0.04 | 0.03 | 0.00 | 0.02 | 0.03 | 0.06 | 0.17 | ▇▆▂▁▁ |
V4 | 0 | 1 | 0.05 | 0.04 | 0.01 | 0.02 | 0.04 | 0.07 | 0.17 | ▇▆▂▁▁ |
V5 | 0 | 1 | 0.07 | 0.05 | 0.01 | 0.04 | 0.06 | 0.10 | 0.26 | ▇▆▃▁▁ |
V6 | 0 | 1 | 0.10 | 0.06 | 0.01 | 0.07 | 0.09 | 0.13 | 0.38 | ▇▇▂▁▁ |
V7 | 0 | 1 | 0.12 | 0.06 | 0.01 | 0.08 | 0.11 | 0.15 | 0.37 | ▅▇▃▁▁ |
V8 | 0 | 1 | 0.13 | 0.09 | 0.01 | 0.08 | 0.11 | 0.17 | 0.46 | ▇▇▃▁▁ |
V9 | 0 | 1 | 0.18 | 0.12 | 0.01 | 0.10 | 0.15 | 0.24 | 0.68 | ▇▆▂▁▁ |
V10 | 0 | 1 | 0.21 | 0.14 | 0.01 | 0.11 | 0.18 | 0.27 | 0.71 | ▇▆▃▁▁ |
V11 | 0 | 1 | 0.24 | 0.14 | 0.03 | 0.13 | 0.22 | 0.31 | 0.73 | ▇▇▃▁▁ |
V12 | 0 | 1 | 0.25 | 0.14 | 0.02 | 0.13 | 0.25 | 0.33 | 0.71 | ▇▇▆▂▁ |
V13 | 0 | 1 | 0.27 | 0.14 | 0.02 | 0.16 | 0.27 | 0.36 | 0.71 | ▅▇▅▂▁ |
V14 | 0 | 1 | 0.30 | 0.17 | 0.03 | 0.17 | 0.28 | 0.39 | 1.00 | ▇▇▃▁▁ |
V15 | 0 | 1 | 0.32 | 0.21 | 0.00 | 0.15 | 0.26 | 0.45 | 1.00 | ▇▆▅▂▁ |
V16 | 0 | 1 | 0.37 | 0.23 | 0.02 | 0.18 | 0.29 | 0.54 | 1.00 | ▇▆▃▃▁ |
V17 | 0 | 1 | 0.41 | 0.27 | 0.03 | 0.20 | 0.30 | 0.66 | 1.00 | ▇▇▃▅▃ |
V18 | 0 | 1 | 0.45 | 0.26 | 0.04 | 0.23 | 0.37 | 0.68 | 1.00 | ▆▇▂▅▃ |
V19 | 0 | 1 | 0.50 | 0.26 | 0.05 | 0.30 | 0.43 | 0.73 | 1.00 | ▃▇▃▅▅ |
V20 | 0 | 1 | 0.56 | 0.26 | 0.07 | 0.33 | 0.52 | 0.80 | 1.00 | ▅▇▆▆▇ |
V21 | 0 | 1 | 0.60 | 0.26 | 0.05 | 0.39 | 0.59 | 0.82 | 1.00 | ▃▆▆▆▇ |
V22 | 0 | 1 | 0.61 | 0.26 | 0.02 | 0.40 | 0.65 | 0.83 | 1.00 | ▂▅▅▆▇ |
V23 | 0 | 1 | 0.64 | 0.25 | 0.06 | 0.45 | 0.68 | 0.85 | 1.00 | ▂▃▅▆▇ |
V24 | 0 | 1 | 0.67 | 0.24 | 0.02 | 0.54 | 0.70 | 0.87 | 1.00 | ▂▂▅▇▇ |
V25 | 0 | 1 | 0.67 | 0.25 | 0.02 | 0.52 | 0.72 | 0.86 | 1.00 | ▂▂▃▇▇ |
V26 | 0 | 1 | 0.69 | 0.23 | 0.15 | 0.55 | 0.75 | 0.88 | 1.00 | ▂▂▅▆▇ |
V27 | 0 | 1 | 0.70 | 0.24 | 0.09 | 0.52 | 0.72 | 0.90 | 1.00 | ▁▂▅▃▇ |
V28 | 0 | 1 | 0.70 | 0.24 | 0.03 | 0.51 | 0.73 | 0.91 | 1.00 | ▁▂▃▅▇ |
V29 | 0 | 1 | 0.65 | 0.24 | 0.01 | 0.47 | 0.69 | 0.86 | 1.00 | ▁▂▆▅▇ |
V30 | 0 | 1 | 0.60 | 0.22 | 0.06 | 0.44 | 0.62 | 0.75 | 1.00 | ▂▅▇▇▅ |
V31 | 0 | 1 | 0.52 | 0.22 | 0.10 | 0.35 | 0.50 | 0.68 | 0.97 | ▅▇▇▅▅ |
V32 | 0 | 1 | 0.45 | 0.22 | 0.04 | 0.28 | 0.42 | 0.62 | 0.93 | ▃▇▆▅▃ |
V33 | 0 | 1 | 0.43 | 0.21 | 0.05 | 0.27 | 0.39 | 0.58 | 1.00 | ▅▇▆▅▁ |
V34 | 0 | 1 | 0.41 | 0.24 | 0.02 | 0.22 | 0.38 | 0.60 | 0.96 | ▆▇▅▅▂ |
V35 | 0 | 1 | 0.40 | 0.26 | 0.02 | 0.18 | 0.33 | 0.61 | 1.00 | ▇▇▃▅▂ |
V36 | 0 | 1 | 0.39 | 0.27 | 0.01 | 0.16 | 0.32 | 0.57 | 1.00 | ▇▆▅▃▂ |
V37 | 0 | 1 | 0.37 | 0.24 | 0.04 | 0.16 | 0.30 | 0.54 | 0.95 | ▇▅▅▂▂ |
V38 | 0 | 1 | 0.34 | 0.22 | 0.04 | 0.18 | 0.31 | 0.44 | 1.00 | ▇▇▃▂▁ |
V39 | 0 | 1 | 0.33 | 0.20 | 0.04 | 0.18 | 0.28 | 0.46 | 0.99 | ▇▇▃▂▁ |
V40 | 0 | 1 | 0.31 | 0.18 | 0.01 | 0.19 | 0.28 | 0.43 | 0.92 | ▅▇▅▂▁ |
V41 | 0 | 1 | 0.30 | 0.16 | 0.04 | 0.17 | 0.26 | 0.40 | 0.78 | ▆▇▃▂▁ |
V42 | 0 | 1 | 0.28 | 0.17 | 0.01 | 0.17 | 0.26 | 0.39 | 0.82 | ▅▇▅▁▁ |
V43 | 0 | 1 | 0.25 | 0.14 | 0.00 | 0.16 | 0.23 | 0.32 | 0.77 | ▃▇▃▁▁ |
V44 | 0 | 1 | 0.21 | 0.14 | 0.00 | 0.13 | 0.18 | 0.27 | 0.78 | ▇▇▂▂▁ |
V45 | 0 | 1 | 0.20 | 0.16 | 0.00 | 0.10 | 0.15 | 0.24 | 0.70 | ▇▆▁▂▁ |
V46 | 0 | 1 | 0.16 | 0.14 | 0.00 | 0.07 | 0.12 | 0.20 | 0.73 | ▇▃▁▁▁ |
V47 | 0 | 1 | 0.12 | 0.09 | 0.00 | 0.07 | 0.10 | 0.15 | 0.55 | ▇▅▁▁▁ |
V48 | 0 | 1 | 0.09 | 0.06 | 0.00 | 0.05 | 0.08 | 0.12 | 0.33 | ▇▇▂▁▁ |
V49 | 0 | 1 | 0.05 | 0.04 | 0.00 | 0.03 | 0.05 | 0.07 | 0.18 | ▇▇▃▂▁ |
V50 | 0 | 1 | 0.02 | 0.01 | 0.00 | 0.01 | 0.02 | 0.03 | 0.08 | ▇▇▂▁▁ |
V51 | 0 | 1 | 0.02 | 0.01 | 0.00 | 0.01 | 0.01 | 0.02 | 0.10 | ▇▃▁▁▁ |
V52 | 0 | 1 | 0.01 | 0.01 | 0.00 | 0.01 | 0.01 | 0.02 | 0.07 | ▇▃▁▁▁ |
V53 | 0 | 1 | 0.01 | 0.01 | 0.00 | 0.00 | 0.01 | 0.02 | 0.04 | ▇▅▂▁▁ |
V54 | 0 | 1 | 0.01 | 0.01 | 0.00 | 0.01 | 0.01 | 0.01 | 0.04 | ▆▇▂▁▁ |
V55 | 0 | 1 | 0.01 | 0.01 | 0.00 | 0.00 | 0.01 | 0.01 | 0.04 | ▇▃▁▁▁ |
V56 | 0 | 1 | 0.01 | 0.01 | 0.00 | 0.00 | 0.01 | 0.01 | 0.04 | ▇▅▁▁▁ |
V57 | 0 | 1 | 0.01 | 0.01 | 0.00 | 0.00 | 0.01 | 0.01 | 0.04 | ▇▅▁▁▁ |
V58 | 0 | 1 | 0.01 | 0.01 | 0.00 | 0.00 | 0.01 | 0.01 | 0.04 | ▇▃▁▁▁ |
V59 | 0 | 1 | 0.01 | 0.01 | 0.00 | 0.00 | 0.01 | 0.01 | 0.03 | ▇▆▁▁▁ |
V60 | 0 | 1 | 0.01 | 0.00 | 0.00 | 0.00 | 0.01 | 0.01 | 0.02 | ▇▆▂▁▁ |
##################################
# Identifying columns with low variance
###################################
<- nearZeroVar(DPA,
DPA_LowVariance freqCut = 95/5,
uniqueCut = 10,
saveMetrics= TRUE)
$nzv,]) (DPA_LowVariance[DPA_LowVariance
## [1] freqRatio percentUnique zeroVar nzv
## <0 rows> (or 0-length row.names)
if ((nrow(DPA_LowVariance[DPA_LowVariance$nzv,]))==0){
print("No low variance predictors noted.")
else {
}
print(paste0("Low variance observed for ",
nrow(DPA_LowVariance[DPA_LowVariance$nzv,])),
(" numeric variable(s) with First.Second.Mode.Ratio>4 and Unique.Count.Ratio<0.10."))
<- (nrow(DPA_LowVariance[DPA_LowVariance$nzv,]))
DPA_LowVarianceForRemoval
print(paste0("Low variance can be resolved by removing ",
nrow(DPA_LowVariance[DPA_LowVariance$nzv,])),
(" numeric variable(s)."))
for (j in 1:DPA_LowVarianceForRemoval) {
<- rownames(DPA_LowVariance[DPA_LowVariance$nzv,])[j]
DPA_LowVarianceRemovedVariable print(paste0("Variable ",
j," for removal: ",
DPA_LowVarianceRemovedVariable))
}
%>%
DPA skim() %>%
::filter(skim_variable %in% rownames(DPA_LowVariance[DPA_LowVariance$nzv,]))
dplyr
##################################
# Filtering out columns with low variance
#################################
<- DPA[,!names(DPA) %in% rownames(DPA_LowVariance[DPA_LowVariance$nzv,])]
DPA_ExcludedLowVariance
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_ExcludedLowVariance))
(DPA_ExcludedLowVariance_Skimmed
###################################
# Verifying the data dimensions
###################################
dim(DPA_ExcludedLowVariance)
}
## [1] "No low variance predictors noted."
##################################
# Loading dataset
##################################
<- Sonar_Train
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("Class")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,sapply(DPA.Predictors, is.numeric)]
DPA.Predictors.Numeric
##################################
# Visualizing pairwise correlation between predictors
##################################
<- cor.mtest(DPA.Predictors.Numeric,
DPA_CorrelationTest method = "pearson",
conf.level = .95)
corrplot(cor(DPA.Predictors.Numeric,
method = "pearson",
use="pairwise.complete.obs"),
method = "circle",
type = "upper",
order = "original",
tl.col = "black",
tl.cex = 0.75,
tl.srt = 90,
sig.level = 0.05,
p.mat = DPA_CorrelationTest$p,
insig = "blank")
##################################
# Identifying the highly correlated variables
##################################
<- cor(DPA.Predictors.Numeric,
DPA_Correlation method = "pearson",
use="pairwise.complete.obs")
<- sum(abs(DPA_Correlation[upper.tri(DPA_Correlation)]) > 0.95)) (DPA_HighlyCorrelatedCount
## [1] 0
if (DPA_HighlyCorrelatedCount == 0) {
print("No highly correlated predictors noted.")
else {
} print(paste0("High correlation observed for ",
(DPA_HighlyCorrelatedCount)," pairs of numeric variable(s) with Correlation.Coefficient>0.95."))
<- corr_cross(DPA.Predictors.Numeric,
(DPA_HighlyCorrelatedPairs max_pvalue = 0.05,
top = DPA_HighlyCorrelatedCount,
rm.na = TRUE,
grid = FALSE
))
}
## [1] "No highly correlated predictors noted."
if (DPA_HighlyCorrelatedCount > 0) {
<- findCorrelation(DPA_Correlation, cutoff = 0.95)
DPA_HighlyCorrelated
<- length(DPA_HighlyCorrelated))
(DPA_HighlyCorrelatedForRemoval
print(paste0("High correlation can be resolved by removing ",
(DPA_HighlyCorrelatedForRemoval)," numeric variable(s)."))
for (j in 1:DPA_HighlyCorrelatedForRemoval) {
<- colnames(DPA.Predictors.Numeric)[DPA_HighlyCorrelated[j]]
DPA_HighlyCorrelatedRemovedVariable print(paste0("Variable ",
j," for removal: ",
DPA_HighlyCorrelatedRemovedVariable))
}
##################################
# Filtering out columns with high correlation
#################################
<- DPA[,-DPA_HighlyCorrelated]
DPA_ExcludedHighCorrelation
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_ExcludedHighCorrelation))
(DPA_ExcludedHighCorrelation_Skimmed
###################################
# Verifying the data dimensions
###################################
dim(DPA_ExcludedHighCorrelation)
}
##################################
# Loading dataset
##################################
<- Sonar_Train
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("Class")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,sapply(DPA.Predictors, is.numeric)]
DPA.Predictors.Numeric
##################################
# Identifying the linearly dependent variables
##################################
<- findLinearCombos(DPA.Predictors.Numeric)
DPA_LinearlyDependent
<- length(DPA_LinearlyDependent$linearCombos)) (DPA_LinearlyDependentCount
## [1] 0
if (DPA_LinearlyDependentCount == 0) {
print("No linearly dependent predictors noted.")
else {
} print(paste0("Linear dependency observed for ",
(DPA_LinearlyDependentCount)," subset(s) of numeric variable(s)."))
for (i in 1:DPA_LinearlyDependentCount) {
<- colnames(DPA.Predictors.Numeric)[DPA_LinearlyDependent$linearCombos[[i]]]
DPA_LinearlyDependentSubset print(paste0("Linear dependent variable(s) for subset ",
i," include: ",
DPA_LinearlyDependentSubset))
}
}
## [1] "No linearly dependent predictors noted."
##################################
# Identifying the linearly dependent variables for removal
##################################
if (DPA_LinearlyDependentCount > 0) {
<- findLinearCombos(DPA.Predictors.Numeric)
DPA_LinearlyDependent
<- length(DPA_LinearlyDependent$remove)
DPA_LinearlyDependentForRemoval
print(paste0("Linear dependency can be resolved by removing ",
(DPA_LinearlyDependentForRemoval)," numeric variable(s)."))
for (j in 1:DPA_LinearlyDependentForRemoval) {
<- colnames(DPA.Predictors.Numeric)[DPA_LinearlyDependent$remove[j]]
DPA_LinearlyDependentRemovedVariable print(paste0("Variable ",
j," for removal: ",
DPA_LinearlyDependentRemovedVariable))
}
##################################
# Filtering out columns with linear dependency
#################################
<- DPA[,-DPA_LinearlyDependent$remove]
DPA_ExcludedLinearlyDependent
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_ExcludedLinearlyDependent))
(DPA_ExcludedLinearlyDependent_Skimmed
###################################
# Verifying the data dimensions
###################################
dim(DPA_ExcludedLinearlyDependent)
else {
}
###################################
# Verifying the data dimensions
###################################
dim(DPA)
}
## [1] 167 61
##################################
# Loading dataset
##################################
<- Sonar_Train
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("Class")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,sapply(DPA.Predictors, is.numeric)]
DPA.Predictors.Numeric
##################################
# Applying a Box-Cox transformation
##################################
<- preProcess(DPA.Predictors.Numeric, method = c("BoxCox"))
DPA_BoxCox <- predict(DPA_BoxCox, DPA.Predictors.Numeric)
DPA_BoxCoxTransformed
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_BoxCoxTransformed)) (DPA_BoxCoxTransformedSkimmed
Name | DPA_BoxCoxTransformed |
Number of rows | 167 |
Number of columns | 60 |
_______________________ | |
Column type frequency: | |
numeric | 60 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
V1 | 0 | 1 | -3.79 | 0.75 | -6.50 | -4.28 | -3.78 | -3.31 | -1.99 | ▁▂▇▇▂ |
V2 | 0 | 1 | -2.53 | 0.42 | -3.60 | -2.80 | -2.51 | -2.28 | -1.52 | ▁▅▇▆▂ |
V3 | 0 | 1 | -2.11 | 0.29 | -2.86 | -2.33 | -2.12 | -1.90 | -1.39 | ▂▅▇▅▂ |
V4 | 0 | 1 | -2.35 | 0.40 | -3.21 | -2.63 | -2.32 | -2.10 | -1.48 | ▂▅▇▅▂ |
V5 | 0 | 1 | -1.89 | 0.31 | -2.59 | -2.10 | -1.89 | -1.67 | -1.12 | ▂▇▇▆▂ |
V6 | 0 | 1 | -1.53 | 0.23 | -2.10 | -1.65 | -1.54 | -1.39 | -0.80 | ▂▆▇▂▁ |
V7 | 0 | 1 | -1.46 | 0.22 | -2.06 | -1.57 | -1.48 | -1.33 | -0.82 | ▁▃▇▃▁ |
V8 | 0 | 1 | -1.43 | 0.28 | -2.18 | -1.60 | -1.46 | -1.26 | -0.67 | ▁▅▇▅▁ |
V9 | 0 | 1 | -1.57 | 0.47 | -2.95 | -1.87 | -1.57 | -1.24 | -0.37 | ▁▃▇▅▁ |
V10 | 0 | 1 | -1.34 | 0.41 | -2.46 | -1.63 | -1.34 | -1.07 | -0.32 | ▁▅▇▅▂ |
V11 | 0 | 1 | -1.25 | 0.38 | -2.18 | -1.54 | -1.21 | -0.99 | -0.30 | ▂▆▇▆▂ |
V12 | 0 | 1 | -1.05 | 0.29 | -1.69 | -1.28 | -1.00 | -0.84 | -0.32 | ▃▆▇▅▂ |
V13 | 0 | 1 | -0.99 | 0.28 | -1.73 | -1.19 | -0.97 | -0.80 | -0.31 | ▁▅▇▅▂ |
V14 | 0 | 1 | -1.10 | 0.40 | -2.20 | -1.39 | -1.06 | -0.81 | 0.00 | ▁▅▇▆▁ |
V15 | 0 | 1 | -1.02 | 0.44 | -2.25 | -1.31 | -1.04 | -0.68 | 0.00 | ▁▅▇▇▂ |
V16 | 0 | 1 | -0.97 | 0.48 | -2.37 | -1.33 | -1.03 | -0.57 | 0.00 | ▁▅▇▆▅ |
V17 | 0 | 1 | -0.97 | 0.57 | -2.44 | -1.37 | -1.06 | -0.40 | 0.00 | ▁▅▇▅▇ |
V18 | 0 | 1 | -0.81 | 0.48 | -2.09 | -1.18 | -0.87 | -0.37 | 0.00 | ▁▃▇▃▆ |
V19 | 0 | 1 | -0.64 | 0.38 | -1.56 | -0.91 | -0.69 | -0.29 | 0.00 | ▂▅▇▅▇ |
V20 | 0 | 1 | 0.56 | 0.26 | 0.07 | 0.33 | 0.52 | 0.80 | 1.00 | ▅▇▆▆▇ |
V21 | 0 | 1 | 0.60 | 0.26 | 0.05 | 0.39 | 0.59 | 0.82 | 1.00 | ▃▆▆▆▇ |
V22 | 0 | 1 | 0.61 | 0.26 | 0.02 | 0.40 | 0.65 | 0.83 | 1.00 | ▂▅▅▆▇ |
V23 | 0 | 1 | -0.33 | 0.21 | -0.75 | -0.50 | -0.30 | -0.14 | 0.00 | ▃▆▇▇▇ |
V24 | 0 | 1 | -0.29 | 0.19 | -0.71 | -0.41 | -0.28 | -0.13 | 0.00 | ▃▃▇▆▇ |
V25 | 0 | 1 | -0.28 | 0.18 | -0.66 | -0.41 | -0.26 | -0.13 | 0.00 | ▅▃▆▇▇ |
V26 | 0 | 1 | -0.25 | 0.16 | -0.56 | -0.38 | -0.23 | -0.11 | 0.00 | ▅▅▆▇▇ |
V27 | 0 | 1 | -0.26 | 0.19 | -0.65 | -0.42 | -0.26 | -0.10 | 0.00 | ▂▃▅▃▇ |
V28 | 0 | 1 | -0.27 | 0.20 | -0.71 | -0.44 | -0.26 | -0.09 | 0.00 | ▂▅▅▅▇ |
V29 | 0 | 1 | -0.31 | 0.20 | -0.77 | -0.48 | -0.29 | -0.14 | 0.00 | ▂▆▅▆▇ |
V30 | 0 | 1 | 0.60 | 0.22 | 0.06 | 0.44 | 0.62 | 0.75 | 1.00 | ▂▅▇▇▅ |
V31 | 0 | 1 | -0.57 | 0.29 | -1.25 | -0.78 | -0.57 | -0.34 | -0.03 | ▂▆▇▇▆ |
V32 | 0 | 1 | -0.67 | 0.32 | -1.42 | -0.89 | -0.67 | -0.41 | -0.07 | ▂▅▇▆▅ |
V33 | 0 | 1 | -0.74 | 0.33 | -1.56 | -0.96 | -0.75 | -0.48 | 0.00 | ▁▆▇▇▃ |
V34 | 0 | 1 | -0.77 | 0.38 | -1.71 | -1.07 | -0.77 | -0.44 | -0.04 | ▂▆▇▆▅ |
V35 | 0 | 1 | -0.87 | 0.48 | -1.95 | -1.23 | -0.89 | -0.45 | 0.00 | ▂▆▇▆▆ |
V36 | 0 | 1 | -0.90 | 0.49 | -2.14 | -1.31 | -0.92 | -0.51 | 0.00 | ▁▇▇▇▆ |
V37 | 0 | 1 | -0.99 | 0.52 | -2.11 | -1.41 | -1.00 | -0.56 | -0.05 | ▂▇▆▇▆ |
V38 | 0 | 1 | -1.09 | 0.52 | -2.40 | -1.46 | -1.04 | -0.76 | 0.00 | ▂▆▇▇▃ |
V39 | 0 | 1 | -1.03 | 0.44 | -2.09 | -1.34 | -1.05 | -0.69 | -0.01 | ▂▇▇▆▂ |
V40 | 0 | 1 | -0.93 | 0.32 | -1.78 | -1.12 | -0.94 | -0.70 | -0.09 | ▁▅▇▅▁ |
V41 | 0 | 1 | -1.02 | 0.34 | -1.78 | -1.27 | -1.03 | -0.76 | -0.24 | ▂▆▇▅▂ |
V42 | 0 | 1 | -0.98 | 0.32 | -1.85 | -1.17 | -0.98 | -0.76 | -0.18 | ▁▃▇▆▁ |
V43 | 0 | 1 | 0.25 | 0.14 | 0.00 | 0.16 | 0.23 | 0.32 | 0.77 | ▃▇▃▁▁ |
V44 | 0 | 1 | 0.21 | 0.14 | 0.00 | 0.13 | 0.18 | 0.27 | 0.78 | ▇▇▂▂▁ |
V45 | 0 | 1 | 0.20 | 0.16 | 0.00 | 0.10 | 0.15 | 0.24 | 0.70 | ▇▆▁▂▁ |
V46 | 0 | 1 | 0.16 | 0.14 | 0.00 | 0.07 | 0.12 | 0.20 | 0.73 | ▇▃▁▁▁ |
V47 | 0 | 1 | 0.12 | 0.09 | 0.00 | 0.07 | 0.10 | 0.15 | 0.55 | ▇▅▁▁▁ |
V48 | 0 | 1 | 0.09 | 0.06 | 0.00 | 0.05 | 0.08 | 0.12 | 0.33 | ▇▇▂▁▁ |
V49 | 0 | 1 | 0.05 | 0.04 | 0.00 | 0.03 | 0.05 | 0.07 | 0.18 | ▇▇▃▂▁ |
V50 | 0 | 1 | 0.02 | 0.01 | 0.00 | 0.01 | 0.02 | 0.03 | 0.08 | ▇▇▂▁▁ |
V51 | 0 | 1 | 0.02 | 0.01 | 0.00 | 0.01 | 0.01 | 0.02 | 0.10 | ▇▃▁▁▁ |
V52 | 0 | 1 | -4.54 | 0.71 | -7.13 | -4.93 | -4.49 | -4.09 | -2.65 | ▁▂▇▇▂ |
V53 | 0 | 1 | -3.07 | 0.30 | -3.91 | -3.29 | -3.10 | -2.83 | -2.39 | ▁▅▇▇▂ |
V54 | 0 | 1 | -2.52 | 0.17 | -2.91 | -2.64 | -2.51 | -2.41 | -2.11 | ▂▃▇▃▂ |
V55 | 0 | 1 | -3.13 | 0.28 | -3.87 | -3.33 | -3.12 | -2.93 | -2.31 | ▁▆▇▅▁ |
V56 | 0 | 1 | -2.58 | 0.16 | -3.01 | -2.68 | -2.58 | -2.49 | -2.07 | ▁▆▇▃▁ |
V57 | 0 | 1 | -3.18 | 0.27 | -4.01 | -3.37 | -3.20 | -3.00 | -2.44 | ▁▃▇▅▁ |
V58 | 0 | 1 | -5.11 | 0.81 | -8.11 | -5.63 | -5.12 | -4.57 | -3.12 | ▁▂▇▇▂ |
V59 | 0 | 1 | -2.61 | 0.18 | -3.12 | -2.72 | -2.62 | -2.50 | -2.18 | ▁▃▇▅▂ |
V60 | 0 | 1 | -5.34 | 0.70 | -7.42 | -5.81 | -5.28 | -4.77 | -3.83 | ▁▃▆▇▂ |
###################################
# Verifying the data dimensions
###################################
dim(DPA_BoxCoxTransformed)
## [1] 167 60
##################################
# Loading dataset
##################################
<- Sonar_Train
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("Class")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,sapply(DPA.Predictors, is.numeric)]
DPA.Predictors.Numeric
##################################
# Applying a Box-Cox transformation
##################################
<- preProcess(DPA.Predictors.Numeric, method = c("BoxCox"))
DPA_BoxCox <- predict(DPA_BoxCox, DPA.Predictors.Numeric)
DPA_BoxCoxTransformed
##################################
# Applying a center and scale data transformation
##################################
<- preProcess(DPA_BoxCoxTransformed, method = c("center","scale"))
DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaled <- predict(DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaled, DPA_BoxCoxTransformed)
DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed)) (DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformedSkimmed
Name | DPA.Predictors.Numeric_Bo… |
Number of rows | 167 |
Number of columns | 60 |
_______________________ | |
Column type frequency: | |
numeric | 60 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
V1 | 0 | 1 | 0 | 1 | -3.60 | -0.65 | 0.02 | 0.64 | 2.40 | ▁▂▇▇▂ |
V2 | 0 | 1 | 0 | 1 | -2.55 | -0.65 | 0.05 | 0.59 | 2.40 | ▁▅▇▆▂ |
V3 | 0 | 1 | 0 | 1 | -2.55 | -0.74 | -0.02 | 0.72 | 2.46 | ▂▅▇▅▂ |
V4 | 0 | 1 | 0 | 1 | -2.17 | -0.71 | 0.08 | 0.62 | 2.18 | ▂▅▇▅▂ |
V5 | 0 | 1 | 0 | 1 | -2.29 | -0.68 | 0.00 | 0.73 | 2.53 | ▂▇▇▆▂ |
V6 | 0 | 1 | 0 | 1 | -2.54 | -0.55 | -0.03 | 0.62 | 3.25 | ▂▆▇▂▁ |
V7 | 0 | 1 | 0 | 1 | -2.75 | -0.52 | -0.11 | 0.61 | 2.96 | ▁▃▇▃▁ |
V8 | 0 | 1 | 0 | 1 | -2.70 | -0.60 | -0.10 | 0.61 | 2.72 | ▁▅▇▅▁ |
V9 | 0 | 1 | 0 | 1 | -2.94 | -0.64 | 0.01 | 0.71 | 2.58 | ▁▃▇▅▁ |
V10 | 0 | 1 | 0 | 1 | -2.72 | -0.69 | 0.01 | 0.66 | 2.48 | ▁▅▇▅▂ |
V11 | 0 | 1 | 0 | 1 | -2.43 | -0.77 | 0.09 | 0.67 | 2.48 | ▂▆▇▆▂ |
V12 | 0 | 1 | 0 | 1 | -2.22 | -0.81 | 0.14 | 0.69 | 2.48 | ▃▆▇▅▂ |
V13 | 0 | 1 | 0 | 1 | -2.64 | -0.72 | 0.09 | 0.68 | 2.45 | ▁▅▇▅▂ |
V14 | 0 | 1 | 0 | 1 | -2.76 | -0.72 | 0.09 | 0.71 | 2.74 | ▁▅▇▆▁ |
V15 | 0 | 1 | 0 | 1 | -2.79 | -0.67 | -0.04 | 0.76 | 2.30 | ▁▅▇▇▂ |
V16 | 0 | 1 | 0 | 1 | -2.90 | -0.75 | -0.12 | 0.82 | 2.00 | ▁▅▇▆▅ |
V17 | 0 | 1 | 0 | 1 | -2.59 | -0.70 | -0.16 | 1.00 | 1.70 | ▁▅▇▅▇ |
V18 | 0 | 1 | 0 | 1 | -2.68 | -0.76 | -0.12 | 0.93 | 1.71 | ▁▃▇▃▆ |
V19 | 0 | 1 | 0 | 1 | -2.40 | -0.71 | -0.13 | 0.90 | 1.66 | ▂▅▇▅▇ |
V20 | 0 | 1 | 0 | 1 | -1.83 | -0.84 | -0.13 | 0.92 | 1.68 | ▅▇▆▆▇ |
V21 | 0 | 1 | 0 | 1 | -2.12 | -0.80 | -0.03 | 0.84 | 1.55 | ▃▆▆▆▇ |
V22 | 0 | 1 | 0 | 1 | -2.32 | -0.84 | 0.13 | 0.86 | 1.52 | ▂▅▅▆▇ |
V23 | 0 | 1 | 0 | 1 | -2.02 | -0.82 | 0.11 | 0.86 | 1.54 | ▃▆▇▇▇ |
V24 | 0 | 1 | 0 | 1 | -2.17 | -0.62 | 0.04 | 0.86 | 1.52 | ▃▃▇▆▇ |
V25 | 0 | 1 | 0 | 1 | -2.09 | -0.72 | 0.13 | 0.81 | 1.53 | ▅▃▆▇▇ |
V26 | 0 | 1 | 0 | 1 | -1.91 | -0.78 | 0.15 | 0.83 | 1.53 | ▅▅▆▇▇ |
V27 | 0 | 1 | 0 | 1 | -2.06 | -0.84 | 0.01 | 0.86 | 1.38 | ▂▃▅▃▇ |
V28 | 0 | 1 | 0 | 1 | -2.25 | -0.85 | 0.07 | 0.91 | 1.38 | ▂▅▅▅▇ |
V29 | 0 | 1 | 0 | 1 | -2.23 | -0.81 | 0.10 | 0.89 | 1.56 | ▂▆▅▆▇ |
V30 | 0 | 1 | 0 | 1 | -2.47 | -0.72 | 0.09 | 0.70 | 1.83 | ▂▅▇▇▅ |
V31 | 0 | 1 | 0 | 1 | -2.33 | -0.73 | -0.02 | 0.75 | 1.81 | ▂▆▇▇▆ |
V32 | 0 | 1 | 0 | 1 | -2.38 | -0.68 | 0.01 | 0.82 | 1.91 | ▂▅▇▆▅ |
V33 | 0 | 1 | 0 | 1 | -2.48 | -0.67 | -0.03 | 0.77 | 2.21 | ▁▆▇▇▃ |
V34 | 0 | 1 | 0 | 1 | -2.44 | -0.76 | 0.01 | 0.86 | 1.93 | ▂▆▇▆▅ |
V35 | 0 | 1 | 0 | 1 | -2.24 | -0.74 | -0.04 | 0.88 | 1.81 | ▂▆▇▆▆ |
V36 | 0 | 1 | 0 | 1 | -2.53 | -0.85 | -0.05 | 0.79 | 1.82 | ▁▇▇▇▆ |
V37 | 0 | 1 | 0 | 1 | -2.16 | -0.81 | -0.02 | 0.82 | 1.81 | ▂▇▆▇▆ |
V38 | 0 | 1 | 0 | 1 | -2.52 | -0.71 | 0.09 | 0.65 | 2.11 | ▂▆▇▇▃ |
V39 | 0 | 1 | 0 | 1 | -2.40 | -0.71 | -0.04 | 0.77 | 2.31 | ▂▇▇▆▂ |
V40 | 0 | 1 | 0 | 1 | -2.65 | -0.61 | -0.06 | 0.71 | 2.59 | ▁▅▇▅▁ |
V41 | 0 | 1 | 0 | 1 | -2.25 | -0.75 | -0.04 | 0.75 | 2.27 | ▂▆▇▅▂ |
V42 | 0 | 1 | 0 | 1 | -2.71 | -0.59 | 0.00 | 0.70 | 2.49 | ▁▃▇▆▁ |
V43 | 0 | 1 | 0 | 1 | -1.73 | -0.65 | -0.15 | 0.51 | 3.64 | ▃▇▃▁▁ |
V44 | 0 | 1 | 0 | 1 | -1.56 | -0.65 | -0.29 | 0.37 | 4.08 | ▇▇▂▂▁ |
V45 | 0 | 1 | 0 | 1 | -1.28 | -0.66 | -0.34 | 0.23 | 3.22 | ▇▆▁▂▁ |
V46 | 0 | 1 | 0 | 1 | -1.18 | -0.68 | -0.31 | 0.27 | 4.11 | ▇▃▁▁▁ |
V47 | 0 | 1 | 0 | 1 | -1.42 | -0.66 | -0.24 | 0.34 | 4.86 | ▇▅▁▁▁ |
V48 | 0 | 1 | 0 | 1 | -1.49 | -0.76 | -0.21 | 0.44 | 3.76 | ▇▇▂▁▁ |
V49 | 0 | 1 | 0 | 1 | -1.50 | -0.70 | -0.25 | 0.51 | 3.48 | ▇▇▃▂▁ |
V50 | 0 | 1 | 0 | 1 | -1.52 | -0.66 | -0.21 | 0.34 | 4.51 | ▇▇▂▁▁ |
V51 | 0 | 1 | 0 | 1 | -1.27 | -0.64 | -0.19 | 0.41 | 6.72 | ▇▃▁▁▁ |
V52 | 0 | 1 | 0 | 1 | -3.63 | -0.54 | 0.07 | 0.63 | 2.66 | ▁▂▇▇▂ |
V53 | 0 | 1 | 0 | 1 | -2.84 | -0.75 | -0.10 | 0.79 | 2.30 | ▁▅▇▇▂ |
V54 | 0 | 1 | 0 | 1 | -2.32 | -0.70 | 0.04 | 0.60 | 2.35 | ▂▃▇▃▂ |
V55 | 0 | 1 | 0 | 1 | -2.67 | -0.75 | 0.02 | 0.70 | 2.93 | ▁▆▇▅▁ |
V56 | 0 | 1 | 0 | 1 | -2.77 | -0.61 | 0.00 | 0.60 | 3.26 | ▁▆▇▃▁ |
V57 | 0 | 1 | 0 | 1 | -3.11 | -0.70 | -0.07 | 0.70 | 2.80 | ▁▃▇▅▁ |
V58 | 0 | 1 | 0 | 1 | -3.73 | -0.65 | -0.01 | 0.67 | 2.46 | ▁▂▇▇▂ |
V59 | 0 | 1 | 0 | 1 | -2.93 | -0.65 | -0.04 | 0.61 | 2.46 | ▁▃▇▅▂ |
V60 | 0 | 1 | 0 | 1 | -2.95 | -0.67 | 0.09 | 0.81 | 2.15 | ▁▃▆▇▂ |
###################################
# Verifying the data dimensions
###################################
dim(DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed)
## [1] 167 60
##################################
# Creating the pre-modelling
# train set
##################################
<- DPA$Class
Class <- DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed
PMA.Predictors.Numeric <- cbind(Class,PMA.Predictors.Numeric)
PMA_BoxCoxTransformed_CenteredScaledTransformed <- PMA_BoxCoxTransformed_CenteredScaledTransformed
PMA_PreModelling_Train
##################################
# Gathering descriptive statistics
##################################
<- skim(PMA_PreModelling_Train)) (PMA_PreModelling_Train_Skimmed
Name | PMA_PreModelling_Train |
Number of rows | 167 |
Number of columns | 61 |
_______________________ | |
Column type frequency: | |
factor | 1 |
numeric | 60 |
________________________ | |
Group variables | None |
Variable type: factor
skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
---|---|---|---|---|---|
Class | 0 | 1 | FALSE | 2 | M: 89, R: 78 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
V1 | 0 | 1 | 0 | 1 | -3.60 | -0.65 | 0.02 | 0.64 | 2.40 | ▁▂▇▇▂ |
V2 | 0 | 1 | 0 | 1 | -2.55 | -0.65 | 0.05 | 0.59 | 2.40 | ▁▅▇▆▂ |
V3 | 0 | 1 | 0 | 1 | -2.55 | -0.74 | -0.02 | 0.72 | 2.46 | ▂▅▇▅▂ |
V4 | 0 | 1 | 0 | 1 | -2.17 | -0.71 | 0.08 | 0.62 | 2.18 | ▂▅▇▅▂ |
V5 | 0 | 1 | 0 | 1 | -2.29 | -0.68 | 0.00 | 0.73 | 2.53 | ▂▇▇▆▂ |
V6 | 0 | 1 | 0 | 1 | -2.54 | -0.55 | -0.03 | 0.62 | 3.25 | ▂▆▇▂▁ |
V7 | 0 | 1 | 0 | 1 | -2.75 | -0.52 | -0.11 | 0.61 | 2.96 | ▁▃▇▃▁ |
V8 | 0 | 1 | 0 | 1 | -2.70 | -0.60 | -0.10 | 0.61 | 2.72 | ▁▅▇▅▁ |
V9 | 0 | 1 | 0 | 1 | -2.94 | -0.64 | 0.01 | 0.71 | 2.58 | ▁▃▇▅▁ |
V10 | 0 | 1 | 0 | 1 | -2.72 | -0.69 | 0.01 | 0.66 | 2.48 | ▁▅▇▅▂ |
V11 | 0 | 1 | 0 | 1 | -2.43 | -0.77 | 0.09 | 0.67 | 2.48 | ▂▆▇▆▂ |
V12 | 0 | 1 | 0 | 1 | -2.22 | -0.81 | 0.14 | 0.69 | 2.48 | ▃▆▇▅▂ |
V13 | 0 | 1 | 0 | 1 | -2.64 | -0.72 | 0.09 | 0.68 | 2.45 | ▁▅▇▅▂ |
V14 | 0 | 1 | 0 | 1 | -2.76 | -0.72 | 0.09 | 0.71 | 2.74 | ▁▅▇▆▁ |
V15 | 0 | 1 | 0 | 1 | -2.79 | -0.67 | -0.04 | 0.76 | 2.30 | ▁▅▇▇▂ |
V16 | 0 | 1 | 0 | 1 | -2.90 | -0.75 | -0.12 | 0.82 | 2.00 | ▁▅▇▆▅ |
V17 | 0 | 1 | 0 | 1 | -2.59 | -0.70 | -0.16 | 1.00 | 1.70 | ▁▅▇▅▇ |
V18 | 0 | 1 | 0 | 1 | -2.68 | -0.76 | -0.12 | 0.93 | 1.71 | ▁▃▇▃▆ |
V19 | 0 | 1 | 0 | 1 | -2.40 | -0.71 | -0.13 | 0.90 | 1.66 | ▂▅▇▅▇ |
V20 | 0 | 1 | 0 | 1 | -1.83 | -0.84 | -0.13 | 0.92 | 1.68 | ▅▇▆▆▇ |
V21 | 0 | 1 | 0 | 1 | -2.12 | -0.80 | -0.03 | 0.84 | 1.55 | ▃▆▆▆▇ |
V22 | 0 | 1 | 0 | 1 | -2.32 | -0.84 | 0.13 | 0.86 | 1.52 | ▂▅▅▆▇ |
V23 | 0 | 1 | 0 | 1 | -2.02 | -0.82 | 0.11 | 0.86 | 1.54 | ▃▆▇▇▇ |
V24 | 0 | 1 | 0 | 1 | -2.17 | -0.62 | 0.04 | 0.86 | 1.52 | ▃▃▇▆▇ |
V25 | 0 | 1 | 0 | 1 | -2.09 | -0.72 | 0.13 | 0.81 | 1.53 | ▅▃▆▇▇ |
V26 | 0 | 1 | 0 | 1 | -1.91 | -0.78 | 0.15 | 0.83 | 1.53 | ▅▅▆▇▇ |
V27 | 0 | 1 | 0 | 1 | -2.06 | -0.84 | 0.01 | 0.86 | 1.38 | ▂▃▅▃▇ |
V28 | 0 | 1 | 0 | 1 | -2.25 | -0.85 | 0.07 | 0.91 | 1.38 | ▂▅▅▅▇ |
V29 | 0 | 1 | 0 | 1 | -2.23 | -0.81 | 0.10 | 0.89 | 1.56 | ▂▆▅▆▇ |
V30 | 0 | 1 | 0 | 1 | -2.47 | -0.72 | 0.09 | 0.70 | 1.83 | ▂▅▇▇▅ |
V31 | 0 | 1 | 0 | 1 | -2.33 | -0.73 | -0.02 | 0.75 | 1.81 | ▂▆▇▇▆ |
V32 | 0 | 1 | 0 | 1 | -2.38 | -0.68 | 0.01 | 0.82 | 1.91 | ▂▅▇▆▅ |
V33 | 0 | 1 | 0 | 1 | -2.48 | -0.67 | -0.03 | 0.77 | 2.21 | ▁▆▇▇▃ |
V34 | 0 | 1 | 0 | 1 | -2.44 | -0.76 | 0.01 | 0.86 | 1.93 | ▂▆▇▆▅ |
V35 | 0 | 1 | 0 | 1 | -2.24 | -0.74 | -0.04 | 0.88 | 1.81 | ▂▆▇▆▆ |
V36 | 0 | 1 | 0 | 1 | -2.53 | -0.85 | -0.05 | 0.79 | 1.82 | ▁▇▇▇▆ |
V37 | 0 | 1 | 0 | 1 | -2.16 | -0.81 | -0.02 | 0.82 | 1.81 | ▂▇▆▇▆ |
V38 | 0 | 1 | 0 | 1 | -2.52 | -0.71 | 0.09 | 0.65 | 2.11 | ▂▆▇▇▃ |
V39 | 0 | 1 | 0 | 1 | -2.40 | -0.71 | -0.04 | 0.77 | 2.31 | ▂▇▇▆▂ |
V40 | 0 | 1 | 0 | 1 | -2.65 | -0.61 | -0.06 | 0.71 | 2.59 | ▁▅▇▅▁ |
V41 | 0 | 1 | 0 | 1 | -2.25 | -0.75 | -0.04 | 0.75 | 2.27 | ▂▆▇▅▂ |
V42 | 0 | 1 | 0 | 1 | -2.71 | -0.59 | 0.00 | 0.70 | 2.49 | ▁▃▇▆▁ |
V43 | 0 | 1 | 0 | 1 | -1.73 | -0.65 | -0.15 | 0.51 | 3.64 | ▃▇▃▁▁ |
V44 | 0 | 1 | 0 | 1 | -1.56 | -0.65 | -0.29 | 0.37 | 4.08 | ▇▇▂▂▁ |
V45 | 0 | 1 | 0 | 1 | -1.28 | -0.66 | -0.34 | 0.23 | 3.22 | ▇▆▁▂▁ |
V46 | 0 | 1 | 0 | 1 | -1.18 | -0.68 | -0.31 | 0.27 | 4.11 | ▇▃▁▁▁ |
V47 | 0 | 1 | 0 | 1 | -1.42 | -0.66 | -0.24 | 0.34 | 4.86 | ▇▅▁▁▁ |
V48 | 0 | 1 | 0 | 1 | -1.49 | -0.76 | -0.21 | 0.44 | 3.76 | ▇▇▂▁▁ |
V49 | 0 | 1 | 0 | 1 | -1.50 | -0.70 | -0.25 | 0.51 | 3.48 | ▇▇▃▂▁ |
V50 | 0 | 1 | 0 | 1 | -1.52 | -0.66 | -0.21 | 0.34 | 4.51 | ▇▇▂▁▁ |
V51 | 0 | 1 | 0 | 1 | -1.27 | -0.64 | -0.19 | 0.41 | 6.72 | ▇▃▁▁▁ |
V52 | 0 | 1 | 0 | 1 | -3.63 | -0.54 | 0.07 | 0.63 | 2.66 | ▁▂▇▇▂ |
V53 | 0 | 1 | 0 | 1 | -2.84 | -0.75 | -0.10 | 0.79 | 2.30 | ▁▅▇▇▂ |
V54 | 0 | 1 | 0 | 1 | -2.32 | -0.70 | 0.04 | 0.60 | 2.35 | ▂▃▇▃▂ |
V55 | 0 | 1 | 0 | 1 | -2.67 | -0.75 | 0.02 | 0.70 | 2.93 | ▁▆▇▅▁ |
V56 | 0 | 1 | 0 | 1 | -2.77 | -0.61 | 0.00 | 0.60 | 3.26 | ▁▆▇▃▁ |
V57 | 0 | 1 | 0 | 1 | -3.11 | -0.70 | -0.07 | 0.70 | 2.80 | ▁▃▇▅▁ |
V58 | 0 | 1 | 0 | 1 | -3.73 | -0.65 | -0.01 | 0.67 | 2.46 | ▁▂▇▇▂ |
V59 | 0 | 1 | 0 | 1 | -2.93 | -0.65 | -0.04 | 0.61 | 2.46 | ▁▃▇▅▂ |
V60 | 0 | 1 | 0 | 1 | -2.95 | -0.67 | 0.09 | 0.81 | 2.15 | ▁▃▆▇▂ |
###################################
# Verifying the data dimensions
# for the train set
###################################
dim(PMA_PreModelling_Train)
## [1] 167 61
##################################
# Formulating the test set
##################################
<- Sonar_Test
DPA_Test <- DPA_Test[,!names(DPA_Test) %in% c("Class")]
DPA_Test.Predictors <- DPA_Test.Predictors[,sapply(DPA_Test.Predictors, is.numeric)]
DPA_Test.Predictors.Numeric <- preProcess(DPA_Test.Predictors.Numeric, method = c("BoxCox"))
DPA_Test_BoxCox <- predict(DPA_Test_BoxCox, DPA_Test.Predictors.Numeric)
DPA_Test_BoxCoxTransformed <- preProcess(DPA_Test_BoxCoxTransformed, method = c("center","scale"))
DPA_Test.Predictors.Numeric_BoxCoxTransformed_CenteredScaled <- predict(DPA_Test.Predictors.Numeric_BoxCoxTransformed_CenteredScaled, DPA_Test_BoxCoxTransformed)
DPA_Test.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed
##################################
# Creating the pre-modelling
# test set
##################################
<- DPA_Test$Class
Class <- DPA_Test.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed
PMA_Test.Predictors.Numeric <- cbind(Class,PMA_Test.Predictors.Numeric)
PMA_Test_BoxCoxTransformed_CenteredScaledTransformed <- PMA_Test_BoxCoxTransformed_CenteredScaledTransformed
PMA_PreModelling_Test
##################################
# Gathering descriptive statistics
##################################
<- skim(PMA_PreModelling_Test)) (PMA_PreModelling_Test_Skimmed
Name | PMA_PreModelling_Test |
Number of rows | 41 |
Number of columns | 61 |
_______________________ | |
Column type frequency: | |
factor | 1 |
numeric | 60 |
________________________ | |
Group variables | None |
Variable type: factor
skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
---|---|---|---|---|---|
Class | 0 | 1 | FALSE | 2 | M: 22, R: 19 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
V1 | 0 | 1 | 0 | 1 | -1.96 | -0.88 | 0.00 | 0.74 | 2.12 | ▃▆▇▇▁ |
V2 | 0 | 1 | 0 | 1 | -2.49 | -0.60 | 0.03 | 0.62 | 3.07 | ▁▆▇▂▁ |
V3 | 0 | 1 | 0 | 1 | -2.39 | -0.45 | 0.05 | 0.49 | 2.46 | ▁▂▇▂▂ |
V4 | 0 | 1 | 0 | 1 | -2.44 | -0.70 | -0.13 | 0.43 | 2.80 | ▁▇▇▃▁ |
V5 | 0 | 1 | 0 | 1 | -2.69 | -0.64 | -0.01 | 0.72 | 2.31 | ▁▃▇▅▁ |
V6 | 0 | 1 | 0 | 1 | -2.11 | -0.54 | -0.09 | 0.88 | 1.90 | ▂▅▇▃▅ |
V7 | 0 | 1 | 0 | 1 | -2.66 | -0.74 | -0.02 | 0.69 | 2.56 | ▁▅▇▅▁ |
V8 | 0 | 1 | 0 | 1 | -2.87 | -0.40 | -0.12 | 0.46 | 2.82 | ▁▂▇▂▁ |
V9 | 0 | 1 | 0 | 1 | -2.60 | -0.59 | -0.04 | 0.51 | 2.93 | ▁▅▇▂▁ |
V10 | 0 | 1 | 0 | 1 | -2.17 | -0.54 | 0.10 | 0.56 | 2.69 | ▂▅▇▂▁ |
V11 | 0 | 1 | 0 | 1 | -2.13 | -0.63 | 0.14 | 0.64 | 2.04 | ▃▅▇▇▂ |
V12 | 0 | 1 | 0 | 1 | -1.99 | -0.52 | 0.05 | 0.49 | 2.51 | ▂▃▇▂▁ |
V13 | 0 | 1 | 0 | 1 | -2.29 | -0.56 | -0.04 | 0.46 | 2.36 | ▁▃▇▂▂ |
V14 | 0 | 1 | 0 | 1 | -2.47 | -0.61 | 0.10 | 0.40 | 2.58 | ▁▅▇▃▁ |
V15 | 0 | 1 | 0 | 1 | -2.01 | -0.82 | -0.05 | 0.71 | 1.82 | ▃▃▇▃▅ |
V16 | 0 | 1 | 0 | 1 | -2.28 | -0.73 | -0.13 | 0.69 | 1.85 | ▂▆▇▇▆ |
V17 | 0 | 1 | 0 | 1 | -2.37 | -0.89 | 0.03 | 0.88 | 1.71 | ▁▇▆▆▇ |
V18 | 0 | 1 | 0 | 1 | -2.11 | -0.60 | -0.15 | 0.91 | 1.66 | ▂▃▇▅▇ |
V19 | 0 | 1 | 0 | 1 | -1.97 | -0.81 | 0.06 | 0.99 | 1.61 | ▃▇▆▅▇ |
V20 | 0 | 1 | 0 | 1 | -2.01 | -0.75 | 0.10 | 1.06 | 1.45 | ▂▆▅▅▇ |
V21 | 0 | 1 | 0 | 1 | -1.84 | -0.83 | 0.23 | 0.86 | 1.48 | ▅▃▃▇▇ |
V22 | 0 | 1 | 0 | 1 | -1.87 | -0.86 | 0.32 | 0.61 | 1.44 | ▃▃▂▇▅ |
V23 | 0 | 1 | 0 | 1 | -1.86 | -1.01 | 0.26 | 0.69 | 1.54 | ▃▅▃▇▆ |
V24 | 0 | 1 | 0 | 1 | -1.97 | -0.78 | 0.22 | 0.83 | 1.53 | ▃▇▃▇▇ |
V25 | 0 | 1 | 0 | 1 | -1.81 | -0.82 | -0.05 | 0.91 | 1.41 | ▃▂▅▂▇ |
V26 | 0 | 1 | 0 | 1 | -1.97 | -0.78 | 0.17 | 0.89 | 1.14 | ▂▂▃▁▇ |
V27 | 0 | 1 | 0 | 1 | -2.05 | -0.82 | 0.12 | 0.99 | 1.14 | ▂▃▃▅▇ |
V28 | 0 | 1 | 0 | 1 | -2.06 | -0.56 | 0.09 | 0.78 | 1.53 | ▃▅▇▇▇ |
V29 | 0 | 1 | 0 | 1 | -2.06 | -0.65 | -0.07 | 0.92 | 1.67 | ▃▇▆▇▇ |
V30 | 0 | 1 | 0 | 1 | -1.94 | -0.55 | -0.32 | 0.75 | 1.70 | ▅▆▆▇▅ |
V31 | 0 | 1 | 0 | 1 | -2.24 | -0.64 | -0.04 | 0.67 | 2.55 | ▂▅▇▅▁ |
V32 | 0 | 1 | 0 | 1 | -1.94 | -0.59 | 0.26 | 0.62 | 2.46 | ▃▃▇▃▁ |
V33 | 0 | 1 | 0 | 1 | -2.19 | -0.77 | 0.12 | 0.73 | 1.93 | ▂▆▇▆▃ |
V34 | 0 | 1 | 0 | 1 | -2.52 | -0.75 | -0.09 | 0.78 | 1.90 | ▁▅▇▆▃ |
V35 | 0 | 1 | 0 | 1 | -1.75 | -0.74 | -0.05 | 0.84 | 1.76 | ▇▇▇▇▆ |
V36 | 0 | 1 | 0 | 1 | -1.76 | -0.78 | 0.15 | 0.65 | 1.70 | ▆▇▆▇▆ |
V37 | 0 | 1 | 0 | 1 | -1.91 | -0.78 | 0.10 | 0.73 | 1.95 | ▅▅▇▇▃ |
V38 | 0 | 1 | 0 | 1 | -2.29 | -0.72 | 0.17 | 0.68 | 2.21 | ▂▇▇▇▂ |
V39 | 0 | 1 | 0 | 1 | -2.00 | -0.92 | 0.21 | 0.61 | 2.19 | ▃▆▃▇▂ |
V40 | 0 | 1 | 0 | 1 | -2.67 | -0.76 | 0.08 | 0.72 | 2.35 | ▁▆▇▅▂ |
V41 | 0 | 1 | 0 | 1 | -1.95 | -0.69 | 0.04 | 0.76 | 1.88 | ▅▆▇▇▅ |
V42 | 0 | 1 | 0 | 1 | -2.16 | -0.70 | -0.05 | 0.53 | 2.08 | ▃▅▇▃▃ |
V43 | 0 | 1 | 0 | 1 | -1.90 | -0.60 | 0.00 | 0.91 | 2.27 | ▃▅▇▅▁ |
V44 | 0 | 1 | 0 | 1 | -2.07 | -0.54 | 0.00 | 0.70 | 2.13 | ▃▆▇▇▃ |
V45 | 0 | 1 | 0 | 1 | -2.18 | -0.73 | 0.14 | 0.50 | 2.10 | ▂▇▇▇▃ |
V46 | 0 | 1 | 0 | 1 | -2.21 | -0.71 | 0.16 | 0.71 | 2.02 | ▂▇▇▇▂ |
V47 | 0 | 1 | 0 | 1 | -1.90 | -0.72 | 0.12 | 0.70 | 2.45 | ▃▅▇▃▁ |
V48 | 0 | 1 | 0 | 1 | -2.18 | -0.55 | 0.05 | 0.64 | 2.60 | ▂▅▇▃▁ |
V49 | 0 | 1 | 0 | 1 | -1.98 | -0.74 | 0.20 | 0.68 | 2.37 | ▃▇▇▇▁ |
V50 | 0 | 1 | 0 | 1 | -2.56 | -0.70 | 0.03 | 0.60 | 2.99 | ▁▆▇▃▁ |
V51 | 0 | 1 | 0 | 1 | -2.13 | -0.57 | 0.02 | 0.44 | 2.22 | ▂▃▇▂▂ |
V52 | 0 | 1 | 0 | 1 | -2.05 | -0.56 | 0.06 | 0.66 | 2.08 | ▂▅▇▅▂ |
V53 | 0 | 1 | 0 | 1 | -2.14 | -0.49 | -0.05 | 0.46 | 2.51 | ▂▃▇▂▁ |
V54 | 0 | 1 | 0 | 1 | -1.96 | -0.74 | -0.16 | 0.74 | 2.00 | ▃▆▆▇▃ |
V55 | 0 | 1 | 0 | 1 | -2.20 | -0.62 | -0.02 | 0.60 | 1.89 | ▃▃▇▆▅ |
V56 | 0 | 1 | 0 | 1 | -1.81 | -0.58 | -0.05 | 0.78 | 2.11 | ▆▇▇▇▃ |
V57 | 0 | 1 | 0 | 1 | -2.16 | -0.71 | -0.14 | 0.67 | 1.97 | ▂▆▇▇▃ |
V58 | 0 | 1 | 0 | 1 | -2.07 | -0.62 | -0.07 | 0.79 | 2.09 | ▅▆▇▇▃ |
V59 | 0 | 1 | 0 | 1 | -1.66 | -0.73 | 0.00 | 0.59 | 2.31 | ▅▅▇▃▁ |
V60 | 0 | 1 | 0 | 1 | -2.01 | -0.60 | -0.12 | 0.64 | 2.72 | ▃▇▇▃▁ |
###################################
# Verifying the data dimensions
# for the test set
###################################
dim(PMA_PreModelling_Test)
## [1] 41 61
##################################
# Loading dataset
##################################
<- PMA_PreModelling_Train
EDA
##################################
# Listing all predictors
##################################
<- EDA[,!names(EDA) %in% c("Class")]
EDA.Predictors
##################################
# Listing all numeric predictors
##################################
<- EDA.Predictors[,sapply(EDA.Predictors, is.numeric)]
EDA.Predictors.Numeric ncol(EDA.Predictors.Numeric)
## [1] 60
names(EDA.Predictors.Numeric)
## [1] "V1" "V2" "V3" "V4" "V5" "V6" "V7" "V8" "V9" "V10" "V11" "V12"
## [13] "V13" "V14" "V15" "V16" "V17" "V18" "V19" "V20" "V21" "V22" "V23" "V24"
## [25] "V25" "V26" "V27" "V28" "V29" "V30" "V31" "V32" "V33" "V34" "V35" "V36"
## [37] "V37" "V38" "V39" "V40" "V41" "V42" "V43" "V44" "V45" "V46" "V47" "V48"
## [49] "V49" "V50" "V51" "V52" "V53" "V54" "V55" "V56" "V57" "V58" "V59" "V60"
##################################
# Formulating the box plots
##################################
featurePlot(x = EDA.Predictors.Numeric,
y = EDA$Class,
plot = "box",
scales = list(x = list(relation="free", rot = 90),
y = list(relation="free")),
adjust = 1.5,
pch = "|")
##################################
# Transforming factor predictors
# as required by the nature of the model
##################################
# Creating a local object
# for the train and test sets
##################################
<- as.data.frame(lapply(PMA_PreModelling_Train[,!names(PMA_PreModelling_Train) %in%
PMA_PreModelling_Train_SVM_R c("Class")],
function(x) as.numeric(as.character(x))))
$Class <- PMA_PreModelling_Train$Class
PMA_PreModelling_Train_SVM_Rdim(PMA_PreModelling_Train_SVM_R)
## [1] 167 61
<- as.data.frame(lapply(PMA_PreModelling_Test[,!names(PMA_PreModelling_Test) %in%
PMA_PreModelling_Test_SVM_R c("Class")],
function(x) as.numeric(as.character(x))))
$Class <- PMA_PreModelling_Test$Class
PMA_PreModelling_Test_SVM_Rdim(PMA_PreModelling_Test_SVM_R)
## [1] 41 61
##################################
# Creating consistent fold assignments
# for the 10-Fold Cross Validation process
##################################
set.seed(12345678)
<- createFolds(PMA_PreModelling_Train_SVM_R$Class,
KFold_Indices k = 10,
returnTrain=TRUE)
<- trainControl(method="cv",
KFold_Control_RandomSearch index=KFold_Indices,
summaryFunction = twoClassSummary,
classProbs = TRUE,
search = "random")
<- trainControl(method="cv",
KFold_Control_GridSearch index=KFold_Indices,
summaryFunction = twoClassSummary,
classProbs = TRUE)
##################################
# Setting the conditions
# for hyperparameter tuning
##################################
= expand.grid(sigma = c(0.030, 0.025, 0.020, 0.015, 0.010, 0.005),
SVM_R_Grid C = 2^(-2:9))
##################################
# Running the support vector machine (radial basis function kernel) model
# by setting the caret method to 'svmRadial'
##################################
##################################
# Using a manual grid search
##################################
set.seed(12345678)
<- train(x = PMA_PreModelling_Train_SVM_R[,!names(PMA_PreModelling_Train_SVM_R) %in% c("Class")],
SVM_R_Tune_GridSearch_Manual y = PMA_PreModelling_Train_SVM_R$Class,
method = "svmRadial",
tuneGrid = SVM_R_Grid,
metric = "ROC",
preProc = c("center", "scale"),
trControl = KFold_Control_GridSearch,
returnResamp = "all")
$finalModel SVM_R_Tune_GridSearch_Manual
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 2
##
## Gaussian Radial Basis kernel function.
## Hyperparameter : sigma = 0.03
##
## Number of Support Vectors : 156
##
## Objective Function Value : -54.7816
## Training error : 0
## Probability model included.
$results SVM_R_Tune_GridSearch_Manual
## sigma C ROC Sens Spec ROCSD SensSD SpecSD
## 1 0.005 0.25 0.8260169 0.7888889 0.7321429 0.13266813 0.16101530 0.21179103
## 2 0.005 0.50 0.8414683 0.8333333 0.7303571 0.13145729 0.14103284 0.18412976
## 3 0.005 1.00 0.8714286 0.8777778 0.7696429 0.10662639 0.11049210 0.19351190
## 4 0.005 2.00 0.8863095 0.8888889 0.7964286 0.09551350 0.11712139 0.19852774
## 5 0.005 4.00 0.8880952 0.9111111 0.8089286 0.10012450 0.11475506 0.19652775
## 6 0.005 8.00 0.8938492 0.8888889 0.7964286 0.09055897 0.11712139 0.18675684
## 7 0.005 16.00 0.9109375 0.9000000 0.7821429 0.08273844 0.12227833 0.14526737
## 8 0.005 32.00 0.9151042 0.8652778 0.8214286 0.08203275 0.17198934 0.12371791
## 9 0.005 64.00 0.9151042 0.8652778 0.8214286 0.08203275 0.17198934 0.12371791
## 10 0.005 128.00 0.9151042 0.8763889 0.8214286 0.08203275 0.16927613 0.12371791
## 11 0.005 256.00 0.9151042 0.8986111 0.8214286 0.08203275 0.09756333 0.12371791
## 12 0.005 512.00 0.9151042 0.8652778 0.8071429 0.08203275 0.17198934 0.10806482
## 13 0.010 0.25 0.8458333 0.8222222 0.7196429 0.13137608 0.16728281 0.19742712
## 14 0.010 0.50 0.8684524 0.8444444 0.7821429 0.11588634 0.11944086 0.18706014
## 15 0.010 1.00 0.9017857 0.8763889 0.7964286 0.08970483 0.11042419 0.19852774
## 16 0.010 2.00 0.9063492 0.9097222 0.8232143 0.09227459 0.11513732 0.20541321
## 17 0.010 4.00 0.9236359 0.8875000 0.8357143 0.08044389 0.11720372 0.16683665
## 18 0.010 8.00 0.9323661 0.9097222 0.8339286 0.07378842 0.11513732 0.12200203
## 19 0.010 16.00 0.9337550 0.9097222 0.8339286 0.07092083 0.11513732 0.12200203
## 20 0.010 32.00 0.9337550 0.8986111 0.8339286 0.07092083 0.11073431 0.12200203
## 21 0.010 64.00 0.9337550 0.9097222 0.8339286 0.07092083 0.11513732 0.12200203
## 22 0.010 128.00 0.9337550 0.8875000 0.8339286 0.07092083 0.11720372 0.12200203
## 23 0.010 256.00 0.9337550 0.8986111 0.8214286 0.07092083 0.11073431 0.12371791
## 24 0.010 512.00 0.9337550 0.9097222 0.8339286 0.07092083 0.11513732 0.12200203
## 25 0.015 0.25 0.8599206 0.8333333 0.7571429 0.12319069 0.14103284 0.21249250
## 26 0.015 0.50 0.8900794 0.8666667 0.7964286 0.10042985 0.10210406 0.19852774
## 27 0.015 1.00 0.9190724 0.8986111 0.8232143 0.07313455 0.11073431 0.20541321
## 28 0.015 2.00 0.9337550 0.9319444 0.8232143 0.06785716 0.09452838 0.16824297
## 29 0.015 4.00 0.9522073 0.9430556 0.8607143 0.05912173 0.09543103 0.12423234
## 30 0.015 8.00 0.9535962 0.9208333 0.8607143 0.05648576 0.09214010 0.09213701
## 31 0.015 16.00 0.9535962 0.9208333 0.8464286 0.05648576 0.11822337 0.10316697
## 32 0.015 32.00 0.9535962 0.9208333 0.8464286 0.05648576 0.10598718 0.10316697
## 33 0.015 64.00 0.9535962 0.9097222 0.8607143 0.05648576 0.11513732 0.09213701
## 34 0.015 128.00 0.9535962 0.9208333 0.8607143 0.05648576 0.11822337 0.09213701
## 35 0.015 256.00 0.9535962 0.9208333 0.8607143 0.05648576 0.11822337 0.09213701
## 36 0.015 512.00 0.9535962 0.8986111 0.8607143 0.05648576 0.11073431 0.09213701
## 37 0.020 0.25 0.8599454 0.7986111 0.7839286 0.12654947 0.12512853 0.21386367
## 38 0.020 0.50 0.9093254 0.8875000 0.7964286 0.08581104 0.07420417 0.19852774
## 39 0.020 1.00 0.9365327 0.8986111 0.8482143 0.06463239 0.11073431 0.16436549
## 40 0.020 2.00 0.9522073 0.9541667 0.8607143 0.05728040 0.07912469 0.12423234
## 41 0.020 4.00 0.9563740 0.9652778 0.8607143 0.05151736 0.07670390 0.09213701
## 42 0.020 8.00 0.9563740 0.9541667 0.8607143 0.05151736 0.07912469 0.09213701
## 43 0.020 16.00 0.9563740 0.9541667 0.8607143 0.05151736 0.07912469 0.09213701
## 44 0.020 32.00 0.9563740 0.9430556 0.8607143 0.05151736 0.10886030 0.09213701
## 45 0.020 64.00 0.9563740 0.9541667 0.8607143 0.05151736 0.07912469 0.09213701
## 46 0.020 128.00 0.9563740 0.9652778 0.8607143 0.05151736 0.07670390 0.09213701
## 47 0.020 256.00 0.9563740 0.9430556 0.8732143 0.05151736 0.07977216 0.10221817
## 48 0.020 512.00 0.9563740 0.9541667 0.8607143 0.05151736 0.07912469 0.09213701
## 49 0.025 0.25 0.8744296 0.7875000 0.8250000 0.11673972 0.12108938 0.20581815
## 50 0.025 0.50 0.9238095 0.8875000 0.8107143 0.07900159 0.07420417 0.19709483
## 51 0.025 1.00 0.9478423 0.9208333 0.8607143 0.05851844 0.07580444 0.12423234
## 52 0.025 2.00 0.9607391 0.9652778 0.8607143 0.04456594 0.07670390 0.12423234
## 53 0.025 4.00 0.9649058 0.9430556 0.8732143 0.03865818 0.07977216 0.10221817
## 54 0.025 8.00 0.9649058 0.9541667 0.8607143 0.03865818 0.07912469 0.09213701
## 55 0.025 16.00 0.9649058 0.9652778 0.8607143 0.03865818 0.07670390 0.09213701
## 56 0.025 32.00 0.9649058 0.9319444 0.8732143 0.03865818 0.07869009 0.10221817
## 57 0.025 64.00 0.9649058 0.9319444 0.8607143 0.03865818 0.07869009 0.09213701
## 58 0.025 128.00 0.9649058 0.9541667 0.8732143 0.03865818 0.07912469 0.10221817
## 59 0.025 256.00 0.9649058 0.9430556 0.8607143 0.03865818 0.07977216 0.09213701
## 60 0.025 512.00 0.9649058 0.9541667 0.8732143 0.03865818 0.07912469 0.10221817
## 61 0.030 0.25 0.8918899 0.6986111 0.8750000 0.09764678 0.19489036 0.16666667
## 62 0.030 0.50 0.9264137 0.9097222 0.8107143 0.07046736 0.07089534 0.19709483
## 63 0.030 1.00 0.9565724 0.9208333 0.8607143 0.05274606 0.07580444 0.12423234
## 64 0.030 2.00 0.9662946 0.9319444 0.8857143 0.03635921 0.09452838 0.09267381
## 65 0.030 4.00 0.9662946 0.9430556 0.8482143 0.03635921 0.07977216 0.11457367
## 66 0.030 8.00 0.9662946 0.9319444 0.8732143 0.03635921 0.07869009 0.08352444
## 67 0.030 16.00 0.9662946 0.9430556 0.8607143 0.03635921 0.07977216 0.12423234
## 68 0.030 32.00 0.9662946 0.9430556 0.8857143 0.03635921 0.07977216 0.09267381
## 69 0.030 64.00 0.9662946 0.9541667 0.8482143 0.03635921 0.07912469 0.11457367
## 70 0.030 128.00 0.9662946 0.9541667 0.8607143 0.03635921 0.07912469 0.09213701
## 71 0.030 256.00 0.9662946 0.9319444 0.8607143 0.03635921 0.07869009 0.09213701
## 72 0.030 512.00 0.9662946 0.9319444 0.8607143 0.03635921 0.07869009 0.09213701
##################################
# Reporting the cross-validation results
# for the train set
##################################
<- SVM_R_Tune_GridSearch_Manual$results[SVM_R_Tune_GridSearch_Manual$results$C==SVM_R_Tune_GridSearch_Manual$bestTune$C &
(SVM_R_Train_GridSearch_Manual_ROCCurveAUC $results$sigma==SVM_R_Tune_GridSearch_Manual$bestTune$sigma,
SVM_R_Tune_GridSearch_Manualc("ROC")])
## [1] 0.9662946
##################################
# Independently evaluating the model
# on the test set
##################################
<- data.frame(SVM_R_Observed = PMA_PreModelling_Test_SVM_R$Class,
SVM_R_Test_GridSearch_Manual SVM_R_Predicted = predict(SVM_R_Tune_GridSearch_Manual,
!names(PMA_PreModelling_Test_SVM_R) %in% c("Class")],
PMA_PreModelling_Test_SVM_R[,type = "prob"))
SVM_R_Test_GridSearch_Manual
## SVM_R_Observed SVM_R_Predicted.M SVM_R_Predicted.R
## 1 R 0.498313517 0.501686483
## 2 R 0.252823888 0.747176112
## 3 R 0.432002237 0.567997763
## 4 R 0.338372591 0.661627409
## 5 R 0.044367096 0.955632904
## 6 R 0.792956664 0.207043336
## 7 R 0.372759443 0.627240557
## 8 R 0.518259266 0.481740734
## 9 R 0.166319456 0.833680544
## 10 R 0.007429286 0.992570714
## 11 R 0.409778215 0.590221785
## 12 R 0.187868114 0.812131886
## 13 R 0.001983070 0.998016930
## 14 R 0.015027703 0.984972297
## 15 R 0.117879991 0.882120009
## 16 R 0.075579467 0.924420533
## 17 R 0.037979966 0.962020034
## 18 R 0.053744989 0.946255011
## 19 R 0.212654747 0.787345253
## 20 M 0.233770876 0.766229124
## 21 M 0.593526187 0.406473813
## 22 M 0.321828992 0.678171008
## 23 M 0.523709904 0.476290096
## 24 M 0.739237045 0.260762955
## 25 M 0.980117995 0.019882005
## 26 M 0.984706536 0.015293464
## 27 M 0.978129469 0.021870531
## 28 M 0.756532422 0.243467578
## 29 M 0.918799066 0.081200934
## 30 M 0.963846419 0.036153581
## 31 M 0.430839955 0.569160045
## 32 M 0.603029446 0.396970554
## 33 M 0.931623529 0.068376471
## 34 M 0.538337898 0.461662102
## 35 M 0.995676794 0.004323206
## 36 M 0.987575783 0.012424217
## 37 M 0.998846682 0.001153318
## 38 M 0.974067410 0.025932590
## 39 M 0.998920687 0.001079313
## 40 M 0.997777114 0.002222886
## 41 M 0.589129927 0.410870073
##################################
# Reporting the independent evaluation results
# for the test set
##################################
<- roc(response = SVM_R_Test_GridSearch_Manual$SVM_R_Observed,
SVM_R_Test_GridSearch_Manual_ROC predictor = SVM_R_Test_GridSearch_Manual$SVM_R_Predicted.R,
levels = rev(levels(SVM_R_Test_GridSearch_Manual$SVM_R_Observed)))
<- auc(SVM_R_Test_GridSearch_Manual_ROC)[1]) (SVM_R_Test_GridSearch_Manual_ROCCurveAUC
## [1] 0.937799
##################################
# Using an automated grid search
##################################
set.seed(12345678)
<- train(x = PMA_PreModelling_Train_SVM_R[,!names(PMA_PreModelling_Train_SVM_R) %in% c("Class")],
SVM_R_Tune_GridSearch_Auto y = PMA_PreModelling_Train_SVM_R$Class,
method = "svmRadial",
tuneLength = 12,
metric = "ROC",
preProc = c("center", "scale"),
trControl = KFold_Control_GridSearch,
returnResamp = "all")
$finalModel SVM_R_Tune_GridSearch_Auto
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 8
##
## Gaussian Radial Basis kernel function.
## Hyperparameter : sigma = 0.0101286917816145
##
## Number of Support Vectors : 107
##
## Objective Function Value : -116.4213
## Training error : 0
## Probability model included.
$results SVM_R_Tune_GridSearch_Auto
## sigma C ROC Sens Spec ROCSD SensSD
## 1 0.01012869 0.25 0.8472222 0.8222222 0.7321429 0.13031954 0.1672828
## 2 0.01012869 0.50 0.8698413 0.8555556 0.7821429 0.11287340 0.1054093
## 3 0.01012869 1.00 0.9017857 0.8986111 0.7964286 0.08970483 0.1107343
## 4 0.01012869 2.00 0.9047867 0.9097222 0.8232143 0.09063022 0.1151373
## 5 0.01012869 4.00 0.9252232 0.9097222 0.7821429 0.08053464 0.1025335
## 6 0.01012869 8.00 0.9351438 0.8986111 0.8339286 0.07128847 0.1224972
## 7 0.01012869 16.00 0.9321925 0.9097222 0.8214286 0.06984564 0.1151373
## 8 0.01012869 32.00 0.9321925 0.8986111 0.8339286 0.06984564 0.1224972
## 9 0.01012869 64.00 0.9321925 0.9097222 0.8214286 0.06984564 0.1151373
## 10 0.01012869 128.00 0.9321925 0.9097222 0.8339286 0.06984564 0.1151373
## 11 0.01012869 256.00 0.9321925 0.8986111 0.8339286 0.06984564 0.1107343
## 12 0.01012869 512.00 0.9321925 0.8986111 0.8339286 0.06984564 0.1107343
## SpecSD
## 1 0.2117910
## 2 0.1870601
## 3 0.1985277
## 4 0.2054132
## 5 0.1870601
## 6 0.1220020
## 7 0.1237179
## 8 0.1220020
## 9 0.1237179
## 10 0.1220020
## 11 0.1220020
## 12 0.1220020
##################################
# Reporting the cross-validation results
# for the train set
##################################
<- SVM_R_Tune_GridSearch_Auto$results[SVM_R_Tune_GridSearch_Auto$results$C==SVM_R_Tune_GridSearch_Auto$bestTune$C &
(SVM_R_Train_GridSearch_Auto_ROCCurveAUC $results$sigma==SVM_R_Tune_GridSearch_Auto$bestTune$sigma,
SVM_R_Tune_GridSearch_Autoc("ROC")])
## [1] 0.9351438
##################################
# Independently evaluating the model
# on the test set
##################################
<- data.frame(SVM_R_Observed = PMA_PreModelling_Test_SVM_R$Class,
SVM_R_Test_GridSearch_Auto SVM_R_Predicted = predict(SVM_R_Tune_GridSearch_Auto,
!names(PMA_PreModelling_Test_SVM_R) %in% c("Class")],
PMA_PreModelling_Test_SVM_R[,type = "prob"))
SVM_R_Test_GridSearch_Auto
## SVM_R_Observed SVM_R_Predicted.M SVM_R_Predicted.R
## 1 R 0.644520120 0.355479880
## 2 R 0.068227365 0.931772635
## 3 R 0.274246432 0.725753568
## 4 R 0.306557842 0.693442158
## 5 R 0.035774492 0.964225508
## 6 R 0.630076881 0.369923119
## 7 R 0.182377193 0.817622807
## 8 R 0.592301365 0.407698635
## 9 R 0.302447356 0.697552644
## 10 R 0.025172209 0.974827791
## 11 R 0.280726484 0.719273516
## 12 R 0.159681848 0.840318152
## 13 R 0.005997015 0.994002985
## 14 R 0.030660134 0.969339866
## 15 R 0.305052217 0.694947783
## 16 R 0.164417547 0.835582453
## 17 R 0.077980882 0.922019118
## 18 R 0.003954464 0.996045536
## 19 R 0.198233556 0.801766444
## 20 M 0.099944550 0.900055450
## 21 M 0.538894167 0.461105833
## 22 M 0.272072827 0.727927173
## 23 M 0.327887741 0.672112259
## 24 M 0.452517107 0.547482893
## 25 M 0.952480800 0.047519200
## 26 M 0.989378821 0.010621179
## 27 M 0.920060058 0.079939942
## 28 M 0.735616095 0.264383905
## 29 M 0.701311496 0.298688504
## 30 M 0.964975997 0.035024003
## 31 M 0.677307373 0.322692627
## 32 M 0.535382523 0.464617477
## 33 M 0.877683205 0.122316795
## 34 M 0.453848395 0.546151605
## 35 M 0.996891033 0.003108967
## 36 M 0.981416116 0.018583884
## 37 M 0.992227874 0.007772126
## 38 M 0.923369542 0.076630458
## 39 M 0.987060650 0.012939350
## 40 M 0.993618617 0.006381383
## 41 M 0.625963239 0.374036761
##################################
# Reporting the independent evaluation results
# for the test set
##################################
<- roc(response = SVM_R_Test_GridSearch_Auto$SVM_R_Observed,
SVM_R_Test_GridSearch_Auto_ROC predictor = SVM_R_Test_GridSearch_Auto$SVM_R_Predicted.R,
levels = rev(levels(SVM_R_Test_GridSearch_Auto$SVM_R_Observed)))
<- auc(SVM_R_Test_GridSearch_Auto_ROC)[1]) (SVM_R_Test_GridSearch_Auto_ROCCurveAUC
## [1] 0.9114833
##################################
# Using an automated random search
##################################
set.seed(12345678)
<- train(x = PMA_PreModelling_Train_SVM_R[,!names(PMA_PreModelling_Train_SVM_R) %in% c("Class")],
SVM_R_Tune_RandomSearch_Auto y = PMA_PreModelling_Train_SVM_R$Class,
method = "svmRadial",
tuneLength = 30,
metric = "ROC",
preProc = c("center", "scale"),
trControl = KFold_Control_RandomSearch,
returnResamp = "all")
$finalModel SVM_R_Tune_RandomSearch_Auto
## Support Vector Machine object of class "ksvm"
##
## SV type: C-svc (classification)
## parameter : cost C = 183.359327063186
##
## Gaussian Radial Basis kernel function.
## Hyperparameter : sigma = 0.0245815648934837
##
## Number of Support Vectors : 145
##
## Objective Function Value : -57.5767
## Training error : 0
## Probability model included.
$results SVM_R_Tune_RandomSearch_Auto
## sigma C ROC Sens Spec ROCSD SensSD
## 1 0.003062763 0.07968017 0.8188988 0.5736111 0.8357143 0.12762632 0.14404771
## 2 0.003161972 557.27174973 0.8962550 0.8555556 0.7803571 0.08481918 0.16604824
## 3 0.003197104 19.09397071 0.8890873 0.8888889 0.7446429 0.09068061 0.10475656
## 4 0.003260497 311.39561843 0.9031994 0.8666667 0.7785714 0.07868373 0.17213259
## 5 0.003720532 2.43055400 0.8789683 0.9111111 0.7964286 0.09924602 0.10210406
## 6 0.003771907 1.36632284 0.8684524 0.8666667 0.7946429 0.11078013 0.12614360
## 7 0.004808965 0.58526382 0.8428571 0.8444444 0.7428571 0.12813221 0.14054567
## 8 0.004911297 0.20061724 0.8218502 0.6763889 0.7839286 0.13322750 0.16744289
## 9 0.005629841 1.35019752 0.8930556 0.8777778 0.7964286 0.09020180 0.12227833
## 10 0.005642254 185.24975110 0.9180804 0.8875000 0.8214286 0.08155714 0.11720372
## 11 0.006545177 0.09296935 0.8190724 0.6069444 0.8232143 0.13421268 0.18239213
## 12 0.007471523 41.17996347 0.9240327 0.8875000 0.8214286 0.08203642 0.11720372
## 13 0.007619948 0.66682930 0.8696429 0.8666667 0.7821429 0.10902894 0.10210406
## 14 0.008007937 412.07218822 0.9226438 0.8875000 0.8214286 0.08071429 0.11720372
## 15 0.010394362 0.08678718 0.8303819 0.6527778 0.8089286 0.13758504 0.17531571
## 16 0.011821589 13.07907674 0.9438740 0.9208333 0.8464286 0.06384653 0.10598718
## 17 0.012384606 0.35581028 0.8587302 0.8333333 0.7678571 0.12842887 0.14103284
## 18 0.013580611 40.75126495 0.9522073 0.9208333 0.8464286 0.05614662 0.11822337
## 19 0.014508407 16.69657765 0.9535962 0.9430556 0.8607143 0.05648576 0.10886030
## 20 0.014591810 116.40343712 0.9535962 0.9097222 0.8464286 0.05648576 0.11513732
## 21 0.014725783 38.54782847 0.9535962 0.9208333 0.8464286 0.05648576 0.10598718
## 22 0.014997325 1.29929841 0.9232391 0.9097222 0.8232143 0.06686697 0.11513732
## 23 0.015734712 0.10535004 0.8474454 0.6527778 0.8089286 0.13659952 0.18297290
## 24 0.021462739 481.87920593 0.9577629 0.9652778 0.8607143 0.04921559 0.07670390
## 25 0.023003220 0.06568199 0.8655010 0.6986111 0.8500000 0.12355617 0.18026449
## 26 0.023225821 0.25030006 0.8684772 0.7875000 0.8107143 0.12076345 0.12108938
## 27 0.023466522 0.30388775 0.8787946 0.8208333 0.7982143 0.11241867 0.09269670
## 28 0.024581565 183.35932706 0.9621280 0.9541667 0.8607143 0.04223332 0.07912469
## 29 0.026812304 0.32643705 0.8934772 0.8430556 0.8107143 0.09835957 0.09306592
## 30 0.027283954 1.12493978 0.9579613 0.9430556 0.8607143 0.05006799 0.07977216
## SpecSD
## 1 0.17693696
## 2 0.12628310
## 3 0.17578178
## 4 0.16042743
## 5 0.19852774
## 6 0.18884157
## 7 0.18038759
## 8 0.19695996
## 9 0.19852774
## 10 0.12371791
## 11 0.16824297
## 12 0.12371791
## 13 0.18706014
## 14 0.12371791
## 15 0.19029940
## 16 0.10316697
## 17 0.19795582
## 18 0.10316697
## 19 0.09213701
## 20 0.10316697
## 21 0.10316697
## 22 0.20541321
## 23 0.19029940
## 24 0.09213701
## 25 0.21889876
## 26 0.19709483
## 27 0.21346569
## 28 0.09213701
## 29 0.19709483
## 30 0.12423234
##################################
# Reporting the cross-validation results
# for the train set
##################################
<- SVM_R_Tune_RandomSearch_Auto$results[SVM_R_Tune_RandomSearch_Auto$results$C==SVM_R_Tune_RandomSearch_Auto$bestTune$C &
(SVM_R_Train_RandomSearch_Auto_ROCCurveAUC $results$sigma==SVM_R_Tune_RandomSearch_Auto$bestTune$sigma,
SVM_R_Tune_RandomSearch_Autoc("ROC")])
## [1] 0.962128
##################################
# Independently evaluating the model
# on the test set
##################################
<- data.frame(SVM_R_Observed = PMA_PreModelling_Test_SVM_R$Class,
SVM_R_Test_RandomSearch_Auto SVM_R_Predicted = predict(SVM_R_Tune_RandomSearch_Auto,
!names(PMA_PreModelling_Test_SVM_R) %in% c("Class")],
PMA_PreModelling_Test_SVM_R[,type = "prob"))
SVM_R_Test_RandomSearch_Auto
## SVM_R_Observed SVM_R_Predicted.M SVM_R_Predicted.R
## 1 R 0.4461118516 0.5538881484
## 2 R 0.0917810626 0.9082189374
## 3 R 0.3003831045 0.6996168955
## 4 R 0.2113270453 0.7886729547
## 5 R 0.0110686706 0.9889313294
## 6 R 0.7937314777 0.2062685223
## 7 R 0.2302256584 0.7697743416
## 8 R 0.4654496465 0.5345503535
## 9 R 0.0718372374 0.9281627626
## 10 R 0.0017355436 0.9982644564
## 11 R 0.2759847540 0.7240152460
## 12 R 0.0883429855 0.9116570145
## 13 R 0.0003928833 0.9996071167
## 14 R 0.0033157653 0.9966842347
## 15 R 0.0791601264 0.9208398736
## 16 R 0.0344303176 0.9655696824
## 17 R 0.0100423552 0.9899576448
## 18 R 0.0063847677 0.9936152323
## 19 R 0.1556639090 0.8443360910
## 20 M 0.1100785449 0.8899214551
## 21 M 0.5315453448 0.4684546552
## 22 M 0.1856970809 0.8143029191
## 23 M 0.3843546079 0.6156453921
## 24 M 0.6661845595 0.3338154405
## 25 M 0.9900636439 0.0099363561
## 26 M 0.9955404724 0.0044595276
## 27 M 0.9829171642 0.0170828358
## 28 M 0.7646969623 0.2353030377
## 29 M 0.9260418003 0.0739581997
## 30 M 0.9831113603 0.0168886397
## 31 M 0.4164007909 0.5835992091
## 32 M 0.5178765245 0.4821234755
## 33 M 0.9468755388 0.0531244612
## 34 M 0.5335050901 0.4664949099
## 35 M 0.9986015025 0.0013984975
## 36 M 0.9948842061 0.0051157939
## 37 M 0.9994503690 0.0005496310
## 38 M 0.9797647607 0.0202352393
## 39 M 0.9994629054 0.0005370946
## 40 M 0.9994536506 0.0005463494
## 41 M 0.5222730772 0.4777269228
##################################
# Reporting the independent evaluation results
# for the test set
##################################
<- roc(response = SVM_R_Test_RandomSearch_Auto$SVM_R_Observed,
SVM_R_Test_RandomSearch_Auto_ROC predictor = SVM_R_Test_RandomSearch_Auto$SVM_R_Predicted.R,
levels = rev(levels(SVM_R_Test_RandomSearch_Auto$SVM_R_Observed)))
<- auc(SVM_R_Test_RandomSearch_Auto_ROC)[1]) (SVM_R_Test_RandomSearch_Auto_ROCCurveAUC
## [1] 0.9354067
##################################
# Creating a local object
# for the train and test sets
##################################
<- PMA_PreModelling_Train
PMA_PreModelling_Train_RDA <- PMA_PreModelling_Test
PMA_PreModelling_Test_RDA
##################################
# Creating consistent fold assignments
# for the 10-Fold Cross Validation process
##################################
set.seed(12345678)
<- createFolds(PMA_PreModelling_Train_RDA$Class,
KFold_Indices k = 10,
returnTrain=TRUE)
<- trainControl(method="cv",
KFold_Control_RandomSearch index=KFold_Indices,
summaryFunction = twoClassSummary,
classProbs = TRUE,
search = "random")
<- trainControl(method="cv",
KFold_Control_GridSearch index=KFold_Indices,
summaryFunction = twoClassSummary,
classProbs = TRUE)
##################################
# Setting the conditions
# for hyperparameter tuning
##################################
= expand.grid(gamma = seq(0,1,0.20),
RDA_Grid lambda = seq(0,1,0.20))
##################################
# Running the regularized discriminant analysis model
# by setting the caret method to 'rda'
##################################
##################################
# Using a manual grid search
##################################
set.seed(12345678)
<- train(x = PMA_PreModelling_Train_RDA[,!names(PMA_PreModelling_Train_RDA) %in% c("Class")],
RDA_Tune_GridSearch_Manual y = PMA_PreModelling_Train_RDA$Class,
method = "rda",
tuneGrid = RDA_Grid,
metric = "ROC",
trControl = KFold_Control_GridSearch,
returnResamp = "all")
$finalModel RDA_Tune_GridSearch_Manual
## Call:
## rda.default(x = x, grouping = y, gamma = param$gamma, lambda = param$lambda,
## returnResamp = "all")
##
## Regularization parameters:
## gamma lambda
## 0.6 0.2
##
## Prior probabilities of groups:
## M R
## 0.5329341 0.4670659
##
## Misclassification rate:
## apparent: 1.198 %
$results RDA_Tune_GridSearch_Manual
## gamma lambda ROC Sens Spec ROCSD SensSD SpecSD
## 1 0.0 0.0 0.7171627 0.9222222 0.4571429 0.06867241 0.07499428 0.1733768
## 2 0.0 0.2 0.8133681 0.8444444 0.6678571 0.08674264 0.10734353 0.1307358
## 3 0.0 0.4 0.8363343 0.8222222 0.6803571 0.08956975 0.14054567 0.1025642
## 4 0.0 0.6 0.8492063 0.8222222 0.6785714 0.10624090 0.14054567 0.1240040
## 5 0.0 0.8 0.8196429 0.8333333 0.7267857 0.14048419 0.14103284 0.1925207
## 6 0.0 1.0 0.7682540 0.8000000 0.6750000 0.16285642 0.17213259 0.1911817
## 7 0.2 0.0 0.8772569 0.8777778 0.7339286 0.09411413 0.11049210 0.1905599
## 8 0.2 0.2 0.8867560 0.8333333 0.7714286 0.07884393 0.10798059 0.1944890
## 9 0.2 0.4 0.8819692 0.8222222 0.7553571 0.07297162 0.13042087 0.1653969
## 10 0.2 0.6 0.8773810 0.8333333 0.7821429 0.08398065 0.14103284 0.1189881
## 11 0.2 0.8 0.8648810 0.8444444 0.7446429 0.10567719 0.14998857 0.1656110
## 12 0.2 1.0 0.8486111 0.8666667 0.7428571 0.12071166 0.14628458 0.1897680
## 13 0.4 0.0 0.9031994 0.8666667 0.8250000 0.08263177 0.08764563 0.1787301
## 14 0.4 0.2 0.8970486 0.8444444 0.8232143 0.07794668 0.10734353 0.1461549
## 15 0.4 0.4 0.8934772 0.8444444 0.8232143 0.07570235 0.13042087 0.1337499
## 16 0.4 0.6 0.8863095 0.8555556 0.7964286 0.08527926 0.14861039 0.1597635
## 17 0.4 0.8 0.8734127 0.8555556 0.7964286 0.10487258 0.14861039 0.1985277
## 18 0.4 1.0 0.8527778 0.8666667 0.7428571 0.13214875 0.15537909 0.1803876
## 19 0.6 0.0 0.9075645 0.8555556 0.8500000 0.08292834 0.07499428 0.1419116
## 20 0.6 0.2 0.9115079 0.8555556 0.8232143 0.08101892 0.10540926 0.1337499
## 21 0.6 0.4 0.8980159 0.8777778 0.8107143 0.08518765 0.11049210 0.1786111
## 22 0.6 0.6 0.8894841 0.8555556 0.8232143 0.09272973 0.14861039 0.2054132
## 23 0.6 0.8 0.8634921 0.8444444 0.7964286 0.11629234 0.16728281 0.1985277
## 24 0.6 1.0 0.8503968 0.8666667 0.8089286 0.13213021 0.13658584 0.1992137
## 25 0.8 0.0 0.8888889 0.8222222 0.7964286 0.10146460 0.11944086 0.2153084
## 26 0.8 0.2 0.8837302 0.8444444 0.7964286 0.10085144 0.13042087 0.2070881
## 27 0.8 0.4 0.8748016 0.8333333 0.7839286 0.10354892 0.15930232 0.2055856
## 28 0.8 0.6 0.8664683 0.8555556 0.7839286 0.11154351 0.13907395 0.2055856
## 29 0.8 0.8 0.8549603 0.8444444 0.7839286 0.11584255 0.14054567 0.2055856
## 30 0.8 1.0 0.8418651 0.8333333 0.7714286 0.12604040 0.15044516 0.2032196
## 31 1.0 0.0 0.8085813 0.6986111 0.6928571 0.13201497 0.15577173 0.2070197
## 32 1.0 0.2 0.8113591 0.7208333 0.6928571 0.13044117 0.16620951 0.2070197
## 33 1.0 0.4 0.8099702 0.7319444 0.6928571 0.13302295 0.15741422 0.2070197
## 34 1.0 0.6 0.8099702 0.7319444 0.7053571 0.13302295 0.15741422 0.2144923
## 35 1.0 0.8 0.8115575 0.7430556 0.7053571 0.13163130 0.15621141 0.2144923
## 36 1.0 1.0 0.8115575 0.7541667 0.7053571 0.13163130 0.13514237 0.2144923
##################################
# Reporting the cross-validation results
# for the train set
##################################
<- RDA_Tune_GridSearch_Manual$results[RDA_Tune_GridSearch_Manual$results$gamma==RDA_Tune_GridSearch_Manual$bestTune$gamma &
(RDA_Train_GridSearch_Manual_ROCCurveAUC $results$lambda==RDA_Tune_GridSearch_Manual$bestTune$lambda,
RDA_Tune_GridSearch_Manualc("ROC")])
## [1] 0.9115079
##################################
# Independently evaluating the model
# on the test set
##################################
<- data.frame(RDA_Observed = PMA_PreModelling_Test_RDA$Class,
RDA_Test_GridSearch_Manual RDA_Predicted = predict(RDA_Tune_GridSearch_Manual,
!names(PMA_PreModelling_Test_RDA) %in% c("Class")],
PMA_PreModelling_Test_RDA[,type = "prob"))
RDA_Test_GridSearch_Manual
## RDA_Observed RDA_Predicted.M RDA_Predicted.R
## 1 R 9.939995e-01 6.000456e-03
## 2 R 8.730765e-02 9.126924e-01
## 3 R 3.181359e-02 9.681864e-01
## 4 R 1.527510e-01 8.472490e-01
## 5 R 2.466817e-03 9.975332e-01
## 6 R 9.772080e-01 2.279197e-02
## 7 R 9.486356e-01 5.136444e-02
## 8 R 8.394090e-01 1.605910e-01
## 9 R 1.672424e-01 8.327576e-01
## 10 R 1.587608e-06 9.999984e-01
## 11 R 1.833434e-01 8.166566e-01
## 12 R 3.039097e-02 9.696090e-01
## 13 R 2.978790e-04 9.997021e-01
## 14 R 4.653232e-03 9.953468e-01
## 15 R 5.380004e-02 9.462000e-01
## 16 R 2.995551e-02 9.700445e-01
## 17 R 3.556569e-02 9.644343e-01
## 18 R 6.449323e-03 9.935507e-01
## 19 R 7.450271e-04 9.992550e-01
## 20 M 1.813621e-01 8.186379e-01
## 21 M 9.661042e-01 3.389583e-02
## 22 M 5.517503e-02 9.448250e-01
## 23 M 7.867602e-01 2.132398e-01
## 24 M 5.603520e-01 4.396480e-01
## 25 M 8.893416e-01 1.106584e-01
## 26 M 9.962381e-01 3.761856e-03
## 27 M 8.889273e-01 1.110727e-01
## 28 M 6.322747e-01 3.677253e-01
## 29 M 9.999874e-01 1.264440e-05
## 30 M 1.000000e+00 3.973935e-08
## 31 M 1.451264e-01 8.548736e-01
## 32 M 2.717538e-01 7.282462e-01
## 33 M 9.985120e-01 1.488016e-03
## 34 M 3.199744e-01 6.800256e-01
## 35 M 9.999998e-01 1.507811e-07
## 36 M 9.997433e-01 2.566687e-04
## 37 M 9.996882e-01 3.118405e-04
## 38 M 9.952482e-01 4.751830e-03
## 39 M 9.989721e-01 1.027896e-03
## 40 M 9.999744e-01 2.561338e-05
## 41 M 5.232949e-01 4.767051e-01
##################################
# Reporting the independent evaluation results
# for the test set
##################################
<- roc(response = RDA_Test_GridSearch_Manual$RDA_Observed,
RDA_Test_GridSearch_Manual_ROC predictor = RDA_Test_GridSearch_Manual$RDA_Predicted.R,
levels = rev(levels(RDA_Test_GridSearch_Manual$RDA_Observed)))
<- auc(RDA_Test_GridSearch_Manual_ROC)[1]) (RDA_Test_GridSearch_Manual_ROCCurveAUC
## [1] 0.8755981
##################################
# Using an automated grid search
##################################
set.seed(12345678)
<- train(x = PMA_PreModelling_Train_RDA[,!names(PMA_PreModelling_Train_RDA) %in% c("Class")],
RDA_Tune_GridSearch_Auto y = PMA_PreModelling_Train_RDA$Class,
method = "rda",
tuneLength = 8,
metric = "ROC",
trControl = KFold_Control_GridSearch,
returnResamp = "all")
$finalModel RDA_Tune_GridSearch_Auto
## Call:
## rda.default(x = x, grouping = y, gamma = param$gamma, lambda = param$lambda,
## returnResamp = "all")
##
## Regularization parameters:
## gamma lambda
## 0.5714286 0.1428571
##
## Prior probabilities of groups:
## M R
## 0.5329341 0.4670659
##
## Misclassification rate:
## apparent: 1.198 %
$results RDA_Tune_GridSearch_Auto
## gamma lambda ROC Sens Spec ROCSD SensSD
## 1 0.0000000 0.0000000 0.7171627 0.9222222 0.4571429 0.06867241 0.07499428
## 2 0.0000000 0.1428571 0.8037698 0.8444444 0.6303571 0.08966800 0.10734353
## 3 0.0000000 0.2857143 0.8246776 0.8555556 0.6928571 0.08393174 0.10540926
## 4 0.0000000 0.4285714 0.8349454 0.8222222 0.6803571 0.08996755 0.14054567
## 5 0.0000000 0.5714286 0.8535714 0.8222222 0.6928571 0.09877685 0.14054567
## 6 0.0000000 0.7142857 0.8442460 0.8444444 0.7160714 0.10832942 0.14054567
## 7 0.0000000 0.8571429 0.8033730 0.8333333 0.6982143 0.15380205 0.14103284
## 8 0.0000000 1.0000000 0.7682540 0.8000000 0.6750000 0.16285642 0.17213259
## 9 0.1428571 0.0000000 0.8730903 0.8777778 0.7339286 0.09736862 0.11049210
## 10 0.1428571 0.1428571 0.8750744 0.8666667 0.7321429 0.07161015 0.11475506
## 11 0.1428571 0.2857143 0.8792163 0.8444444 0.7696429 0.07041525 0.13042087
## 12 0.1428571 0.4285714 0.8776042 0.8333333 0.7678571 0.07485983 0.13094570
## 13 0.1428571 0.5714286 0.8716270 0.8111111 0.7428571 0.08441315 0.13907395
## 14 0.1428571 0.7142857 0.8601190 0.8444444 0.7821429 0.09937926 0.13042087
## 15 0.1428571 0.8571429 0.8587302 0.8555556 0.7428571 0.10951231 0.13907395
## 16 0.1428571 1.0000000 0.8523810 0.8444444 0.7285714 0.11916587 0.17529125
## 17 0.2857143 0.0000000 0.8972718 0.8555556 0.7714286 0.08916942 0.10540926
## 18 0.2857143 0.1428571 0.8914931 0.8444444 0.7982143 0.08791021 0.09369712
## 19 0.2857143 0.2857143 0.8912946 0.8333333 0.7964286 0.07678601 0.13094570
## 20 0.2857143 0.4285714 0.8905010 0.8333333 0.8089286 0.07040285 0.13094570
## 21 0.2857143 0.5714286 0.8875000 0.8444444 0.8089286 0.07974474 0.14998857
## 22 0.2857143 0.7142857 0.8734127 0.8444444 0.7714286 0.09318499 0.14998857
## 23 0.2857143 0.8571429 0.8676587 0.8555556 0.7571429 0.10970885 0.15757072
## 24 0.2857143 1.0000000 0.8500000 0.8555556 0.7553571 0.13000432 0.15757072
## 25 0.4285714 0.0000000 0.9018105 0.8666667 0.8250000 0.08480293 0.08764563
## 26 0.4285714 0.1428571 0.9014137 0.8444444 0.8232143 0.07891334 0.10734353
## 27 0.4285714 0.2857143 0.8962550 0.8555556 0.8357143 0.07226627 0.10540926
## 28 0.4285714 0.4285714 0.8934772 0.8555556 0.8107143 0.07765896 0.12883353
## 29 0.4285714 0.5714286 0.8821429 0.8555556 0.8107143 0.08884715 0.14861039
## 30 0.4285714 0.7142857 0.8777778 0.8555556 0.7839286 0.10030346 0.14861039
## 31 0.4285714 0.8571429 0.8662698 0.8555556 0.7839286 0.11559212 0.14861039
## 32 0.4285714 1.0000000 0.8501984 0.8666667 0.7553571 0.13183873 0.15537909
## 33 0.5714286 0.0000000 0.9089534 0.8555556 0.8500000 0.08321354 0.07499428
## 34 0.5714286 0.1428571 0.9101438 0.8555556 0.8357143 0.07894354 0.10540926
## 35 0.5714286 0.2857143 0.9037698 0.8666667 0.8107143 0.07659547 0.10210406
## 36 0.5714286 0.4285714 0.8982143 0.8777778 0.8107143 0.08393506 0.11049210
## 37 0.5714286 0.5714286 0.8894841 0.8555556 0.7982143 0.09062560 0.14861039
## 38 0.5714286 0.7142857 0.8809524 0.8555556 0.8232143 0.10067259 0.14861039
## 39 0.5714286 0.8571429 0.8575397 0.8555556 0.7964286 0.12395034 0.14861039
## 40 0.5714286 1.0000000 0.8501984 0.8777778 0.7964286 0.13216348 0.16101530
## 41 0.7142857 0.0000000 0.9065476 0.8430556 0.8625000 0.09452560 0.05634090
## 42 0.7142857 0.1428571 0.9043651 0.8555556 0.8232143 0.08854929 0.10540926
## 43 0.7142857 0.2857143 0.8952381 0.8555556 0.8232143 0.08927739 0.10540926
## 44 0.7142857 0.4285714 0.8950397 0.8444444 0.8107143 0.08920215 0.14054567
## 45 0.7142857 0.5714286 0.8775794 0.8555556 0.7839286 0.10226352 0.12883353
## 46 0.7142857 0.7142857 0.8690476 0.8444444 0.7839286 0.11144916 0.17529125
## 47 0.7142857 0.8571429 0.8591270 0.8444444 0.7964286 0.12080203 0.18294947
## 48 0.7142857 1.0000000 0.8450397 0.8333333 0.7964286 0.13314348 0.17568209
## 49 0.8571429 0.0000000 0.8740079 0.8111111 0.8089286 0.11105697 0.16604824
## 50 0.8571429 0.1428571 0.8738095 0.8222222 0.7964286 0.10755929 0.16728281
## 51 0.8571429 0.2857143 0.8724206 0.8111111 0.7964286 0.10643033 0.16604824
## 52 0.8571429 0.4285714 0.8634921 0.8222222 0.7964286 0.10501013 0.17529125
## 53 0.8571429 0.5714286 0.8591270 0.8111111 0.7964286 0.10688125 0.17411347
## 54 0.8571429 0.7142857 0.8535714 0.8333333 0.7964286 0.11949797 0.16769232
## 55 0.8571429 0.8571429 0.8442460 0.8333333 0.7714286 0.12244652 0.16769232
## 56 0.8571429 1.0000000 0.8386905 0.8333333 0.7839286 0.12595708 0.16769232
## 57 1.0000000 0.0000000 0.8085813 0.6986111 0.6928571 0.13201497 0.15577173
## 58 1.0000000 0.1428571 0.8113591 0.7097222 0.6928571 0.13044117 0.14821324
## 59 1.0000000 0.2857143 0.8113591 0.7208333 0.6928571 0.13044117 0.16620951
## 60 1.0000000 0.4285714 0.8099702 0.7319444 0.6928571 0.13302295 0.15741422
## 61 1.0000000 0.5714286 0.8099702 0.7319444 0.7053571 0.13302295 0.15741422
## 62 1.0000000 0.7142857 0.8115575 0.7430556 0.7053571 0.13163130 0.15621141
## 63 1.0000000 0.8571429 0.8115575 0.7541667 0.7053571 0.13163130 0.13514237
## 64 1.0000000 1.0000000 0.8115575 0.7541667 0.7053571 0.13163130 0.13514237
## SpecSD
## 1 0.1733768
## 2 0.1666773
## 3 0.1026850
## 4 0.1025642
## 5 0.1183910
## 6 0.1466389
## 7 0.2088344
## 8 0.1911817
## 9 0.1905599
## 10 0.1592749
## 11 0.1534860
## 12 0.1590523
## 13 0.1599851
## 14 0.1189881
## 15 0.1326193
## 16 0.1855387
## 17 0.1853476
## 18 0.1679479
## 19 0.1484997
## 20 0.1494154
## 21 0.1373053
## 22 0.1655575
## 23 0.1805839
## 24 0.1943523
## 25 0.1787301
## 26 0.1461549
## 27 0.1319765
## 28 0.1465785
## 29 0.1579794
## 30 0.1879388
## 31 0.1969600
## 32 0.1852042
## 33 0.1419116
## 34 0.1319765
## 35 0.1465785
## 36 0.1786111
## 37 0.1965278
## 38 0.2054132
## 39 0.1985277
## 40 0.1895812
## 41 0.1608355
## 42 0.1877502
## 43 0.1877502
## 44 0.2139879
## 45 0.2055856
## 46 0.2055856
## 47 0.1985277
## 48 0.2070881
## 49 0.1992137
## 50 0.1985277
## 51 0.1985277
## 52 0.1985277
## 53 0.1985277
## 54 0.1985277
## 55 0.2032196
## 56 0.1969600
## 57 0.2070197
## 58 0.2070197
## 59 0.2070197
## 60 0.2070197
## 61 0.2144923
## 62 0.2144923
## 63 0.2144923
## 64 0.2144923
##################################
# Reporting the cross-validation results
# for the train set
##################################
<- RDA_Tune_GridSearch_Auto$results[RDA_Tune_GridSearch_Auto$results$gamma==RDA_Tune_GridSearch_Auto$bestTune$gamma &
(RDA_Train_GridSearch_Auto_ROCCurveAUC $results$lambda==RDA_Tune_GridSearch_Auto$bestTune$lambda,
RDA_Tune_GridSearch_Autoc("ROC")])
## [1] 0.9101438
##################################
# Independently evaluating the model
# on the test set
##################################
<- data.frame(RDA_Observed = PMA_PreModelling_Test_RDA$Class,
RDA_Test_GridSearch_Auto RDA_Predicted = predict(RDA_Tune_GridSearch_Auto,
!names(PMA_PreModelling_Test_RDA) %in% c("Class")],
PMA_PreModelling_Test_RDA[,type = "prob"))
RDA_Test_GridSearch_Auto
## RDA_Observed RDA_Predicted.M RDA_Predicted.R
## 1 R 9.973229e-01 2.677145e-03
## 2 R 6.705582e-02 9.329442e-01
## 3 R 1.573949e-02 9.842605e-01
## 4 R 1.929045e-01 8.070955e-01
## 5 R 1.876099e-03 9.981239e-01
## 6 R 9.861147e-01 1.388529e-02
## 7 R 9.626076e-01 3.739240e-02
## 8 R 8.787244e-01 1.212756e-01
## 9 R 1.764799e-01 8.235201e-01
## 10 R 9.506597e-07 9.999990e-01
## 11 R 1.250982e-01 8.749018e-01
## 12 R 2.167116e-02 9.783288e-01
## 13 R 3.972024e-04 9.996028e-01
## 14 R 5.146713e-03 9.948533e-01
## 15 R 5.584792e-02 9.441521e-01
## 16 R 2.447285e-02 9.755271e-01
## 17 R 3.005016e-02 9.699498e-01
## 18 R 4.542990e-03 9.954570e-01
## 19 R 5.643707e-04 9.994356e-01
## 20 M 1.981419e-01 8.018581e-01
## 21 M 9.664446e-01 3.355538e-02
## 22 M 4.726127e-02 9.527387e-01
## 23 M 8.819351e-01 1.180649e-01
## 24 M 6.380720e-01 3.619280e-01
## 25 M 9.078489e-01 9.215112e-02
## 26 M 9.968674e-01 3.132565e-03
## 27 M 8.782702e-01 1.217298e-01
## 28 M 6.556653e-01 3.443347e-01
## 29 M 9.999961e-01 3.904439e-06
## 30 M 1.000000e+00 5.817384e-09
## 31 M 1.141914e-01 8.858086e-01
## 32 M 2.175458e-01 7.824542e-01
## 33 M 9.988325e-01 1.167511e-03
## 34 M 3.347904e-01 6.652096e-01
## 35 M 9.999999e-01 5.034955e-08
## 36 M 9.998912e-01 1.087956e-04
## 37 M 9.998089e-01 1.910645e-04
## 38 M 9.965992e-01 3.400819e-03
## 39 M 9.993342e-01 6.657531e-04
## 40 M 9.999922e-01 7.787154e-06
## 41 M 5.640104e-01 4.359896e-01
##################################
# Reporting the independent evaluation results
# for the test set
##################################
<- roc(response = RDA_Test_GridSearch_Auto$RDA_Observed,
RDA_Test_GridSearch_Auto_ROC predictor = RDA_Test_GridSearch_Auto$RDA_Predicted.R,
levels = rev(levels(RDA_Test_GridSearch_Auto$RDA_Observed)))
<- auc(RDA_Test_GridSearch_Auto_ROC)[1]) (RDA_Test_GridSearch_Auto_ROCCurveAUC
## [1] 0.8708134
##################################
# Using an automated random search
##################################
set.seed(12345678)
<- train(x = PMA_PreModelling_Train_RDA[,!names(PMA_PreModelling_Train_RDA) %in% c("Class")],
RDA_Tune_RandomSearch_Auto y = PMA_PreModelling_Train_RDA$Class,
method = "rda",
tuneLength = 36,
metric = "ROC",
trControl = KFold_Control_RandomSearch,
returnResamp = "all")
$finalModel RDA_Tune_RandomSearch_Auto
## Call:
## rda.default(x = x, grouping = y, gamma = param$gamma, lambda = param$lambda,
## returnResamp = "all")
##
## Regularization parameters:
## gamma lambda
## 0.47910538 0.09509243
##
## Prior probabilities of groups:
## M R
## 0.5329341 0.4670659
##
## Misclassification rate:
## apparent: 0.599 %
$results RDA_Tune_RandomSearch_Auto
## gamma lambda ROC Sens Spec ROCSD SensSD
## 1 0.008678354 0.422683378 0.8468502 0.8222222 0.6785714 0.08719860 0.14054567
## 2 0.063090573 0.159730318 0.8601935 0.8444444 0.6928571 0.06592393 0.11944086
## 3 0.092448982 0.851543210 0.8541667 0.8444444 0.7535714 0.10672665 0.13042087
## 4 0.224852071 0.014492429 0.8857639 0.8666667 0.7464286 0.09217891 0.10210406
## 5 0.225922084 0.240498578 0.8839534 0.8333333 0.7839286 0.07936822 0.10798059
## 6 0.242685425 0.479121222 0.8843502 0.8333333 0.7839286 0.07331041 0.13094570
## 7 0.285900684 0.818685511 0.8692460 0.8444444 0.7446429 0.10440292 0.14998857
## 8 0.299823386 0.296154688 0.8926835 0.8333333 0.7964286 0.07517150 0.13094570
## 9 0.309769555 0.501567030 0.8905010 0.8444444 0.7964286 0.07483025 0.13042087
## 10 0.371572583 0.530818735 0.8918651 0.8555556 0.8089286 0.07707935 0.14861039
## 11 0.389756622 0.291965252 0.8952629 0.8444444 0.8232143 0.07455911 0.10734353
## 12 0.398705438 0.020711658 0.9004216 0.8666667 0.8250000 0.08738713 0.08764563
## 13 0.398780852 0.126081060 0.8986359 0.8555556 0.8375000 0.08278481 0.09147473
## 14 0.404947380 0.001431841 0.9018105 0.8666667 0.8250000 0.08480293 0.08764563
## 15 0.412378052 0.370807810 0.8948661 0.8444444 0.8357143 0.07713825 0.13042087
## 16 0.479105382 0.095092430 0.9057788 0.8444444 0.8500000 0.07887411 0.10734353
## 17 0.521891946 0.597569945 0.8863095 0.8555556 0.7982143 0.09064008 0.14861039
## 18 0.546598573 0.763725982 0.8718254 0.8555556 0.8232143 0.10726608 0.14861039
## 19 0.562150992 0.294328171 0.9037698 0.8666667 0.8107143 0.07603375 0.10210406
## 20 0.606571331 0.651290618 0.8880952 0.8444444 0.8232143 0.09512412 0.16728281
## 21 0.626348499 0.357598145 0.8966270 0.8777778 0.8107143 0.08646307 0.11049210
## 22 0.694459123 0.453600885 0.8950397 0.8444444 0.8107143 0.08920215 0.14054567
## 23 0.701360101 0.794419097 0.8619048 0.8555556 0.7964286 0.11648175 0.18182130
## 24 0.750428761 0.529016046 0.8759921 0.8555556 0.7839286 0.10313750 0.12883353
## 25 0.813903614 0.401602007 0.8734127 0.8333333 0.7839286 0.10343205 0.15930232
## 26 0.824399956 0.753058456 0.8535714 0.8444444 0.7839286 0.11859777 0.14054567
## 27 0.837353629 0.650854883 0.8577381 0.8333333 0.7964286 0.11107193 0.16769232
## 28 0.864207304 0.669060568 0.8549603 0.8222222 0.7964286 0.11659154 0.16728281
## 29 0.884520706 0.274960286 0.8638889 0.8111111 0.7839286 0.10443957 0.17411347
## 30 0.903947357 0.122033964 0.8636905 0.7777778 0.7839286 0.10432161 0.16563466
## 31 0.933136827 0.875548703 0.8331349 0.7777778 0.7321429 0.12693055 0.18144368
## 32 0.947211719 0.696390216 0.8315724 0.7777778 0.7321429 0.12592242 0.18144368
## 33 0.958416023 0.536026059 0.8244296 0.7777778 0.7321429 0.13030047 0.18144368
## 34 0.960019596 0.213178845 0.8285962 0.7555556 0.7321429 0.12474957 0.19457667
## 35 0.993158175 0.642527189 0.8145089 0.7541667 0.7053571 0.13117580 0.17098946
## 36 0.994644627 0.887776134 0.8115575 0.7541667 0.7053571 0.13163130 0.17098946
## SpecSD
## 1 0.1091089
## 2 0.1596748
## 3 0.1325124
## 4 0.2044363
## 5 0.1684534
## 6 0.1463971
## 7 0.1656110
## 8 0.1484997
## 9 0.1484997
## 10 0.1373053
## 11 0.1461549
## 12 0.1787301
## 13 0.1564582
## 14 0.1787301
## 15 0.1319765
## 16 0.1419116
## 17 0.1965278
## 18 0.2054132
## 19 0.1465785
## 20 0.2054132
## 21 0.1786111
## 22 0.2139879
## 23 0.1985277
## 24 0.2055856
## 25 0.2055856
## 26 0.2055856
## 27 0.1985277
## 28 0.1985277
## 29 0.1879388
## 30 0.1879388
## 31 0.2117910
## 32 0.2117910
## 33 0.2117910
## 34 0.2117910
## 35 0.2144923
## 36 0.2144923
##################################
# Reporting the cross-validation results
# for the train set
##################################
<- RDA_Tune_RandomSearch_Auto$results[RDA_Tune_RandomSearch_Auto$results$gamma==RDA_Tune_RandomSearch_Auto$bestTune$gamma &
(RDA_Train_RandomSearch_Auto_ROCCurveAUC $results$lambda==RDA_Tune_RandomSearch_Auto$bestTune$lambda,
RDA_Tune_RandomSearch_Autoc("ROC")])
## [1] 0.9057788
##################################
# Independently evaluating the model
# on the test set
##################################
<- data.frame(RDA_Observed = PMA_PreModelling_Test_RDA$Class,
RDA_Test_RandomSearch_Auto RDA_Predicted = predict(RDA_Tune_RandomSearch_Auto,
!names(PMA_PreModelling_Test_RDA) %in% c("Class")],
PMA_PreModelling_Test_RDA[,type = "prob"))
RDA_Test_RandomSearch_Auto
## RDA_Observed RDA_Predicted.M RDA_Predicted.R
## 1 R 9.996342e-01 3.657658e-04
## 2 R 5.444014e-02 9.455599e-01
## 3 R 2.402096e-03 9.975979e-01
## 4 R 3.138262e-01 6.861738e-01
## 5 R 1.116578e-03 9.988834e-01
## 6 R 9.916533e-01 8.346719e-03
## 7 R 9.595638e-01 4.043622e-02
## 8 R 9.431910e-01 5.680897e-02
## 9 R 2.228901e-01 7.771099e-01
## 10 R 5.857581e-07 9.999994e-01
## 11 R 4.288120e-02 9.571188e-01
## 12 R 9.111884e-03 9.908881e-01
## 13 R 1.101115e-03 9.988989e-01
## 14 R 6.469385e-03 9.935306e-01
## 15 R 6.034082e-02 9.396592e-01
## 16 R 1.707100e-02 9.829290e-01
## 17 R 2.127312e-02 9.787269e-01
## 18 R 1.747011e-03 9.982530e-01
## 19 R 3.356219e-04 9.996644e-01
## 20 M 2.785323e-01 7.214677e-01
## 21 M 9.570640e-01 4.293596e-02
## 22 M 2.667801e-02 9.733220e-01
## 23 M 9.599714e-01 4.002858e-02
## 24 M 7.331989e-01 2.668011e-01
## 25 M 9.406450e-01 5.935500e-02
## 26 M 9.982936e-01 1.706444e-03
## 27 M 8.784421e-01 1.215579e-01
## 28 M 7.213115e-01 2.786885e-01
## 29 M 9.999994e-01 5.834747e-07
## 30 M 1.000000e+00 1.733133e-10
## 31 M 7.585306e-02 9.241469e-01
## 32 M 1.411484e-01 8.588516e-01
## 33 M 9.991374e-01 8.626115e-04
## 34 M 3.976876e-01 6.023124e-01
## 35 M 1.000000e+00 5.940803e-09
## 36 M 9.999789e-01 2.105140e-05
## 37 M 9.999167e-01 8.334350e-05
## 38 M 9.984505e-01 1.549530e-03
## 39 M 9.997395e-01 2.604779e-04
## 40 M 9.999993e-01 7.294063e-07
## 41 M 7.207425e-01 2.792575e-01
##################################
# Reporting the independent evaluation results
# for the test set
##################################
<- roc(response = RDA_Test_RandomSearch_Auto$RDA_Observed,
RDA_Test_RandomSearch_Auto_ROC predictor = RDA_Test_RandomSearch_Auto$RDA_Predicted.R,
levels = rev(levels(RDA_Test_RandomSearch_Auto$RDA_Observed)))
<- auc(RDA_Test_RandomSearch_Auto_ROC)[1]) (RDA_Test_RandomSearch_Auto_ROCCurveAUC
## [1] 0.861244
##################################
# Consolidating all results
# from the evaluated hyperparameter tuning methods
# for Support Vector Machine - Radial Basis Function Kernel (SVM_R)
##################################
<- SVM_R_Tune_GridSearch_Manual$results
SVM_R_Tune_GridSearch_Manual_ROC <- SVM_R_Tune_GridSearch_Auto$results
SVM_R_Tune_GridSearch_Auto_ROC <- SVM_R_Tune_RandomSearch_Auto$results
SVM_R_Tune_RandomSearch_Auto_ROC
<- as.data.frame(rbind(SVM_R_Tune_GridSearch_Manual_ROC,
SVM_R_Tune_All
SVM_R_Tune_GridSearch_Auto_ROC,
SVM_R_Tune_RandomSearch_Auto_ROC))
$Method <- c(rep("Manual Grid Search (MGS)",nrow(SVM_R_Tune_GridSearch_Manual_ROC)),
SVM_R_Tune_Allrep("Automated Grid Search (AGS)",nrow(SVM_R_Tune_GridSearch_Auto_ROC)),
rep("Automated Random Search (ARS)",nrow(SVM_R_Tune_RandomSearch_Auto_ROC)))
$Method <- factor(SVM_R_Tune_All$Method,
SVM_R_Tune_Alllevels=c("Manual Grid Search (MGS)",
"Automated Grid Search (AGS)",
"Automated Random Search (ARS)"))
##################################
# Plotting all results
# from the evaluated hyperparameter tuning methods
##################################
ggplot(SVM_R_Tune_All, aes(x=sigma, y=C, color=ROC, size= ROC)) +
geom_point() +
scale_color_gradient(low="blue", high="red") +
theme_bw() +
facet_grid(. ~ Method) +
scale_x_continuous(name="Sigma", limits=c(0,0.03),breaks=seq(0,0.03,by=0.01)) +
scale_y_continuous(name="C", limits=c(0,600),breaks=seq(0,600,by=100)) +
theme(legend.position="top",
plot.title=element_text(color="black",size=15,face="bold",hjust=0.50)) +
ggtitle("Hyperparameter Tuning : Support Vector Machine - Radial Basis Function Kernel (SVM_R)")
##################################
# Consolidating all results
# from the evaluated hyperparameter tuning methods
# for Regularized Discriminant Analysis (RDA)
##################################
<- RDA_Tune_GridSearch_Manual$results
RDA_Tune_GridSearch_Manual_ROC <- RDA_Tune_GridSearch_Auto$results
RDA_Tune_GridSearch_Auto_ROC <- RDA_Tune_RandomSearch_Auto$results
RDA_Tune_RandomSearch_Auto_ROC
<- as.data.frame(rbind(RDA_Tune_GridSearch_Manual_ROC,
RDA_Tune_All
RDA_Tune_GridSearch_Auto_ROC,
RDA_Tune_RandomSearch_Auto_ROC))
$Method <- c(rep("Manual Grid Search (MGS)",nrow(RDA_Tune_GridSearch_Manual_ROC)),
RDA_Tune_Allrep("Automated Grid Search (AGS)",nrow(RDA_Tune_GridSearch_Auto_ROC)),
rep("Automated Random Search (ARS)",nrow(RDA_Tune_RandomSearch_Auto_ROC)))
$Method <- factor(RDA_Tune_All$Method,
RDA_Tune_Alllevels=c("Manual Grid Search (MGS)",
"Automated Grid Search (AGS)",
"Automated Random Search (ARS)"))
##################################
# Plotting all results
# from the evaluated hyperparameter tuning methods
##################################
ggplot(RDA_Tune_All, aes(x=gamma, y=lambda, color=ROC, size= ROC)) +
geom_point() +
scale_color_gradient(low="blue", high="red") +
theme_bw() +
facet_grid(. ~ Method) +
scale_x_continuous(name="Gamma", limits=c(0,1),breaks=seq(0,1,by=0.10)) +
scale_y_continuous(name="Lambda", limits=c(0,1),breaks=seq(0,1,by=0.10)) +
theme(legend.position="top",
plot.title=element_text(color="black",size=15,face="bold",hjust=0.50)) +
ggtitle("Hyperparameter Tuning : Regularized Discriminant Analysis (RDA)")
##################################
# Consolidating all evaluation results
# for the train and test sets
# using the AUROC metric
##################################
<- c('SVM_R_MGS','SVM_R_AGS','SVM_R_ARS','RDA_MGS','RDA_AGS','RDA_ARS',
Model 'SVM_R_MGS','SVM_R_AGS','SVM_R_ARS','RDA_MGS','RDA_AGS','RDA_ARS')
<- c(rep('Cross-Validation',6),rep('Test',6))
Set
<- c(SVM_R_Train_GridSearch_Manual_ROCCurveAUC,
ROCCurveAUC
SVM_R_Train_GridSearch_Auto_ROCCurveAUC,
SVM_R_Train_RandomSearch_Auto_ROCCurveAUC,
RDA_Train_GridSearch_Manual_ROCCurveAUC,
RDA_Train_GridSearch_Auto_ROCCurveAUC,
RDA_Train_RandomSearch_Auto_ROCCurveAUC,
SVM_R_Test_GridSearch_Manual_ROCCurveAUC,
SVM_R_Test_GridSearch_Auto_ROCCurveAUC,
SVM_R_Test_RandomSearch_Auto_ROCCurveAUC,
RDA_Test_GridSearch_Manual_ROCCurveAUC,
RDA_Test_GridSearch_Auto_ROCCurveAUC,
RDA_Test_RandomSearch_Auto_ROCCurveAUC)
<- as.data.frame(cbind(Model,Set,ROCCurveAUC))
ROCCurveAUC_Summary
$ROCCurveAUC <- as.numeric(as.character(ROCCurveAUC_Summary$ROCCurveAUC))
ROCCurveAUC_Summary$Set <- factor(ROCCurveAUC_Summary$Set,
ROCCurveAUC_Summarylevels = c("Cross-Validation",
"Test"))
$Model <- factor(ROCCurveAUC_Summary$Model,
ROCCurveAUC_Summarylevels = c('SVM_R_MGS',
'SVM_R_AGS',
'SVM_R_ARS',
'RDA_MGS',
'RDA_AGS',
'RDA_ARS'))
print(ROCCurveAUC_Summary, row.names=FALSE)
## Model Set ROCCurveAUC
## SVM_R_MGS Cross-Validation 0.9662946
## SVM_R_AGS Cross-Validation 0.9351438
## SVM_R_ARS Cross-Validation 0.9621280
## RDA_MGS Cross-Validation 0.9115079
## RDA_AGS Cross-Validation 0.9101438
## RDA_ARS Cross-Validation 0.9057788
## SVM_R_MGS Test 0.9377990
## SVM_R_AGS Test 0.9114833
## SVM_R_ARS Test 0.9354067
## RDA_MGS Test 0.8755981
## RDA_AGS Test 0.8708134
## RDA_ARS Test 0.8612440
<- dotplot(Model ~ ROCCurveAUC,
(ROCCurveAUC_Plot data = ROCCurveAUC_Summary,
groups = Set,
main = "Classification Model Performance Comparison",
ylab = "Model",
xlab = "AUROC",
auto.key = list(adj=1, space="top", columns=2),
type=c("p", "h"),
origin = 0,
alpha = 0.45,
pch = 16,
cex = 2))
##################################
# Consolidating the resampling results
# for the candidate models
##################################
<- resamples(list(SVM_R_MGS = SVM_R_Tune_GridSearch_Manual,
(OverallResampling SVM_R_AGS = SVM_R_Tune_GridSearch_Auto,
SVM_R_ARS = SVM_R_Tune_RandomSearch_Auto,
RDA_MGS = RDA_Tune_GridSearch_Manual,
RDA_AGS = RDA_Tune_GridSearch_Auto,
RDA_ARS = RDA_Tune_RandomSearch_Auto)))
##
## Call:
## resamples.default(x = list(SVM_R_MGS = SVM_R_Tune_GridSearch_Manual,
## = SVM_R_Tune_RandomSearch_Auto, RDA_MGS = RDA_Tune_GridSearch_Manual,
## RDA_AGS = RDA_Tune_GridSearch_Auto, RDA_ARS = RDA_Tune_RandomSearch_Auto))
##
## Models: SVM_R_MGS, SVM_R_AGS, SVM_R_ARS, RDA_MGS, RDA_AGS, RDA_ARS
## Number of resamples: 10
## Performance metrics: ROC, Sens, Spec
## Time estimates for: everything, final model fit
summary(OverallResampling)
##
## Call:
## summary.resamples(object = OverallResampling)
##
## Models: SVM_R_MGS, SVM_R_AGS, SVM_R_ARS, RDA_MGS, RDA_AGS, RDA_ARS
## Number of resamples: 10
##
## ROC
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## SVM_R_MGS 0.9027778 0.9409722 0.9781746 0.9662946 0.9960938 1 0
## SVM_R_AGS 0.8055556 0.8993056 0.9543651 0.9351438 0.9960938 1 0
## SVM_R_ARS 0.9027778 0.9201389 0.9781746 0.9621280 0.9960938 1 0
## RDA_MGS 0.7638889 0.8680556 0.9126984 0.9115079 0.9811508 1 0
## RDA_AGS 0.7638889 0.8680556 0.9206349 0.9101438 0.9776786 1 0
## RDA_ARS 0.7638889 0.8645833 0.9236111 0.9057788 0.9722222 1 0
##
## Sens
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## SVM_R_MGS 0.7777778 0.8784722 1.0000000 0.9319444 1.0000000 1 0
## SVM_R_AGS 0.6666667 0.8020833 0.9444444 0.8986111 1.0000000 1 0
## SVM_R_ARS 0.7777778 0.9166667 1.0000000 0.9541667 1.0000000 1 0
## RDA_MGS 0.6666667 0.7777778 0.8888889 0.8555556 0.8888889 1 0
## RDA_AGS 0.6666667 0.7777778 0.8888889 0.8555556 0.8888889 1 0
## RDA_ARS 0.6666667 0.7777778 0.8333333 0.8444444 0.8888889 1 0
##
## Spec
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## SVM_R_MGS 0.750 0.8616071 0.8750000 0.8857143 0.96875 1 0
## SVM_R_AGS 0.625 0.7500000 0.8750000 0.8339286 0.87500 1 0
## SVM_R_ARS 0.750 0.7767857 0.8750000 0.8607143 0.87500 1 0
## RDA_MGS 0.625 0.7500000 0.8660714 0.8232143 0.87500 1 0
## RDA_AGS 0.625 0.7767857 0.8750000 0.8357143 0.87500 1 0
## RDA_ARS 0.625 0.7812500 0.8750000 0.8500000 0.96875 1 0
##################################
# Exploring the resampling results
##################################
bwplot(OverallResampling,
main = "Model Resampling Performance Comparison (Range)",
ylab = "Model",
pch=16,
cex=2,
layout=c(3,1))
dotplot(OverallResampling,
main = "Model Resampling Performance Comparison (95% Confidence Interval)",
ylab = "Model",
pch=16,
cex=2,
layout=c(3,1))
splom(OverallResampling)
##################################
# Conducting an analysis
# of the performance differences
##################################
<- diff(OverallResampling)) (ResamplingDifferences
##
## Call:
## diff.resamples(x = OverallResampling)
##
## Models: SVM_R_MGS, SVM_R_AGS, SVM_R_ARS, RDA_MGS, RDA_AGS, RDA_ARS
## Metrics: ROC, Sens, Spec
## Number of differences: 15
## p-value adjustment: bonferroni
summary(ResamplingDifferences)
##
## Call:
## summary.diff.resamples(object = ResamplingDifferences)
##
## p-value adjustment: bonferroni
## Upper diagonal: estimates of the difference
## Lower diagonal: p-value for H0: difference = 0
##
## ROC
## SVM_R_MGS SVM_R_AGS SVM_R_ARS RDA_MGS RDA_AGS RDA_ARS
## SVM_R_MGS 0.031151 0.004167 0.054787 0.056151 0.060516
## SVM_R_AGS 0.74969 -0.026984 0.023636 0.025000 0.029365
## SVM_R_ARS 1.00000 0.57915 0.050620 0.051984 0.056349
## RDA_MGS 0.13881 1.00000 0.18357 0.001364 0.005729
## RDA_AGS 0.09628 1.00000 0.12814 1.00000 0.004365
## RDA_ARS 0.10637 1.00000 0.17953 1.00000 1.00000
##
## Sens
## SVM_R_MGS SVM_R_AGS SVM_R_ARS RDA_MGS RDA_AGS RDA_ARS
## SVM_R_MGS 0.03333 -0.02222 0.07639 0.07639 0.08750
## SVM_R_AGS 1.0000 -0.05556 0.04306 0.04306 0.05417
## SVM_R_ARS 1.0000 1.0000 0.09861 0.09861 0.10972
## RDA_MGS 1.0000 1.0000 0.3158 0.00000 0.01111
## RDA_AGS 1.0000 1.0000 0.3158 NA 0.01111
## RDA_ARS 1.0000 1.0000 0.2507 1.0000 1.0000
##
## Spec
## SVM_R_MGS SVM_R_AGS SVM_R_ARS RDA_MGS RDA_AGS RDA_ARS
## SVM_R_MGS 0.051786 0.025000 0.062500 0.050000 0.035714
## SVM_R_AGS 0.5584 -0.026786 0.010714 -0.001786 -0.016071
## SVM_R_ARS 1.0000 1.0000 0.037500 0.025000 0.010714
## RDA_MGS 0.2243 1.0000 1.0000 -0.012500 -0.026786
## RDA_AGS 0.5518 1.0000 1.0000 1.0000 -0.014286
## RDA_ARS 1.0000 1.0000 1.0000 1.0000 1.0000
bwplot(ResamplingDifferences,
main = "Model Resampling Performance Comparison (Differences)",
ylab = "Model",
pch=16,
cex=2,
layout=c(3,1))