##################################
# Loading R libraries
##################################
library(AppliedPredictiveModeling)
library(caret)
library(rpart)
library(lattice)
library(dplyr)
library(tidyr)
library(moments)
library(skimr)
library(RANN)
library(pls)
library(corrplot)
library(tidyverse)
library(lares)
library(DMwR2)
library(gridExtra)
library(rattle)
library(rpart.plot)
library(RColorBrewer)
library(stats)
library(nnet)
library(elasticnet)
library(earth)
library(party)
library(kernlab)
library(randomForest)
library(Cubist)
##################################
# Loading source and
# formulating the train set
##################################
data(solubility)
<- as.data.frame(cbind(solTrainY,solTrainX))
Solubility_Train <- as.data.frame(cbind(solTestY,solTestX))
Solubility_Test
##################################
# Performing a general exploration of the train set
##################################
dim(Solubility_Train)
## [1] 951 229
str(Solubility_Train)
## 'data.frame': 951 obs. of 229 variables:
## $ solTrainY : num -3.97 -3.98 -3.99 -4 -4.06 -4.08 -4.08 -4.1 -4.1 -4.11 ...
## $ FP001 : int 0 0 1 0 0 1 0 1 1 1 ...
## $ FP002 : int 1 1 1 0 0 0 1 0 0 1 ...
## $ FP003 : int 0 0 1 1 1 1 0 1 1 1 ...
## $ FP004 : int 0 1 1 0 1 1 1 1 1 1 ...
## $ FP005 : int 1 1 1 0 1 0 1 0 0 1 ...
## $ FP006 : int 0 1 0 0 1 0 0 0 1 1 ...
## $ FP007 : int 0 1 0 1 0 0 0 1 1 1 ...
## $ FP008 : int 1 1 1 0 0 0 1 0 0 0 ...
## $ FP009 : int 0 0 0 0 1 1 1 0 1 0 ...
## $ FP010 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ FP011 : int 0 1 0 0 0 0 0 0 1 0 ...
## $ FP012 : int 0 0 0 0 0 1 0 1 0 0 ...
## $ FP013 : int 0 0 0 0 1 0 1 0 0 0 ...
## $ FP014 : int 0 0 0 0 0 0 1 0 0 0 ...
## $ FP015 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ FP016 : int 0 1 0 0 1 1 0 1 0 0 ...
## $ FP017 : int 0 0 1 1 0 0 0 0 1 1 ...
## $ FP018 : int 0 1 0 0 0 0 0 0 0 0 ...
## $ FP019 : int 1 0 0 0 1 0 1 0 0 0 ...
## $ FP020 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP021 : int 0 0 0 0 0 1 0 0 1 0 ...
## $ FP022 : int 0 0 0 0 0 0 0 0 0 1 ...
## $ FP023 : int 0 0 0 1 0 0 0 0 1 0 ...
## $ FP024 : int 1 0 0 0 1 0 0 0 0 0 ...
## $ FP025 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ FP026 : int 1 0 0 0 0 0 1 0 0 0 ...
## $ FP027 : int 0 0 0 0 0 0 0 0 0 1 ...
## $ FP028 : int 0 1 0 0 0 0 0 0 1 1 ...
## $ FP029 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP030 : int 0 0 0 0 1 0 0 0 0 0 ...
## $ FP031 : int 0 0 0 0 0 0 0 1 0 0 ...
## $ FP032 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP033 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP034 : int 0 0 0 0 1 0 0 0 0 1 ...
## $ FP035 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ FP036 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP037 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ FP038 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ FP039 : int 1 0 0 0 0 0 0 0 0 0 ...
## $ FP040 : int 1 0 0 0 0 0 0 0 0 0 ...
## $ FP041 : int 0 0 0 1 0 0 0 0 1 0 ...
## $ FP042 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP043 : int 0 1 0 0 0 0 0 0 0 0 ...
## $ FP044 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP045 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ FP046 : int 0 1 0 0 0 0 1 0 0 1 ...
## $ FP047 : int 0 1 1 0 0 0 1 0 0 0 ...
## $ FP048 : int 0 0 0 0 0 0 0 1 0 0 ...
## $ FP049 : int 0 0 0 0 0 0 1 0 0 0 ...
## $ FP050 : int 0 0 0 0 0 0 0 1 0 1 ...
## $ FP051 : int 0 1 0 0 0 0 0 0 0 0 ...
## $ FP052 : int 0 0 0 0 0 0 0 0 0 1 ...
## $ FP053 : int 0 0 0 0 0 0 1 0 0 0 ...
## $ FP054 : int 0 0 0 1 0 0 0 0 1 1 ...
## $ FP055 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP056 : int 1 0 0 0 0 0 0 0 0 0 ...
## $ FP057 : int 0 0 0 0 0 0 1 0 0 0 ...
## $ FP058 : int 0 0 0 0 0 0 0 0 0 1 ...
## $ FP059 : int 0 0 0 0 0 0 0 1 0 0 ...
## $ FP060 : int 0 1 1 0 0 0 0 1 1 0 ...
## $ FP061 : int 0 0 1 0 0 0 0 1 1 0 ...
## $ FP062 : int 0 0 1 0 0 1 0 1 1 1 ...
## $ FP063 : int 1 1 0 0 1 1 1 0 0 1 ...
## $ FP064 : int 0 1 1 0 1 1 0 1 0 0 ...
## $ FP065 : int 1 1 0 0 1 0 1 0 1 1 ...
## $ FP066 : int 1 0 1 1 1 1 1 1 1 1 ...
## $ FP067 : int 1 1 0 0 1 1 1 0 0 1 ...
## $ FP068 : int 0 1 0 0 1 1 1 0 0 1 ...
## $ FP069 : int 1 0 1 1 1 1 0 1 1 0 ...
## $ FP070 : int 1 1 0 1 0 0 1 0 1 0 ...
## $ FP071 : int 0 0 0 0 0 0 1 0 1 1 ...
## $ FP072 : int 0 1 1 0 0 1 0 1 1 1 ...
## $ FP073 : int 0 1 1 0 0 0 0 0 1 0 ...
## $ FP074 : int 0 1 0 0 0 0 0 0 1 0 ...
## $ FP075 : int 0 1 0 0 1 1 1 0 0 1 ...
## $ FP076 : int 1 1 0 0 0 0 1 0 1 1 ...
## $ FP077 : int 0 1 0 1 0 0 0 1 1 1 ...
## $ FP078 : int 0 1 0 0 0 0 0 0 1 0 ...
## $ FP079 : int 1 1 1 1 1 0 1 0 1 1 ...
## $ FP080 : int 0 1 0 0 1 1 1 1 0 0 ...
## $ FP081 : int 0 0 1 1 0 0 0 1 1 1 ...
## $ FP082 : int 1 1 1 0 1 1 1 0 1 1 ...
## $ FP083 : int 0 0 0 0 1 0 0 0 0 1 ...
## $ FP084 : int 1 1 0 0 1 0 1 0 0 0 ...
## $ FP085 : int 0 1 0 0 0 0 1 0 0 0 ...
## $ FP086 : int 0 0 0 1 1 0 0 1 1 1 ...
## $ FP087 : int 1 1 1 1 1 0 1 0 1 1 ...
## $ FP088 : int 0 1 0 0 0 0 0 1 1 0 ...
## $ FP089 : int 1 1 0 0 0 0 1 0 0 0 ...
## $ FP090 : int 0 1 0 1 0 0 0 1 1 1 ...
## $ FP091 : int 1 1 0 0 1 0 1 0 0 1 ...
## $ FP092 : int 0 0 0 0 1 1 1 0 1 0 ...
## $ FP093 : int 0 1 0 1 0 0 0 1 1 1 ...
## $ FP094 : int 0 0 0 0 1 0 0 1 0 0 ...
## $ FP095 : int 0 0 0 0 0 0 0 0 1 1 ...
## $ FP096 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ FP097 : int 1 1 0 0 0 0 1 0 1 0 ...
## $ FP098 : int 0 0 1 0 0 0 0 1 0 0 ...
## [list output truncated]
summary(Solubility_Train)
## solTrainY FP001 FP002 FP003
## Min. :-11.620 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.: -3.955 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median : -2.510 Median :0.0000 Median :1.0000 Median :0.0000
## Mean : -2.719 Mean :0.4932 Mean :0.5394 Mean :0.4364
## 3rd Qu.: -1.360 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. : 1.580 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP004 FP005 FP006 FP007
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :1.0000 Median :0.0000 Median :0.0000
## Mean :0.5846 Mean :0.5794 Mean :0.4006 Mean :0.3638
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP008 FP009 FP010 FP011
## Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.326 Mean :0.2797 Mean :0.1788 Mean :0.2145
## 3rd Qu.:1.000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP012 FP013 FP014 FP015
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:1.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :1.0000
## Mean :0.1767 Mean :0.1661 Mean :0.1609 Mean :0.8601
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP016 FP017 FP018 FP019
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.000
## Mean :0.1462 Mean :0.1441 Mean :0.1314 Mean :0.122
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.000
## FP020 FP021 FP022 FP023
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.000
## Mean :0.1199 Mean :0.1209 Mean :0.1041 Mean :0.123
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.000
## FP024 FP025 FP026 FP027
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.0000 Median :0.0000 Median :0.00000 Median :0.00000
## Mean :0.1125 Mean :0.1157 Mean :0.08412 Mean :0.09779
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.00000
## FP028 FP029 FP030 FP031
## Min. :0.0000 Min. :0.000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.0000 Median :0.000 Median :0.00000 Median :0.00000
## Mean :0.1062 Mean :0.102 Mean :0.09359 Mean :0.08938
## 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.000 Max. :1.00000 Max. :1.00000
## FP032 FP033 FP034 FP035
## Min. :0.00000 Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.0000 Median :0.00000 Median :0.00000
## Mean :0.07361 Mean :0.0694 Mean :0.07992 Mean :0.07256
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.0000 Max. :1.00000 Max. :1.00000
## FP036 FP037 FP038 FP039
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.07571 Mean :0.07045 Mean :0.08622 Mean :0.07466
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP040 FP041 FP042 FP043
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.06835 Mean :0.06309 Mean :0.05678 Mean :0.06625
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP044 FP045 FP046 FP047
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.00000 Median :0.00000 Median :0.0000 Median :0.000
## Mean :0.05994 Mean :0.05573 Mean :0.3155 Mean :0.266
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:1.000
## Max. :1.00000 Max. :1.00000 Max. :1.0000 Max. :1.000
## FP048 FP049 FP050 FP051
## Min. :0.0000 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.000 Median :0.0000 Median :0.0000
## Mean :0.1241 Mean :0.122 Mean :0.1125 Mean :0.1094
## 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.000 Max. :1.0000 Max. :1.0000
## FP052 FP053 FP054 FP055
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.09148 Mean :0.09359 Mean :0.07571 Mean :0.05363
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP056 FP057 FP058 FP059
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.00000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.06519 Mean :0.1199 Mean :0.1136 Mean :0.05468
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## FP060 FP061 FP062 FP063
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.4816 Mean :0.4469 Mean :0.4374 Mean :0.4259
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP064 FP065 FP066 FP067
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :1.0000 Median :1.0000 Median :0.0000
## Mean :0.4164 Mean :0.5931 Mean :0.6099 Mean :0.3796
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP068 FP069 FP070 FP071
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.000
## Mean :0.3617 Mean :0.3617 Mean :0.3554 Mean :0.327
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.000
## FP072 FP073 FP074 FP075
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.6583 Mean :0.3102 Mean :0.3249 Mean :0.3386
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP076 FP077 FP078 FP079
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :1.0000
## Mean :0.3281 Mean :0.3207 Mean :0.3039 Mean :0.6898
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP080 FP081 FP082 FP083
## Min. :0.0000 Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :1.000 Median :0.0000
## Mean :0.3028 Mean :0.2787 Mean :0.714 Mean :0.2734
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.000 Max. :1.0000
## FP084 FP085 FP086 FP087
## Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.000 Median :0.0000 Median :0.0000 Median :1.0000
## Mean :0.286 Mean :0.2555 Mean :0.2692 Mean :0.7266
## 3rd Qu.:1.000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP088 FP089 FP090 FP091
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.000
## Mean :0.2629 Mean :0.2471 Mean :0.2492 Mean :0.225
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.000
## FP092 FP093 FP094 FP095
## Min. :0.000 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.000 Median :0.000 Median :0.0000 Median :0.0000
## Mean :0.244 Mean :0.244 Mean :0.2313 Mean :0.2198
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.000 Max. :1.000 Max. :1.0000 Max. :1.0000
## FP096 FP097 FP098 FP099
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.2177 Mean :0.2355 Mean :0.2376 Mean :0.2271
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP100 FP101 FP102 FP103
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.2313 Mean :0.2366 Mean :0.2019 Mean :0.2187
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP104 FP105 FP106 FP107
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.2229 Mean :0.2156 Mean :0.1914 Mean :0.2114
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP108 FP109 FP110 FP111
## Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.205 Mean :0.1767 Mean :0.2061 Mean :0.1966
## 3rd Qu.:0.000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP112 FP113 FP114 FP115
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1945 Mean :0.1956 Mean :0.1556 Mean :0.1788
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP116 FP117 FP118 FP119
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.000
## Mean :0.1924 Mean :0.1788 Mean :0.1924 Mean :0.163
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.000
## FP120 FP121 FP122 FP123
## Min. :0.0000 Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.000 Median :0.0000
## Mean :0.1661 Mean :0.1399 Mean :0.164 Mean :0.1672
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.000 Max. :1.0000
## FP124 FP125 FP126 FP127
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1619 Mean :0.1556 Mean :0.1483 Mean :0.1399
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP128 FP129 FP130 FP131
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1483 Mean :0.1388 Mean :0.1052 Mean :0.1262
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP132 FP133 FP134 FP135
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1251 Mean :0.1262 Mean :0.1272 Mean :0.1262
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP136 FP137 FP138 FP139
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.1209 Mean :0.1157 Mean :0.1115 Mean :0.08202
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## FP140 FP141 FP142 FP143
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.1115 Mean :0.1167 Mean :0.1094 Mean :0.08097
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## FP144 FP145 FP146 FP147
## Min. :0.0000 Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.000 Median :0.0000
## Mean :0.1041 Mean :0.1041 Mean :0.103 Mean :0.1052
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.000 Max. :1.0000
## FP148 FP149 FP150 FP151
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.08728 Mean :0.09043 Mean :0.07886 Mean :0.05573
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP152 FP153 FP154 FP155
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.08202 Mean :0.07781 Mean :0.03785 Mean :0.0694
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.0000
## FP156 FP157 FP158 FP159
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.07045 Mean :0.06204 Mean :0.05363 Mean :0.07045
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP160 FP161 FP162 FP163
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.0000 Median :0.0000
## Mean :0.06835 Mean :0.06625 Mean :0.4953 Mean :0.4763
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.00000 Max. :1.00000 Max. :1.0000 Max. :1.0000
## FP164 FP165 FP166 FP167
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.6278 Mean :0.3491 Mean :0.3312 Mean :0.3281
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP168 FP169 FP170 FP171
## Min. :0.0000 Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000
## Median :1.0000 Median :0.0000 Median :0.000 Median :0.0000
## Mean :0.6656 Mean :0.1861 Mean :0.184 Mean :0.1693
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.000 Max. :1.0000
## FP172 FP173 FP174 FP175
## Min. :0.0000 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.000 Median :0.0000 Median :0.0000
## Mean :0.1514 Mean :0.142 Mean :0.1304 Mean :0.1346
## 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.000 Max. :1.0000 Max. :1.0000
## FP176 FP177 FP178 FP179
## Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.122 Mean :0.1209 Mean :0.1209 Mean :0.09779
## 3rd Qu.:0.000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## FP180 FP181 FP182 FP183
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.0000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.1073 Mean :0.09359 Mean :0.09884 Mean :0.07571
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP184 FP185 FP186 FP187
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.08412 Mean :0.08517 Mean :0.07676 Mean :0.07256
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP188 FP189 FP190 FP191
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.06835 Mean :0.07676 Mean :0.07256 Mean :0.07045
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP192 FP193 FP194 FP195
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.06099 Mean :0.06204 Mean :0.05889 Mean :0.06099
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP196 FP197 FP198 FP199
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.05678 Mean :0.05258 Mean :0.05678 Mean :0.04732
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP200 FP201 FP202 FP203
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.0000 Median :0.0000
## Mean :0.04942 Mean :0.05258 Mean :0.2576 Mean :0.1146
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.00000 Max. :1.0000 Max. :1.0000
## FP204 FP205 FP206 FP207
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.09884 Mean :0.07781 Mean :0.05994 Mean :0.05678
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP208 MolWeight NumAtoms NumNonHAtoms
## Min. :0.0000 Min. : 46.09 Min. : 5.00 Min. : 2.00
## 1st Qu.:0.0000 1st Qu.:122.61 1st Qu.:17.00 1st Qu.: 8.00
## Median :0.0000 Median :179.23 Median :22.00 Median :12.00
## Mean :0.1125 Mean :201.65 Mean :25.51 Mean :13.16
## 3rd Qu.:0.0000 3rd Qu.:264.34 3rd Qu.:31.00 3rd Qu.:17.00
## Max. :1.0000 Max. :665.81 Max. :94.00 Max. :47.00
## NumBonds NumNonHBonds NumMultBonds NumRotBonds
## Min. : 4.00 Min. : 1.00 Min. : 0.000 Min. : 0.000
## 1st Qu.:17.00 1st Qu.: 8.00 1st Qu.: 1.000 1st Qu.: 0.000
## Median :23.00 Median :12.00 Median : 6.000 Median : 2.000
## Mean :25.91 Mean :13.56 Mean : 6.148 Mean : 2.251
## 3rd Qu.:31.50 3rd Qu.:18.00 3rd Qu.:10.000 3rd Qu.: 3.500
## Max. :97.00 Max. :50.00 Max. :25.000 Max. :16.000
## NumDblBonds NumAromaticBonds NumHydrogen NumCarbon
## Min. :0.000 Min. : 0.000 Min. : 0.00 Min. : 1.000
## 1st Qu.:0.000 1st Qu.: 0.000 1st Qu.: 7.00 1st Qu.: 6.000
## Median :1.000 Median : 6.000 Median :11.00 Median : 9.000
## Mean :1.006 Mean : 5.121 Mean :12.35 Mean : 9.893
## 3rd Qu.:2.000 3rd Qu.: 6.000 3rd Qu.:16.00 3rd Qu.:12.000
## Max. :7.000 Max. :25.000 Max. :47.00 Max. :33.000
## NumNitrogen NumOxygen NumSulfer NumChlorine
## Min. :0.0000 Min. : 0.000 Min. :0.000 Min. : 0.0000
## 1st Qu.:0.0000 1st Qu.: 0.000 1st Qu.:0.000 1st Qu.: 0.0000
## Median :0.0000 Median : 1.000 Median :0.000 Median : 0.0000
## Mean :0.8128 Mean : 1.574 Mean :0.164 Mean : 0.5563
## 3rd Qu.:1.0000 3rd Qu.: 2.000 3rd Qu.:0.000 3rd Qu.: 0.0000
## Max. :6.0000 Max. :13.000 Max. :4.000 Max. :10.0000
## NumHalogen NumRings HydrophilicFactor SurfaceArea1
## Min. : 0.0000 Min. :0.000 Min. :-0.98500 Min. : 0.00
## 1st Qu.: 0.0000 1st Qu.:0.000 1st Qu.:-0.76300 1st Qu.: 9.23
## Median : 0.0000 Median :1.000 Median :-0.31400 Median : 29.10
## Mean : 0.6982 Mean :1.402 Mean :-0.02059 Mean : 36.46
## 3rd Qu.: 1.0000 3rd Qu.:2.000 3rd Qu.: 0.31300 3rd Qu.: 53.28
## Max. :10.0000 Max. :7.000 Max. :13.48300 Max. :331.94
## SurfaceArea2
## Min. : 0.00
## 1st Qu.: 10.63
## Median : 33.12
## Mean : 40.23
## 3rd Qu.: 60.66
## Max. :331.94
##################################
# Performing a general exploration of the test set
##################################
dim(Solubility_Test)
## [1] 316 229
str(Solubility_Test)
## 'data.frame': 316 obs. of 229 variables:
## $ solTestY : num 0.93 0.85 0.81 0.74 0.61 0.58 0.57 0.56 0.52 0.45 ...
## $ FP001 : int 1 1 0 0 1 1 1 0 1 0 ...
## $ FP002 : int 0 0 1 0 1 0 0 0 0 1 ...
## $ FP003 : int 0 1 0 1 0 0 0 0 1 0 ...
## $ FP004 : int 1 1 0 0 1 1 1 1 1 0 ...
## $ FP005 : int 0 0 1 0 1 0 0 0 0 1 ...
## $ FP006 : int 0 1 0 1 1 0 0 0 0 0 ...
## $ FP007 : int 0 0 0 0 0 0 0 1 1 0 ...
## $ FP008 : int 0 0 0 0 1 0 0 0 0 0 ...
## $ FP009 : int 1 0 0 0 0 0 0 0 0 0 ...
## $ FP010 : int 1 0 1 0 0 0 0 0 0 0 ...
## $ FP011 : int 0 1 0 0 1 0 0 0 0 0 ...
## $ FP012 : int 0 1 0 0 0 1 0 1 0 0 ...
## $ FP013 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP014 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP015 : int 1 1 0 1 1 1 1 1 1 1 ...
## $ FP016 : int 0 1 0 0 0 0 0 1 0 0 ...
## $ FP017 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP018 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP019 : int 0 0 0 0 1 0 0 0 0 1 ...
## $ FP020 : int 0 0 0 0 0 1 0 0 0 0 ...
## $ FP021 : int 1 0 0 0 0 0 0 0 0 0 ...
## $ FP022 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP023 : int 0 0 0 0 0 0 1 0 0 0 ...
## $ FP024 : int 0 0 0 0 1 0 0 0 0 1 ...
## $ FP025 : int 1 0 0 0 0 0 0 0 0 0 ...
## $ FP026 : int 0 0 0 0 0 0 0 0 0 1 ...
## $ FP027 : int 0 0 0 1 0 0 0 0 0 0 ...
## $ FP028 : int 0 0 0 1 0 0 0 0 0 0 ...
## $ FP029 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP030 : int 0 0 0 1 0 0 0 0 0 0 ...
## $ FP031 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP032 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ FP033 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ FP034 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP035 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP036 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP037 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ FP038 : int 1 0 0 0 0 0 0 0 0 0 ...
## $ FP039 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP040 : int 0 0 0 0 1 0 0 0 0 0 ...
## $ FP041 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP042 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP043 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP044 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP045 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP046 : int 0 0 1 0 0 0 0 0 0 1 ...
## $ FP047 : int 0 0 0 0 1 0 0 0 0 0 ...
## $ FP048 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP049 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP050 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP051 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP052 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP053 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP054 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP055 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ FP056 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP057 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP058 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP059 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP060 : int 1 1 1 0 0 1 0 1 0 0 ...
## $ FP061 : int 1 1 1 0 0 1 0 0 0 0 ...
## $ FP062 : int 1 1 0 0 1 1 1 0 1 0 ...
## $ FP063 : int 0 1 0 1 1 0 0 0 0 1 ...
## $ FP064 : int 1 1 0 0 0 0 0 0 1 0 ...
## $ FP065 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ FP066 : int 0 1 0 1 0 1 0 0 1 1 ...
## $ FP067 : int 0 1 0 1 1 0 0 0 0 1 ...
## $ FP068 : int 0 1 0 1 1 0 0 0 0 0 ...
## $ FP069 : int 0 0 0 0 0 0 0 0 1 1 ...
## $ FP070 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP071 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP072 : int 1 1 1 0 1 1 1 1 1 0 ...
## $ FP073 : int 1 0 1 0 0 0 0 0 0 0 ...
## $ FP074 : int 0 0 1 0 0 0 0 0 1 0 ...
## $ FP075 : int 0 1 0 1 0 0 0 1 0 0 ...
## $ FP076 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP077 : int 0 0 0 1 0 0 0 1 0 0 ...
## $ FP078 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ FP079 : int 0 0 1 1 1 0 0 0 0 1 ...
## $ FP080 : int 1 1 0 1 0 0 0 1 0 0 ...
## $ FP081 : int 0 0 0 1 0 0 0 0 1 0 ...
## $ FP082 : int 0 0 1 0 1 0 0 0 0 1 ...
## $ FP083 : int 0 1 0 1 1 0 0 0 0 0 ...
## $ FP084 : int 0 0 0 1 1 0 0 1 0 1 ...
## $ FP085 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP086 : int 0 0 0 1 0 0 0 0 0 0 ...
## $ FP087 : int 0 0 1 1 1 0 0 1 0 1 ...
## $ FP088 : int 1 0 0 0 0 0 0 1 1 0 ...
## $ FP089 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP090 : int 0 0 0 1 0 0 0 1 0 0 ...
## $ FP091 : int 0 0 0 1 1 0 0 0 0 0 ...
## $ FP092 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP093 : int 0 0 0 1 0 0 0 1 0 0 ...
## $ FP094 : int 0 1 0 0 0 0 0 0 1 0 ...
## $ FP095 : int 0 0 1 1 0 0 0 0 0 0 ...
## $ FP096 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP097 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP098 : int 1 1 0 0 0 1 0 0 0 0 ...
## [list output truncated]
summary(Solubility_Test)
## solTestY FP001 FP002 FP003
## Min. :-10.410 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.: -3.953 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median : -2.480 Median :0.0000 Median :1.0000 Median :0.000
## Mean : -2.797 Mean :0.4684 Mean :0.5854 Mean :0.443
## 3rd Qu.: -1.373 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.000
## Max. : 1.070 Max. :1.0000 Max. :1.0000 Max. :1.000
## FP004 FP005 FP006 FP007
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :1.0000 Median :0.0000 Median :0.0000
## Mean :0.5316 Mean :0.6171 Mean :0.3513 Mean :0.3544
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP008 FP009 FP010 FP011
## Min. :0.0000 Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.000 Median :0.0000
## Mean :0.3608 Mean :0.2627 Mean :0.193 Mean :0.1741
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.000 Max. :1.0000
## FP012 FP013 FP014 FP015
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:1.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :1.0000
## Mean :0.1677 Mean :0.1646 Mean :0.1582 Mean :0.8291
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP016 FP017 FP018 FP019
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.00000 Median :0.0000
## Mean :0.1424 Mean :0.1487 Mean :0.08544 Mean :0.1139
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.0000
## FP020 FP021 FP022 FP023
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.1076 Mean :0.1076 Mean :0.1171 Mean :0.08544
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## FP024 FP025 FP026 FP027
## Min. :0.0000 Min. :0.00000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.0000 Median :0.00000 Median :0.0000 Median :0.00000
## Mean :0.0981 Mean :0.07911 Mean :0.1171 Mean :0.07911
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.00000 Max. :1.0000 Max. :1.00000
## FP028 FP029 FP030 FP031
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.05696 Mean :0.05063 Mean :0.08228 Mean :0.0981
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.0000
## FP032 FP033 FP034 FP035
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.00000 Median :0.0000
## Mean :0.1297 Mean :0.1203 Mean :0.06646 Mean :0.0981
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.0000
## FP036 FP037 FP038 FP039
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.06013 Mean :0.09494 Mean :0.03165 Mean :0.06329
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP040 FP041 FP042 FP043
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.05696 Mean :0.06013 Mean :0.06013 Mean :0.0443
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.0000
## FP044 FP045 FP046 FP047
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.0000 Median :0.0000
## Mean :0.06013 Mean :0.06329 Mean :0.3259 Mean :0.2975
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.00000 Max. :1.00000 Max. :1.0000 Max. :1.0000
## FP048 FP049 FP050 FP051
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.1139 Mean :0.1076 Mean :0.1139 Mean :0.05696
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## FP052 FP053 FP054 FP055
## Min. :0.0000 Min. :0.00000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.0000 Median :0.00000 Median :0.0000 Median :0.00000
## Mean :0.1044 Mean :0.06013 Mean :0.0981 Mean :0.09177
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.00000 Max. :1.0000 Max. :1.00000
## FP056 FP057 FP058 FP059
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.06329 Mean :0.1234 Mean :0.1361 Mean :0.0443
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP060 FP061 FP062 FP063
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.4525 Mean :0.3924 Mean :0.4272 Mean :0.3576
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP064 FP065 FP066 FP067
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :1.0000 Median :1.0000 Median :0.0000
## Mean :0.3892 Mean :0.5981 Mean :0.6171 Mean :0.3259
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP068 FP069 FP070 FP071
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.2911 Mean :0.3734 Mean :0.3323 Mean :0.3449
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP072 FP073 FP074 FP075
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.6456 Mean :0.2911 Mean :0.3259 Mean :0.2563
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP076 FP077 FP078 FP079
## Min. :0.0000 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.000 Median :0.0000 Median :1.0000
## Mean :0.3165 Mean :0.307 Mean :0.3101 Mean :0.7278
## 3rd Qu.:1.0000 3rd Qu.:1.000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.000 Max. :1.0000 Max. :1.0000
## FP080 FP081 FP082 FP083
## Min. :0.0000 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.000 Median :1.0000 Median :0.0000
## Mean :0.2627 Mean :0.288 Mean :0.7437 Mean :0.2532
## 3rd Qu.:1.0000 3rd Qu.:1.000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.000 Max. :1.0000 Max. :1.0000
## FP084 FP085 FP086 FP087
## Min. :0.0000 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:1.0000
## Median :0.0000 Median :0.000 Median :0.0000 Median :1.0000
## Mean :0.2247 Mean :0.269 Mean :0.2722 Mean :0.7627
## 3rd Qu.:0.0000 3rd Qu.:1.000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.000 Max. :1.0000 Max. :1.0000
## FP088 FP089 FP090 FP091
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.000
## Mean :0.2437 Mean :0.2532 Mean :0.2278 Mean :0.231
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.000
## FP092 FP093 FP094 FP095
## Min. :0.0000 Min. :0.0000 Min. :0.00 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.00 Median :0.0000
## Mean :0.2184 Mean :0.2152 Mean :0.25 Mean :0.2057
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.25 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.00 Max. :1.0000
## FP096 FP097 FP098 FP099
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.000
## Mean :0.1867 Mean :0.2089 Mean :0.2025 Mean :0.212
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.000
## FP100 FP101 FP102 FP103
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1804 Mean :0.1772 Mean :0.1456 Mean :0.2184
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP104 FP105 FP106 FP107
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1835 Mean :0.2152 Mean :0.1361 Mean :0.1962
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP108 FP109 FP110 FP111
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1804 Mean :0.1741 Mean :0.1646 Mean :0.1804
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP112 FP113 FP114 FP115
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1772 Mean :0.1646 Mean :0.1772 Mean :0.1582
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP116 FP117 FP118 FP119
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1487 Mean :0.1709 Mean :0.1171 Mean :0.1677
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP120 FP121 FP122 FP123
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1551 Mean :0.1076 Mean :0.1361 Mean :0.1456
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP124 FP125 FP126 FP127
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1329 Mean :0.1203 Mean :0.1139 Mean :0.1487
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP128 FP129 FP130 FP131
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.00000 Median :0.0000
## Mean :0.1076 Mean :0.1392 Mean :0.08228 Mean :0.1076
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.0000
## FP132 FP133 FP134 FP135
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.0000 Median :0.0000 Median :0.00000 Median :0.00000
## Mean :0.1266 Mean :0.1361 Mean :0.08544 Mean :0.06329
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.00000
## FP136 FP137 FP138 FP139
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.0000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.1013 Mean :0.08861 Mean :0.08228 Mean :0.06329
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP140 FP141 FP142 FP143
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.08861 Mean :0.06962 Mean :0.09494 Mean :0.0538
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.0000
## FP144 FP145 FP146 FP147
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.09177 Mean :0.06329 Mean :0.09177 Mean :0.06962
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP148 FP149 FP150 FP151
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.07911 Mean :0.08228 Mean :0.06646 Mean :0.03165
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP152 FP153 FP154 FP155
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.0000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.0538 Mean :0.03481 Mean :0.03165 Mean :0.06646
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP156 FP157 FP158 FP159
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.04747 Mean :0.05696 Mean :0.07911 Mean :0.03481
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP160 FP161 FP162 FP163
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :1.0000 Median :0.0000
## Mean :0.03481 Mean :0.03481 Mean :0.5316 Mean :0.4525
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.00000 Max. :1.00000 Max. :1.0000 Max. :1.0000
## FP164 FP165 FP166 FP167
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.6551 Mean :0.3196 Mean :0.3386 Mean :0.3006
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP168 FP169 FP170 FP171
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.7152 Mean :0.1867 Mean :0.1551 Mean :0.1297
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP172 FP173 FP174 FP175
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1487 Mean :0.1361 Mean :0.1551 Mean :0.1329
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP176 FP177 FP178 FP179
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1076 Mean :0.1013 Mean :0.1076 Mean :0.1392
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP180 FP181 FP182 FP183
## Min. :0.00000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.00000 Median :0.0000 Median :0.00000 Median :0.0000
## Mean :0.06962 Mean :0.1044 Mean :0.07595 Mean :0.1329
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.0000 Max. :1.00000 Max. :1.0000
## FP184 FP185 FP186 FP187
## Min. :0.00000 Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.0000 Median :0.00000 Median :0.00000
## Mean :0.09494 Mean :0.0981 Mean :0.06013 Mean :0.06646
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.0000 Max. :1.00000 Max. :1.00000
## FP188 FP189 FP190 FP191
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.0000 Median :0.00000
## Mean :0.06962 Mean :0.04114 Mean :0.0538 Mean :0.05696
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.0000 Max. :1.00000
## FP192 FP193 FP194 FP195
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.06962 Mean :0.06962 Mean :0.06646 Mean :0.05063
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP196 FP197 FP198 FP199
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.0000 Median :0.00000
## Mean :0.06962 Mean :0.06329 Mean :0.0443 Mean :0.07278
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.0000 Max. :1.00000
## FP200 FP201 FP202 FP203
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.0000 Median :0.0000
## Mean :0.06329 Mean :0.04114 Mean :0.2658 Mean :0.1361
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.00000 Max. :1.0000 Max. :1.0000
## FP204 FP205 FP206 FP207
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.09494 Mean :0.07911 Mean :0.05063 Mean :0.0443
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.0000
## FP208 MolWeight NumAtoms NumNonHAtoms NumBonds
## Min. :0.0000 Min. : 56.07 Min. : 5.0 Min. : 3.00 Min. : 4
## 1st Qu.:0.0000 1st Qu.:121.91 1st Qu.:17.0 1st Qu.: 8.00 1st Qu.:16
## Median :0.0000 Median :170.11 Median :22.0 Median :11.00 Median :23
## Mean :0.1361 Mean :194.12 Mean :24.6 Mean :12.71 Mean :25
## 3rd Qu.:0.0000 3rd Qu.:253.82 3rd Qu.:29.0 3rd Qu.:16.00 3rd Qu.:30
## Max. :1.0000 Max. :478.92 Max. :68.0 Max. :33.00 Max. :71
## NumNonHBonds NumMultBonds NumRotBonds NumDblBonds
## Min. : 2.0 Min. : 0.000 Min. : 0.000 Min. :0.0000
## 1st Qu.: 8.0 1st Qu.: 1.000 1st Qu.: 0.000 1st Qu.:0.0000
## Median :12.0 Median : 6.000 Median : 1.000 Median :1.0000
## Mean :13.1 Mean : 6.313 Mean : 1.949 Mean :0.8892
## 3rd Qu.:17.0 3rd Qu.:10.000 3rd Qu.: 3.000 3rd Qu.:1.0000
## Max. :36.0 Max. :27.000 Max. :16.000 Max. :6.0000
## NumAromaticBonds NumHydrogen NumCarbon NumNitrogen
## Min. : 0.000 Min. : 0.0 Min. : 1.000 Min. :0.0000
## 1st Qu.: 0.000 1st Qu.: 7.0 1st Qu.: 6.000 1st Qu.:0.0000
## Median : 6.000 Median :11.0 Median : 8.000 Median :0.0000
## Mean : 5.399 Mean :11.9 Mean : 9.785 Mean :0.7089
## 3rd Qu.:10.000 3rd Qu.:15.0 3rd Qu.:12.000 3rd Qu.:1.0000
## Max. :27.000 Max. :40.0 Max. :24.000 Max. :6.0000
## NumOxygen NumSulfer NumChlorine NumHalogen
## Min. :0.000 Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000
## Median :1.000 Median :0.0000 Median :0.000 Median :0.0000
## Mean :1.389 Mean :0.1013 Mean :0.557 Mean :0.7089
## 3rd Qu.:2.000 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:1.0000
## Max. :9.000 Max. :3.0000 Max. :9.000 Max. :9.0000
## NumRings HydrophilicFactor SurfaceArea1 SurfaceArea2
## Min. :0.000 Min. :-0.9860 Min. : 0.00 Min. : 0.00
## 1st Qu.:1.000 1st Qu.:-0.7670 1st Qu.: 9.23 1st Qu.: 9.23
## Median :1.000 Median :-0.3970 Median : 26.30 Median : 26.30
## Mean :1.399 Mean :-0.1022 Mean : 32.76 Mean : 35.04
## 3rd Qu.:2.000 3rd Qu.: 0.2140 3rd Qu.: 49.55 3rd Qu.: 52.32
## Max. :6.000 Max. : 5.0000 Max. :201.85 Max. :201.85
##################################
# Formulating a data type assessment summary
##################################
<- Solubility_Train
PDA <- data.frame(
(PDA.Summary Column.Index=c(1:length(names(PDA))),
Column.Name= names(PDA),
Column.Type=sapply(PDA, function(x) class(x)),
row.names=NULL)
)
## Column.Index Column.Name Column.Type
## 1 1 solTrainY numeric
## 2 2 FP001 integer
## 3 3 FP002 integer
## 4 4 FP003 integer
## 5 5 FP004 integer
## 6 6 FP005 integer
## 7 7 FP006 integer
## 8 8 FP007 integer
## 9 9 FP008 integer
## 10 10 FP009 integer
## 11 11 FP010 integer
## 12 12 FP011 integer
## 13 13 FP012 integer
## 14 14 FP013 integer
## 15 15 FP014 integer
## 16 16 FP015 integer
## 17 17 FP016 integer
## 18 18 FP017 integer
## 19 19 FP018 integer
## 20 20 FP019 integer
## 21 21 FP020 integer
## 22 22 FP021 integer
## 23 23 FP022 integer
## 24 24 FP023 integer
## 25 25 FP024 integer
## 26 26 FP025 integer
## 27 27 FP026 integer
## 28 28 FP027 integer
## 29 29 FP028 integer
## 30 30 FP029 integer
## 31 31 FP030 integer
## 32 32 FP031 integer
## 33 33 FP032 integer
## 34 34 FP033 integer
## 35 35 FP034 integer
## 36 36 FP035 integer
## 37 37 FP036 integer
## 38 38 FP037 integer
## 39 39 FP038 integer
## 40 40 FP039 integer
## 41 41 FP040 integer
## 42 42 FP041 integer
## 43 43 FP042 integer
## 44 44 FP043 integer
## 45 45 FP044 integer
## 46 46 FP045 integer
## 47 47 FP046 integer
## 48 48 FP047 integer
## 49 49 FP048 integer
## 50 50 FP049 integer
## 51 51 FP050 integer
## 52 52 FP051 integer
## 53 53 FP052 integer
## 54 54 FP053 integer
## 55 55 FP054 integer
## 56 56 FP055 integer
## 57 57 FP056 integer
## 58 58 FP057 integer
## 59 59 FP058 integer
## 60 60 FP059 integer
## 61 61 FP060 integer
## 62 62 FP061 integer
## 63 63 FP062 integer
## 64 64 FP063 integer
## 65 65 FP064 integer
## 66 66 FP065 integer
## 67 67 FP066 integer
## 68 68 FP067 integer
## 69 69 FP068 integer
## 70 70 FP069 integer
## 71 71 FP070 integer
## 72 72 FP071 integer
## 73 73 FP072 integer
## 74 74 FP073 integer
## 75 75 FP074 integer
## 76 76 FP075 integer
## 77 77 FP076 integer
## 78 78 FP077 integer
## 79 79 FP078 integer
## 80 80 FP079 integer
## 81 81 FP080 integer
## 82 82 FP081 integer
## 83 83 FP082 integer
## 84 84 FP083 integer
## 85 85 FP084 integer
## 86 86 FP085 integer
## 87 87 FP086 integer
## 88 88 FP087 integer
## 89 89 FP088 integer
## 90 90 FP089 integer
## 91 91 FP090 integer
## 92 92 FP091 integer
## 93 93 FP092 integer
## 94 94 FP093 integer
## 95 95 FP094 integer
## 96 96 FP095 integer
## 97 97 FP096 integer
## 98 98 FP097 integer
## 99 99 FP098 integer
## 100 100 FP099 integer
## 101 101 FP100 integer
## 102 102 FP101 integer
## 103 103 FP102 integer
## 104 104 FP103 integer
## 105 105 FP104 integer
## 106 106 FP105 integer
## 107 107 FP106 integer
## 108 108 FP107 integer
## 109 109 FP108 integer
## 110 110 FP109 integer
## 111 111 FP110 integer
## 112 112 FP111 integer
## 113 113 FP112 integer
## 114 114 FP113 integer
## 115 115 FP114 integer
## 116 116 FP115 integer
## 117 117 FP116 integer
## 118 118 FP117 integer
## 119 119 FP118 integer
## 120 120 FP119 integer
## 121 121 FP120 integer
## 122 122 FP121 integer
## 123 123 FP122 integer
## 124 124 FP123 integer
## 125 125 FP124 integer
## 126 126 FP125 integer
## 127 127 FP126 integer
## 128 128 FP127 integer
## 129 129 FP128 integer
## 130 130 FP129 integer
## 131 131 FP130 integer
## 132 132 FP131 integer
## 133 133 FP132 integer
## 134 134 FP133 integer
## 135 135 FP134 integer
## 136 136 FP135 integer
## 137 137 FP136 integer
## 138 138 FP137 integer
## 139 139 FP138 integer
## 140 140 FP139 integer
## 141 141 FP140 integer
## 142 142 FP141 integer
## 143 143 FP142 integer
## 144 144 FP143 integer
## 145 145 FP144 integer
## 146 146 FP145 integer
## 147 147 FP146 integer
## 148 148 FP147 integer
## 149 149 FP148 integer
## 150 150 FP149 integer
## 151 151 FP150 integer
## 152 152 FP151 integer
## 153 153 FP152 integer
## 154 154 FP153 integer
## 155 155 FP154 integer
## 156 156 FP155 integer
## 157 157 FP156 integer
## 158 158 FP157 integer
## 159 159 FP158 integer
## 160 160 FP159 integer
## 161 161 FP160 integer
## 162 162 FP161 integer
## 163 163 FP162 integer
## 164 164 FP163 integer
## 165 165 FP164 integer
## 166 166 FP165 integer
## 167 167 FP166 integer
## 168 168 FP167 integer
## 169 169 FP168 integer
## 170 170 FP169 integer
## 171 171 FP170 integer
## 172 172 FP171 integer
## 173 173 FP172 integer
## 174 174 FP173 integer
## 175 175 FP174 integer
## 176 176 FP175 integer
## 177 177 FP176 integer
## 178 178 FP177 integer
## 179 179 FP178 integer
## 180 180 FP179 integer
## 181 181 FP180 integer
## 182 182 FP181 integer
## 183 183 FP182 integer
## 184 184 FP183 integer
## 185 185 FP184 integer
## 186 186 FP185 integer
## 187 187 FP186 integer
## 188 188 FP187 integer
## 189 189 FP188 integer
## 190 190 FP189 integer
## 191 191 FP190 integer
## 192 192 FP191 integer
## 193 193 FP192 integer
## 194 194 FP193 integer
## 195 195 FP194 integer
## 196 196 FP195 integer
## 197 197 FP196 integer
## 198 198 FP197 integer
## 199 199 FP198 integer
## 200 200 FP199 integer
## 201 201 FP200 integer
## 202 202 FP201 integer
## 203 203 FP202 integer
## 204 204 FP203 integer
## 205 205 FP204 integer
## 206 206 FP205 integer
## 207 207 FP206 integer
## 208 208 FP207 integer
## 209 209 FP208 integer
## 210 210 MolWeight numeric
## 211 211 NumAtoms integer
## 212 212 NumNonHAtoms integer
## 213 213 NumBonds integer
## 214 214 NumNonHBonds integer
## 215 215 NumMultBonds integer
## 216 216 NumRotBonds integer
## 217 217 NumDblBonds integer
## 218 218 NumAromaticBonds integer
## 219 219 NumHydrogen integer
## 220 220 NumCarbon integer
## 221 221 NumNitrogen integer
## 222 222 NumOxygen integer
## 223 223 NumSulfer integer
## 224 224 NumChlorine integer
## 225 225 NumHalogen integer
## 226 226 NumRings integer
## 227 227 HydrophilicFactor numeric
## 228 228 SurfaceArea1 numeric
## 229 229 SurfaceArea2 numeric
##################################
# Loading dataset
##################################
<- Solubility_Train
DQA
##################################
# Formulating an overall data quality assessment summary
##################################
<- data.frame(
(DQA.Summary Column.Index=c(1:length(names(DQA))),
Column.Name= names(DQA),
Column.Type=sapply(DQA, function(x) class(x)),
Row.Count=sapply(DQA, function(x) nrow(DQA)),
NA.Count=sapply(DQA,function(x)sum(is.na(x))),
Fill.Rate=sapply(DQA,function(x)format(round((sum(!is.na(x))/nrow(DQA)),3),nsmall=3)),
row.names=NULL)
)
## Column.Index Column.Name Column.Type Row.Count NA.Count Fill.Rate
## 1 1 solTrainY numeric 951 0 1.000
## 2 2 FP001 integer 951 0 1.000
## 3 3 FP002 integer 951 0 1.000
## 4 4 FP003 integer 951 0 1.000
## 5 5 FP004 integer 951 0 1.000
## 6 6 FP005 integer 951 0 1.000
## 7 7 FP006 integer 951 0 1.000
## 8 8 FP007 integer 951 0 1.000
## 9 9 FP008 integer 951 0 1.000
## 10 10 FP009 integer 951 0 1.000
## 11 11 FP010 integer 951 0 1.000
## 12 12 FP011 integer 951 0 1.000
## 13 13 FP012 integer 951 0 1.000
## 14 14 FP013 integer 951 0 1.000
## 15 15 FP014 integer 951 0 1.000
## 16 16 FP015 integer 951 0 1.000
## 17 17 FP016 integer 951 0 1.000
## 18 18 FP017 integer 951 0 1.000
## 19 19 FP018 integer 951 0 1.000
## 20 20 FP019 integer 951 0 1.000
## 21 21 FP020 integer 951 0 1.000
## 22 22 FP021 integer 951 0 1.000
## 23 23 FP022 integer 951 0 1.000
## 24 24 FP023 integer 951 0 1.000
## 25 25 FP024 integer 951 0 1.000
## 26 26 FP025 integer 951 0 1.000
## 27 27 FP026 integer 951 0 1.000
## 28 28 FP027 integer 951 0 1.000
## 29 29 FP028 integer 951 0 1.000
## 30 30 FP029 integer 951 0 1.000
## 31 31 FP030 integer 951 0 1.000
## 32 32 FP031 integer 951 0 1.000
## 33 33 FP032 integer 951 0 1.000
## 34 34 FP033 integer 951 0 1.000
## 35 35 FP034 integer 951 0 1.000
## 36 36 FP035 integer 951 0 1.000
## 37 37 FP036 integer 951 0 1.000
## 38 38 FP037 integer 951 0 1.000
## 39 39 FP038 integer 951 0 1.000
## 40 40 FP039 integer 951 0 1.000
## 41 41 FP040 integer 951 0 1.000
## 42 42 FP041 integer 951 0 1.000
## 43 43 FP042 integer 951 0 1.000
## 44 44 FP043 integer 951 0 1.000
## 45 45 FP044 integer 951 0 1.000
## 46 46 FP045 integer 951 0 1.000
## 47 47 FP046 integer 951 0 1.000
## 48 48 FP047 integer 951 0 1.000
## 49 49 FP048 integer 951 0 1.000
## 50 50 FP049 integer 951 0 1.000
## 51 51 FP050 integer 951 0 1.000
## 52 52 FP051 integer 951 0 1.000
## 53 53 FP052 integer 951 0 1.000
## 54 54 FP053 integer 951 0 1.000
## 55 55 FP054 integer 951 0 1.000
## 56 56 FP055 integer 951 0 1.000
## 57 57 FP056 integer 951 0 1.000
## 58 58 FP057 integer 951 0 1.000
## 59 59 FP058 integer 951 0 1.000
## 60 60 FP059 integer 951 0 1.000
## 61 61 FP060 integer 951 0 1.000
## 62 62 FP061 integer 951 0 1.000
## 63 63 FP062 integer 951 0 1.000
## 64 64 FP063 integer 951 0 1.000
## 65 65 FP064 integer 951 0 1.000
## 66 66 FP065 integer 951 0 1.000
## 67 67 FP066 integer 951 0 1.000
## 68 68 FP067 integer 951 0 1.000
## 69 69 FP068 integer 951 0 1.000
## 70 70 FP069 integer 951 0 1.000
## 71 71 FP070 integer 951 0 1.000
## 72 72 FP071 integer 951 0 1.000
## 73 73 FP072 integer 951 0 1.000
## 74 74 FP073 integer 951 0 1.000
## 75 75 FP074 integer 951 0 1.000
## 76 76 FP075 integer 951 0 1.000
## 77 77 FP076 integer 951 0 1.000
## 78 78 FP077 integer 951 0 1.000
## 79 79 FP078 integer 951 0 1.000
## 80 80 FP079 integer 951 0 1.000
## 81 81 FP080 integer 951 0 1.000
## 82 82 FP081 integer 951 0 1.000
## 83 83 FP082 integer 951 0 1.000
## 84 84 FP083 integer 951 0 1.000
## 85 85 FP084 integer 951 0 1.000
## 86 86 FP085 integer 951 0 1.000
## 87 87 FP086 integer 951 0 1.000
## 88 88 FP087 integer 951 0 1.000
## 89 89 FP088 integer 951 0 1.000
## 90 90 FP089 integer 951 0 1.000
## 91 91 FP090 integer 951 0 1.000
## 92 92 FP091 integer 951 0 1.000
## 93 93 FP092 integer 951 0 1.000
## 94 94 FP093 integer 951 0 1.000
## 95 95 FP094 integer 951 0 1.000
## 96 96 FP095 integer 951 0 1.000
## 97 97 FP096 integer 951 0 1.000
## 98 98 FP097 integer 951 0 1.000
## 99 99 FP098 integer 951 0 1.000
## 100 100 FP099 integer 951 0 1.000
## 101 101 FP100 integer 951 0 1.000
## 102 102 FP101 integer 951 0 1.000
## 103 103 FP102 integer 951 0 1.000
## 104 104 FP103 integer 951 0 1.000
## 105 105 FP104 integer 951 0 1.000
## 106 106 FP105 integer 951 0 1.000
## 107 107 FP106 integer 951 0 1.000
## 108 108 FP107 integer 951 0 1.000
## 109 109 FP108 integer 951 0 1.000
## 110 110 FP109 integer 951 0 1.000
## 111 111 FP110 integer 951 0 1.000
## 112 112 FP111 integer 951 0 1.000
## 113 113 FP112 integer 951 0 1.000
## 114 114 FP113 integer 951 0 1.000
## 115 115 FP114 integer 951 0 1.000
## 116 116 FP115 integer 951 0 1.000
## 117 117 FP116 integer 951 0 1.000
## 118 118 FP117 integer 951 0 1.000
## 119 119 FP118 integer 951 0 1.000
## 120 120 FP119 integer 951 0 1.000
## 121 121 FP120 integer 951 0 1.000
## 122 122 FP121 integer 951 0 1.000
## 123 123 FP122 integer 951 0 1.000
## 124 124 FP123 integer 951 0 1.000
## 125 125 FP124 integer 951 0 1.000
## 126 126 FP125 integer 951 0 1.000
## 127 127 FP126 integer 951 0 1.000
## 128 128 FP127 integer 951 0 1.000
## 129 129 FP128 integer 951 0 1.000
## 130 130 FP129 integer 951 0 1.000
## 131 131 FP130 integer 951 0 1.000
## 132 132 FP131 integer 951 0 1.000
## 133 133 FP132 integer 951 0 1.000
## 134 134 FP133 integer 951 0 1.000
## 135 135 FP134 integer 951 0 1.000
## 136 136 FP135 integer 951 0 1.000
## 137 137 FP136 integer 951 0 1.000
## 138 138 FP137 integer 951 0 1.000
## 139 139 FP138 integer 951 0 1.000
## 140 140 FP139 integer 951 0 1.000
## 141 141 FP140 integer 951 0 1.000
## 142 142 FP141 integer 951 0 1.000
## 143 143 FP142 integer 951 0 1.000
## 144 144 FP143 integer 951 0 1.000
## 145 145 FP144 integer 951 0 1.000
## 146 146 FP145 integer 951 0 1.000
## 147 147 FP146 integer 951 0 1.000
## 148 148 FP147 integer 951 0 1.000
## 149 149 FP148 integer 951 0 1.000
## 150 150 FP149 integer 951 0 1.000
## 151 151 FP150 integer 951 0 1.000
## 152 152 FP151 integer 951 0 1.000
## 153 153 FP152 integer 951 0 1.000
## 154 154 FP153 integer 951 0 1.000
## 155 155 FP154 integer 951 0 1.000
## 156 156 FP155 integer 951 0 1.000
## 157 157 FP156 integer 951 0 1.000
## 158 158 FP157 integer 951 0 1.000
## 159 159 FP158 integer 951 0 1.000
## 160 160 FP159 integer 951 0 1.000
## 161 161 FP160 integer 951 0 1.000
## 162 162 FP161 integer 951 0 1.000
## 163 163 FP162 integer 951 0 1.000
## 164 164 FP163 integer 951 0 1.000
## 165 165 FP164 integer 951 0 1.000
## 166 166 FP165 integer 951 0 1.000
## 167 167 FP166 integer 951 0 1.000
## 168 168 FP167 integer 951 0 1.000
## 169 169 FP168 integer 951 0 1.000
## 170 170 FP169 integer 951 0 1.000
## 171 171 FP170 integer 951 0 1.000
## 172 172 FP171 integer 951 0 1.000
## 173 173 FP172 integer 951 0 1.000
## 174 174 FP173 integer 951 0 1.000
## 175 175 FP174 integer 951 0 1.000
## 176 176 FP175 integer 951 0 1.000
## 177 177 FP176 integer 951 0 1.000
## 178 178 FP177 integer 951 0 1.000
## 179 179 FP178 integer 951 0 1.000
## 180 180 FP179 integer 951 0 1.000
## 181 181 FP180 integer 951 0 1.000
## 182 182 FP181 integer 951 0 1.000
## 183 183 FP182 integer 951 0 1.000
## 184 184 FP183 integer 951 0 1.000
## 185 185 FP184 integer 951 0 1.000
## 186 186 FP185 integer 951 0 1.000
## 187 187 FP186 integer 951 0 1.000
## 188 188 FP187 integer 951 0 1.000
## 189 189 FP188 integer 951 0 1.000
## 190 190 FP189 integer 951 0 1.000
## 191 191 FP190 integer 951 0 1.000
## 192 192 FP191 integer 951 0 1.000
## 193 193 FP192 integer 951 0 1.000
## 194 194 FP193 integer 951 0 1.000
## 195 195 FP194 integer 951 0 1.000
## 196 196 FP195 integer 951 0 1.000
## 197 197 FP196 integer 951 0 1.000
## 198 198 FP197 integer 951 0 1.000
## 199 199 FP198 integer 951 0 1.000
## 200 200 FP199 integer 951 0 1.000
## 201 201 FP200 integer 951 0 1.000
## 202 202 FP201 integer 951 0 1.000
## 203 203 FP202 integer 951 0 1.000
## 204 204 FP203 integer 951 0 1.000
## 205 205 FP204 integer 951 0 1.000
## 206 206 FP205 integer 951 0 1.000
## 207 207 FP206 integer 951 0 1.000
## 208 208 FP207 integer 951 0 1.000
## 209 209 FP208 integer 951 0 1.000
## 210 210 MolWeight numeric 951 0 1.000
## 211 211 NumAtoms integer 951 0 1.000
## 212 212 NumNonHAtoms integer 951 0 1.000
## 213 213 NumBonds integer 951 0 1.000
## 214 214 NumNonHBonds integer 951 0 1.000
## 215 215 NumMultBonds integer 951 0 1.000
## 216 216 NumRotBonds integer 951 0 1.000
## 217 217 NumDblBonds integer 951 0 1.000
## 218 218 NumAromaticBonds integer 951 0 1.000
## 219 219 NumHydrogen integer 951 0 1.000
## 220 220 NumCarbon integer 951 0 1.000
## 221 221 NumNitrogen integer 951 0 1.000
## 222 222 NumOxygen integer 951 0 1.000
## 223 223 NumSulfer integer 951 0 1.000
## 224 224 NumChlorine integer 951 0 1.000
## 225 225 NumHalogen integer 951 0 1.000
## 226 226 NumRings integer 951 0 1.000
## 227 227 HydrophilicFactor numeric 951 0 1.000
## 228 228 SurfaceArea1 numeric 951 0 1.000
## 229 229 SurfaceArea2 numeric 951 0 1.000
##################################
# Listing all predictors
##################################
<- DQA[,!names(DQA) %in% c("solTrainY")]
DQA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DQA.Predictors[,-(grep("FP", names(DQA.Predictors)))]
DQA.Predictors.Numeric
if (length(names(DQA.Predictors.Numeric))>0) {
print(paste0("There are ",
length(names(DQA.Predictors.Numeric))),
(" numeric predictor variable(s)."))
else {
} print("There are no numeric predictor variables.")
}
## [1] "There are 20 numeric predictor variable(s)."
##################################
# Listing all factor predictors
##################################
<-as.data.frame(lapply(DQA.Predictors[(grep("FP", names(DQA.Predictors)))],factor))
DQA.Predictors.Factor
if (length(names(DQA.Predictors.Factor))>0) {
print(paste0("There are ",
length(names(DQA.Predictors.Factor))),
(" factor predictor variable(s)."))
else {
} print("There are no factor predictor variables.")
}
## [1] "There are 208 factor predictor variable(s)."
##################################
# Formulating a data quality assessment summary for factor predictors
##################################
if (length(names(DQA.Predictors.Factor))>0) {
##################################
# Formulating a function to determine the first mode
##################################
<- function(x) {
FirstModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab == max(tab)]
ux[tab
}
##################################
# Formulating a function to determine the second mode
##################################
<- function(x) {
SecondModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab = ux[tab == max(tab)]
fm = x[!(x %in% fm)]
sm <- unique(sm)
usm <- tabulate(match(sm, usm))
tabsm ifelse(is.na(usm[tabsm == max(tabsm)])==TRUE,
return("x"),
return(usm[tabsm == max(tabsm)]))
}
<- data.frame(
(DQA.Predictors.Factor.Summary Column.Name= names(DQA.Predictors.Factor),
Column.Type=sapply(DQA.Predictors.Factor, function(x) class(x)),
Unique.Count=sapply(DQA.Predictors.Factor, function(x) length(unique(x))),
First.Mode.Value=sapply(DQA.Predictors.Factor, function(x) as.character(FirstModes(x)[1])),
Second.Mode.Value=sapply(DQA.Predictors.Factor, function(x) as.character(SecondModes(x)[1])),
First.Mode.Count=sapply(DQA.Predictors.Factor, function(x) sum(na.omit(x) == FirstModes(x)[1])),
Second.Mode.Count=sapply(DQA.Predictors.Factor, function(x) sum(na.omit(x) == SecondModes(x)[1])),
Unique.Count.Ratio=sapply(DQA.Predictors.Factor, function(x) format(round((length(unique(x))/nrow(DQA.Predictors.Factor)),3), nsmall=3)),
First.Second.Mode.Ratio=sapply(DQA.Predictors.Factor, function(x) format(round((sum(na.omit(x) == FirstModes(x)[1])/sum(na.omit(x) == SecondModes(x)[1])),3), nsmall=3)),
row.names=NULL)
)
}
## Column.Name Column.Type Unique.Count First.Mode.Value Second.Mode.Value
## 1 FP001 factor 2 0 1
## 2 FP002 factor 2 1 0
## 3 FP003 factor 2 0 1
## 4 FP004 factor 2 1 0
## 5 FP005 factor 2 1 0
## 6 FP006 factor 2 0 1
## 7 FP007 factor 2 0 1
## 8 FP008 factor 2 0 1
## 9 FP009 factor 2 0 1
## 10 FP010 factor 2 0 1
## 11 FP011 factor 2 0 1
## 12 FP012 factor 2 0 1
## 13 FP013 factor 2 0 1
## 14 FP014 factor 2 0 1
## 15 FP015 factor 2 1 0
## 16 FP016 factor 2 0 1
## 17 FP017 factor 2 0 1
## 18 FP018 factor 2 0 1
## 19 FP019 factor 2 0 1
## 20 FP020 factor 2 0 1
## 21 FP021 factor 2 0 1
## 22 FP022 factor 2 0 1
## 23 FP023 factor 2 0 1
## 24 FP024 factor 2 0 1
## 25 FP025 factor 2 0 1
## 26 FP026 factor 2 0 1
## 27 FP027 factor 2 0 1
## 28 FP028 factor 2 0 1
## 29 FP029 factor 2 0 1
## 30 FP030 factor 2 0 1
## 31 FP031 factor 2 0 1
## 32 FP032 factor 2 0 1
## 33 FP033 factor 2 0 1
## 34 FP034 factor 2 0 1
## 35 FP035 factor 2 0 1
## 36 FP036 factor 2 0 1
## 37 FP037 factor 2 0 1
## 38 FP038 factor 2 0 1
## 39 FP039 factor 2 0 1
## 40 FP040 factor 2 0 1
## 41 FP041 factor 2 0 1
## 42 FP042 factor 2 0 1
## 43 FP043 factor 2 0 1
## 44 FP044 factor 2 0 1
## 45 FP045 factor 2 0 1
## 46 FP046 factor 2 0 1
## 47 FP047 factor 2 0 1
## 48 FP048 factor 2 0 1
## 49 FP049 factor 2 0 1
## 50 FP050 factor 2 0 1
## 51 FP051 factor 2 0 1
## 52 FP052 factor 2 0 1
## 53 FP053 factor 2 0 1
## 54 FP054 factor 2 0 1
## 55 FP055 factor 2 0 1
## 56 FP056 factor 2 0 1
## 57 FP057 factor 2 0 1
## 58 FP058 factor 2 0 1
## 59 FP059 factor 2 0 1
## 60 FP060 factor 2 0 1
## 61 FP061 factor 2 0 1
## 62 FP062 factor 2 0 1
## 63 FP063 factor 2 0 1
## 64 FP064 factor 2 0 1
## 65 FP065 factor 2 1 0
## 66 FP066 factor 2 1 0
## 67 FP067 factor 2 0 1
## 68 FP068 factor 2 0 1
## 69 FP069 factor 2 0 1
## 70 FP070 factor 2 0 1
## 71 FP071 factor 2 0 1
## 72 FP072 factor 2 1 0
## 73 FP073 factor 2 0 1
## 74 FP074 factor 2 0 1
## 75 FP075 factor 2 0 1
## 76 FP076 factor 2 0 1
## 77 FP077 factor 2 0 1
## 78 FP078 factor 2 0 1
## 79 FP079 factor 2 1 0
## 80 FP080 factor 2 0 1
## 81 FP081 factor 2 0 1
## 82 FP082 factor 2 1 0
## 83 FP083 factor 2 0 1
## 84 FP084 factor 2 0 1
## 85 FP085 factor 2 0 1
## 86 FP086 factor 2 0 1
## 87 FP087 factor 2 1 0
## 88 FP088 factor 2 0 1
## 89 FP089 factor 2 0 1
## 90 FP090 factor 2 0 1
## 91 FP091 factor 2 0 1
## 92 FP092 factor 2 0 1
## 93 FP093 factor 2 0 1
## 94 FP094 factor 2 0 1
## 95 FP095 factor 2 0 1
## 96 FP096 factor 2 0 1
## 97 FP097 factor 2 0 1
## 98 FP098 factor 2 0 1
## 99 FP099 factor 2 0 1
## 100 FP100 factor 2 0 1
## 101 FP101 factor 2 0 1
## 102 FP102 factor 2 0 1
## 103 FP103 factor 2 0 1
## 104 FP104 factor 2 0 1
## 105 FP105 factor 2 0 1
## 106 FP106 factor 2 0 1
## 107 FP107 factor 2 0 1
## 108 FP108 factor 2 0 1
## 109 FP109 factor 2 0 1
## 110 FP110 factor 2 0 1
## 111 FP111 factor 2 0 1
## 112 FP112 factor 2 0 1
## 113 FP113 factor 2 0 1
## 114 FP114 factor 2 0 1
## 115 FP115 factor 2 0 1
## 116 FP116 factor 2 0 1
## 117 FP117 factor 2 0 1
## 118 FP118 factor 2 0 1
## 119 FP119 factor 2 0 1
## 120 FP120 factor 2 0 1
## 121 FP121 factor 2 0 1
## 122 FP122 factor 2 0 1
## 123 FP123 factor 2 0 1
## 124 FP124 factor 2 0 1
## 125 FP125 factor 2 0 1
## 126 FP126 factor 2 0 1
## 127 FP127 factor 2 0 1
## 128 FP128 factor 2 0 1
## 129 FP129 factor 2 0 1
## 130 FP130 factor 2 0 1
## 131 FP131 factor 2 0 1
## 132 FP132 factor 2 0 1
## 133 FP133 factor 2 0 1
## 134 FP134 factor 2 0 1
## 135 FP135 factor 2 0 1
## 136 FP136 factor 2 0 1
## 137 FP137 factor 2 0 1
## 138 FP138 factor 2 0 1
## 139 FP139 factor 2 0 1
## 140 FP140 factor 2 0 1
## 141 FP141 factor 2 0 1
## 142 FP142 factor 2 0 1
## 143 FP143 factor 2 0 1
## 144 FP144 factor 2 0 1
## 145 FP145 factor 2 0 1
## 146 FP146 factor 2 0 1
## 147 FP147 factor 2 0 1
## 148 FP148 factor 2 0 1
## 149 FP149 factor 2 0 1
## 150 FP150 factor 2 0 1
## 151 FP151 factor 2 0 1
## 152 FP152 factor 2 0 1
## 153 FP153 factor 2 0 1
## 154 FP154 factor 2 0 1
## 155 FP155 factor 2 0 1
## 156 FP156 factor 2 0 1
## 157 FP157 factor 2 0 1
## 158 FP158 factor 2 0 1
## 159 FP159 factor 2 0 1
## 160 FP160 factor 2 0 1
## 161 FP161 factor 2 0 1
## 162 FP162 factor 2 0 1
## 163 FP163 factor 2 0 1
## 164 FP164 factor 2 1 0
## 165 FP165 factor 2 0 1
## 166 FP166 factor 2 0 1
## 167 FP167 factor 2 0 1
## 168 FP168 factor 2 1 0
## 169 FP169 factor 2 0 1
## 170 FP170 factor 2 0 1
## 171 FP171 factor 2 0 1
## 172 FP172 factor 2 0 1
## 173 FP173 factor 2 0 1
## 174 FP174 factor 2 0 1
## 175 FP175 factor 2 0 1
## 176 FP176 factor 2 0 1
## 177 FP177 factor 2 0 1
## 178 FP178 factor 2 0 1
## 179 FP179 factor 2 0 1
## 180 FP180 factor 2 0 1
## 181 FP181 factor 2 0 1
## 182 FP182 factor 2 0 1
## 183 FP183 factor 2 0 1
## 184 FP184 factor 2 0 1
## 185 FP185 factor 2 0 1
## 186 FP186 factor 2 0 1
## 187 FP187 factor 2 0 1
## 188 FP188 factor 2 0 1
## 189 FP189 factor 2 0 1
## 190 FP190 factor 2 0 1
## 191 FP191 factor 2 0 1
## 192 FP192 factor 2 0 1
## 193 FP193 factor 2 0 1
## 194 FP194 factor 2 0 1
## 195 FP195 factor 2 0 1
## 196 FP196 factor 2 0 1
## 197 FP197 factor 2 0 1
## 198 FP198 factor 2 0 1
## 199 FP199 factor 2 0 1
## 200 FP200 factor 2 0 1
## 201 FP201 factor 2 0 1
## 202 FP202 factor 2 0 1
## 203 FP203 factor 2 0 1
## 204 FP204 factor 2 0 1
## 205 FP205 factor 2 0 1
## 206 FP206 factor 2 0 1
## 207 FP207 factor 2 0 1
## 208 FP208 factor 2 0 1
## First.Mode.Count Second.Mode.Count Unique.Count.Ratio
## 1 482 469 0.002
## 2 513 438 0.002
## 3 536 415 0.002
## 4 556 395 0.002
## 5 551 400 0.002
## 6 570 381 0.002
## 7 605 346 0.002
## 8 641 310 0.002
## 9 685 266 0.002
## 10 781 170 0.002
## 11 747 204 0.002
## 12 783 168 0.002
## 13 793 158 0.002
## 14 798 153 0.002
## 15 818 133 0.002
## 16 812 139 0.002
## 17 814 137 0.002
## 18 826 125 0.002
## 19 835 116 0.002
## 20 837 114 0.002
## 21 836 115 0.002
## 22 852 99 0.002
## 23 834 117 0.002
## 24 844 107 0.002
## 25 841 110 0.002
## 26 871 80 0.002
## 27 858 93 0.002
## 28 850 101 0.002
## 29 854 97 0.002
## 30 862 89 0.002
## 31 866 85 0.002
## 32 881 70 0.002
## 33 885 66 0.002
## 34 875 76 0.002
## 35 882 69 0.002
## 36 879 72 0.002
## 37 884 67 0.002
## 38 869 82 0.002
## 39 880 71 0.002
## 40 886 65 0.002
## 41 891 60 0.002
## 42 897 54 0.002
## 43 888 63 0.002
## 44 894 57 0.002
## 45 898 53 0.002
## 46 651 300 0.002
## 47 698 253 0.002
## 48 833 118 0.002
## 49 835 116 0.002
## 50 844 107 0.002
## 51 847 104 0.002
## 52 864 87 0.002
## 53 862 89 0.002
## 54 879 72 0.002
## 55 900 51 0.002
## 56 889 62 0.002
## 57 837 114 0.002
## 58 843 108 0.002
## 59 899 52 0.002
## 60 493 458 0.002
## 61 526 425 0.002
## 62 535 416 0.002
## 63 546 405 0.002
## 64 555 396 0.002
## 65 564 387 0.002
## 66 580 371 0.002
## 67 590 361 0.002
## 68 607 344 0.002
## 69 607 344 0.002
## 70 613 338 0.002
## 71 640 311 0.002
## 72 626 325 0.002
## 73 656 295 0.002
## 74 642 309 0.002
## 75 629 322 0.002
## 76 639 312 0.002
## 77 646 305 0.002
## 78 662 289 0.002
## 79 656 295 0.002
## 80 663 288 0.002
## 81 686 265 0.002
## 82 679 272 0.002
## 83 691 260 0.002
## 84 679 272 0.002
## 85 708 243 0.002
## 86 695 256 0.002
## 87 691 260 0.002
## 88 701 250 0.002
## 89 716 235 0.002
## 90 714 237 0.002
## 91 737 214 0.002
## 92 719 232 0.002
## 93 719 232 0.002
## 94 731 220 0.002
## 95 742 209 0.002
## 96 744 207 0.002
## 97 727 224 0.002
## 98 725 226 0.002
## 99 735 216 0.002
## 100 731 220 0.002
## 101 726 225 0.002
## 102 759 192 0.002
## 103 743 208 0.002
## 104 739 212 0.002
## 105 746 205 0.002
## 106 769 182 0.002
## 107 750 201 0.002
## 108 756 195 0.002
## 109 783 168 0.002
## 110 755 196 0.002
## 111 764 187 0.002
## 112 766 185 0.002
## 113 765 186 0.002
## 114 803 148 0.002
## 115 781 170 0.002
## 116 768 183 0.002
## 117 781 170 0.002
## 118 768 183 0.002
## 119 796 155 0.002
## 120 793 158 0.002
## 121 818 133 0.002
## 122 795 156 0.002
## 123 792 159 0.002
## 124 797 154 0.002
## 125 803 148 0.002
## 126 810 141 0.002
## 127 818 133 0.002
## 128 810 141 0.002
## 129 819 132 0.002
## 130 851 100 0.002
## 131 831 120 0.002
## 132 832 119 0.002
## 133 831 120 0.002
## 134 830 121 0.002
## 135 831 120 0.002
## 136 836 115 0.002
## 137 841 110 0.002
## 138 845 106 0.002
## 139 873 78 0.002
## 140 845 106 0.002
## 141 840 111 0.002
## 142 847 104 0.002
## 143 874 77 0.002
## 144 852 99 0.002
## 145 852 99 0.002
## 146 853 98 0.002
## 147 851 100 0.002
## 148 868 83 0.002
## 149 865 86 0.002
## 150 876 75 0.002
## 151 898 53 0.002
## 152 873 78 0.002
## 153 877 74 0.002
## 154 915 36 0.002
## 155 885 66 0.002
## 156 884 67 0.002
## 157 892 59 0.002
## 158 900 51 0.002
## 159 884 67 0.002
## 160 886 65 0.002
## 161 888 63 0.002
## 162 480 471 0.002
## 163 498 453 0.002
## 164 597 354 0.002
## 165 619 332 0.002
## 166 636 315 0.002
## 167 639 312 0.002
## 168 633 318 0.002
## 169 774 177 0.002
## 170 776 175 0.002
## 171 790 161 0.002
## 172 807 144 0.002
## 173 816 135 0.002
## 174 827 124 0.002
## 175 823 128 0.002
## 176 835 116 0.002
## 177 836 115 0.002
## 178 836 115 0.002
## 179 858 93 0.002
## 180 849 102 0.002
## 181 862 89 0.002
## 182 857 94 0.002
## 183 879 72 0.002
## 184 871 80 0.002
## 185 870 81 0.002
## 186 878 73 0.002
## 187 882 69 0.002
## 188 886 65 0.002
## 189 878 73 0.002
## 190 882 69 0.002
## 191 884 67 0.002
## 192 893 58 0.002
## 193 892 59 0.002
## 194 895 56 0.002
## 195 893 58 0.002
## 196 897 54 0.002
## 197 901 50 0.002
## 198 897 54 0.002
## 199 906 45 0.002
## 200 904 47 0.002
## 201 901 50 0.002
## 202 706 245 0.002
## 203 842 109 0.002
## 204 857 94 0.002
## 205 877 74 0.002
## 206 894 57 0.002
## 207 897 54 0.002
## 208 844 107 0.002
## First.Second.Mode.Ratio
## 1 1.028
## 2 1.171
## 3 1.292
## 4 1.408
## 5 1.378
## 6 1.496
## 7 1.749
## 8 2.068
## 9 2.575
## 10 4.594
## 11 3.662
## 12 4.661
## 13 5.019
## 14 5.216
## 15 6.150
## 16 5.842
## 17 5.942
## 18 6.608
## 19 7.198
## 20 7.342
## 21 7.270
## 22 8.606
## 23 7.128
## 24 7.888
## 25 7.645
## 26 10.887
## 27 9.226
## 28 8.416
## 29 8.804
## 30 9.685
## 31 10.188
## 32 12.586
## 33 13.409
## 34 11.513
## 35 12.783
## 36 12.208
## 37 13.194
## 38 10.598
## 39 12.394
## 40 13.631
## 41 14.850
## 42 16.611
## 43 14.095
## 44 15.684
## 45 16.943
## 46 2.170
## 47 2.759
## 48 7.059
## 49 7.198
## 50 7.888
## 51 8.144
## 52 9.931
## 53 9.685
## 54 12.208
## 55 17.647
## 56 14.339
## 57 7.342
## 58 7.806
## 59 17.288
## 60 1.076
## 61 1.238
## 62 1.286
## 63 1.348
## 64 1.402
## 65 1.457
## 66 1.563
## 67 1.634
## 68 1.765
## 69 1.765
## 70 1.814
## 71 2.058
## 72 1.926
## 73 2.224
## 74 2.078
## 75 1.953
## 76 2.048
## 77 2.118
## 78 2.291
## 79 2.224
## 80 2.302
## 81 2.589
## 82 2.496
## 83 2.658
## 84 2.496
## 85 2.914
## 86 2.715
## 87 2.658
## 88 2.804
## 89 3.047
## 90 3.013
## 91 3.444
## 92 3.099
## 93 3.099
## 94 3.323
## 95 3.550
## 96 3.594
## 97 3.246
## 98 3.208
## 99 3.403
## 100 3.323
## 101 3.227
## 102 3.953
## 103 3.572
## 104 3.486
## 105 3.639
## 106 4.225
## 107 3.731
## 108 3.877
## 109 4.661
## 110 3.852
## 111 4.086
## 112 4.141
## 113 4.113
## 114 5.426
## 115 4.594
## 116 4.197
## 117 4.594
## 118 4.197
## 119 5.135
## 120 5.019
## 121 6.150
## 122 5.096
## 123 4.981
## 124 5.175
## 125 5.426
## 126 5.745
## 127 6.150
## 128 5.745
## 129 6.205
## 130 8.510
## 131 6.925
## 132 6.992
## 133 6.925
## 134 6.860
## 135 6.925
## 136 7.270
## 137 7.645
## 138 7.972
## 139 11.192
## 140 7.972
## 141 7.568
## 142 8.144
## 143 11.351
## 144 8.606
## 145 8.606
## 146 8.704
## 147 8.510
## 148 10.458
## 149 10.058
## 150 11.680
## 151 16.943
## 152 11.192
## 153 11.851
## 154 25.417
## 155 13.409
## 156 13.194
## 157 15.119
## 158 17.647
## 159 13.194
## 160 13.631
## 161 14.095
## 162 1.019
## 163 1.099
## 164 1.686
## 165 1.864
## 166 2.019
## 167 2.048
## 168 1.991
## 169 4.373
## 170 4.434
## 171 4.907
## 172 5.604
## 173 6.044
## 174 6.669
## 175 6.430
## 176 7.198
## 177 7.270
## 178 7.270
## 179 9.226
## 180 8.324
## 181 9.685
## 182 9.117
## 183 12.208
## 184 10.887
## 185 10.741
## 186 12.027
## 187 12.783
## 188 13.631
## 189 12.027
## 190 12.783
## 191 13.194
## 192 15.397
## 193 15.119
## 194 15.982
## 195 15.397
## 196 16.611
## 197 18.020
## 198 16.611
## 199 20.133
## 200 19.234
## 201 18.020
## 202 2.882
## 203 7.725
## 204 9.117
## 205 11.851
## 206 15.684
## 207 16.611
## 208 7.888
##################################
# Formulating a data quality assessment summary for numeric predictors
##################################
if (length(names(DQA.Predictors.Numeric))>0) {
##################################
# Formulating a function to determine the first mode
##################################
<- function(x) {
FirstModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab == max(tab)]
ux[tab
}
##################################
# Formulating a function to determine the second mode
##################################
<- function(x) {
SecondModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab = ux[tab == max(tab)]
fm = na.omit(x)[!(na.omit(x) %in% fm)]
sm <- unique(sm)
usm <- tabulate(match(sm, usm))
tabsm ifelse(is.na(usm[tabsm == max(tabsm)])==TRUE,
return(0.00001),
return(usm[tabsm == max(tabsm)]))
}
<- data.frame(
(DQA.Predictors.Numeric.Summary Column.Name= names(DQA.Predictors.Numeric),
Column.Type=sapply(DQA.Predictors.Numeric, function(x) class(x)),
Unique.Count=sapply(DQA.Predictors.Numeric, function(x) length(unique(x))),
Unique.Count.Ratio=sapply(DQA.Predictors.Numeric, function(x) format(round((length(unique(x))/nrow(DQA.Predictors.Numeric)),3), nsmall=3)),
First.Mode.Value=sapply(DQA.Predictors.Numeric, function(x) format(round((FirstModes(x)[1]),3),nsmall=3)),
Second.Mode.Value=sapply(DQA.Predictors.Numeric, function(x) format(round((SecondModes(x)[1]),3),nsmall=3)),
First.Mode.Count=sapply(DQA.Predictors.Numeric, function(x) sum(na.omit(x) == FirstModes(x)[1])),
Second.Mode.Count=sapply(DQA.Predictors.Numeric, function(x) sum(na.omit(x) == SecondModes(x)[1])),
First.Second.Mode.Ratio=sapply(DQA.Predictors.Numeric, function(x) format(round((sum(na.omit(x) == FirstModes(x)[1])/sum(na.omit(x) == SecondModes(x)[1])),3), nsmall=3)),
Minimum=sapply(DQA.Predictors.Numeric, function(x) format(round(min(x,na.rm = TRUE),3), nsmall=3)),
Mean=sapply(DQA.Predictors.Numeric, function(x) format(round(mean(x,na.rm = TRUE),3), nsmall=3)),
Median=sapply(DQA.Predictors.Numeric, function(x) format(round(median(x,na.rm = TRUE),3), nsmall=3)),
Maximum=sapply(DQA.Predictors.Numeric, function(x) format(round(max(x,na.rm = TRUE),3), nsmall=3)),
Skewness=sapply(DQA.Predictors.Numeric, function(x) format(round(skewness(x,na.rm = TRUE),3), nsmall=3)),
Kurtosis=sapply(DQA.Predictors.Numeric, function(x) format(round(kurtosis(x,na.rm = TRUE),3), nsmall=3)),
Percentile25th=sapply(DQA.Predictors.Numeric, function(x) format(round(quantile(x,probs=0.25,na.rm = TRUE),3), nsmall=3)),
Percentile75th=sapply(DQA.Predictors.Numeric, function(x) format(round(quantile(x,probs=0.75,na.rm = TRUE),3), nsmall=3)),
row.names=NULL)
)
}
## Column.Name Column.Type Unique.Count Unique.Count.Ratio
## 1 MolWeight numeric 646 0.679
## 2 NumAtoms integer 66 0.069
## 3 NumNonHAtoms integer 36 0.038
## 4 NumBonds integer 72 0.076
## 5 NumNonHBonds integer 39 0.041
## 6 NumMultBonds integer 25 0.026
## 7 NumRotBonds integer 15 0.016
## 8 NumDblBonds integer 8 0.008
## 9 NumAromaticBonds integer 16 0.017
## 10 NumHydrogen integer 41 0.043
## 11 NumCarbon integer 28 0.029
## 12 NumNitrogen integer 7 0.007
## 13 NumOxygen integer 11 0.012
## 14 NumSulfer integer 5 0.005
## 15 NumChlorine integer 11 0.012
## 16 NumHalogen integer 11 0.012
## 17 NumRings integer 8 0.008
## 18 HydrophilicFactor numeric 369 0.388
## 19 SurfaceArea1 numeric 252 0.265
## 20 SurfaceArea2 numeric 287 0.302
## First.Mode.Value Second.Mode.Value First.Mode.Count Second.Mode.Count
## 1 102.200 116.230 16 14
## 2 22.000 24.000 73 51
## 3 8.000 11.000 104 73
## 4 23.000 19.000 69 56
## 5 8.000 7.000 82 66
## 6 0.000 7.000 158 122
## 7 0.000 1.000 272 186
## 8 0.000 1.000 427 268
## 9 0.000 6.000 400 302
## 10 12.000 8.000 83 79
## 11 6.000 7.000 105 97
## 12 0.000 1.000 546 191
## 13 0.000 2.000 325 218
## 14 0.000 1.000 830 96
## 15 0.000 1.000 750 81
## 16 0.000 1.000 685 107
## 17 1.000 0.000 323 260
## 18 -0.828 -0.158 21 20
## 19 0.000 20.230 218 76
## 20 0.000 20.230 211 75
## First.Second.Mode.Ratio Minimum Mean Median Maximum Skewness Kurtosis
## 1 1.143 46.090 201.654 179.230 665.810 0.988 3.945
## 2 1.431 5.000 25.507 22.000 94.000 1.364 5.523
## 3 1.425 2.000 13.161 12.000 47.000 0.993 4.129
## 4 1.232 4.000 25.909 23.000 97.000 1.360 5.408
## 5 1.242 1.000 13.563 12.000 50.000 0.969 3.842
## 6 1.295 0.000 6.148 6.000 25.000 0.670 3.053
## 7 1.462 0.000 2.251 2.000 16.000 1.577 6.437
## 8 1.593 0.000 1.006 1.000 7.000 1.360 4.760
## 9 1.325 0.000 5.121 6.000 25.000 0.796 3.241
## 10 1.051 0.000 12.346 11.000 47.000 1.262 5.261
## 11 1.082 1.000 9.893 9.000 33.000 0.927 3.616
## 12 2.859 0.000 0.813 0.000 6.000 1.554 4.831
## 13 1.491 0.000 1.574 1.000 13.000 1.772 8.494
## 14 8.646 0.000 0.164 0.000 4.000 3.842 21.526
## 15 9.259 0.000 0.556 0.000 10.000 3.178 13.780
## 16 6.402 0.000 0.698 0.000 10.000 2.691 10.808
## 17 1.242 0.000 1.402 1.000 7.000 1.034 3.875
## 18 1.050 -0.985 -0.021 -0.314 13.483 3.404 27.504
## 19 2.868 0.000 36.459 29.100 331.940 1.714 9.714
## 20 2.813 0.000 40.234 33.120 331.940 1.475 7.485
## Percentile25th Percentile75th
## 1 122.605 264.340
## 2 17.000 31.000
## 3 8.000 17.000
## 4 17.000 31.500
## 5 8.000 18.000
## 6 1.000 10.000
## 7 0.000 3.500
## 8 0.000 2.000
## 9 0.000 6.000
## 10 7.000 16.000
## 11 6.000 12.000
## 12 0.000 1.000
## 13 0.000 2.000
## 14 0.000 0.000
## 15 0.000 0.000
## 16 0.000 1.000
## 17 0.000 2.000
## 18 -0.763 0.313
## 19 9.230 53.280
## 20 10.630 60.660
##################################
# Identifying potential data quality issues
##################################
##################################
# Checking for missing observations
##################################
if ((nrow(DQA.Summary[DQA.Summary$NA.Count>0,]))>0){
print(paste0("Missing observations noted for ",
nrow(DQA.Summary[DQA.Summary$NA.Count>0,])),
(" variable(s) with NA.Count>0 and Fill.Rate<1.0."))
$NA.Count>0,]
DQA.Summary[DQA.Summaryelse {
} print("No missing observations noted.")
}
## [1] "No missing observations noted."
##################################
# Checking for zero or near-zero variance predictors
##################################
if (length(names(DQA.Predictors.Factor))==0) {
print("No factor predictors noted.")
else if (nrow(DQA.Predictors.Factor.Summary[as.numeric(as.character(DQA.Predictors.Factor.Summary$First.Second.Mode.Ratio))>5,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Predictors.Factor.Summary[as.numeric(as.character(DQA.Predictors.Factor.Summary$First.Second.Mode.Ratio))>5,])),
(" factor variable(s) with First.Second.Mode.Ratio>5."))
as.numeric(as.character(DQA.Predictors.Factor.Summary$First.Second.Mode.Ratio))>5,]
DQA.Predictors.Factor.Summary[else {
} print("No low variance factor predictors due to high first-second mode ratio noted.")
}
## [1] "Low variance observed for 124 factor variable(s) with First.Second.Mode.Ratio>5."
## Column.Name Column.Type Unique.Count First.Mode.Value Second.Mode.Value
## 13 FP013 factor 2 0 1
## 14 FP014 factor 2 0 1
## 15 FP015 factor 2 1 0
## 16 FP016 factor 2 0 1
## 17 FP017 factor 2 0 1
## 18 FP018 factor 2 0 1
## 19 FP019 factor 2 0 1
## 20 FP020 factor 2 0 1
## 21 FP021 factor 2 0 1
## 22 FP022 factor 2 0 1
## 23 FP023 factor 2 0 1
## 24 FP024 factor 2 0 1
## 25 FP025 factor 2 0 1
## 26 FP026 factor 2 0 1
## 27 FP027 factor 2 0 1
## 28 FP028 factor 2 0 1
## 29 FP029 factor 2 0 1
## 30 FP030 factor 2 0 1
## 31 FP031 factor 2 0 1
## 32 FP032 factor 2 0 1
## 33 FP033 factor 2 0 1
## 34 FP034 factor 2 0 1
## 35 FP035 factor 2 0 1
## 36 FP036 factor 2 0 1
## 37 FP037 factor 2 0 1
## 38 FP038 factor 2 0 1
## 39 FP039 factor 2 0 1
## 40 FP040 factor 2 0 1
## 41 FP041 factor 2 0 1
## 42 FP042 factor 2 0 1
## 43 FP043 factor 2 0 1
## 44 FP044 factor 2 0 1
## 45 FP045 factor 2 0 1
## 48 FP048 factor 2 0 1
## 49 FP049 factor 2 0 1
## 50 FP050 factor 2 0 1
## 51 FP051 factor 2 0 1
## 52 FP052 factor 2 0 1
## 53 FP053 factor 2 0 1
## 54 FP054 factor 2 0 1
## 55 FP055 factor 2 0 1
## 56 FP056 factor 2 0 1
## 57 FP057 factor 2 0 1
## 58 FP058 factor 2 0 1
## 59 FP059 factor 2 0 1
## 114 FP114 factor 2 0 1
## 119 FP119 factor 2 0 1
## 120 FP120 factor 2 0 1
## 121 FP121 factor 2 0 1
## 122 FP122 factor 2 0 1
## 124 FP124 factor 2 0 1
## 125 FP125 factor 2 0 1
## 126 FP126 factor 2 0 1
## 127 FP127 factor 2 0 1
## 128 FP128 factor 2 0 1
## 129 FP129 factor 2 0 1
## 130 FP130 factor 2 0 1
## 131 FP131 factor 2 0 1
## 132 FP132 factor 2 0 1
## 133 FP133 factor 2 0 1
## 134 FP134 factor 2 0 1
## 135 FP135 factor 2 0 1
## 136 FP136 factor 2 0 1
## 137 FP137 factor 2 0 1
## 138 FP138 factor 2 0 1
## 139 FP139 factor 2 0 1
## 140 FP140 factor 2 0 1
## 141 FP141 factor 2 0 1
## 142 FP142 factor 2 0 1
## 143 FP143 factor 2 0 1
## 144 FP144 factor 2 0 1
## 145 FP145 factor 2 0 1
## 146 FP146 factor 2 0 1
## 147 FP147 factor 2 0 1
## 148 FP148 factor 2 0 1
## 149 FP149 factor 2 0 1
## 150 FP150 factor 2 0 1
## 151 FP151 factor 2 0 1
## 152 FP152 factor 2 0 1
## 153 FP153 factor 2 0 1
## 154 FP154 factor 2 0 1
## 155 FP155 factor 2 0 1
## 156 FP156 factor 2 0 1
## 157 FP157 factor 2 0 1
## 158 FP158 factor 2 0 1
## 159 FP159 factor 2 0 1
## 160 FP160 factor 2 0 1
## 161 FP161 factor 2 0 1
## 172 FP172 factor 2 0 1
## 173 FP173 factor 2 0 1
## 174 FP174 factor 2 0 1
## 175 FP175 factor 2 0 1
## 176 FP176 factor 2 0 1
## 177 FP177 factor 2 0 1
## 178 FP178 factor 2 0 1
## 179 FP179 factor 2 0 1
## 180 FP180 factor 2 0 1
## 181 FP181 factor 2 0 1
## 182 FP182 factor 2 0 1
## 183 FP183 factor 2 0 1
## 184 FP184 factor 2 0 1
## 185 FP185 factor 2 0 1
## 186 FP186 factor 2 0 1
## 187 FP187 factor 2 0 1
## 188 FP188 factor 2 0 1
## 189 FP189 factor 2 0 1
## 190 FP190 factor 2 0 1
## 191 FP191 factor 2 0 1
## 192 FP192 factor 2 0 1
## 193 FP193 factor 2 0 1
## 194 FP194 factor 2 0 1
## 195 FP195 factor 2 0 1
## 196 FP196 factor 2 0 1
## 197 FP197 factor 2 0 1
## 198 FP198 factor 2 0 1
## 199 FP199 factor 2 0 1
## 200 FP200 factor 2 0 1
## 201 FP201 factor 2 0 1
## 203 FP203 factor 2 0 1
## 204 FP204 factor 2 0 1
## 205 FP205 factor 2 0 1
## 206 FP206 factor 2 0 1
## 207 FP207 factor 2 0 1
## 208 FP208 factor 2 0 1
## First.Mode.Count Second.Mode.Count Unique.Count.Ratio
## 13 793 158 0.002
## 14 798 153 0.002
## 15 818 133 0.002
## 16 812 139 0.002
## 17 814 137 0.002
## 18 826 125 0.002
## 19 835 116 0.002
## 20 837 114 0.002
## 21 836 115 0.002
## 22 852 99 0.002
## 23 834 117 0.002
## 24 844 107 0.002
## 25 841 110 0.002
## 26 871 80 0.002
## 27 858 93 0.002
## 28 850 101 0.002
## 29 854 97 0.002
## 30 862 89 0.002
## 31 866 85 0.002
## 32 881 70 0.002
## 33 885 66 0.002
## 34 875 76 0.002
## 35 882 69 0.002
## 36 879 72 0.002
## 37 884 67 0.002
## 38 869 82 0.002
## 39 880 71 0.002
## 40 886 65 0.002
## 41 891 60 0.002
## 42 897 54 0.002
## 43 888 63 0.002
## 44 894 57 0.002
## 45 898 53 0.002
## 48 833 118 0.002
## 49 835 116 0.002
## 50 844 107 0.002
## 51 847 104 0.002
## 52 864 87 0.002
## 53 862 89 0.002
## 54 879 72 0.002
## 55 900 51 0.002
## 56 889 62 0.002
## 57 837 114 0.002
## 58 843 108 0.002
## 59 899 52 0.002
## 114 803 148 0.002
## 119 796 155 0.002
## 120 793 158 0.002
## 121 818 133 0.002
## 122 795 156 0.002
## 124 797 154 0.002
## 125 803 148 0.002
## 126 810 141 0.002
## 127 818 133 0.002
## 128 810 141 0.002
## 129 819 132 0.002
## 130 851 100 0.002
## 131 831 120 0.002
## 132 832 119 0.002
## 133 831 120 0.002
## 134 830 121 0.002
## 135 831 120 0.002
## 136 836 115 0.002
## 137 841 110 0.002
## 138 845 106 0.002
## 139 873 78 0.002
## 140 845 106 0.002
## 141 840 111 0.002
## 142 847 104 0.002
## 143 874 77 0.002
## 144 852 99 0.002
## 145 852 99 0.002
## 146 853 98 0.002
## 147 851 100 0.002
## 148 868 83 0.002
## 149 865 86 0.002
## 150 876 75 0.002
## 151 898 53 0.002
## 152 873 78 0.002
## 153 877 74 0.002
## 154 915 36 0.002
## 155 885 66 0.002
## 156 884 67 0.002
## 157 892 59 0.002
## 158 900 51 0.002
## 159 884 67 0.002
## 160 886 65 0.002
## 161 888 63 0.002
## 172 807 144 0.002
## 173 816 135 0.002
## 174 827 124 0.002
## 175 823 128 0.002
## 176 835 116 0.002
## 177 836 115 0.002
## 178 836 115 0.002
## 179 858 93 0.002
## 180 849 102 0.002
## 181 862 89 0.002
## 182 857 94 0.002
## 183 879 72 0.002
## 184 871 80 0.002
## 185 870 81 0.002
## 186 878 73 0.002
## 187 882 69 0.002
## 188 886 65 0.002
## 189 878 73 0.002
## 190 882 69 0.002
## 191 884 67 0.002
## 192 893 58 0.002
## 193 892 59 0.002
## 194 895 56 0.002
## 195 893 58 0.002
## 196 897 54 0.002
## 197 901 50 0.002
## 198 897 54 0.002
## 199 906 45 0.002
## 200 904 47 0.002
## 201 901 50 0.002
## 203 842 109 0.002
## 204 857 94 0.002
## 205 877 74 0.002
## 206 894 57 0.002
## 207 897 54 0.002
## 208 844 107 0.002
## First.Second.Mode.Ratio
## 13 5.019
## 14 5.216
## 15 6.150
## 16 5.842
## 17 5.942
## 18 6.608
## 19 7.198
## 20 7.342
## 21 7.270
## 22 8.606
## 23 7.128
## 24 7.888
## 25 7.645
## 26 10.887
## 27 9.226
## 28 8.416
## 29 8.804
## 30 9.685
## 31 10.188
## 32 12.586
## 33 13.409
## 34 11.513
## 35 12.783
## 36 12.208
## 37 13.194
## 38 10.598
## 39 12.394
## 40 13.631
## 41 14.850
## 42 16.611
## 43 14.095
## 44 15.684
## 45 16.943
## 48 7.059
## 49 7.198
## 50 7.888
## 51 8.144
## 52 9.931
## 53 9.685
## 54 12.208
## 55 17.647
## 56 14.339
## 57 7.342
## 58 7.806
## 59 17.288
## 114 5.426
## 119 5.135
## 120 5.019
## 121 6.150
## 122 5.096
## 124 5.175
## 125 5.426
## 126 5.745
## 127 6.150
## 128 5.745
## 129 6.205
## 130 8.510
## 131 6.925
## 132 6.992
## 133 6.925
## 134 6.860
## 135 6.925
## 136 7.270
## 137 7.645
## 138 7.972
## 139 11.192
## 140 7.972
## 141 7.568
## 142 8.144
## 143 11.351
## 144 8.606
## 145 8.606
## 146 8.704
## 147 8.510
## 148 10.458
## 149 10.058
## 150 11.680
## 151 16.943
## 152 11.192
## 153 11.851
## 154 25.417
## 155 13.409
## 156 13.194
## 157 15.119
## 158 17.647
## 159 13.194
## 160 13.631
## 161 14.095
## 172 5.604
## 173 6.044
## 174 6.669
## 175 6.430
## 176 7.198
## 177 7.270
## 178 7.270
## 179 9.226
## 180 8.324
## 181 9.685
## 182 9.117
## 183 12.208
## 184 10.887
## 185 10.741
## 186 12.027
## 187 12.783
## 188 13.631
## 189 12.027
## 190 12.783
## 191 13.194
## 192 15.397
## 193 15.119
## 194 15.982
## 195 15.397
## 196 16.611
## 197 18.020
## 198 16.611
## 199 20.133
## 200 19.234
## 201 18.020
## 203 7.725
## 204 9.117
## 205 11.851
## 206 15.684
## 207 16.611
## 208 7.888
if (length(names(DQA.Predictors.Numeric))==0) {
print("No numeric predictors noted.")
else if (nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$First.Second.Mode.Ratio))>5,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$First.Second.Mode.Ratio))>5,])),
(" numeric variable(s) with First.Second.Mode.Ratio>5."))
as.numeric(as.character(DQA.Predictors.Numeric.Summary$First.Second.Mode.Ratio))>5,]
DQA.Predictors.Numeric.Summary[else {
} print("No low variance numeric predictors due to high first-second mode ratio noted.")
}
## [1] "Low variance observed for 3 numeric variable(s) with First.Second.Mode.Ratio>5."
## Column.Name Column.Type Unique.Count Unique.Count.Ratio First.Mode.Value
## 14 NumSulfer integer 5 0.005 0.000
## 15 NumChlorine integer 11 0.012 0.000
## 16 NumHalogen integer 11 0.012 0.000
## Second.Mode.Value First.Mode.Count Second.Mode.Count First.Second.Mode.Ratio
## 14 1.000 830 96 8.646
## 15 1.000 750 81 9.259
## 16 1.000 685 107 6.402
## Minimum Mean Median Maximum Skewness Kurtosis Percentile25th Percentile75th
## 14 0.000 0.164 0.000 4.000 3.842 21.526 0.000 0.000
## 15 0.000 0.556 0.000 10.000 3.178 13.780 0.000 0.000
## 16 0.000 0.698 0.000 10.000 2.691 10.808 0.000 1.000
if (length(names(DQA.Predictors.Numeric))==0) {
print("No numeric predictors noted.")
else if (nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Unique.Count.Ratio))<0.01,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Unique.Count.Ratio))<0.01,])),
(" numeric variable(s) with Unique.Count.Ratio<0.01."))
as.numeric(as.character(DQA.Predictors.Numeric.Summary$Unique.Count.Ratio))<0.01,]
DQA.Predictors.Numeric.Summary[else {
} print("No low variance numeric predictors due to low unique count ratio noted.")
}
## [1] "Low variance observed for 4 numeric variable(s) with Unique.Count.Ratio<0.01."
## Column.Name Column.Type Unique.Count Unique.Count.Ratio First.Mode.Value
## 8 NumDblBonds integer 8 0.008 0.000
## 12 NumNitrogen integer 7 0.007 0.000
## 14 NumSulfer integer 5 0.005 0.000
## 17 NumRings integer 8 0.008 1.000
## Second.Mode.Value First.Mode.Count Second.Mode.Count First.Second.Mode.Ratio
## 8 1.000 427 268 1.593
## 12 1.000 546 191 2.859
## 14 1.000 830 96 8.646
## 17 0.000 323 260 1.242
## Minimum Mean Median Maximum Skewness Kurtosis Percentile25th Percentile75th
## 8 0.000 1.006 1.000 7.000 1.360 4.760 0.000 2.000
## 12 0.000 0.813 0.000 6.000 1.554 4.831 0.000 1.000
## 14 0.000 0.164 0.000 4.000 3.842 21.526 0.000 0.000
## 17 0.000 1.402 1.000 7.000 1.034 3.875 0.000 2.000
##################################
# Checking for skewed predictors
##################################
if (length(names(DQA.Predictors.Numeric))==0) {
print("No numeric predictors noted.")
else if (nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))>3 |
} as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))<(-3),])>0){
print(paste0("High skewness observed for ",
nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))>3 |
(as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))<(-3),])),
" numeric variable(s) with Skewness>3 or Skewness<(-3)."))
as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))>3 |
DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))<(-3),]
else {
} print("No skewed numeric predictors noted.")
}
## [1] "High skewness observed for 3 numeric variable(s) with Skewness>3 or Skewness<(-3)."
## Column.Name Column.Type Unique.Count Unique.Count.Ratio
## 14 NumSulfer integer 5 0.005
## 15 NumChlorine integer 11 0.012
## 18 HydrophilicFactor numeric 369 0.388
## First.Mode.Value Second.Mode.Value First.Mode.Count Second.Mode.Count
## 14 0.000 1.000 830 96
## 15 0.000 1.000 750 81
## 18 -0.828 -0.158 21 20
## First.Second.Mode.Ratio Minimum Mean Median Maximum Skewness Kurtosis
## 14 8.646 0.000 0.164 0.000 4.000 3.842 21.526
## 15 9.259 0.000 0.556 0.000 10.000 3.178 13.780
## 18 1.050 -0.985 -0.021 -0.314 13.483 3.404 27.504
## Percentile25th Percentile75th
## 14 0.000 0.000
## 15 0.000 0.000
## 18 -0.763 0.313
##################################
# Loading dataset
##################################
<- Solubility_Train
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("solTrainY")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,-(grep("FP", names(DPA.Predictors)))]
DPA.Predictors.Numeric
##################################
# Identifying outliers for the numeric predictors
##################################
<- c()
OutlierCountList
for (i in 1:ncol(DPA.Predictors.Numeric)) {
<- boxplot.stats(DPA.Predictors.Numeric[,i])$out
Outliers <- length(Outliers)
OutlierCount <- append(OutlierCountList,OutlierCount)
OutlierCountList <- which(DPA.Predictors.Numeric[,i] %in% c(Outliers))
OutlierIndices boxplot(DPA.Predictors.Numeric[,i],
ylab = names(DPA.Predictors.Numeric)[i],
main = names(DPA.Predictors.Numeric)[i],
horizontal=TRUE)
mtext(paste0(OutlierCount, " Outlier(s) Detected"))
}
<- as.data.frame(cbind(names(DPA.Predictors.Numeric),(OutlierCountList)))
OutlierCountSummary names(OutlierCountSummary) <- c("NumericPredictors","OutlierCount")
$OutlierCount <- as.numeric(as.character(OutlierCountSummary$OutlierCount))
OutlierCountSummary<- nrow(OutlierCountSummary[OutlierCountSummary$OutlierCount>0,])
NumericPredictorWithOutlierCount print(paste0(NumericPredictorWithOutlierCount, " numeric variable(s) were noted with outlier(s)." ))
## [1] "20 numeric variable(s) were noted with outlier(s)."
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA.Predictors.Numeric)) (DPA_Skimmed
Name | DPA.Predictors.Numeric |
Number of rows | 951 |
Number of columns | 20 |
_______________________ | |
Column type frequency: | |
numeric | 20 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
MolWeight | 0 | 1 | 201.65 | 97.91 | 46.09 | 122.60 | 179.23 | 264.34 | 665.81 | ▇▆▂▁▁ |
NumAtoms | 0 | 1 | 25.51 | 12.61 | 5.00 | 17.00 | 22.00 | 31.00 | 94.00 | ▇▆▂▁▁ |
NumNonHAtoms | 0 | 1 | 13.16 | 6.50 | 2.00 | 8.00 | 12.00 | 17.00 | 47.00 | ▇▆▂▁▁ |
NumBonds | 0 | 1 | 25.91 | 13.48 | 4.00 | 17.00 | 23.00 | 31.50 | 97.00 | ▇▇▂▁▁ |
NumNonHBonds | 0 | 1 | 13.56 | 7.57 | 1.00 | 8.00 | 12.00 | 18.00 | 50.00 | ▇▇▂▁▁ |
NumMultBonds | 0 | 1 | 6.15 | 5.17 | 0.00 | 1.00 | 6.00 | 10.00 | 25.00 | ▇▆▃▁▁ |
NumRotBonds | 0 | 1 | 2.25 | 2.41 | 0.00 | 0.00 | 2.00 | 3.50 | 16.00 | ▇▂▁▁▁ |
NumDblBonds | 0 | 1 | 1.01 | 1.21 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▂▁▁▁ |
NumAromaticBonds | 0 | 1 | 5.12 | 5.26 | 0.00 | 0.00 | 6.00 | 6.00 | 25.00 | ▇▆▃▁▁ |
NumHydrogen | 0 | 1 | 12.35 | 7.32 | 0.00 | 7.00 | 11.00 | 16.00 | 47.00 | ▇▇▂▁▁ |
NumCarbon | 0 | 1 | 9.89 | 5.29 | 1.00 | 6.00 | 9.00 | 12.00 | 33.00 | ▇▇▃▁▁ |
NumNitrogen | 0 | 1 | 0.81 | 1.19 | 0.00 | 0.00 | 0.00 | 1.00 | 6.00 | ▇▂▁▁▁ |
NumOxygen | 0 | 1 | 1.57 | 1.73 | 0.00 | 0.00 | 1.00 | 2.00 | 13.00 | ▇▂▁▁▁ |
NumSulfer | 0 | 1 | 0.16 | 0.49 | 0.00 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
NumChlorine | 0 | 1 | 0.56 | 1.40 | 0.00 | 0.00 | 0.00 | 0.00 | 10.00 | ▇▁▁▁▁ |
NumHalogen | 0 | 1 | 0.70 | 1.47 | 0.00 | 0.00 | 0.00 | 1.00 | 10.00 | ▇▁▁▁▁ |
NumRings | 0 | 1 | 1.40 | 1.30 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▃▂▁▁ |
HydrophilicFactor | 0 | 1 | -0.02 | 1.13 | -0.98 | -0.76 | -0.31 | 0.31 | 13.48 | ▇▁▁▁▁ |
SurfaceArea1 | 0 | 1 | 36.46 | 35.29 | 0.00 | 9.23 | 29.10 | 53.28 | 331.94 | ▇▂▁▁▁ |
SurfaceArea2 | 0 | 1 | 40.23 | 38.12 | 0.00 | 10.63 | 33.12 | 60.66 | 331.94 | ▇▂▁▁▁ |
###################################
# Verifying the data dimensions
###################################
dim(DPA.Predictors.Numeric)
## [1] 951 20
##################################
# Loading dataset
##################################
<- Solubility_Train
DPA
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA)) (DPA_Skimmed
Name | DPA |
Number of rows | 951 |
Number of columns | 229 |
_______________________ | |
Column type frequency: | |
numeric | 229 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
solTrainY | 0 | 1 | -2.72 | 2.05 | -11.62 | -3.96 | -2.51 | -1.36 | 1.58 | ▁▁▃▇▃ |
FP001 | 0 | 1 | 0.49 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP002 | 0 | 1 | 0.54 | 0.50 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP003 | 0 | 1 | 0.44 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP004 | 0 | 1 | 0.58 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP005 | 0 | 1 | 0.58 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP006 | 0 | 1 | 0.40 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP007 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP008 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP009 | 0 | 1 | 0.28 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP010 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP011 | 0 | 1 | 0.21 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP012 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP013 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP014 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP015 | 0 | 1 | 0.86 | 0.35 | 0.00 | 1.00 | 1.00 | 1.00 | 1.00 | ▁▁▁▁▇ |
FP016 | 0 | 1 | 0.15 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP017 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP018 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP019 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP020 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP021 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP022 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP023 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP024 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP025 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP026 | 0 | 1 | 0.08 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP027 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP028 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP029 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP030 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP031 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP032 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP033 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP034 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP035 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP036 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP037 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP038 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP039 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP040 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP041 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP042 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP043 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP044 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP045 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP046 | 0 | 1 | 0.32 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP047 | 0 | 1 | 0.27 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP048 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP049 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP050 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP051 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP052 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP053 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP054 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP055 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP056 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP057 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP058 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP059 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP060 | 0 | 1 | 0.48 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP061 | 0 | 1 | 0.45 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP062 | 0 | 1 | 0.44 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP063 | 0 | 1 | 0.43 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP064 | 0 | 1 | 0.42 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP065 | 0 | 1 | 0.59 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP066 | 0 | 1 | 0.61 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP067 | 0 | 1 | 0.38 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP068 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP069 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP070 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP071 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP072 | 0 | 1 | 0.66 | 0.47 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP073 | 0 | 1 | 0.31 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP074 | 0 | 1 | 0.32 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP075 | 0 | 1 | 0.34 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP076 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP077 | 0 | 1 | 0.32 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP078 | 0 | 1 | 0.30 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP079 | 0 | 1 | 0.69 | 0.46 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP080 | 0 | 1 | 0.30 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP081 | 0 | 1 | 0.28 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP082 | 0 | 1 | 0.71 | 0.45 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP083 | 0 | 1 | 0.27 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP084 | 0 | 1 | 0.29 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP085 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP086 | 0 | 1 | 0.27 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP087 | 0 | 1 | 0.73 | 0.45 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP088 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP089 | 0 | 1 | 0.25 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP090 | 0 | 1 | 0.25 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP091 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP092 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP093 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP094 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP095 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP096 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP097 | 0 | 1 | 0.24 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP098 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP099 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP100 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP101 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP102 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP103 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP104 | 0 | 1 | 0.22 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP105 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP106 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP107 | 0 | 1 | 0.21 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP108 | 0 | 1 | 0.21 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP109 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP110 | 0 | 1 | 0.21 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP111 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP112 | 0 | 1 | 0.19 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP113 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP114 | 0 | 1 | 0.16 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP115 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP116 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP117 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP118 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP119 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP120 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP121 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP122 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP123 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP124 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP125 | 0 | 1 | 0.16 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP126 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP127 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP128 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP129 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP130 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP131 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP132 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP133 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP134 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP135 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP136 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP137 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP138 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP139 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP140 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP141 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP142 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP143 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP144 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP145 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP146 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP147 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP148 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP149 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP150 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP151 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP152 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP153 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP154 | 0 | 1 | 0.04 | 0.19 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP155 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP156 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP157 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP158 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP159 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP160 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP161 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP162 | 0 | 1 | 0.50 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP163 | 0 | 1 | 0.48 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP164 | 0 | 1 | 0.63 | 0.48 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP165 | 0 | 1 | 0.35 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP166 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP167 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP168 | 0 | 1 | 0.67 | 0.47 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP169 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP170 | 0 | 1 | 0.18 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP171 | 0 | 1 | 0.17 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP172 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP173 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP174 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP175 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP176 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP177 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP178 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP179 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP180 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP181 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP182 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP183 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP184 | 0 | 1 | 0.08 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP185 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP186 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP187 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP188 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP189 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP190 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP191 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP192 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP193 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP194 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP195 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP196 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP197 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP198 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP199 | 0 | 1 | 0.05 | 0.21 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP200 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP201 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP202 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP203 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP204 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP205 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP206 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP207 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP208 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
MolWeight | 0 | 1 | 201.65 | 97.91 | 46.09 | 122.60 | 179.23 | 264.34 | 665.81 | ▇▆▂▁▁ |
NumAtoms | 0 | 1 | 25.51 | 12.61 | 5.00 | 17.00 | 22.00 | 31.00 | 94.00 | ▇▆▂▁▁ |
NumNonHAtoms | 0 | 1 | 13.16 | 6.50 | 2.00 | 8.00 | 12.00 | 17.00 | 47.00 | ▇▆▂▁▁ |
NumBonds | 0 | 1 | 25.91 | 13.48 | 4.00 | 17.00 | 23.00 | 31.50 | 97.00 | ▇▇▂▁▁ |
NumNonHBonds | 0 | 1 | 13.56 | 7.57 | 1.00 | 8.00 | 12.00 | 18.00 | 50.00 | ▇▇▂▁▁ |
NumMultBonds | 0 | 1 | 6.15 | 5.17 | 0.00 | 1.00 | 6.00 | 10.00 | 25.00 | ▇▆▃▁▁ |
NumRotBonds | 0 | 1 | 2.25 | 2.41 | 0.00 | 0.00 | 2.00 | 3.50 | 16.00 | ▇▂▁▁▁ |
NumDblBonds | 0 | 1 | 1.01 | 1.21 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▂▁▁▁ |
NumAromaticBonds | 0 | 1 | 5.12 | 5.26 | 0.00 | 0.00 | 6.00 | 6.00 | 25.00 | ▇▆▃▁▁ |
NumHydrogen | 0 | 1 | 12.35 | 7.32 | 0.00 | 7.00 | 11.00 | 16.00 | 47.00 | ▇▇▂▁▁ |
NumCarbon | 0 | 1 | 9.89 | 5.29 | 1.00 | 6.00 | 9.00 | 12.00 | 33.00 | ▇▇▃▁▁ |
NumNitrogen | 0 | 1 | 0.81 | 1.19 | 0.00 | 0.00 | 0.00 | 1.00 | 6.00 | ▇▂▁▁▁ |
NumOxygen | 0 | 1 | 1.57 | 1.73 | 0.00 | 0.00 | 1.00 | 2.00 | 13.00 | ▇▂▁▁▁ |
NumSulfer | 0 | 1 | 0.16 | 0.49 | 0.00 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
NumChlorine | 0 | 1 | 0.56 | 1.40 | 0.00 | 0.00 | 0.00 | 0.00 | 10.00 | ▇▁▁▁▁ |
NumHalogen | 0 | 1 | 0.70 | 1.47 | 0.00 | 0.00 | 0.00 | 1.00 | 10.00 | ▇▁▁▁▁ |
NumRings | 0 | 1 | 1.40 | 1.30 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▃▂▁▁ |
HydrophilicFactor | 0 | 1 | -0.02 | 1.13 | -0.98 | -0.76 | -0.31 | 0.31 | 13.48 | ▇▁▁▁▁ |
SurfaceArea1 | 0 | 1 | 36.46 | 35.29 | 0.00 | 9.23 | 29.10 | 53.28 | 331.94 | ▇▂▁▁▁ |
SurfaceArea2 | 0 | 1 | 40.23 | 38.12 | 0.00 | 10.63 | 33.12 | 60.66 | 331.94 | ▇▂▁▁▁ |
##################################
# Identifying columns with low variance
###################################
<- nearZeroVar(DPA,
DPA_LowVariance freqCut = 95/5,
uniqueCut = 10,
saveMetrics= TRUE)
$nzv,]) (DPA_LowVariance[DPA_LowVariance
## freqRatio percentUnique zeroVar nzv
## FP154 25.41667 0.2103049 FALSE TRUE
## FP199 20.13333 0.2103049 FALSE TRUE
## FP200 19.23404 0.2103049 FALSE TRUE
if ((nrow(DPA_LowVariance[DPA_LowVariance$nzv,]))==0){
print("No low variance predictors noted.")
else {
}
print(paste0("Low variance observed for ",
nrow(DPA_LowVariance[DPA_LowVariance$nzv,])),
(" numeric variable(s) with First.Second.Mode.Ratio>4 and Unique.Count.Ratio<0.10."))
<- (nrow(DPA_LowVariance[DPA_LowVariance$nzv,]))
DPA_LowVarianceForRemoval
print(paste0("Low variance can be resolved by removing ",
nrow(DPA_LowVariance[DPA_LowVariance$nzv,])),
(" numeric variable(s)."))
for (j in 1:DPA_LowVarianceForRemoval) {
<- rownames(DPA_LowVariance[DPA_LowVariance$nzv,])[j]
DPA_LowVarianceRemovedVariable print(paste0("Variable ",
j," for removal: ",
DPA_LowVarianceRemovedVariable))
}
%>%
DPA skim() %>%
::filter(skim_variable %in% rownames(DPA_LowVariance[DPA_LowVariance$nzv,]))
dplyr
##################################
# Filtering out columns with low variance
#################################
<- DPA[,!names(DPA) %in% rownames(DPA_LowVariance[DPA_LowVariance$nzv,])]
DPA_ExcludedLowVariance
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_ExcludedLowVariance))
(DPA_ExcludedLowVariance_Skimmed }
## [1] "Low variance observed for 3 numeric variable(s) with First.Second.Mode.Ratio>4 and Unique.Count.Ratio<0.10."
## [1] "Low variance can be resolved by removing 3 numeric variable(s)."
## [1] "Variable 1 for removal: FP154"
## [1] "Variable 2 for removal: FP199"
## [1] "Variable 3 for removal: FP200"
Name | DPA_ExcludedLowVariance |
Number of rows | 951 |
Number of columns | 226 |
_______________________ | |
Column type frequency: | |
numeric | 226 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
solTrainY | 0 | 1 | -2.72 | 2.05 | -11.62 | -3.96 | -2.51 | -1.36 | 1.58 | ▁▁▃▇▃ |
FP001 | 0 | 1 | 0.49 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP002 | 0 | 1 | 0.54 | 0.50 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP003 | 0 | 1 | 0.44 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP004 | 0 | 1 | 0.58 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP005 | 0 | 1 | 0.58 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP006 | 0 | 1 | 0.40 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP007 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP008 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP009 | 0 | 1 | 0.28 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP010 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP011 | 0 | 1 | 0.21 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP012 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP013 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP014 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP015 | 0 | 1 | 0.86 | 0.35 | 0.00 | 1.00 | 1.00 | 1.00 | 1.00 | ▁▁▁▁▇ |
FP016 | 0 | 1 | 0.15 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP017 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP018 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP019 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP020 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP021 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP022 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP023 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP024 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP025 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP026 | 0 | 1 | 0.08 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP027 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP028 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP029 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP030 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP031 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP032 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP033 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP034 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP035 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP036 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP037 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP038 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP039 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP040 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP041 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP042 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP043 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP044 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP045 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP046 | 0 | 1 | 0.32 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP047 | 0 | 1 | 0.27 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP048 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP049 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP050 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP051 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP052 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP053 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP054 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP055 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP056 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP057 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP058 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP059 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP060 | 0 | 1 | 0.48 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP061 | 0 | 1 | 0.45 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP062 | 0 | 1 | 0.44 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP063 | 0 | 1 | 0.43 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP064 | 0 | 1 | 0.42 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP065 | 0 | 1 | 0.59 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP066 | 0 | 1 | 0.61 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP067 | 0 | 1 | 0.38 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP068 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP069 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP070 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP071 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP072 | 0 | 1 | 0.66 | 0.47 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP073 | 0 | 1 | 0.31 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP074 | 0 | 1 | 0.32 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP075 | 0 | 1 | 0.34 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP076 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP077 | 0 | 1 | 0.32 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP078 | 0 | 1 | 0.30 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP079 | 0 | 1 | 0.69 | 0.46 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP080 | 0 | 1 | 0.30 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP081 | 0 | 1 | 0.28 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP082 | 0 | 1 | 0.71 | 0.45 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP083 | 0 | 1 | 0.27 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP084 | 0 | 1 | 0.29 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP085 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP086 | 0 | 1 | 0.27 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP087 | 0 | 1 | 0.73 | 0.45 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP088 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP089 | 0 | 1 | 0.25 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP090 | 0 | 1 | 0.25 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP091 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP092 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP093 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP094 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP095 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP096 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP097 | 0 | 1 | 0.24 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP098 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP099 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP100 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP101 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP102 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP103 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP104 | 0 | 1 | 0.22 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP105 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP106 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP107 | 0 | 1 | 0.21 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP108 | 0 | 1 | 0.21 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP109 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP110 | 0 | 1 | 0.21 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP111 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP112 | 0 | 1 | 0.19 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP113 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP114 | 0 | 1 | 0.16 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP115 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP116 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP117 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP118 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP119 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP120 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP121 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP122 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP123 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP124 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP125 | 0 | 1 | 0.16 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP126 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP127 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP128 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP129 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP130 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP131 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP132 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP133 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP134 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP135 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP136 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP137 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP138 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP139 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP140 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP141 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP142 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP143 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP144 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP145 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP146 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP147 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP148 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP149 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP150 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP151 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP152 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP153 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP155 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP156 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP157 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP158 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP159 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP160 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP161 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP162 | 0 | 1 | 0.50 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP163 | 0 | 1 | 0.48 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP164 | 0 | 1 | 0.63 | 0.48 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP165 | 0 | 1 | 0.35 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP166 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP167 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP168 | 0 | 1 | 0.67 | 0.47 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP169 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP170 | 0 | 1 | 0.18 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP171 | 0 | 1 | 0.17 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP172 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP173 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP174 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP175 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP176 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP177 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP178 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP179 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP180 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP181 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP182 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP183 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP184 | 0 | 1 | 0.08 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP185 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP186 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP187 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP188 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP189 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP190 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP191 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP192 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP193 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP194 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP195 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP196 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP197 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP198 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP201 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP202 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP203 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP204 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP205 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP206 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP207 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP208 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
MolWeight | 0 | 1 | 201.65 | 97.91 | 46.09 | 122.60 | 179.23 | 264.34 | 665.81 | ▇▆▂▁▁ |
NumAtoms | 0 | 1 | 25.51 | 12.61 | 5.00 | 17.00 | 22.00 | 31.00 | 94.00 | ▇▆▂▁▁ |
NumNonHAtoms | 0 | 1 | 13.16 | 6.50 | 2.00 | 8.00 | 12.00 | 17.00 | 47.00 | ▇▆▂▁▁ |
NumBonds | 0 | 1 | 25.91 | 13.48 | 4.00 | 17.00 | 23.00 | 31.50 | 97.00 | ▇▇▂▁▁ |
NumNonHBonds | 0 | 1 | 13.56 | 7.57 | 1.00 | 8.00 | 12.00 | 18.00 | 50.00 | ▇▇▂▁▁ |
NumMultBonds | 0 | 1 | 6.15 | 5.17 | 0.00 | 1.00 | 6.00 | 10.00 | 25.00 | ▇▆▃▁▁ |
NumRotBonds | 0 | 1 | 2.25 | 2.41 | 0.00 | 0.00 | 2.00 | 3.50 | 16.00 | ▇▂▁▁▁ |
NumDblBonds | 0 | 1 | 1.01 | 1.21 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▂▁▁▁ |
NumAromaticBonds | 0 | 1 | 5.12 | 5.26 | 0.00 | 0.00 | 6.00 | 6.00 | 25.00 | ▇▆▃▁▁ |
NumHydrogen | 0 | 1 | 12.35 | 7.32 | 0.00 | 7.00 | 11.00 | 16.00 | 47.00 | ▇▇▂▁▁ |
NumCarbon | 0 | 1 | 9.89 | 5.29 | 1.00 | 6.00 | 9.00 | 12.00 | 33.00 | ▇▇▃▁▁ |
NumNitrogen | 0 | 1 | 0.81 | 1.19 | 0.00 | 0.00 | 0.00 | 1.00 | 6.00 | ▇▂▁▁▁ |
NumOxygen | 0 | 1 | 1.57 | 1.73 | 0.00 | 0.00 | 1.00 | 2.00 | 13.00 | ▇▂▁▁▁ |
NumSulfer | 0 | 1 | 0.16 | 0.49 | 0.00 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
NumChlorine | 0 | 1 | 0.56 | 1.40 | 0.00 | 0.00 | 0.00 | 0.00 | 10.00 | ▇▁▁▁▁ |
NumHalogen | 0 | 1 | 0.70 | 1.47 | 0.00 | 0.00 | 0.00 | 1.00 | 10.00 | ▇▁▁▁▁ |
NumRings | 0 | 1 | 1.40 | 1.30 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▃▂▁▁ |
HydrophilicFactor | 0 | 1 | -0.02 | 1.13 | -0.98 | -0.76 | -0.31 | 0.31 | 13.48 | ▇▁▁▁▁ |
SurfaceArea1 | 0 | 1 | 36.46 | 35.29 | 0.00 | 9.23 | 29.10 | 53.28 | 331.94 | ▇▂▁▁▁ |
SurfaceArea2 | 0 | 1 | 40.23 | 38.12 | 0.00 | 10.63 | 33.12 | 60.66 | 331.94 | ▇▂▁▁▁ |
###################################
# Verifying the data dimensions
###################################
dim(DPA_ExcludedLowVariance)
## [1] 951 226
##################################
# Loading dataset
##################################
<- Solubility_Train
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("solTrainY")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,-(grep("FP", names(DPA.Predictors)))]
DPA.Predictors.Numeric
##################################
# Visualizing pairwise correlation between predictors
##################################
<- cor.mtest(DPA.Predictors.Numeric,
DPA_CorrelationTest method = "pearson",
conf.level = .95)
corrplot(cor(DPA.Predictors.Numeric,
method = "pearson",
use="pairwise.complete.obs"),
method = "circle",
type = "upper",
order = "original",
tl.col = "black",
tl.cex = 0.75,
tl.srt = 90,
sig.level = 0.05,
p.mat = DPA_CorrelationTest$p,
insig = "blank")
##################################
# Identifying the highly correlated variables
##################################
<- cor(DPA.Predictors.Numeric,
DPA_Correlation method = "pearson",
use="pairwise.complete.obs")
<- sum(abs(DPA_Correlation[upper.tri(DPA_Correlation)]) > 0.95)) (DPA_HighlyCorrelatedCount
## [1] 3
if (DPA_HighlyCorrelatedCount == 0) {
print("No highly correlated predictors noted.")
else {
} print(paste0("High correlation observed for ",
(DPA_HighlyCorrelatedCount)," pairs of numeric variable(s) with Correlation.Coefficient>0.95."))
<- corr_cross(DPA.Predictors.Numeric,
(DPA_HighlyCorrelatedPairs max_pvalue = 0.05,
top = DPA_HighlyCorrelatedCount,
rm.na = TRUE,
grid = FALSE
))
}
## [1] "High correlation observed for 3 pairs of numeric variable(s) with Correlation.Coefficient>0.95."
if (DPA_HighlyCorrelatedCount > 0) {
<- findCorrelation(DPA_Correlation, cutoff = 0.95)
DPA_HighlyCorrelated
<- length(DPA_HighlyCorrelated))
(DPA_HighlyCorrelatedForRemoval
print(paste0("High correlation can be resolved by removing ",
(DPA_HighlyCorrelatedForRemoval)," numeric variable(s)."))
for (j in 1:DPA_HighlyCorrelatedForRemoval) {
<- colnames(DPA.Predictors.Numeric)[DPA_HighlyCorrelated[j]]
DPA_HighlyCorrelatedRemovedVariable print(paste0("Variable ",
j," for removal: ",
DPA_HighlyCorrelatedRemovedVariable))
}
##################################
# Filtering out columns with high correlation
#################################
<- DPA[,-DPA_HighlyCorrelated]
DPA_ExcludedHighCorrelation
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_ExcludedHighCorrelation))
(DPA_ExcludedHighCorrelation_Skimmed
}
## [1] "High correlation can be resolved by removing 3 numeric variable(s)."
## [1] "Variable 1 for removal: NumNonHAtoms"
## [1] "Variable 2 for removal: NumBonds"
## [1] "Variable 3 for removal: NumAromaticBonds"
Name | DPA_ExcludedHighCorrelati… |
Number of rows | 951 |
Number of columns | 226 |
_______________________ | |
Column type frequency: | |
numeric | 226 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
solTrainY | 0 | 1 | -2.72 | 2.05 | -11.62 | -3.96 | -2.51 | -1.36 | 1.58 | ▁▁▃▇▃ |
FP001 | 0 | 1 | 0.49 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP004 | 0 | 1 | 0.58 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP005 | 0 | 1 | 0.58 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP006 | 0 | 1 | 0.40 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP007 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP009 | 0 | 1 | 0.28 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP010 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP011 | 0 | 1 | 0.21 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP012 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP013 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP014 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP015 | 0 | 1 | 0.86 | 0.35 | 0.00 | 1.00 | 1.00 | 1.00 | 1.00 | ▁▁▁▁▇ |
FP016 | 0 | 1 | 0.15 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP017 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP018 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP019 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP020 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP021 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP022 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP023 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP024 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP025 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP026 | 0 | 1 | 0.08 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP027 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP028 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP029 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP030 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP031 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP032 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP033 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP034 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP035 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP036 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP037 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP038 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP039 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP040 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP041 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP042 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP043 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP044 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP045 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP046 | 0 | 1 | 0.32 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP047 | 0 | 1 | 0.27 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP048 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP049 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP050 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP051 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP052 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP053 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP054 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP055 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP056 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP057 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP058 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP059 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP060 | 0 | 1 | 0.48 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP061 | 0 | 1 | 0.45 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP062 | 0 | 1 | 0.44 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP063 | 0 | 1 | 0.43 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP064 | 0 | 1 | 0.42 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP065 | 0 | 1 | 0.59 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP066 | 0 | 1 | 0.61 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP067 | 0 | 1 | 0.38 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP068 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP069 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP070 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP071 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP072 | 0 | 1 | 0.66 | 0.47 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP073 | 0 | 1 | 0.31 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP074 | 0 | 1 | 0.32 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP075 | 0 | 1 | 0.34 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP076 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP077 | 0 | 1 | 0.32 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP078 | 0 | 1 | 0.30 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP079 | 0 | 1 | 0.69 | 0.46 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP080 | 0 | 1 | 0.30 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP081 | 0 | 1 | 0.28 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP082 | 0 | 1 | 0.71 | 0.45 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP083 | 0 | 1 | 0.27 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP084 | 0 | 1 | 0.29 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP085 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP086 | 0 | 1 | 0.27 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP087 | 0 | 1 | 0.73 | 0.45 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP088 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP089 | 0 | 1 | 0.25 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP090 | 0 | 1 | 0.25 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP091 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP092 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP093 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP094 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP095 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP096 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP097 | 0 | 1 | 0.24 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP098 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP099 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP100 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP101 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP102 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP103 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP104 | 0 | 1 | 0.22 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP105 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP106 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP107 | 0 | 1 | 0.21 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP108 | 0 | 1 | 0.21 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP109 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP110 | 0 | 1 | 0.21 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP111 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP112 | 0 | 1 | 0.19 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP113 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP114 | 0 | 1 | 0.16 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP115 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP116 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP117 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP118 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP119 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP120 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP121 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP122 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP123 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP124 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP125 | 0 | 1 | 0.16 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP126 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP127 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP128 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP129 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP130 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP131 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP132 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP133 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP134 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP135 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP136 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP137 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP138 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP139 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP140 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP141 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP142 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP143 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP144 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP145 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP146 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP147 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP148 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP149 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP150 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP151 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP152 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP153 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP154 | 0 | 1 | 0.04 | 0.19 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP155 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP156 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP157 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP158 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP159 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP160 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP161 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP162 | 0 | 1 | 0.50 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP163 | 0 | 1 | 0.48 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP164 | 0 | 1 | 0.63 | 0.48 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP165 | 0 | 1 | 0.35 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP166 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP167 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP168 | 0 | 1 | 0.67 | 0.47 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP169 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP170 | 0 | 1 | 0.18 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP171 | 0 | 1 | 0.17 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP172 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP173 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP174 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP175 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP176 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP177 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP178 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP179 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP180 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP181 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP182 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP183 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP184 | 0 | 1 | 0.08 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP185 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP186 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP187 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP188 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP189 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP190 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP191 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP192 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP193 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP194 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP195 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP196 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP197 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP198 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP199 | 0 | 1 | 0.05 | 0.21 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP200 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP201 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP202 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP203 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP204 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP205 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP206 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP207 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP208 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
MolWeight | 0 | 1 | 201.65 | 97.91 | 46.09 | 122.60 | 179.23 | 264.34 | 665.81 | ▇▆▂▁▁ |
NumAtoms | 0 | 1 | 25.51 | 12.61 | 5.00 | 17.00 | 22.00 | 31.00 | 94.00 | ▇▆▂▁▁ |
NumNonHAtoms | 0 | 1 | 13.16 | 6.50 | 2.00 | 8.00 | 12.00 | 17.00 | 47.00 | ▇▆▂▁▁ |
NumBonds | 0 | 1 | 25.91 | 13.48 | 4.00 | 17.00 | 23.00 | 31.50 | 97.00 | ▇▇▂▁▁ |
NumNonHBonds | 0 | 1 | 13.56 | 7.57 | 1.00 | 8.00 | 12.00 | 18.00 | 50.00 | ▇▇▂▁▁ |
NumMultBonds | 0 | 1 | 6.15 | 5.17 | 0.00 | 1.00 | 6.00 | 10.00 | 25.00 | ▇▆▃▁▁ |
NumRotBonds | 0 | 1 | 2.25 | 2.41 | 0.00 | 0.00 | 2.00 | 3.50 | 16.00 | ▇▂▁▁▁ |
NumDblBonds | 0 | 1 | 1.01 | 1.21 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▂▁▁▁ |
NumAromaticBonds | 0 | 1 | 5.12 | 5.26 | 0.00 | 0.00 | 6.00 | 6.00 | 25.00 | ▇▆▃▁▁ |
NumHydrogen | 0 | 1 | 12.35 | 7.32 | 0.00 | 7.00 | 11.00 | 16.00 | 47.00 | ▇▇▂▁▁ |
NumCarbon | 0 | 1 | 9.89 | 5.29 | 1.00 | 6.00 | 9.00 | 12.00 | 33.00 | ▇▇▃▁▁ |
NumNitrogen | 0 | 1 | 0.81 | 1.19 | 0.00 | 0.00 | 0.00 | 1.00 | 6.00 | ▇▂▁▁▁ |
NumOxygen | 0 | 1 | 1.57 | 1.73 | 0.00 | 0.00 | 1.00 | 2.00 | 13.00 | ▇▂▁▁▁ |
NumSulfer | 0 | 1 | 0.16 | 0.49 | 0.00 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
NumChlorine | 0 | 1 | 0.56 | 1.40 | 0.00 | 0.00 | 0.00 | 0.00 | 10.00 | ▇▁▁▁▁ |
NumHalogen | 0 | 1 | 0.70 | 1.47 | 0.00 | 0.00 | 0.00 | 1.00 | 10.00 | ▇▁▁▁▁ |
NumRings | 0 | 1 | 1.40 | 1.30 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▃▂▁▁ |
HydrophilicFactor | 0 | 1 | -0.02 | 1.13 | -0.98 | -0.76 | -0.31 | 0.31 | 13.48 | ▇▁▁▁▁ |
SurfaceArea1 | 0 | 1 | 36.46 | 35.29 | 0.00 | 9.23 | 29.10 | 53.28 | 331.94 | ▇▂▁▁▁ |
SurfaceArea2 | 0 | 1 | 40.23 | 38.12 | 0.00 | 10.63 | 33.12 | 60.66 | 331.94 | ▇▂▁▁▁ |
###################################
# Verifying the data dimensions
###################################
dim(DPA_ExcludedHighCorrelation)
## [1] 951 226
##################################
# Loading dataset
##################################
<- Solubility_Train
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("solTrainY")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,sapply(DPA.Predictors, is.numeric)]
DPA.Predictors.Numeric
##################################
# Identifying the linearly dependent variables
##################################
<- findLinearCombos(DPA.Predictors.Numeric)
DPA_LinearlyDependent
<- length(DPA_LinearlyDependent$linearCombos)) (DPA_LinearlyDependentCount
## [1] 2
if (DPA_LinearlyDependentCount == 0) {
print("No linearly dependent predictors noted.")
else {
} print(paste0("Linear dependency observed for ",
(DPA_LinearlyDependentCount)," subset(s) of numeric variable(s)."))
for (i in 1:DPA_LinearlyDependentCount) {
<- colnames(DPA.Predictors.Numeric)[DPA_LinearlyDependent$linearCombos[[i]]]
DPA_LinearlyDependentSubset print(paste0("Linear dependent variable(s) for subset ",
i," include: ",
DPA_LinearlyDependentSubset))
}
}
## [1] "Linear dependency observed for 2 subset(s) of numeric variable(s)."
## [1] "Linear dependent variable(s) for subset 1 include: NumNonHBonds"
## [2] "Linear dependent variable(s) for subset 1 include: NumAtoms"
## [3] "Linear dependent variable(s) for subset 1 include: NumNonHAtoms"
## [4] "Linear dependent variable(s) for subset 1 include: NumBonds"
## [1] "Linear dependent variable(s) for subset 2 include: NumHydrogen"
## [2] "Linear dependent variable(s) for subset 2 include: NumAtoms"
## [3] "Linear dependent variable(s) for subset 2 include: NumNonHAtoms"
##################################
# Identifying the linearly dependent variables for removal
##################################
if (DPA_LinearlyDependentCount > 0) {
<- findLinearCombos(DPA.Predictors.Numeric)
DPA_LinearlyDependent
<- length(DPA_LinearlyDependent$remove)
DPA_LinearlyDependentForRemoval
print(paste0("Linear dependency can be resolved by removing ",
(DPA_LinearlyDependentForRemoval)," numeric variable(s)."))
for (j in 1:DPA_LinearlyDependentForRemoval) {
<- colnames(DPA.Predictors.Numeric)[DPA_LinearlyDependent$remove[j]]
DPA_LinearlyDependentRemovedVariable print(paste0("Variable ",
j," for removal: ",
DPA_LinearlyDependentRemovedVariable))
}
##################################
# Filtering out columns with linear dependency
#################################
<- DPA[,-DPA_LinearlyDependent$remove]
DPA_ExcludedLinearlyDependent
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_ExcludedLinearlyDependent))
(DPA_ExcludedLinearlyDependent_Skimmed
}
## [1] "Linear dependency can be resolved by removing 2 numeric variable(s)."
## [1] "Variable 1 for removal: NumNonHBonds"
## [1] "Variable 2 for removal: NumHydrogen"
Name | DPA_ExcludedLinearlyDepen… |
Number of rows | 951 |
Number of columns | 227 |
_______________________ | |
Column type frequency: | |
numeric | 227 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
solTrainY | 0 | 1 | -2.72 | 2.05 | -11.62 | -3.96 | -2.51 | -1.36 | 1.58 | ▁▁▃▇▃ |
FP001 | 0 | 1 | 0.49 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP002 | 0 | 1 | 0.54 | 0.50 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP003 | 0 | 1 | 0.44 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP004 | 0 | 1 | 0.58 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP005 | 0 | 1 | 0.58 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP006 | 0 | 1 | 0.40 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP007 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP008 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP009 | 0 | 1 | 0.28 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP010 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP011 | 0 | 1 | 0.21 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP012 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP013 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP014 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP015 | 0 | 1 | 0.86 | 0.35 | 0.00 | 1.00 | 1.00 | 1.00 | 1.00 | ▁▁▁▁▇ |
FP016 | 0 | 1 | 0.15 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP017 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP018 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP019 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP020 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP021 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP022 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP023 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP024 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP025 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP026 | 0 | 1 | 0.08 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP027 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP028 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP029 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP030 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP031 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP032 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP033 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP034 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP035 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP036 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP037 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP038 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP039 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP040 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP041 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP042 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP043 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP044 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP045 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP046 | 0 | 1 | 0.32 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP047 | 0 | 1 | 0.27 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP048 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP049 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP050 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP051 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP052 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP053 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP054 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP055 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP056 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP057 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP058 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP059 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP060 | 0 | 1 | 0.48 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP061 | 0 | 1 | 0.45 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP062 | 0 | 1 | 0.44 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP063 | 0 | 1 | 0.43 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP064 | 0 | 1 | 0.42 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP065 | 0 | 1 | 0.59 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP066 | 0 | 1 | 0.61 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP067 | 0 | 1 | 0.38 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP068 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP069 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP070 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP071 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP072 | 0 | 1 | 0.66 | 0.47 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP073 | 0 | 1 | 0.31 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP074 | 0 | 1 | 0.32 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP075 | 0 | 1 | 0.34 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP076 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP077 | 0 | 1 | 0.32 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP078 | 0 | 1 | 0.30 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP079 | 0 | 1 | 0.69 | 0.46 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP080 | 0 | 1 | 0.30 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP081 | 0 | 1 | 0.28 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP082 | 0 | 1 | 0.71 | 0.45 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP083 | 0 | 1 | 0.27 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP084 | 0 | 1 | 0.29 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP085 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP086 | 0 | 1 | 0.27 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP087 | 0 | 1 | 0.73 | 0.45 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP088 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP089 | 0 | 1 | 0.25 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP090 | 0 | 1 | 0.25 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP091 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP092 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP093 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP094 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP095 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP096 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP097 | 0 | 1 | 0.24 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP098 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP099 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP100 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP101 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP102 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP103 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP104 | 0 | 1 | 0.22 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP105 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP106 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP107 | 0 | 1 | 0.21 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP108 | 0 | 1 | 0.21 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP109 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP110 | 0 | 1 | 0.21 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP111 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP112 | 0 | 1 | 0.19 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP113 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP114 | 0 | 1 | 0.16 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP115 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP116 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP117 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP118 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP119 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP120 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP121 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP122 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP123 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP124 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP125 | 0 | 1 | 0.16 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP126 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP127 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP128 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP129 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP130 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP131 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP132 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP133 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP134 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP135 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP136 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP137 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP138 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP139 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP140 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP141 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP142 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP143 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP144 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP145 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP146 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP147 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP148 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP149 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP150 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP151 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP152 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP153 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP154 | 0 | 1 | 0.04 | 0.19 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP155 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP156 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP157 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP158 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP159 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP160 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP161 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP162 | 0 | 1 | 0.50 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP163 | 0 | 1 | 0.48 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP164 | 0 | 1 | 0.63 | 0.48 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP165 | 0 | 1 | 0.35 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP166 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP167 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP168 | 0 | 1 | 0.67 | 0.47 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP169 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP170 | 0 | 1 | 0.18 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP171 | 0 | 1 | 0.17 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP172 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP173 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP174 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP175 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP176 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP177 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP178 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP179 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP180 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP181 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP182 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP183 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP184 | 0 | 1 | 0.08 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP185 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP186 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP187 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP188 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP189 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP190 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP191 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP192 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP193 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP194 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP195 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP196 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP197 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP198 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP199 | 0 | 1 | 0.05 | 0.21 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP200 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP201 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP202 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP203 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP204 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP205 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP206 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP207 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP208 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
MolWeight | 0 | 1 | 201.65 | 97.91 | 46.09 | 122.60 | 179.23 | 264.34 | 665.81 | ▇▆▂▁▁ |
NumAtoms | 0 | 1 | 25.51 | 12.61 | 5.00 | 17.00 | 22.00 | 31.00 | 94.00 | ▇▆▂▁▁ |
NumNonHAtoms | 0 | 1 | 13.16 | 6.50 | 2.00 | 8.00 | 12.00 | 17.00 | 47.00 | ▇▆▂▁▁ |
NumNonHBonds | 0 | 1 | 13.56 | 7.57 | 1.00 | 8.00 | 12.00 | 18.00 | 50.00 | ▇▇▂▁▁ |
NumMultBonds | 0 | 1 | 6.15 | 5.17 | 0.00 | 1.00 | 6.00 | 10.00 | 25.00 | ▇▆▃▁▁ |
NumRotBonds | 0 | 1 | 2.25 | 2.41 | 0.00 | 0.00 | 2.00 | 3.50 | 16.00 | ▇▂▁▁▁ |
NumDblBonds | 0 | 1 | 1.01 | 1.21 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▂▁▁▁ |
NumHydrogen | 0 | 1 | 12.35 | 7.32 | 0.00 | 7.00 | 11.00 | 16.00 | 47.00 | ▇▇▂▁▁ |
NumCarbon | 0 | 1 | 9.89 | 5.29 | 1.00 | 6.00 | 9.00 | 12.00 | 33.00 | ▇▇▃▁▁ |
NumNitrogen | 0 | 1 | 0.81 | 1.19 | 0.00 | 0.00 | 0.00 | 1.00 | 6.00 | ▇▂▁▁▁ |
NumOxygen | 0 | 1 | 1.57 | 1.73 | 0.00 | 0.00 | 1.00 | 2.00 | 13.00 | ▇▂▁▁▁ |
NumSulfer | 0 | 1 | 0.16 | 0.49 | 0.00 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
NumChlorine | 0 | 1 | 0.56 | 1.40 | 0.00 | 0.00 | 0.00 | 0.00 | 10.00 | ▇▁▁▁▁ |
NumHalogen | 0 | 1 | 0.70 | 1.47 | 0.00 | 0.00 | 0.00 | 1.00 | 10.00 | ▇▁▁▁▁ |
NumRings | 0 | 1 | 1.40 | 1.30 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▃▂▁▁ |
HydrophilicFactor | 0 | 1 | -0.02 | 1.13 | -0.98 | -0.76 | -0.31 | 0.31 | 13.48 | ▇▁▁▁▁ |
SurfaceArea1 | 0 | 1 | 36.46 | 35.29 | 0.00 | 9.23 | 29.10 | 53.28 | 331.94 | ▇▂▁▁▁ |
SurfaceArea2 | 0 | 1 | 40.23 | 38.12 | 0.00 | 10.63 | 33.12 | 60.66 | 331.94 | ▇▂▁▁▁ |
###################################
# Verifying the data dimensions
###################################
dim(DPA_ExcludedLinearlyDependent)
## [1] 951 227
##################################
# Loading dataset
##################################
<- Solubility_Train
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("solTrainY")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,-(grep("FP", names(DPA.Predictors)))]
DPA.Predictors.Numeric
##################################
# Applying a Box-Cox transformation
##################################
<- preProcess(DPA.Predictors.Numeric, method = c("BoxCox"))
DPA_BoxCox <- predict(DPA_BoxCox, DPA.Predictors.Numeric)
DPA_BoxCoxTransformed
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_BoxCoxTransformed)) (DPA_BoxCoxTransformedSkimmed
Name | DPA_BoxCoxTransformed |
Number of rows | 951 |
Number of columns | 20 |
_______________________ | |
Column type frequency: | |
numeric | 20 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
MolWeight | 0 | 1 | 5.19 | 0.48 | 3.83 | 4.81 | 5.19 | 5.58 | 6.50 | ▁▆▇▆▁ |
NumAtoms | 0 | 1 | 3.13 | 0.48 | 1.61 | 2.83 | 3.09 | 3.43 | 4.54 | ▁▃▇▃▁ |
NumNonHAtoms | 0 | 1 | 2.46 | 0.50 | 0.69 | 2.08 | 2.48 | 2.83 | 3.85 | ▁▃▇▇▁ |
NumBonds | 0 | 1 | 4.39 | 0.96 | 1.60 | 3.81 | 4.36 | 4.97 | 7.48 | ▁▅▇▃▁ |
NumNonHBonds | 0 | 1 | 3.21 | 0.95 | 0.00 | 2.58 | 3.22 | 3.91 | 5.93 | ▁▃▇▆▁ |
NumMultBonds | 0 | 1 | 6.15 | 5.17 | 0.00 | 1.00 | 6.00 | 10.00 | 25.00 | ▇▆▃▁▁ |
NumRotBonds | 0 | 1 | 2.25 | 2.41 | 0.00 | 0.00 | 2.00 | 3.50 | 16.00 | ▇▂▁▁▁ |
NumDblBonds | 0 | 1 | 1.01 | 1.21 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▂▁▁▁ |
NumAromaticBonds | 0 | 1 | 5.12 | 5.26 | 0.00 | 0.00 | 6.00 | 6.00 | 25.00 | ▇▆▃▁▁ |
NumHydrogen | 0 | 1 | 12.35 | 7.32 | 0.00 | 7.00 | 11.00 | 16.00 | 47.00 | ▇▇▂▁▁ |
NumCarbon | 0 | 1 | 3.54 | 1.34 | 0.00 | 2.62 | 3.52 | 4.25 | 7.62 | ▂▇▇▃▁ |
NumNitrogen | 0 | 1 | 0.81 | 1.19 | 0.00 | 0.00 | 0.00 | 1.00 | 6.00 | ▇▂▁▁▁ |
NumOxygen | 0 | 1 | 1.57 | 1.73 | 0.00 | 0.00 | 1.00 | 2.00 | 13.00 | ▇▂▁▁▁ |
NumSulfer | 0 | 1 | 0.16 | 0.49 | 0.00 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
NumChlorine | 0 | 1 | 0.56 | 1.40 | 0.00 | 0.00 | 0.00 | 0.00 | 10.00 | ▇▁▁▁▁ |
NumHalogen | 0 | 1 | 0.70 | 1.47 | 0.00 | 0.00 | 0.00 | 1.00 | 10.00 | ▇▁▁▁▁ |
NumRings | 0 | 1 | 1.40 | 1.30 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▃▂▁▁ |
HydrophilicFactor | 0 | 1 | -0.02 | 1.13 | -0.98 | -0.76 | -0.31 | 0.31 | 13.48 | ▇▁▁▁▁ |
SurfaceArea1 | 0 | 1 | 36.46 | 35.29 | 0.00 | 9.23 | 29.10 | 53.28 | 331.94 | ▇▂▁▁▁ |
SurfaceArea2 | 0 | 1 | 40.23 | 38.12 | 0.00 | 10.63 | 33.12 | 60.66 | 331.94 | ▇▂▁▁▁ |
###################################
# Verifying the data dimensions
###################################
dim(DPA_BoxCoxTransformed)
## [1] 951 20
##################################
# Loading dataset
##################################
<- Solubility_Train
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("solTrainY")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,-(grep("FP", names(DPA.Predictors)))]
DPA.Predictors.Numeric
##################################
# Applying a Box-Cox transformation
##################################
<- preProcess(DPA.Predictors.Numeric, method = c("BoxCox"))
DPA_BoxCox <- predict(DPA_BoxCox, DPA.Predictors.Numeric)
DPA_BoxCoxTransformed
##################################
# Applying a center and scale data transformation
##################################
<- preProcess(DPA_BoxCoxTransformed, method = c("center","scale"))
DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaled <- predict(DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaled, DPA_BoxCoxTransformed)
DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed)) (DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformedSkimmed
Name | DPA.Predictors.Numeric_Bo… |
Number of rows | 951 |
Number of columns | 20 |
_______________________ | |
Column type frequency: | |
numeric | 20 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
MolWeight | 0 | 1 | 0 | 1 | -2.84 | -0.80 | -0.01 | 0.80 | 2.72 | ▁▆▇▆▁ |
NumAtoms | 0 | 1 | 0 | 1 | -3.16 | -0.61 | -0.07 | 0.64 | 2.95 | ▁▃▇▃▁ |
NumNonHAtoms | 0 | 1 | 0 | 1 | -3.53 | -0.76 | 0.06 | 0.75 | 2.79 | ▁▃▇▇▁ |
NumBonds | 0 | 1 | 0 | 1 | -2.92 | -0.61 | -0.04 | 0.60 | 3.23 | ▁▅▇▃▁ |
NumNonHBonds | 0 | 1 | 0 | 1 | -3.38 | -0.67 | 0.01 | 0.74 | 2.86 | ▁▃▇▆▁ |
NumMultBonds | 0 | 1 | 0 | 1 | -1.19 | -1.00 | -0.03 | 0.74 | 3.65 | ▇▇▃▁▁ |
NumRotBonds | 0 | 1 | 0 | 1 | -0.93 | -0.93 | -0.10 | 0.52 | 5.71 | ▇▂▁▁▁ |
NumDblBonds | 0 | 1 | 0 | 1 | -0.83 | -0.83 | -0.01 | 0.82 | 4.95 | ▇▂▁▁▁ |
NumAromaticBonds | 0 | 1 | 0 | 1 | -0.97 | -0.97 | 0.17 | 0.17 | 3.78 | ▇▆▃▁▁ |
NumHydrogen | 0 | 1 | 0 | 1 | -1.69 | -0.73 | -0.18 | 0.50 | 4.74 | ▇▇▂▁▁ |
NumCarbon | 0 | 1 | 0 | 1 | -2.64 | -0.69 | -0.01 | 0.54 | 3.06 | ▂▇▇▃▁ |
NumNitrogen | 0 | 1 | 0 | 1 | -0.69 | -0.69 | -0.69 | 0.16 | 4.37 | ▇▂▁▁▁ |
NumOxygen | 0 | 1 | 0 | 1 | -0.91 | -0.91 | -0.33 | 0.25 | 6.61 | ▇▂▁▁▁ |
NumSulfer | 0 | 1 | 0 | 1 | -0.34 | -0.34 | -0.34 | -0.34 | 7.86 | ▇▁▁▁▁ |
NumChlorine | 0 | 1 | 0 | 1 | -0.40 | -0.40 | -0.40 | -0.40 | 6.74 | ▇▁▁▁▁ |
NumHalogen | 0 | 1 | 0 | 1 | -0.47 | -0.47 | -0.47 | 0.20 | 6.32 | ▇▁▁▁▁ |
NumRings | 0 | 1 | 0 | 1 | -1.08 | -1.08 | -0.31 | 0.46 | 4.31 | ▇▃▂▁▁ |
HydrophilicFactor | 0 | 1 | 0 | 1 | -0.86 | -0.66 | -0.26 | 0.30 | 11.99 | ▇▁▁▁▁ |
SurfaceArea1 | 0 | 1 | 0 | 1 | -1.03 | -0.77 | -0.21 | 0.48 | 8.37 | ▇▂▁▁▁ |
SurfaceArea2 | 0 | 1 | 0 | 1 | -1.06 | -0.78 | -0.19 | 0.54 | 7.65 | ▇▂▁▁▁ |
###################################
# Verifying the data dimensions
###################################
dim(DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed)
## [1] 951 20
##################################
# Creating the pre-modelling
# train set
##################################
<- DPA$solTrainY
Log_Solubility <- DPA.Predictors[,(grep("FP", names(DPA.Predictors)))]
PMA.Predictors.Factor <- as.data.frame(lapply(PMA.Predictors.Factor,factor))
PMA.Predictors.Factor <- DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed
PMA.Predictors.Numeric <- cbind(Log_Solubility,PMA.Predictors.Factor,PMA.Predictors.Numeric)
PMA_BoxCoxTransformed_CenteredScaledTransformed
##################################
# Filtering out columns noted with data quality issues including
# zero and near-zero variance,
# high correlation and linear dependencies
# to create the pre-modelling dataset
##################################
<- PMA_BoxCoxTransformed_CenteredScaledTransformed[,!names(PMA_BoxCoxTransformed_CenteredScaledTransformed) %in% c("FP154","FP199","FP200","NumNonHBonds","NumHydrogen","NumNonHAtoms","NumAromaticBonds","NumAtoms")]
PMA_BoxCoxTransformed_CenteredScaledTransformed_ExcludedLowVariance_ExcludedLinearlyDependent_ExcludedHighCorrelation
<- PMA_BoxCoxTransformed_CenteredScaledTransformed_ExcludedLowVariance_ExcludedLinearlyDependent_ExcludedHighCorrelation
PMA_PreModelling_Train
##################################
# Gathering descriptive statistics
##################################
<- skim(PMA_PreModelling_Train)) (PMA_PreModelling_Train_Skimmed
Name | PMA_PreModelling_Train |
Number of rows | 951 |
Number of columns | 221 |
_______________________ | |
Column type frequency: | |
factor | 205 |
numeric | 16 |
________________________ | |
Group variables | None |
Variable type: factor
skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
---|---|---|---|---|---|
FP001 | 0 | 1 | FALSE | 2 | 0: 482, 1: 469 |
FP002 | 0 | 1 | FALSE | 2 | 1: 513, 0: 438 |
FP003 | 0 | 1 | FALSE | 2 | 0: 536, 1: 415 |
FP004 | 0 | 1 | FALSE | 2 | 1: 556, 0: 395 |
FP005 | 0 | 1 | FALSE | 2 | 1: 551, 0: 400 |
FP006 | 0 | 1 | FALSE | 2 | 0: 570, 1: 381 |
FP007 | 0 | 1 | FALSE | 2 | 0: 605, 1: 346 |
FP008 | 0 | 1 | FALSE | 2 | 0: 641, 1: 310 |
FP009 | 0 | 1 | FALSE | 2 | 0: 685, 1: 266 |
FP010 | 0 | 1 | FALSE | 2 | 0: 781, 1: 170 |
FP011 | 0 | 1 | FALSE | 2 | 0: 747, 1: 204 |
FP012 | 0 | 1 | FALSE | 2 | 0: 783, 1: 168 |
FP013 | 0 | 1 | FALSE | 2 | 0: 793, 1: 158 |
FP014 | 0 | 1 | FALSE | 2 | 0: 798, 1: 153 |
FP015 | 0 | 1 | FALSE | 2 | 1: 818, 0: 133 |
FP016 | 0 | 1 | FALSE | 2 | 0: 812, 1: 139 |
FP017 | 0 | 1 | FALSE | 2 | 0: 814, 1: 137 |
FP018 | 0 | 1 | FALSE | 2 | 0: 826, 1: 125 |
FP019 | 0 | 1 | FALSE | 2 | 0: 835, 1: 116 |
FP020 | 0 | 1 | FALSE | 2 | 0: 837, 1: 114 |
FP021 | 0 | 1 | FALSE | 2 | 0: 836, 1: 115 |
FP022 | 0 | 1 | FALSE | 2 | 0: 852, 1: 99 |
FP023 | 0 | 1 | FALSE | 2 | 0: 834, 1: 117 |
FP024 | 0 | 1 | FALSE | 2 | 0: 844, 1: 107 |
FP025 | 0 | 1 | FALSE | 2 | 0: 841, 1: 110 |
FP026 | 0 | 1 | FALSE | 2 | 0: 871, 1: 80 |
FP027 | 0 | 1 | FALSE | 2 | 0: 858, 1: 93 |
FP028 | 0 | 1 | FALSE | 2 | 0: 850, 1: 101 |
FP029 | 0 | 1 | FALSE | 2 | 0: 854, 1: 97 |
FP030 | 0 | 1 | FALSE | 2 | 0: 862, 1: 89 |
FP031 | 0 | 1 | FALSE | 2 | 0: 866, 1: 85 |
FP032 | 0 | 1 | FALSE | 2 | 0: 881, 1: 70 |
FP033 | 0 | 1 | FALSE | 2 | 0: 885, 1: 66 |
FP034 | 0 | 1 | FALSE | 2 | 0: 875, 1: 76 |
FP035 | 0 | 1 | FALSE | 2 | 0: 882, 1: 69 |
FP036 | 0 | 1 | FALSE | 2 | 0: 879, 1: 72 |
FP037 | 0 | 1 | FALSE | 2 | 0: 884, 1: 67 |
FP038 | 0 | 1 | FALSE | 2 | 0: 869, 1: 82 |
FP039 | 0 | 1 | FALSE | 2 | 0: 880, 1: 71 |
FP040 | 0 | 1 | FALSE | 2 | 0: 886, 1: 65 |
FP041 | 0 | 1 | FALSE | 2 | 0: 891, 1: 60 |
FP042 | 0 | 1 | FALSE | 2 | 0: 897, 1: 54 |
FP043 | 0 | 1 | FALSE | 2 | 0: 888, 1: 63 |
FP044 | 0 | 1 | FALSE | 2 | 0: 894, 1: 57 |
FP045 | 0 | 1 | FALSE | 2 | 0: 898, 1: 53 |
FP046 | 0 | 1 | FALSE | 2 | 0: 651, 1: 300 |
FP047 | 0 | 1 | FALSE | 2 | 0: 698, 1: 253 |
FP048 | 0 | 1 | FALSE | 2 | 0: 833, 1: 118 |
FP049 | 0 | 1 | FALSE | 2 | 0: 835, 1: 116 |
FP050 | 0 | 1 | FALSE | 2 | 0: 844, 1: 107 |
FP051 | 0 | 1 | FALSE | 2 | 0: 847, 1: 104 |
FP052 | 0 | 1 | FALSE | 2 | 0: 864, 1: 87 |
FP053 | 0 | 1 | FALSE | 2 | 0: 862, 1: 89 |
FP054 | 0 | 1 | FALSE | 2 | 0: 879, 1: 72 |
FP055 | 0 | 1 | FALSE | 2 | 0: 900, 1: 51 |
FP056 | 0 | 1 | FALSE | 2 | 0: 889, 1: 62 |
FP057 | 0 | 1 | FALSE | 2 | 0: 837, 1: 114 |
FP058 | 0 | 1 | FALSE | 2 | 0: 843, 1: 108 |
FP059 | 0 | 1 | FALSE | 2 | 0: 899, 1: 52 |
FP060 | 0 | 1 | FALSE | 2 | 0: 493, 1: 458 |
FP061 | 0 | 1 | FALSE | 2 | 0: 526, 1: 425 |
FP062 | 0 | 1 | FALSE | 2 | 0: 535, 1: 416 |
FP063 | 0 | 1 | FALSE | 2 | 0: 546, 1: 405 |
FP064 | 0 | 1 | FALSE | 2 | 0: 555, 1: 396 |
FP065 | 0 | 1 | FALSE | 2 | 1: 564, 0: 387 |
FP066 | 0 | 1 | FALSE | 2 | 1: 580, 0: 371 |
FP067 | 0 | 1 | FALSE | 2 | 0: 590, 1: 361 |
FP068 | 0 | 1 | FALSE | 2 | 0: 607, 1: 344 |
FP069 | 0 | 1 | FALSE | 2 | 0: 607, 1: 344 |
FP070 | 0 | 1 | FALSE | 2 | 0: 613, 1: 338 |
FP071 | 0 | 1 | FALSE | 2 | 0: 640, 1: 311 |
FP072 | 0 | 1 | FALSE | 2 | 1: 626, 0: 325 |
FP073 | 0 | 1 | FALSE | 2 | 0: 656, 1: 295 |
FP074 | 0 | 1 | FALSE | 2 | 0: 642, 1: 309 |
FP075 | 0 | 1 | FALSE | 2 | 0: 629, 1: 322 |
FP076 | 0 | 1 | FALSE | 2 | 0: 639, 1: 312 |
FP077 | 0 | 1 | FALSE | 2 | 0: 646, 1: 305 |
FP078 | 0 | 1 | FALSE | 2 | 0: 662, 1: 289 |
FP079 | 0 | 1 | FALSE | 2 | 1: 656, 0: 295 |
FP080 | 0 | 1 | FALSE | 2 | 0: 663, 1: 288 |
FP081 | 0 | 1 | FALSE | 2 | 0: 686, 1: 265 |
FP082 | 0 | 1 | FALSE | 2 | 1: 679, 0: 272 |
FP083 | 0 | 1 | FALSE | 2 | 0: 691, 1: 260 |
FP084 | 0 | 1 | FALSE | 2 | 0: 679, 1: 272 |
FP085 | 0 | 1 | FALSE | 2 | 0: 708, 1: 243 |
FP086 | 0 | 1 | FALSE | 2 | 0: 695, 1: 256 |
FP087 | 0 | 1 | FALSE | 2 | 1: 691, 0: 260 |
FP088 | 0 | 1 | FALSE | 2 | 0: 701, 1: 250 |
FP089 | 0 | 1 | FALSE | 2 | 0: 716, 1: 235 |
FP090 | 0 | 1 | FALSE | 2 | 0: 714, 1: 237 |
FP091 | 0 | 1 | FALSE | 2 | 0: 737, 1: 214 |
FP092 | 0 | 1 | FALSE | 2 | 0: 719, 1: 232 |
FP093 | 0 | 1 | FALSE | 2 | 0: 719, 1: 232 |
FP094 | 0 | 1 | FALSE | 2 | 0: 731, 1: 220 |
FP095 | 0 | 1 | FALSE | 2 | 0: 742, 1: 209 |
FP096 | 0 | 1 | FALSE | 2 | 0: 744, 1: 207 |
FP097 | 0 | 1 | FALSE | 2 | 0: 727, 1: 224 |
FP098 | 0 | 1 | FALSE | 2 | 0: 725, 1: 226 |
FP099 | 0 | 1 | FALSE | 2 | 0: 735, 1: 216 |
FP100 | 0 | 1 | FALSE | 2 | 0: 731, 1: 220 |
FP101 | 0 | 1 | FALSE | 2 | 0: 726, 1: 225 |
FP102 | 0 | 1 | FALSE | 2 | 0: 759, 1: 192 |
FP103 | 0 | 1 | FALSE | 2 | 0: 743, 1: 208 |
FP104 | 0 | 1 | FALSE | 2 | 0: 739, 1: 212 |
FP105 | 0 | 1 | FALSE | 2 | 0: 746, 1: 205 |
FP106 | 0 | 1 | FALSE | 2 | 0: 769, 1: 182 |
FP107 | 0 | 1 | FALSE | 2 | 0: 750, 1: 201 |
FP108 | 0 | 1 | FALSE | 2 | 0: 756, 1: 195 |
FP109 | 0 | 1 | FALSE | 2 | 0: 783, 1: 168 |
FP110 | 0 | 1 | FALSE | 2 | 0: 755, 1: 196 |
FP111 | 0 | 1 | FALSE | 2 | 0: 764, 1: 187 |
FP112 | 0 | 1 | FALSE | 2 | 0: 766, 1: 185 |
FP113 | 0 | 1 | FALSE | 2 | 0: 765, 1: 186 |
FP114 | 0 | 1 | FALSE | 2 | 0: 803, 1: 148 |
FP115 | 0 | 1 | FALSE | 2 | 0: 781, 1: 170 |
FP116 | 0 | 1 | FALSE | 2 | 0: 768, 1: 183 |
FP117 | 0 | 1 | FALSE | 2 | 0: 781, 1: 170 |
FP118 | 0 | 1 | FALSE | 2 | 0: 768, 1: 183 |
FP119 | 0 | 1 | FALSE | 2 | 0: 796, 1: 155 |
FP120 | 0 | 1 | FALSE | 2 | 0: 793, 1: 158 |
FP121 | 0 | 1 | FALSE | 2 | 0: 818, 1: 133 |
FP122 | 0 | 1 | FALSE | 2 | 0: 795, 1: 156 |
FP123 | 0 | 1 | FALSE | 2 | 0: 792, 1: 159 |
FP124 | 0 | 1 | FALSE | 2 | 0: 797, 1: 154 |
FP125 | 0 | 1 | FALSE | 2 | 0: 803, 1: 148 |
FP126 | 0 | 1 | FALSE | 2 | 0: 810, 1: 141 |
FP127 | 0 | 1 | FALSE | 2 | 0: 818, 1: 133 |
FP128 | 0 | 1 | FALSE | 2 | 0: 810, 1: 141 |
FP129 | 0 | 1 | FALSE | 2 | 0: 819, 1: 132 |
FP130 | 0 | 1 | FALSE | 2 | 0: 851, 1: 100 |
FP131 | 0 | 1 | FALSE | 2 | 0: 831, 1: 120 |
FP132 | 0 | 1 | FALSE | 2 | 0: 832, 1: 119 |
FP133 | 0 | 1 | FALSE | 2 | 0: 831, 1: 120 |
FP134 | 0 | 1 | FALSE | 2 | 0: 830, 1: 121 |
FP135 | 0 | 1 | FALSE | 2 | 0: 831, 1: 120 |
FP136 | 0 | 1 | FALSE | 2 | 0: 836, 1: 115 |
FP137 | 0 | 1 | FALSE | 2 | 0: 841, 1: 110 |
FP138 | 0 | 1 | FALSE | 2 | 0: 845, 1: 106 |
FP139 | 0 | 1 | FALSE | 2 | 0: 873, 1: 78 |
FP140 | 0 | 1 | FALSE | 2 | 0: 845, 1: 106 |
FP141 | 0 | 1 | FALSE | 2 | 0: 840, 1: 111 |
FP142 | 0 | 1 | FALSE | 2 | 0: 847, 1: 104 |
FP143 | 0 | 1 | FALSE | 2 | 0: 874, 1: 77 |
FP144 | 0 | 1 | FALSE | 2 | 0: 852, 1: 99 |
FP145 | 0 | 1 | FALSE | 2 | 0: 852, 1: 99 |
FP146 | 0 | 1 | FALSE | 2 | 0: 853, 1: 98 |
FP147 | 0 | 1 | FALSE | 2 | 0: 851, 1: 100 |
FP148 | 0 | 1 | FALSE | 2 | 0: 868, 1: 83 |
FP149 | 0 | 1 | FALSE | 2 | 0: 865, 1: 86 |
FP150 | 0 | 1 | FALSE | 2 | 0: 876, 1: 75 |
FP151 | 0 | 1 | FALSE | 2 | 0: 898, 1: 53 |
FP152 | 0 | 1 | FALSE | 2 | 0: 873, 1: 78 |
FP153 | 0 | 1 | FALSE | 2 | 0: 877, 1: 74 |
FP155 | 0 | 1 | FALSE | 2 | 0: 885, 1: 66 |
FP156 | 0 | 1 | FALSE | 2 | 0: 884, 1: 67 |
FP157 | 0 | 1 | FALSE | 2 | 0: 892, 1: 59 |
FP158 | 0 | 1 | FALSE | 2 | 0: 900, 1: 51 |
FP159 | 0 | 1 | FALSE | 2 | 0: 884, 1: 67 |
FP160 | 0 | 1 | FALSE | 2 | 0: 886, 1: 65 |
FP161 | 0 | 1 | FALSE | 2 | 0: 888, 1: 63 |
FP162 | 0 | 1 | FALSE | 2 | 0: 480, 1: 471 |
FP163 | 0 | 1 | FALSE | 2 | 0: 498, 1: 453 |
FP164 | 0 | 1 | FALSE | 2 | 1: 597, 0: 354 |
FP165 | 0 | 1 | FALSE | 2 | 0: 619, 1: 332 |
FP166 | 0 | 1 | FALSE | 2 | 0: 636, 1: 315 |
FP167 | 0 | 1 | FALSE | 2 | 0: 639, 1: 312 |
FP168 | 0 | 1 | FALSE | 2 | 1: 633, 0: 318 |
FP169 | 0 | 1 | FALSE | 2 | 0: 774, 1: 177 |
FP170 | 0 | 1 | FALSE | 2 | 0: 776, 1: 175 |
FP171 | 0 | 1 | FALSE | 2 | 0: 790, 1: 161 |
FP172 | 0 | 1 | FALSE | 2 | 0: 807, 1: 144 |
FP173 | 0 | 1 | FALSE | 2 | 0: 816, 1: 135 |
FP174 | 0 | 1 | FALSE | 2 | 0: 827, 1: 124 |
FP175 | 0 | 1 | FALSE | 2 | 0: 823, 1: 128 |
FP176 | 0 | 1 | FALSE | 2 | 0: 835, 1: 116 |
FP177 | 0 | 1 | FALSE | 2 | 0: 836, 1: 115 |
FP178 | 0 | 1 | FALSE | 2 | 0: 836, 1: 115 |
FP179 | 0 | 1 | FALSE | 2 | 0: 858, 1: 93 |
FP180 | 0 | 1 | FALSE | 2 | 0: 849, 1: 102 |
FP181 | 0 | 1 | FALSE | 2 | 0: 862, 1: 89 |
FP182 | 0 | 1 | FALSE | 2 | 0: 857, 1: 94 |
FP183 | 0 | 1 | FALSE | 2 | 0: 879, 1: 72 |
FP184 | 0 | 1 | FALSE | 2 | 0: 871, 1: 80 |
FP185 | 0 | 1 | FALSE | 2 | 0: 870, 1: 81 |
FP186 | 0 | 1 | FALSE | 2 | 0: 878, 1: 73 |
FP187 | 0 | 1 | FALSE | 2 | 0: 882, 1: 69 |
FP188 | 0 | 1 | FALSE | 2 | 0: 886, 1: 65 |
FP189 | 0 | 1 | FALSE | 2 | 0: 878, 1: 73 |
FP190 | 0 | 1 | FALSE | 2 | 0: 882, 1: 69 |
FP191 | 0 | 1 | FALSE | 2 | 0: 884, 1: 67 |
FP192 | 0 | 1 | FALSE | 2 | 0: 893, 1: 58 |
FP193 | 0 | 1 | FALSE | 2 | 0: 892, 1: 59 |
FP194 | 0 | 1 | FALSE | 2 | 0: 895, 1: 56 |
FP195 | 0 | 1 | FALSE | 2 | 0: 893, 1: 58 |
FP196 | 0 | 1 | FALSE | 2 | 0: 897, 1: 54 |
FP197 | 0 | 1 | FALSE | 2 | 0: 901, 1: 50 |
FP198 | 0 | 1 | FALSE | 2 | 0: 897, 1: 54 |
FP201 | 0 | 1 | FALSE | 2 | 0: 901, 1: 50 |
FP202 | 0 | 1 | FALSE | 2 | 0: 706, 1: 245 |
FP203 | 0 | 1 | FALSE | 2 | 0: 842, 1: 109 |
FP204 | 0 | 1 | FALSE | 2 | 0: 857, 1: 94 |
FP205 | 0 | 1 | FALSE | 2 | 0: 877, 1: 74 |
FP206 | 0 | 1 | FALSE | 2 | 0: 894, 1: 57 |
FP207 | 0 | 1 | FALSE | 2 | 0: 897, 1: 54 |
FP208 | 0 | 1 | FALSE | 2 | 0: 844, 1: 107 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
Log_Solubility | 0 | 1 | -2.72 | 2.05 | -11.62 | -3.96 | -2.51 | -1.36 | 1.58 | ▁▁▃▇▃ |
MolWeight | 0 | 1 | 0.00 | 1.00 | -2.84 | -0.80 | -0.01 | 0.80 | 2.72 | ▁▆▇▆▁ |
NumBonds | 0 | 1 | 0.00 | 1.00 | -2.92 | -0.61 | -0.04 | 0.60 | 3.23 | ▁▅▇▃▁ |
NumMultBonds | 0 | 1 | 0.00 | 1.00 | -1.19 | -1.00 | -0.03 | 0.74 | 3.65 | ▇▇▃▁▁ |
NumRotBonds | 0 | 1 | 0.00 | 1.00 | -0.93 | -0.93 | -0.10 | 0.52 | 5.71 | ▇▂▁▁▁ |
NumDblBonds | 0 | 1 | 0.00 | 1.00 | -0.83 | -0.83 | -0.01 | 0.82 | 4.95 | ▇▂▁▁▁ |
NumCarbon | 0 | 1 | 0.00 | 1.00 | -2.64 | -0.69 | -0.01 | 0.54 | 3.06 | ▂▇▇▃▁ |
NumNitrogen | 0 | 1 | 0.00 | 1.00 | -0.69 | -0.69 | -0.69 | 0.16 | 4.37 | ▇▂▁▁▁ |
NumOxygen | 0 | 1 | 0.00 | 1.00 | -0.91 | -0.91 | -0.33 | 0.25 | 6.61 | ▇▂▁▁▁ |
NumSulfer | 0 | 1 | 0.00 | 1.00 | -0.34 | -0.34 | -0.34 | -0.34 | 7.86 | ▇▁▁▁▁ |
NumChlorine | 0 | 1 | 0.00 | 1.00 | -0.40 | -0.40 | -0.40 | -0.40 | 6.74 | ▇▁▁▁▁ |
NumHalogen | 0 | 1 | 0.00 | 1.00 | -0.47 | -0.47 | -0.47 | 0.20 | 6.32 | ▇▁▁▁▁ |
NumRings | 0 | 1 | 0.00 | 1.00 | -1.08 | -1.08 | -0.31 | 0.46 | 4.31 | ▇▃▂▁▁ |
HydrophilicFactor | 0 | 1 | 0.00 | 1.00 | -0.86 | -0.66 | -0.26 | 0.30 | 11.99 | ▇▁▁▁▁ |
SurfaceArea1 | 0 | 1 | 0.00 | 1.00 | -1.03 | -0.77 | -0.21 | 0.48 | 8.37 | ▇▂▁▁▁ |
SurfaceArea2 | 0 | 1 | 0.00 | 1.00 | -1.06 | -0.78 | -0.19 | 0.54 | 7.65 | ▇▂▁▁▁ |
###################################
# Verifying the data dimensions
# for the train set
###################################
dim(PMA_PreModelling_Train)
## [1] 951 221
##################################
# Formulating the test set
##################################
<- Solubility_Test
DPA_Test <- DPA_Test[,!names(DPA_Test) %in% c("solTestY")]
DPA_Test.Predictors <- DPA_Test.Predictors[,-(grep("FP", names(DPA_Test.Predictors)))]
DPA_Test.Predictors.Numeric <- preProcess(DPA_Test.Predictors.Numeric, method = c("BoxCox"))
DPA_Test_BoxCox <- predict(DPA_Test_BoxCox, DPA_Test.Predictors.Numeric)
DPA_Test_BoxCoxTransformed <- preProcess(DPA_Test_BoxCoxTransformed, method = c("center","scale"))
DPA_Test.Predictors.Numeric_BoxCoxTransformed_CenteredScaled <- predict(DPA_Test.Predictors.Numeric_BoxCoxTransformed_CenteredScaled, DPA_Test_BoxCoxTransformed)
DPA_Test.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed
##################################
# Creating the pre-modelling
# train set
##################################
<- DPA_Test$solTestY
Log_Solubility <- DPA_Test.Predictors[,(grep("FP", names(DPA_Test.Predictors)))]
PMA_Test.Predictors.Factor <- as.data.frame(lapply(PMA_Test.Predictors.Factor,factor))
PMA_Test.Predictors.Factor <- DPA_Test.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed
PMA_Test.Predictors.Numeric <- cbind(Log_Solubility,PMA_Test.Predictors.Factor,PMA_Test.Predictors.Numeric)
PMA_Test_BoxCoxTransformed_CenteredScaledTransformed <- PMA_Test_BoxCoxTransformed_CenteredScaledTransformed[,!names(PMA_Test_BoxCoxTransformed_CenteredScaledTransformed) %in% c("FP154","FP199","FP200","NumNonHBonds","NumHydrogen","NumNonHAtoms","NumAromaticBonds","NumAtoms")]
PMA_Test_BoxCoxTransformed_CenteredScaledTransformed_ExcludedLowVariance_ExcludedLinearlyDependent_ExcludedHighCorrelation
<- PMA_Test_BoxCoxTransformed_CenteredScaledTransformed_ExcludedLowVariance_ExcludedLinearlyDependent_ExcludedHighCorrelation
PMA_PreModelling_Test
##################################
# Gathering descriptive statistics
##################################
<- skim(PMA_PreModelling_Test)) (PMA_PreModelling_Test_Skimmed
Name | PMA_PreModelling_Test |
Number of rows | 316 |
Number of columns | 221 |
_______________________ | |
Column type frequency: | |
factor | 205 |
numeric | 16 |
________________________ | |
Group variables | None |
Variable type: factor
skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
---|---|---|---|---|---|
FP001 | 0 | 1 | FALSE | 2 | 0: 168, 1: 148 |
FP002 | 0 | 1 | FALSE | 2 | 1: 185, 0: 131 |
FP003 | 0 | 1 | FALSE | 2 | 0: 176, 1: 140 |
FP004 | 0 | 1 | FALSE | 2 | 1: 168, 0: 148 |
FP005 | 0 | 1 | FALSE | 2 | 1: 195, 0: 121 |
FP006 | 0 | 1 | FALSE | 2 | 0: 205, 1: 111 |
FP007 | 0 | 1 | FALSE | 2 | 0: 204, 1: 112 |
FP008 | 0 | 1 | FALSE | 2 | 0: 202, 1: 114 |
FP009 | 0 | 1 | FALSE | 2 | 0: 233, 1: 83 |
FP010 | 0 | 1 | FALSE | 2 | 0: 255, 1: 61 |
FP011 | 0 | 1 | FALSE | 2 | 0: 261, 1: 55 |
FP012 | 0 | 1 | FALSE | 2 | 0: 263, 1: 53 |
FP013 | 0 | 1 | FALSE | 2 | 0: 264, 1: 52 |
FP014 | 0 | 1 | FALSE | 2 | 0: 266, 1: 50 |
FP015 | 0 | 1 | FALSE | 2 | 1: 262, 0: 54 |
FP016 | 0 | 1 | FALSE | 2 | 0: 271, 1: 45 |
FP017 | 0 | 1 | FALSE | 2 | 0: 269, 1: 47 |
FP018 | 0 | 1 | FALSE | 2 | 0: 289, 1: 27 |
FP019 | 0 | 1 | FALSE | 2 | 0: 280, 1: 36 |
FP020 | 0 | 1 | FALSE | 2 | 0: 282, 1: 34 |
FP021 | 0 | 1 | FALSE | 2 | 0: 282, 1: 34 |
FP022 | 0 | 1 | FALSE | 2 | 0: 279, 1: 37 |
FP023 | 0 | 1 | FALSE | 2 | 0: 289, 1: 27 |
FP024 | 0 | 1 | FALSE | 2 | 0: 285, 1: 31 |
FP025 | 0 | 1 | FALSE | 2 | 0: 291, 1: 25 |
FP026 | 0 | 1 | FALSE | 2 | 0: 279, 1: 37 |
FP027 | 0 | 1 | FALSE | 2 | 0: 291, 1: 25 |
FP028 | 0 | 1 | FALSE | 2 | 0: 298, 1: 18 |
FP029 | 0 | 1 | FALSE | 2 | 0: 300, 1: 16 |
FP030 | 0 | 1 | FALSE | 2 | 0: 290, 1: 26 |
FP031 | 0 | 1 | FALSE | 2 | 0: 285, 1: 31 |
FP032 | 0 | 1 | FALSE | 2 | 0: 275, 1: 41 |
FP033 | 0 | 1 | FALSE | 2 | 0: 278, 1: 38 |
FP034 | 0 | 1 | FALSE | 2 | 0: 295, 1: 21 |
FP035 | 0 | 1 | FALSE | 2 | 0: 285, 1: 31 |
FP036 | 0 | 1 | FALSE | 2 | 0: 297, 1: 19 |
FP037 | 0 | 1 | FALSE | 2 | 0: 286, 1: 30 |
FP038 | 0 | 1 | FALSE | 2 | 0: 306, 1: 10 |
FP039 | 0 | 1 | FALSE | 2 | 0: 296, 1: 20 |
FP040 | 0 | 1 | FALSE | 2 | 0: 298, 1: 18 |
FP041 | 0 | 1 | FALSE | 2 | 0: 297, 1: 19 |
FP042 | 0 | 1 | FALSE | 2 | 0: 297, 1: 19 |
FP043 | 0 | 1 | FALSE | 2 | 0: 302, 1: 14 |
FP044 | 0 | 1 | FALSE | 2 | 0: 297, 1: 19 |
FP045 | 0 | 1 | FALSE | 2 | 0: 296, 1: 20 |
FP046 | 0 | 1 | FALSE | 2 | 0: 213, 1: 103 |
FP047 | 0 | 1 | FALSE | 2 | 0: 222, 1: 94 |
FP048 | 0 | 1 | FALSE | 2 | 0: 280, 1: 36 |
FP049 | 0 | 1 | FALSE | 2 | 0: 282, 1: 34 |
FP050 | 0 | 1 | FALSE | 2 | 0: 280, 1: 36 |
FP051 | 0 | 1 | FALSE | 2 | 0: 298, 1: 18 |
FP052 | 0 | 1 | FALSE | 2 | 0: 283, 1: 33 |
FP053 | 0 | 1 | FALSE | 2 | 0: 297, 1: 19 |
FP054 | 0 | 1 | FALSE | 2 | 0: 285, 1: 31 |
FP055 | 0 | 1 | FALSE | 2 | 0: 287, 1: 29 |
FP056 | 0 | 1 | FALSE | 2 | 0: 296, 1: 20 |
FP057 | 0 | 1 | FALSE | 2 | 0: 277, 1: 39 |
FP058 | 0 | 1 | FALSE | 2 | 0: 273, 1: 43 |
FP059 | 0 | 1 | FALSE | 2 | 0: 302, 1: 14 |
FP060 | 0 | 1 | FALSE | 2 | 0: 173, 1: 143 |
FP061 | 0 | 1 | FALSE | 2 | 0: 192, 1: 124 |
FP062 | 0 | 1 | FALSE | 2 | 0: 181, 1: 135 |
FP063 | 0 | 1 | FALSE | 2 | 0: 203, 1: 113 |
FP064 | 0 | 1 | FALSE | 2 | 0: 193, 1: 123 |
FP065 | 0 | 1 | FALSE | 2 | 1: 189, 0: 127 |
FP066 | 0 | 1 | FALSE | 2 | 1: 195, 0: 121 |
FP067 | 0 | 1 | FALSE | 2 | 0: 213, 1: 103 |
FP068 | 0 | 1 | FALSE | 2 | 0: 224, 1: 92 |
FP069 | 0 | 1 | FALSE | 2 | 0: 198, 1: 118 |
FP070 | 0 | 1 | FALSE | 2 | 0: 211, 1: 105 |
FP071 | 0 | 1 | FALSE | 2 | 0: 207, 1: 109 |
FP072 | 0 | 1 | FALSE | 2 | 1: 204, 0: 112 |
FP073 | 0 | 1 | FALSE | 2 | 0: 224, 1: 92 |
FP074 | 0 | 1 | FALSE | 2 | 0: 213, 1: 103 |
FP075 | 0 | 1 | FALSE | 2 | 0: 235, 1: 81 |
FP076 | 0 | 1 | FALSE | 2 | 0: 216, 1: 100 |
FP077 | 0 | 1 | FALSE | 2 | 0: 219, 1: 97 |
FP078 | 0 | 1 | FALSE | 2 | 0: 218, 1: 98 |
FP079 | 0 | 1 | FALSE | 2 | 1: 230, 0: 86 |
FP080 | 0 | 1 | FALSE | 2 | 0: 233, 1: 83 |
FP081 | 0 | 1 | FALSE | 2 | 0: 225, 1: 91 |
FP082 | 0 | 1 | FALSE | 2 | 1: 235, 0: 81 |
FP083 | 0 | 1 | FALSE | 2 | 0: 236, 1: 80 |
FP084 | 0 | 1 | FALSE | 2 | 0: 245, 1: 71 |
FP085 | 0 | 1 | FALSE | 2 | 0: 231, 1: 85 |
FP086 | 0 | 1 | FALSE | 2 | 0: 230, 1: 86 |
FP087 | 0 | 1 | FALSE | 2 | 1: 241, 0: 75 |
FP088 | 0 | 1 | FALSE | 2 | 0: 239, 1: 77 |
FP089 | 0 | 1 | FALSE | 2 | 0: 236, 1: 80 |
FP090 | 0 | 1 | FALSE | 2 | 0: 244, 1: 72 |
FP091 | 0 | 1 | FALSE | 2 | 0: 243, 1: 73 |
FP092 | 0 | 1 | FALSE | 2 | 0: 247, 1: 69 |
FP093 | 0 | 1 | FALSE | 2 | 0: 248, 1: 68 |
FP094 | 0 | 1 | FALSE | 2 | 0: 237, 1: 79 |
FP095 | 0 | 1 | FALSE | 2 | 0: 251, 1: 65 |
FP096 | 0 | 1 | FALSE | 2 | 0: 257, 1: 59 |
FP097 | 0 | 1 | FALSE | 2 | 0: 250, 1: 66 |
FP098 | 0 | 1 | FALSE | 2 | 0: 252, 1: 64 |
FP099 | 0 | 1 | FALSE | 2 | 0: 249, 1: 67 |
FP100 | 0 | 1 | FALSE | 2 | 0: 259, 1: 57 |
FP101 | 0 | 1 | FALSE | 2 | 0: 260, 1: 56 |
FP102 | 0 | 1 | FALSE | 2 | 0: 270, 1: 46 |
FP103 | 0 | 1 | FALSE | 2 | 0: 247, 1: 69 |
FP104 | 0 | 1 | FALSE | 2 | 0: 258, 1: 58 |
FP105 | 0 | 1 | FALSE | 2 | 0: 248, 1: 68 |
FP106 | 0 | 1 | FALSE | 2 | 0: 273, 1: 43 |
FP107 | 0 | 1 | FALSE | 2 | 0: 254, 1: 62 |
FP108 | 0 | 1 | FALSE | 2 | 0: 259, 1: 57 |
FP109 | 0 | 1 | FALSE | 2 | 0: 261, 1: 55 |
FP110 | 0 | 1 | FALSE | 2 | 0: 264, 1: 52 |
FP111 | 0 | 1 | FALSE | 2 | 0: 259, 1: 57 |
FP112 | 0 | 1 | FALSE | 2 | 0: 260, 1: 56 |
FP113 | 0 | 1 | FALSE | 2 | 0: 264, 1: 52 |
FP114 | 0 | 1 | FALSE | 2 | 0: 260, 1: 56 |
FP115 | 0 | 1 | FALSE | 2 | 0: 266, 1: 50 |
FP116 | 0 | 1 | FALSE | 2 | 0: 269, 1: 47 |
FP117 | 0 | 1 | FALSE | 2 | 0: 262, 1: 54 |
FP118 | 0 | 1 | FALSE | 2 | 0: 279, 1: 37 |
FP119 | 0 | 1 | FALSE | 2 | 0: 263, 1: 53 |
FP120 | 0 | 1 | FALSE | 2 | 0: 267, 1: 49 |
FP121 | 0 | 1 | FALSE | 2 | 0: 282, 1: 34 |
FP122 | 0 | 1 | FALSE | 2 | 0: 273, 1: 43 |
FP123 | 0 | 1 | FALSE | 2 | 0: 270, 1: 46 |
FP124 | 0 | 1 | FALSE | 2 | 0: 274, 1: 42 |
FP125 | 0 | 1 | FALSE | 2 | 0: 278, 1: 38 |
FP126 | 0 | 1 | FALSE | 2 | 0: 280, 1: 36 |
FP127 | 0 | 1 | FALSE | 2 | 0: 269, 1: 47 |
FP128 | 0 | 1 | FALSE | 2 | 0: 282, 1: 34 |
FP129 | 0 | 1 | FALSE | 2 | 0: 272, 1: 44 |
FP130 | 0 | 1 | FALSE | 2 | 0: 290, 1: 26 |
FP131 | 0 | 1 | FALSE | 2 | 0: 282, 1: 34 |
FP132 | 0 | 1 | FALSE | 2 | 0: 276, 1: 40 |
FP133 | 0 | 1 | FALSE | 2 | 0: 273, 1: 43 |
FP134 | 0 | 1 | FALSE | 2 | 0: 289, 1: 27 |
FP135 | 0 | 1 | FALSE | 2 | 0: 296, 1: 20 |
FP136 | 0 | 1 | FALSE | 2 | 0: 284, 1: 32 |
FP137 | 0 | 1 | FALSE | 2 | 0: 288, 1: 28 |
FP138 | 0 | 1 | FALSE | 2 | 0: 290, 1: 26 |
FP139 | 0 | 1 | FALSE | 2 | 0: 296, 1: 20 |
FP140 | 0 | 1 | FALSE | 2 | 0: 288, 1: 28 |
FP141 | 0 | 1 | FALSE | 2 | 0: 294, 1: 22 |
FP142 | 0 | 1 | FALSE | 2 | 0: 286, 1: 30 |
FP143 | 0 | 1 | FALSE | 2 | 0: 299, 1: 17 |
FP144 | 0 | 1 | FALSE | 2 | 0: 287, 1: 29 |
FP145 | 0 | 1 | FALSE | 2 | 0: 296, 1: 20 |
FP146 | 0 | 1 | FALSE | 2 | 0: 287, 1: 29 |
FP147 | 0 | 1 | FALSE | 2 | 0: 294, 1: 22 |
FP148 | 0 | 1 | FALSE | 2 | 0: 291, 1: 25 |
FP149 | 0 | 1 | FALSE | 2 | 0: 290, 1: 26 |
FP150 | 0 | 1 | FALSE | 2 | 0: 295, 1: 21 |
FP151 | 0 | 1 | FALSE | 2 | 0: 306, 1: 10 |
FP152 | 0 | 1 | FALSE | 2 | 0: 299, 1: 17 |
FP153 | 0 | 1 | FALSE | 2 | 0: 305, 1: 11 |
FP155 | 0 | 1 | FALSE | 2 | 0: 295, 1: 21 |
FP156 | 0 | 1 | FALSE | 2 | 0: 301, 1: 15 |
FP157 | 0 | 1 | FALSE | 2 | 0: 298, 1: 18 |
FP158 | 0 | 1 | FALSE | 2 | 0: 291, 1: 25 |
FP159 | 0 | 1 | FALSE | 2 | 0: 305, 1: 11 |
FP160 | 0 | 1 | FALSE | 2 | 0: 305, 1: 11 |
FP161 | 0 | 1 | FALSE | 2 | 0: 305, 1: 11 |
FP162 | 0 | 1 | FALSE | 2 | 1: 168, 0: 148 |
FP163 | 0 | 1 | FALSE | 2 | 0: 173, 1: 143 |
FP164 | 0 | 1 | FALSE | 2 | 1: 207, 0: 109 |
FP165 | 0 | 1 | FALSE | 2 | 0: 215, 1: 101 |
FP166 | 0 | 1 | FALSE | 2 | 0: 209, 1: 107 |
FP167 | 0 | 1 | FALSE | 2 | 0: 221, 1: 95 |
FP168 | 0 | 1 | FALSE | 2 | 1: 226, 0: 90 |
FP169 | 0 | 1 | FALSE | 2 | 0: 257, 1: 59 |
FP170 | 0 | 1 | FALSE | 2 | 0: 267, 1: 49 |
FP171 | 0 | 1 | FALSE | 2 | 0: 275, 1: 41 |
FP172 | 0 | 1 | FALSE | 2 | 0: 269, 1: 47 |
FP173 | 0 | 1 | FALSE | 2 | 0: 273, 1: 43 |
FP174 | 0 | 1 | FALSE | 2 | 0: 267, 1: 49 |
FP175 | 0 | 1 | FALSE | 2 | 0: 274, 1: 42 |
FP176 | 0 | 1 | FALSE | 2 | 0: 282, 1: 34 |
FP177 | 0 | 1 | FALSE | 2 | 0: 284, 1: 32 |
FP178 | 0 | 1 | FALSE | 2 | 0: 282, 1: 34 |
FP179 | 0 | 1 | FALSE | 2 | 0: 272, 1: 44 |
FP180 | 0 | 1 | FALSE | 2 | 0: 294, 1: 22 |
FP181 | 0 | 1 | FALSE | 2 | 0: 283, 1: 33 |
FP182 | 0 | 1 | FALSE | 2 | 0: 292, 1: 24 |
FP183 | 0 | 1 | FALSE | 2 | 0: 274, 1: 42 |
FP184 | 0 | 1 | FALSE | 2 | 0: 286, 1: 30 |
FP185 | 0 | 1 | FALSE | 2 | 0: 285, 1: 31 |
FP186 | 0 | 1 | FALSE | 2 | 0: 297, 1: 19 |
FP187 | 0 | 1 | FALSE | 2 | 0: 295, 1: 21 |
FP188 | 0 | 1 | FALSE | 2 | 0: 294, 1: 22 |
FP189 | 0 | 1 | FALSE | 2 | 0: 303, 1: 13 |
FP190 | 0 | 1 | FALSE | 2 | 0: 299, 1: 17 |
FP191 | 0 | 1 | FALSE | 2 | 0: 298, 1: 18 |
FP192 | 0 | 1 | FALSE | 2 | 0: 294, 1: 22 |
FP193 | 0 | 1 | FALSE | 2 | 0: 294, 1: 22 |
FP194 | 0 | 1 | FALSE | 2 | 0: 295, 1: 21 |
FP195 | 0 | 1 | FALSE | 2 | 0: 300, 1: 16 |
FP196 | 0 | 1 | FALSE | 2 | 0: 294, 1: 22 |
FP197 | 0 | 1 | FALSE | 2 | 0: 296, 1: 20 |
FP198 | 0 | 1 | FALSE | 2 | 0: 302, 1: 14 |
FP201 | 0 | 1 | FALSE | 2 | 0: 303, 1: 13 |
FP202 | 0 | 1 | FALSE | 2 | 0: 232, 1: 84 |
FP203 | 0 | 1 | FALSE | 2 | 0: 273, 1: 43 |
FP204 | 0 | 1 | FALSE | 2 | 0: 286, 1: 30 |
FP205 | 0 | 1 | FALSE | 2 | 0: 291, 1: 25 |
FP206 | 0 | 1 | FALSE | 2 | 0: 300, 1: 16 |
FP207 | 0 | 1 | FALSE | 2 | 0: 302, 1: 14 |
FP208 | 0 | 1 | FALSE | 2 | 0: 273, 1: 43 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
Log_Solubility | 0 | 1 | -2.8 | 2.08 | -10.41 | -3.95 | -2.48 | -1.37 | 1.07 | ▁▁▅▇▅ |
MolWeight | 0 | 1 | 0.0 | 1.00 | -2.46 | -0.78 | -0.06 | 0.81 | 2.18 | ▁▇▇▇▃ |
NumBonds | 0 | 1 | 0.0 | 1.00 | -2.92 | -0.67 | 0.03 | 0.57 | 2.55 | ▁▂▇▃▂ |
NumMultBonds | 0 | 1 | 0.0 | 1.00 | -1.24 | -1.04 | -0.06 | 0.72 | 4.06 | ▇▇▅▁▁ |
NumRotBonds | 0 | 1 | 0.0 | 1.00 | -0.82 | -0.82 | -0.40 | 0.44 | 5.94 | ▇▁▁▁▁ |
NumDblBonds | 0 | 1 | 0.0 | 1.00 | -0.76 | -0.76 | 0.09 | 0.09 | 4.35 | ▇▁▁▁▁ |
NumCarbon | 0 | 1 | 0.0 | 1.00 | -2.71 | -0.70 | -0.21 | 0.56 | 2.23 | ▁▂▇▅▂ |
NumNitrogen | 0 | 1 | 0.0 | 1.00 | -0.63 | -0.63 | -0.63 | 0.26 | 4.71 | ▇▂▁▁▁ |
NumOxygen | 0 | 1 | 0.0 | 1.00 | -0.92 | -0.92 | -0.26 | 0.40 | 5.02 | ▇▃▁▁▁ |
NumSulfer | 0 | 1 | 0.0 | 1.00 | -0.28 | -0.28 | -0.28 | -0.28 | 8.06 | ▇▁▁▁▁ |
NumChlorine | 0 | 1 | 0.0 | 1.00 | -0.40 | -0.40 | -0.40 | -0.40 | 6.02 | ▇▁▁▁▁ |
NumHalogen | 0 | 1 | 0.0 | 1.00 | -0.48 | -0.48 | -0.48 | 0.20 | 5.57 | ▇▁▁▁▁ |
NumRings | 0 | 1 | 0.0 | 1.00 | -1.14 | -0.32 | -0.32 | 0.49 | 3.74 | ▇▃▁▁▁ |
HydrophilicFactor | 0 | 1 | 0.0 | 1.00 | -0.90 | -0.68 | -0.30 | 0.32 | 5.19 | ▇▂▁▁▁ |
SurfaceArea1 | 0 | 1 | 0.0 | 1.00 | -1.04 | -0.75 | -0.21 | 0.53 | 5.37 | ▇▃▁▁▁ |
SurfaceArea2 | 0 | 1 | 0.0 | 1.00 | -1.05 | -0.77 | -0.26 | 0.52 | 5.00 | ▇▃▁▁▁ |
###################################
# Verifying the data dimensions
# for the test set
###################################
dim(PMA_PreModelling_Test)
## [1] 316 221
##################################
# Loading dataset
##################################
<- PMA_PreModelling_Train
EDA
##################################
# Listing all predictors
##################################
<- EDA[,!names(EDA) %in% c("Log_Solubility")]
EDA.Predictors
##################################
# Listing all numeric predictors
##################################
<- EDA.Predictors[,sapply(EDA.Predictors, is.numeric)]
EDA.Predictors.Numeric ncol(EDA.Predictors.Numeric)
## [1] 15
names(EDA.Predictors.Numeric)
## [1] "MolWeight" "NumBonds" "NumMultBonds"
## [4] "NumRotBonds" "NumDblBonds" "NumCarbon"
## [7] "NumNitrogen" "NumOxygen" "NumSulfer"
## [10] "NumChlorine" "NumHalogen" "NumRings"
## [13] "HydrophilicFactor" "SurfaceArea1" "SurfaceArea2"
##################################
# Listing all factor predictors
##################################
<- EDA.Predictors[,sapply(EDA.Predictors, is.factor)]
EDA.Predictors.Factor ncol(EDA.Predictors.Factor)
## [1] 205
names(EDA.Predictors.Factor)
## [1] "FP001" "FP002" "FP003" "FP004" "FP005" "FP006" "FP007" "FP008" "FP009"
## [10] "FP010" "FP011" "FP012" "FP013" "FP014" "FP015" "FP016" "FP017" "FP018"
## [19] "FP019" "FP020" "FP021" "FP022" "FP023" "FP024" "FP025" "FP026" "FP027"
## [28] "FP028" "FP029" "FP030" "FP031" "FP032" "FP033" "FP034" "FP035" "FP036"
## [37] "FP037" "FP038" "FP039" "FP040" "FP041" "FP042" "FP043" "FP044" "FP045"
## [46] "FP046" "FP047" "FP048" "FP049" "FP050" "FP051" "FP052" "FP053" "FP054"
## [55] "FP055" "FP056" "FP057" "FP058" "FP059" "FP060" "FP061" "FP062" "FP063"
## [64] "FP064" "FP065" "FP066" "FP067" "FP068" "FP069" "FP070" "FP071" "FP072"
## [73] "FP073" "FP074" "FP075" "FP076" "FP077" "FP078" "FP079" "FP080" "FP081"
## [82] "FP082" "FP083" "FP084" "FP085" "FP086" "FP087" "FP088" "FP089" "FP090"
## [91] "FP091" "FP092" "FP093" "FP094" "FP095" "FP096" "FP097" "FP098" "FP099"
## [100] "FP100" "FP101" "FP102" "FP103" "FP104" "FP105" "FP106" "FP107" "FP108"
## [109] "FP109" "FP110" "FP111" "FP112" "FP113" "FP114" "FP115" "FP116" "FP117"
## [118] "FP118" "FP119" "FP120" "FP121" "FP122" "FP123" "FP124" "FP125" "FP126"
## [127] "FP127" "FP128" "FP129" "FP130" "FP131" "FP132" "FP133" "FP134" "FP135"
## [136] "FP136" "FP137" "FP138" "FP139" "FP140" "FP141" "FP142" "FP143" "FP144"
## [145] "FP145" "FP146" "FP147" "FP148" "FP149" "FP150" "FP151" "FP152" "FP153"
## [154] "FP155" "FP156" "FP157" "FP158" "FP159" "FP160" "FP161" "FP162" "FP163"
## [163] "FP164" "FP165" "FP166" "FP167" "FP168" "FP169" "FP170" "FP171" "FP172"
## [172] "FP173" "FP174" "FP175" "FP176" "FP177" "FP178" "FP179" "FP180" "FP181"
## [181] "FP182" "FP183" "FP184" "FP185" "FP186" "FP187" "FP188" "FP189" "FP190"
## [190] "FP191" "FP192" "FP193" "FP194" "FP195" "FP196" "FP197" "FP198" "FP201"
## [199] "FP202" "FP203" "FP204" "FP205" "FP206" "FP207" "FP208"
##################################
# Formulating the scatter plots
##################################
featurePlot(x = EDA.Predictors.Numeric,
y = EDA$Log_Solubility,
between = list(x = 1, y = 1),
type = c("g", "p", "smooth"),
labels = rep("", 2))
##################################
# Restructuring the dataset for
# for boxplot analysis
##################################
<- DPA$solTrainY
Log_Solubility <- cbind(Log_Solubility,
EDA.Boxplot.Source
EDA.Predictors.Factor)
<- gather(EDA.Boxplot.Source,
EDA.Boxplot.Gathered.Group1 'FP001','FP002','FP003','FP004','FP005',
'FP006','FP007','FP008','FP009','FP010',
'FP011','FP012','FP013','FP014','FP015',
'FP016','FP017','FP018','FP019','FP020',
'FP021','FP022','FP023','FP024','FP025',
'FP026','FP027','FP028','FP029','FP030',
'FP031','FP032','FP033','FP034','FP035',
'FP036','FP037','FP038','FP039','FP040',
key="Descriptor",
value="Structure")
<- gather(EDA.Boxplot.Source,
EDA.Boxplot.Gathered.Group2 'FP041','FP042','FP043','FP044','FP045',
'FP046','FP047','FP048','FP049','FP050',
'FP051','FP052','FP053','FP054','FP055',
'FP056','FP057','FP058','FP059','FP060',
'FP061','FP062','FP063','FP064','FP065',
'FP066','FP067','FP068','FP069','FP070',
'FP071','FP072','FP073','FP074','FP075',
'FP076','FP077','FP078','FP079','FP080',
key="Descriptor",
value="Structure")
<- gather(EDA.Boxplot.Source,
EDA.Boxplot.Gathered.Group3 'FP081','FP082','FP083','FP084','FP085',
'FP086','FP087','FP088','FP089','FP090',
'FP091','FP092','FP093','FP094','FP095',
'FP096','FP097','FP098','FP099','FP100',
'FP101','FP102','FP103','FP104','FP105',
'FP106','FP107','FP108','FP109','FP110',
'FP111','FP112','FP113','FP114','FP115',
'FP116','FP117','FP118','FP119','FP120',
key="Descriptor",
value="Structure")
<- gather(EDA.Boxplot.Source,
EDA.Boxplot.Gathered.Group4 'FP121','FP122','FP123','FP124','FP125',
'FP126','FP127','FP128','FP129','FP130',
'FP131','FP132','FP133','FP134','FP135',
'FP136','FP137','FP138','FP139','FP140',
'FP141','FP142','FP143','FP144','FP145',
'FP146','FP147','FP148','FP149','FP150',
'FP151','FP152','FP153','FP155','FP156',
'FP157','FP158','FP159','FP160','FP161',
key="Descriptor",
value="Structure")
<- gather(EDA.Boxplot.Source,
EDA.Boxplot.Gathered.Group5 'FP162','FP163','FP164','FP165','FP166',
'FP167','FP168','FP169','FP170','FP171',
'FP172','FP173','FP174','FP175','FP176',
'FP177','FP178','FP179','FP180','FP181',
'FP182','FP183','FP184','FP185','FP186',
'FP187','FP188','FP189','FP190','FP191',
'FP192','FP193','FP194','FP195','FP196',
'FP197','FP198','FP201','FP202','FP203',
'FP204','FP205','FP206','FP207','FP208',
key="Descriptor",
value="Structure")
bwplot(Log_Solubility~Structure|Descriptor,
data=EDA.Boxplot.Gathered.Group5,
ylab="Log Solubility",
xlab="Structure",
layout=(c(9,5)))
bwplot(Log_Solubility~Structure|Descriptor,
data=EDA.Boxplot.Gathered.Group4,
ylab="Log Solubility",
xlab="Structure",
layout=(c(9,5)))
bwplot(Log_Solubility~Structure|Descriptor,
data=EDA.Boxplot.Gathered.Group3,
ylab="Log Solubility",
xlab="Structure",
layout=(c(9,5)))
bwplot(Log_Solubility~Structure|Descriptor,
data=EDA.Boxplot.Gathered.Group2,
ylab="Log Solubility",
xlab="Structure",
layout=(c(9,5)))
bwplot(Log_Solubility~Structure|Descriptor,
data=EDA.Boxplot.Gathered.Group1,
ylab="Log Solubility",
xlab="Structure",
layout=(c(9,5)))
##################################
# Creating a local object
# for the train set
##################################
<- PMA_PreModelling_Train
PMA_PreModelling_Train_LR
##################################
# Creating consistent fold assignments
# for the 10-Fold Cross Validation process
##################################
set.seed(12345678)
<- createFolds(PMA_PreModelling_Train_LR$Log_Solubility,
KFold_Indices k = 10,
returnTrain=TRUE)
<- trainControl(method="cv",
KFold_Control index=KFold_Indices)
##################################
# Setting the conditions
# for hyperparameter tuning
##################################
# No hyperparameter tuning process conducted
# hyperparameter=intercept fixed to TRUE
##################################
# Running the linear regression model
# by setting the caret method to 'lm'
##################################
set.seed(12345678)
<- train(x = PMA_PreModelling_Train_LR[,!names(PMA_PreModelling_Train_LR) %in% c("Log_Solubility")],
LR_Tune y = PMA_PreModelling_Train_LR$Log_Solubility,
method = "lm",
trControl = KFold_Control)
##################################
# Reporting the cross-validation results
# for the train set
##################################
LR_Tune
## Linear Regression
##
## 951 samples
## 220 predictors
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 857, 855, 855, 856, 857, 856, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 0.6871912 0.8862948 0.5149368
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
$finalModel LR_Tune
##
## Call:
## lm(formula = .outcome ~ ., data = dat)
##
## Coefficients:
## (Intercept) FP0011 FP0021 FP0031
## -4.066000 0.203162 0.010415 -0.083340
## FP0041 FP0051 FP0061 FP0071
## -0.371758 -0.377219 0.087430 0.021977
## FP0081 FP0091 FP0101 FP0111
## -0.006028 -0.596654 0.617362 0.073236
## FP0121 FP0131 FP0141 FP0151
## -0.080938 -0.677007 0.318195 -0.340700
## FP0161 FP0171 FP0181 FP0191
## -0.087389 -0.132981 -0.501254 0.182633
## FP0201 FP0211 FP0221 FP0231
## -0.068780 0.055915 0.362833 -0.254665
## FP0241 FP0251 FP0261 FP0271
## -0.427291 0.754962 0.357680 0.015031
## FP0281 FP0291 FP0301 FP0311
## 0.083591 -0.005925 -0.203563 0.263579
## FP0321 FP0331 FP0341 FP0351
## -1.142536 0.991357 -0.383055 -0.274878
## FP0361 FP0371 FP0381 FP0391
## 0.024001 0.228933 0.349438 -0.262034
## FP0401 FP0411 FP0421 FP0431
## 0.623611 -0.338887 -0.353304 -0.012580
## FP0441 FP0451 FP0461 FP0471
## -0.318968 0.093365 -0.082762 -0.004447
## FP0481 FP0491 FP0501 FP0511
## 0.303096 -0.022564 -0.171177 0.238581
## FP0521 FP0531 FP0541 FP0551
## -0.315815 0.313428 -0.137279 -0.411489
## FP0561 FP0571 FP0581 FP0591
## -0.118550 0.050641 0.116467 -0.088098
## FP0601 FP0611 FP0621 FP0631
## 0.327283 0.059817 -0.159297 0.839179
## FP0641 FP0651 FP0661 FP0671
## 0.301279 -0.100770 0.210044 -0.240241
## FP0681 FP0691 FP0701 FP0711
## 0.250900 -0.013854 0.107883 0.213760
## FP0721 FP0731 FP0741 FP0751
## 0.941298 -0.534633 0.169600 0.198712
## FP0761 FP0771 FP0781 FP0791
## 0.370435 0.137173 -0.451360 0.644861
## FP0801 FP0811 FP0821 FP0831
## 0.288844 -0.424812 0.080198 -0.486889
## FP0841 FP0851 FP0861 FP0871
## 0.292106 -0.471828 0.003212 -0.200902
## FP0881 FP0891 FP0901 FP0911
## 0.245689 0.241490 -0.130453 0.260164
## FP0921 FP0931 FP0941 FP0951
## 0.253515 0.322089 -0.297586 0.009927
## FP0961 FP0971 FP0981 FP0991
## -0.040920 -0.257427 -0.273002 0.302075
## FP1001 FP1011 FP1021 FP1031
## -0.567162 0.010766 0.101139 -0.120334
## FP1041 FP1051 FP1061 FP1071
## -0.126282 -0.172136 0.017081 0.273796
## FP1081 FP1091 FP1101 FP1111
## -0.280250 0.552120 0.343563 -0.535813
## FP1121 FP1131 FP1141 FP1151
## 0.282474 0.178287 -0.169603 -0.078620
## FP1161 FP1171 FP1181 FP1191
## 0.035097 0.354961 -0.202152 0.517463
## FP1201 FP1211 FP1221 FP1231
## -0.205402 -0.256731 0.229096 -0.090138
## FP1241 FP1251 FP1261 FP1271
## 0.197173 0.026748 -0.559281 -0.557537
## FP1281 FP1291 FP1301 FP1311
## -0.236470 -0.017449 -0.323162 0.205262
## FP1321 FP1331 FP1341 FP1351
## 0.049595 -0.275143 -0.286978 0.216807
## FP1361 FP1371 FP1381 FP1391
## 0.061525 -0.065240 -0.083785 -0.527359
## FP1401 FP1411 FP1421 FP1431
## 0.181520 0.320718 0.581450 0.682509
## FP1441 FP1451 FP1461 FP1471
## 0.376819 -0.414159 0.036687 0.208845
## FP1481 FP1491 FP1501 FP1511
## -0.149271 0.011702 0.128825 0.218887
## FP1521 FP1531 FP1551 FP1561
## -0.270079 -0.029909 0.289836 -0.599602
## FP1571 FP1581 FP1591 FP1601
## -0.665030 0.147579 0.094096 0.063586
## FP1611 FP1621 FP1631 FP1641
## -0.291284 0.137654 0.436455 0.545472
## FP1651 FP1661 FP1671 FP1681
## 0.418963 -0.079093 -0.656888 -0.061270
## FP1691 FP1701 FP1711 FP1721
## -0.104876 -0.358562 0.486741 -0.144907
## FP1731 FP1741 FP1751 FP1761
## 0.547299 -0.167793 0.062189 0.390344
## FP1771 FP1781 FP1791 FP1801
## -0.031023 0.296177 -0.207167 -1.163053
## FP1811 FP1821 FP1831 FP1841
## 0.270999 -0.063148 0.093444 0.436498
## FP1851 FP1861 FP1871 FP1881
## -0.267287 -0.166962 0.398059 0.120847
## FP1891 FP1901 FP1911 FP1921
## -0.367590 -0.023868 0.084442 -0.105360
## FP1931 FP1941 FP1951 FP1961
## 0.134538 0.361550 -0.204425 0.042328
## FP1971 FP1981 FP2011 FP2021
## 0.059010 0.035856 -0.389306 0.538293
## FP2031 FP2041 FP2051 FP2061
## 0.147608 -0.205335 0.109915 0.017044
## FP2071 FP2081 MolWeight NumBonds
## 0.037643 -0.371259 -0.524968 -0.717804
## NumMultBonds NumRotBonds NumDblBonds NumCarbon
## -0.740106 -0.523744 0.016234 -0.079046
## NumNitrogen NumOxygen NumSulfer NumChlorine
## 0.920733 1.593098 0.338963 -0.470240
## NumHalogen NumRings HydrophilicFactor SurfaceArea1
## -0.168289 -0.504171 0.160828 -0.030703
## SurfaceArea2
## -1.160557
$results LR_Tune
## intercept RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 TRUE 0.6871912 0.8862948 0.5149368 0.05361825 0.02595212 0.04393313
<- LR_Tune$results$RMSE) (LR_Train_RMSE
## [1] 0.6871912
<- LR_Tune$results$Rsquared) (LR_Train_Rsquared
## [1] 0.8862948
##################################
# Identifying and plotting the
# best model predictors
##################################
<- varImp(LR_Tune, scale = TRUE)
LR_VarImp plot(LR_VarImp,
top=25,
scales=list(y=list(cex = .95)),
main="Ranked Variable Importance : Linear Regression",
xlab="Scaled Variable Importance Metrics",
ylab="Predictors",
cex=2,
origin=0,
alpha=0.45)
##################################
# Independently evaluating the model
# on the test set
##################################
<- data.frame(LR_Observed = PMA_PreModelling_Test$Log_Solubility,
LR_Test LR_Predicted = predict(LR_Tune,
!names(PMA_PreModelling_Test) %in% c("Log_Solubility")]))
PMA_PreModelling_Test[,
LR_Test
## LR_Observed LR_Predicted
## 20 0.93 0.85008095
## 21 0.85 0.22645838
## 23 0.81 -0.42222262
## 25 0.74 1.00132885
## 28 0.61 -0.18792964
## 31 0.58 1.57625536
## 32 0.57 0.46177128
## 33 0.56 0.57563083
## 34 0.52 0.12433631
## 37 0.45 -0.94825875
## 38 0.40 -0.67248930
## 42 0.36 -0.77359940
## 49 0.22 -0.06846625
## 54 0.08 -0.42768142
## 55 0.07 -1.18852676
## 58 0.02 -0.52352868
## 60 0.00 -0.23033399
## 61 -0.01 0.22225962
## 65 -0.07 -0.25802638
## 69 -0.12 -0.88885630
## 73 -0.17 0.71388503
## 86 -0.29 -0.05023695
## 90 -0.38 -0.79845706
## 91 -0.38 -0.79758079
## 93 -0.39 -1.11064695
## 96 -0.42 -0.84166972
## 98 -0.44 -0.82750408
## 100 -0.46 1.50757611
## 104 -0.48 -2.54181529
## 112 -0.60 -1.14251902
## 115 -0.63 -2.48231692
## 119 -0.66 -0.71467927
## 128 -0.72 -0.68561681
## 130 -0.72 -0.20123827
## 139 -0.80 0.38583368
## 143 -0.80 -1.26130642
## 145 -0.82 0.33086174
## 146 -0.82 -0.53799096
## 149 -0.84 0.24312506
## 150 -0.85 -0.77028093
## 152 -0.85 -0.45603900
## 157 -0.87 -1.83556260
## 161 -0.89 -1.11249644
## 162 -0.90 -0.06891881
## 166 -0.96 -1.20868589
## 167 -0.96 -0.71372802
## 173 -0.99 -0.37558064
## 176 -1.01 -0.78452740
## 182 -1.09 -1.21901070
## 187 -1.12 -0.41097472
## 190 -1.14 -0.23496326
## 194 -1.17 -1.79694522
## 195 -1.19 -1.64619814
## 201 -1.22 -1.40479017
## 207 -1.27 -2.05825298
## 208 -1.28 -1.27095362
## 215 -1.32 -1.18288381
## 222 -1.38 -1.33751727
## 224 -1.39 -1.56123858
## 231 -1.42 -1.31682939
## 236 -1.47 -0.91354257
## 237 -1.47 -1.58466302
## 240 -1.50 -0.41486011
## 243 -1.52 -1.20648193
## 248 -1.54 -1.23470906
## 251 -1.55 -2.04200996
## 256 -1.56 -3.35582923
## 258 -1.57 -1.85961077
## 262 -1.60 -1.63923610
## 266 -1.60 -2.54563385
## 272 -1.62 -1.31290546
## 280 -1.64 -2.41199921
## 283 -1.67 -1.67272479
## 286 -1.70 -3.50280923
## 287 -1.70 -2.01667176
## 289 -1.71 -1.87972128
## 290 -1.71 -2.28330830
## 298 -1.75 -1.77512870
## 305 -1.78 -1.84911044
## 306 -1.78 -2.47902069
## 312 -1.82 -1.68083809
## 320 -1.87 -1.85877716
## 325 -1.89 -2.09870742
## 332 -1.92 -1.92592002
## 333 -1.92 -1.37779054
## 335 -1.92 -1.34410458
## 339 -1.94 -3.26845478
## 346 -1.99 -2.89788882
## 347 -2.00 -2.25386758
## 350 -2.05 -2.20365376
## 353 -2.06 -1.35415444
## 358 -2.08 -2.06119614
## 365 -2.10 -2.50159849
## 367 -2.11 -1.50546116
## 370 -2.12 -0.56780235
## 379 -2.17 -2.09303346
## 386 -2.21 -1.88611582
## 394 -2.24 -3.67377079
## 396 -2.24 -1.68153752
## 400 -2.29 -2.40037711
## 404 -2.31 -2.31411050
## 405 -2.32 -2.08239282
## 413 -2.35 -2.56805312
## 415 -2.35 -2.44532458
## 417 -2.36 -2.46699221
## 418 -2.36 -2.46215229
## 423 -2.38 -2.36633963
## 434 -2.42 -2.63030569
## 437 -2.43 -2.88959719
## 440 -2.44 -3.15345313
## 449 -2.52 -2.24372454
## 450 -2.53 -2.99815343
## 457 -2.57 -3.20624071
## 467 -2.62 -2.84173397
## 469 -2.62 -1.94249851
## 474 -2.64 -2.91609082
## 475 -2.64 -2.54094314
## 485 -2.70 -2.24480771
## 504 -2.82 -1.78714328
## 511 -2.88 -2.82931574
## 512 -2.89 -2.33100039
## 517 -2.92 -1.43326292
## 519 -2.93 -3.84688550
## 520 -2.96 -2.85464686
## 522 -2.98 -2.33742088
## 527 -3.01 -2.92251938
## 528 -3.01 -3.36800030
## 529 -3.02 -4.40700551
## 537 -3.07 -3.56054879
## 540 -3.09 -2.98470384
## 541 -3.11 -3.09498026
## 547 -3.13 -3.72220144
## 550 -3.14 -1.94158777
## 555 -3.15 -3.45614360
## 564 -3.22 -2.57629523
## 570 -3.26 -3.30790592
## 573 -3.27 -2.90082040
## 575 -3.27 -2.87420511
## 578 -3.30 -3.05437242
## 581 -3.31 -2.41393690
## 585 -3.33 -2.18265117
## 590 -3.37 -2.40989770
## 601 -3.43 -3.38729357
## 602 -3.43 -2.35270924
## 607 -3.48 -2.99973092
## 610 -3.51 -3.17370073
## 618 -3.59 -2.17873527
## 624 -3.61 -2.51468217
## 626 -3.63 -3.51037615
## 627 -3.63 -3.21922593
## 634 -3.68 -2.80532421
## 640 -3.71 -3.07071758
## 642 -3.74 -2.40778100
## 643 -3.75 -3.79963926
## 644 -3.75 -2.29284417
## 645 -3.77 -3.17179970
## 646 -3.77 -4.15491500
## 647 -3.78 -5.11516765
## 652 -3.81 -3.82159114
## 658 -3.95 -4.53359831
## 659 -3.96 -5.20777439
## 660 -3.96 -4.11333917
## 664 -4.00 -3.28372401
## 666 -4.02 -5.01239248
## 667 -4.04 -4.22264635
## 675 -4.12 -3.59150010
## 680 -4.15 -4.38735138
## 681 -4.16 -3.23757225
## 687 -4.17 -4.47628097
## 694 -4.21 -4.86055552
## 697 -4.23 -3.76913678
## 701 -4.25 -3.41876819
## 705 -4.30 -3.75385720
## 707 -4.31 -5.68574464
## 710 -4.35 -4.69689933
## 716 -4.40 -3.92156254
## 719 -4.40 -4.15434815
## 720 -4.43 -4.79176377
## 725 -4.46 -4.45676917
## 727 -4.47 -3.26014350
## 730 -4.51 -4.89667861
## 738 -4.60 -3.69687365
## 745 -4.64 -4.67322230
## 748 -4.69 -4.91476196
## 751 -4.71 -4.14614635
## 756 -4.77 -3.76066662
## 766 -4.95 -4.14144414
## 769 -4.98 -4.95029159
## 783 -5.21 -5.69185088
## 785 -5.22 -5.27570230
## 790 -5.28 -4.51928303
## 793 -5.31 -2.82904775
## 795 -5.35 -4.87015999
## 796 -5.37 -4.94167346
## 797 -5.40 -4.52018485
## 801 -5.43 -4.32651144
## 811 -5.65 -5.43467584
## 812 -5.66 -4.20061155
## 815 -6.70 -4.83502856
## 816 -5.72 -5.24964473
## 817 -6.00 -6.99661140
## 824 -6.25 -6.31671321
## 825 -6.26 -6.26377712
## 826 -6.27 -6.35209719
## 830 -6.35 -5.84116193
## 837 -6.57 -6.13080915
## 838 -6.62 -5.19880527
## 844 -6.96 -6.09270325
## 845 -7.02 -7.86766325
## 847 -7.20 -7.30608069
## 850 -7.28 -7.06630010
## 852 -7.32 -7.58425651
## 853 -7.39 -7.68733990
## 861 -7.82 -8.24338048
## 868 -8.23 -8.86554341
## 874 -8.94 -8.46264055
## 879 1.07 -0.06836081
## 895 0.43 0.09145293
## 899 0.32 -0.18874752
## 903 0.00 0.10236587
## 917 -0.40 -0.87210988
## 927 -0.52 -0.56040022
## 929 -0.55 -0.64123656
## 931 -0.60 -0.79452125
## 933 -0.62 -2.65915478
## 944 -0.85 -1.24615891
## 947 -0.89 -0.73849423
## 949 -0.93 -0.47034398
## 953 -0.96 -0.36298026
## 958 -1.06 -2.05945241
## 961 -1.10 -1.48846796
## 963 -1.12 -1.06870665
## 964 -1.15 -0.72843534
## 973 -1.28 -0.66198581
## 976 -1.30 -1.76851393
## 977 -1.31 -1.18177298
## 980 -1.35 -3.02865129
## 983 -1.39 -1.98416265
## 984 -1.41 -1.64619814
## 986 -1.41 -1.72732694
## 989 -1.42 -0.66925460
## 991 -1.46 -1.44897044
## 996 -1.50 -1.76647533
## 997 -1.50 -1.73791710
## 999 -1.52 -1.53294349
## 1000 -1.52 -0.78304337
## 1003 -1.59 -1.73368722
## 1008 -1.61 -0.90572068
## 1009 -1.63 -1.22653769
## 1014 -1.71 -2.25389642
## 1015 -1.83 -2.10325323
## 1040 -2.05 -2.44739048
## 1042 -2.06 -2.32809439
## 1043 -2.07 -3.91495362
## 1050 -2.15 -2.73691095
## 1052 -2.16 -0.89823401
## 1056 -1.99 0.43630827
## 1070 -2.36 -1.75081513
## 1073 -2.38 -3.74589678
## 1074 -2.39 -1.48538378
## 1079 -2.46 -2.18137666
## 1080 -2.49 -2.21816817
## 1085 -2.54 -2.62996167
## 1087 -2.55 -2.95995956
## 1096 -2.63 -2.40473884
## 1099 -2.64 -1.49712170
## 1100 -2.67 -2.57217130
## 1102 -2.68 -2.40322416
## 1107 -2.77 -2.61897426
## 1109 -2.78 -3.09105143
## 1114 -2.82 -2.71897133
## 1118 -2.92 -3.45235955
## 1123 -3.03 -3.05502619
## 1132 -3.12 -3.50894894
## 1134 -3.16 -3.23220453
## 1137 -3.19 -2.99156103
## 1154 -3.54 -3.42070439
## 1155 -3.54 -2.37419582
## 1157 -3.59 -3.61119266
## 1162 -3.66 -2.94788569
## 1164 -3.68 -2.49781612
## 1171 -3.75 -3.87066896
## 1172 -3.76 -4.12315551
## 1175 -3.78 -3.64460328
## 1177 -3.80 -3.99045861
## 1179 -3.80 -3.34874930
## 1183 -3.85 -3.26377127
## 1185 -3.89 -3.24457394
## 1189 -3.95 -4.13454233
## 1211 -4.29 -4.81876716
## 1218 -4.42 -3.57301825
## 1224 -4.48 -4.18721344
## 1225 -4.48 -3.37011104
## 1227 -4.53 -4.89976101
## 1232 -4.63 -4.38145043
## 1235 -4.73 -3.93459358
## 1238 -4.84 -4.01093250
## 1240 -4.89 -3.79325854
## 1241 -4.89 -4.85088996
## 1248 -5.26 -5.73628615
## 1258 -6.09 -5.06155374
## 1261 -6.29 -5.76189270
## 1263 -6.29 -6.29644038
## 1269 -6.89 -5.15367692
## 1270 -6.96 -6.85306121
## 1271 -7.00 -6.79203297
## 1272 -7.05 -7.63440381
## 1280 -8.30 -8.78984377
## 1286 -8.66 -9.26339711
## 1287 -9.03 -10.09374869
## 1289 -10.41 -10.10907441
## 1290 -7.89 -7.36013668
## 1291 -2.32 -1.68789497
## 1294 0.39 -3.02945536
## 1305 -2.90 -5.11098058
## 1308 -2.47 -5.09654935
##################################
# Reporting the independent evaluation results
# for the test set
##################################
<- postResample(LR_Test[,2], LR_Test[,1])) (LR_Test_Metrics
## RMSE Rsquared MAE
## 0.7725809 0.8643929 0.5711292
<- LR_Test_Metrics[1]) (LR_Test_RMSE
## RMSE
## 0.7725809
<- LR_Test_Metrics[2]) (LR_Test_Rsquared
## Rsquared
## 0.8643929
##################################
# Transforming factor predictors
# as required by the nature of the model
##################################
# Creating a local object
# for the train and test sets
##################################
<- as.data.frame(lapply(PMA_PreModelling_Train, function(x) as.numeric(as.character(x))))
PMA_PreModelling_Train_PLR_R dim(PMA_PreModelling_Train_PLR_R)
## [1] 951 221
<- as.data.frame(lapply(PMA_PreModelling_Test, function(x) as.numeric(as.character(x))))
PMA_PreModelling_Test_PLR_R dim(PMA_PreModelling_Test_PLR_R)
## [1] 316 221
##################################
# Creating consistent fold assignments
# for the 10-Fold Cross Validation process
##################################
set.seed(12345678)
<- createFolds(PMA_PreModelling_Train_PLR_R$Log_Solubility,
KFold_Indices k = 10,
returnTrain=TRUE)
<- trainControl(method="cv",
KFold_Control index=KFold_Indices)
##################################
# Setting the conditions
# for hyperparameter tuning
##################################
= expand.grid(lambda = seq(0, 0.10, length = 5))
PLR_R_Grid
##################################
# Running the penalized linear regression (ridge) model
# by setting the caret method to 'ridge'
##################################
set.seed(12345678)
<- train(x = PMA_PreModelling_Train_PLR_R[,!names(PMA_PreModelling_Train_PLR_R) %in% c("Log_Solubility")],
PLR_R_Tune y = PMA_PreModelling_Train_PLR_R$Log_Solubility,
method = "ridge",
tuneGrid = PLR_R_Grid,
trControl = KFold_Control,
preProc = c("center", "scale"))
##################################
# Reporting the cross-validation results
# for the train set
##################################
PLR_R_Tune
## Ridge Regression
##
## 951 samples
## 220 predictors
##
## Pre-processing: centered (220), scaled (220)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 857, 855, 855, 856, 857, 856, ...
## Resampling results across tuning parameters:
##
## lambda RMSE Rsquared MAE
## 0.000 0.6871897 0.8862951 0.5149301
## 0.025 0.6527539 0.8968424 0.4976606
## 0.050 0.6590355 0.8957857 0.5027891
## 0.075 0.6700575 0.8937331 0.5124512
## 0.100 0.6831128 0.8915190 0.5240739
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was lambda = 0.025.
$finalModel PLR_R_Tune
##
## Call:
## elasticnet::enet(x = as.matrix(x), y = y, lambda = param$lambda)
## Sequence of moves:
## MolWeight NumCarbon NumChlorine FP044 NumHalogen FP089 FP072
## Var 206 211 215 44 216 89 72
## Step 1 2 3 4 5 6 7
## HydrophilicFactor NumMultBonds SurfaceArea1 FP063 FP059 FP142 FP206 FP135
## Var 218 208 219 63 59 142 203 135
## Step 8 9 10 11 12 13 14 15
## FP084 FP204 FP147 FP089 FP074 FP073 FP116 FP040 NumOxygen FP039 FP043
## Var 84 201 147 -89 74 73 116 40 213 39 43
## Step 16 17 18 19 20 21 22 23 24 25 26
## NumSulfer FP094 FP124 FP011 FP193 FP111 FP050 FP198 FP175 NumRotBonds
## Var 214 94 124 11 192 111 50 197 174 209
## Step 27 28 29 30 31 32 33 34 35 36
## FP172 FP080 NumBonds FP137 FP101 FP088 FP203 FP136 FP123 FP122 FP085 FP081
## Var 171 80 207 137 101 88 200 136 123 122 85 81
## Step 37 38 39 40 41 42 43 44 45 46 47 48
## FP042 FP070 FP202 FP126 HydrophilicFactor NumRings FP188 FP065 FP145 FP128
## Var 42 70 199 126 -218 217 187 65 145 128
## Step 49 50 51 52 53 54 55 56 57 58
## FP162 FP138 FP026 FP031 FP175 FP102 FP075 FP127 FP187 FP033 FP176 FP002
## Var 161 138 26 31 -174 102 75 127 186 33 175 2
## Step 59 60 61 62 63 64 65 66 67 68 69 70
## FP037 FP171 FP173 FP207 FP053 HydrophilicFactor FP166 FP099 FP003 FP023
## Var 37 170 172 204 53 218 165 99 3 23
## Step 71 72 73 74 75 76 77 78 79 80
## FP113 FP169 FP164 FP013 FP133 FP091 FP141 FP078 FP034 FP131 FP064 FP022
## Var 113 168 163 13 133 91 141 78 34 131 64 22
## Step 81 82 83 84 85 86 87 88 89 90 91 92
## FP100 FP201 FP184 FP004 FP159 FP103 FP054 FP036 FP083 HydrophilicFactor
## Var 100 198 183 4 158 103 54 36 83 -218
## Step 93 94 95 96 97 98 99 100 101 102
## FP049 FP104 FP015 FP149 FP109 FP150 FP012 FP163 FP168 FP098 FP186 FP060
## Var 49 104 15 149 109 150 12 162 167 98 185 60
## Step 103 104 105 106 107 108 109 110 111 112 113 114
## FP144 FP174 NumNitrogen FP073 FP016 FP071 FP018 FP155 FP105 FP185 FP115
## Var 144 173 212 -73 16 71 18 154 105 184 115
## Step 115 116 117 118 119 120 121 122 123 124 125
## FP079 FP148 FP027 FP181 FP157 FP204 FP167 FP152 FP161 FP087 FP205 FP017
## Var 79 148 27 180 156 -201 166 152 160 87 202 17
## Step 126 127 128 129 130 131 132 133 134 135 136 137
## FP066 FP089 FP038 FP143 FP134 FP035 FP009 FP090 FP160 FP180 FP119 FP170
## Var 66 89 38 143 134 35 9 90 159 179 119 169
## Step 138 139 140 141 142 143 144 145 146 147 148 149
## FP093 FP146 FP190 FP028 FP191 FP095 HydrophilicFactor FP140 FP068 FP069
## Var 93 146 189 28 190 95 218 140 68 69
## Step 150 151 152 153 154 155 156 157 158 159
## FP153 FP102 FP120 FP036 NumDblBonds FP048 FP077 FP158 FP076 FP156 FP021
## Var 153 -102 120 -36 210 48 77 157 76 155 21
## Step 160 161 162 163 164 165 166 167 168 169 170
## FP030 FP055 FP118 FP139 FP007 FP001 FP130 FP057 FP082 FP195 FP192 FP024
## Var 30 55 118 139 7 1 130 57 82 194 191 24
## Step 171 172 173 174 175 176 177 178 179 180 181 182
## FP051 FP036 FP032 FP067 FP045 FP008 FP102 FP092 FP121 FP151 FP046 FP097
## Var 51 36 32 67 45 8 102 92 121 151 46 97
## Step 183 184 185 186 187 188 189 190 191 192 193 194
## FP073 FP106 FP189 FP114 FP086 FP125 FP182 FP020 FP010 FP149 FP108 FP062
## Var 73 106 188 114 86 125 181 20 10 -149 108 62
## Step 195 196 197 198 199 200 201 202 203 204 205 206
## FP019 FP129 FP041 FP141 FP005 FP196 FP204 FP178 FP052 FP120 FP056 FP197
## Var 19 129 41 -141 5 195 201 177 52 -120 56 196
## Step 207 208 209 210 211 212 213 214 215 216 217 218
## FP112 SurfaceArea2 FP179 FP047 FP110 FP117 FP096 FP175 FP097 FP166 FP165
## Var 112 220 178 47 110 117 96 174 -97 -165 164
## Step 219 220 221 222 223 224 225 226 227 228 229
## FP207 FP107 FP149 FP177 FP025 FP183 FP029 FP141 FP061 FP208 FP132 FP006
## Var -204 107 149 176 25 182 29 141 61 205 132 6
## Step 230 231 232 233 234 235 236 237 238 239 240 241
## FP120 FP058 FP137 FP014 FP097 FP207 FP166 FP137 FP194
## Var 120 58 -137 14 97 204 165 137 193 251
## Step 242 243 244 245 246 247 248 249 250 251
$results PLR_R_Tune
## lambda RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 0.000 0.6871897 0.8862951 0.5149301 0.05361883 0.02595243 0.04394202
## 2 0.025 0.6527539 0.8968424 0.4976606 0.05176540 0.02292189 0.03626743
## 3 0.050 0.6590355 0.8957857 0.5027891 0.05352912 0.02244986 0.03887249
## 4 0.075 0.6700575 0.8937331 0.5124512 0.05597932 0.02224904 0.04211679
## 5 0.100 0.6831128 0.8915190 0.5240739 0.05894427 0.02219650 0.04416185
<- PLR_R_Tune$results[PLR_R_Tune$results$lambda==PLR_R_Tune$bestTune$lambda,
(PLR_R_Train_RMSE c("RMSE")])
## [1] 0.6527539
<- PLR_R_Tune$results[PLR_R_Tune$results$lambda==PLR_R_Tune$bestTune$lambda,
(PLR_R_Train_Rsquared c("Rsquared")])
## [1] 0.8968424
##################################
# Identifying and plotting the
# best model predictors
##################################
# model does not support variable importance measurement
##################################
# Independently evaluating the model
# on the test set
##################################
<- data.frame(PLR_R_Observed = PMA_PreModelling_Test$Log_Solubility,
PLR_R_Test PLR_R_Predicted = predict(PLR_R_Tune,
!names(PMA_PreModelling_Test_PLR_R) %in% c("Log_Solubility")]))
PMA_PreModelling_Test_PLR_R[,
PLR_R_Test
## PLR_R_Observed PLR_R_Predicted
## 1 0.93 6.655157e-01
## 2 0.85 3.336029e-01
## 3 0.81 -6.083225e-01
## 4 0.74 8.432412e-01
## 5 0.61 5.954138e-02
## 6 0.58 1.483561e+00
## 7 0.57 4.786292e-01
## 8 0.56 4.678409e-01
## 9 0.52 2.720858e-01
## 10 0.45 -8.469761e-01
## 11 0.40 -6.716471e-01
## 12 0.36 -9.064858e-01
## 13 0.22 6.878584e-02
## 14 0.08 -3.781082e-01
## 15 0.07 -1.027910e+00
## 16 0.02 -8.082206e-01
## 17 0.00 -2.666937e-01
## 18 -0.01 1.018631e-01
## 19 -0.07 1.770377e-01
## 20 -0.12 -1.038402e+00
## 21 -0.17 5.798321e-01
## 22 -0.29 -1.063792e-01
## 23 -0.38 -7.242987e-01
## 24 -0.38 -8.165523e-01
## 25 -0.39 -1.039680e+00
## 26 -0.42 -7.110655e-01
## 27 -0.44 -7.865257e-01
## 28 -0.46 1.330282e+00
## 29 -0.48 -2.228559e+00
## 30 -0.60 -1.277362e+00
## 31 -0.63 -2.498312e+00
## 32 -0.66 -7.541777e-01
## 33 -0.72 -6.822375e-01
## 34 -0.72 -8.540290e-02
## 35 -0.80 3.837001e-01
## 36 -0.80 -1.120133e+00
## 37 -0.82 3.754147e-01
## 38 -0.82 -6.354979e-01
## 39 -0.84 1.906519e-01
## 40 -0.85 -8.368592e-01
## 41 -0.85 -5.689798e-01
## 42 -0.87 -1.972862e+00
## 43 -0.89 -1.240478e+00
## 44 -0.90 1.884632e-01
## 45 -0.96 -1.496469e+00
## 46 -0.96 -7.421295e-01
## 47 -0.99 -4.135176e-01
## 48 -1.01 -8.128175e-01
## 49 -1.09 -1.054425e+00
## 50 -1.12 -4.603601e-01
## 51 -1.14 -3.738747e-01
## 52 -1.17 -1.692647e+00
## 53 -1.19 -1.575112e+00
## 54 -1.22 -1.276192e+00
## 55 -1.27 -1.911069e+00
## 56 -1.28 -1.231313e+00
## 57 -1.32 -1.249962e+00
## 58 -1.38 -1.462001e+00
## 59 -1.39 -1.609215e+00
## 60 -1.42 -1.659667e+00
## 61 -1.47 -9.900123e-01
## 62 -1.47 -1.547422e+00
## 63 -1.50 -9.496653e-01
## 64 -1.52 -1.278062e+00
## 65 -1.54 -1.349782e+00
## 66 -1.55 -2.178760e+00
## 67 -1.56 -3.033758e+00
## 68 -1.57 -1.865990e+00
## 69 -1.60 -1.333912e+00
## 70 -1.60 -2.694416e+00
## 71 -1.62 -1.586501e+00
## 72 -1.64 -2.551495e+00
## 73 -1.67 -1.718912e+00
## 74 -1.70 -3.265934e+00
## 75 -1.70 -2.062026e+00
## 76 -1.71 -2.223605e+00
## 77 -1.71 -2.401191e+00
## 78 -1.75 -1.933350e+00
## 79 -1.78 -1.615718e+00
## 80 -1.78 -2.427334e+00
## 81 -1.82 -1.311474e+00
## 82 -1.87 -1.853234e+00
## 83 -1.89 -2.114620e+00
## 84 -1.92 -1.962674e+00
## 85 -1.92 -1.295332e+00
## 86 -1.92 -1.414480e+00
## 87 -1.94 -3.361142e+00
## 88 -1.99 -2.646690e+00
## 89 -2.00 -2.222978e+00
## 90 -2.05 -2.324948e+00
## 91 -2.06 -1.582547e+00
## 92 -2.08 -2.143243e+00
## 93 -2.10 -2.561933e+00
## 94 -2.11 -1.463349e+00
## 95 -2.12 -6.012312e-01
## 96 -2.17 -2.061051e+00
## 97 -2.21 -1.837801e+00
## 98 -2.24 -2.787275e+00
## 99 -2.24 -1.527907e+00
## 100 -2.29 -2.241510e+00
## 101 -2.31 -2.241156e+00
## 102 -2.32 -2.172746e+00
## 103 -2.35 -2.589437e+00
## 104 -2.35 -2.246173e+00
## 105 -2.36 -2.552548e+00
## 106 -2.36 -1.982080e+00
## 107 -2.38 -2.371507e+00
## 108 -2.42 -2.633605e+00
## 109 -2.43 -3.251050e+00
## 110 -2.44 -3.328829e+00
## 111 -2.52 -2.431058e+00
## 112 -2.53 -2.940652e+00
## 113 -2.57 -3.084709e+00
## 114 -2.62 -3.066213e+00
## 115 -2.62 -2.772446e+00
## 116 -2.64 -3.044999e+00
## 117 -2.64 -3.066690e+00
## 118 -2.70 -2.221458e+00
## 119 -2.82 -2.524769e+00
## 120 -2.88 -2.669586e+00
## 121 -2.89 -2.263681e+00
## 122 -2.92 -1.157740e+00
## 123 -2.93 -3.512809e+00
## 124 -2.96 -2.763717e+00
## 125 -2.98 -2.632943e+00
## 126 -3.01 -2.737061e+00
## 127 -3.01 -3.437836e+00
## 128 -3.02 -3.596187e+00
## 129 -3.07 -3.450439e+00
## 130 -3.09 -3.062580e+00
## 131 -3.11 -3.013319e+00
## 132 -3.13 -3.807036e+00
## 133 -3.14 -1.895637e+00
## 134 -3.15 -3.611763e+00
## 135 -3.22 -2.416420e+00
## 136 -3.26 -3.400673e+00
## 137 -3.27 -2.819815e+00
## 138 -3.27 -2.848596e+00
## 139 -3.30 -2.957609e+00
## 140 -3.31 -2.433027e+00
## 141 -3.33 -2.267365e+00
## 142 -3.37 -2.244878e+00
## 143 -3.43 -3.533220e+00
## 144 -3.43 -2.508858e+00
## 145 -3.48 -3.007281e+00
## 146 -3.51 -3.457121e+00
## 147 -3.59 -2.391230e+00
## 148 -3.61 -2.605758e+00
## 149 -3.63 -3.498944e+00
## 150 -3.63 -3.458830e+00
## 151 -3.68 -2.013209e+00
## 152 -3.71 -3.511889e+00
## 153 -3.74 -2.378672e+00
## 154 -3.75 -3.673353e+00
## 155 -3.75 -2.741505e+00
## 156 -3.77 -3.312758e+00
## 157 -3.77 -4.323747e+00
## 158 -3.78 -5.341718e+00
## 159 -3.81 -3.765972e+00
## 160 -3.95 -4.451062e+00
## 161 -3.96 -5.456749e+00
## 162 -3.96 -4.163823e+00
## 163 -4.00 -3.563417e+00
## 164 -4.02 -4.766735e+00
## 165 -4.04 -4.490979e+00
## 166 -4.12 -3.526970e+00
## 167 -4.15 -5.014687e+00
## 168 -4.16 -3.767517e+00
## 169 -4.17 -4.519733e+00
## 170 -4.21 -4.771142e+00
## 171 -4.23 -4.326197e+00
## 172 -4.25 -3.514275e+00
## 173 -4.30 -3.626576e+00
## 174 -4.31 -5.475649e+00
## 175 -4.35 -4.889637e+00
## 176 -4.40 -4.027219e+00
## 177 -4.40 -4.357594e+00
## 178 -4.43 -4.812655e+00
## 179 -4.46 -4.606169e+00
## 180 -4.47 -3.111667e+00
## 181 -4.51 -5.073588e+00
## 182 -4.60 -3.961640e+00
## 183 -4.64 -4.699197e+00
## 184 -4.69 -4.796990e+00
## 185 -4.71 -4.016558e+00
## 186 -4.77 -3.675313e+00
## 187 -4.95 -4.503789e+00
## 188 -4.98 -4.647916e+00
## 189 -5.21 -5.850469e+00
## 190 -5.22 -5.651693e+00
## 191 -5.28 -4.406219e+00
## 192 -5.31 -2.990854e+00
## 193 -5.35 -4.756430e+00
## 194 -5.37 -5.112303e+00
## 195 -5.40 -4.660381e+00
## 196 -5.43 -4.627488e+00
## 197 -5.65 -5.654802e+00
## 198 -5.66 -4.361465e+00
## 199 -6.70 -4.951949e+00
## 200 -5.72 -5.252434e+00
## 201 -6.00 -7.373908e+00
## 202 -6.25 -6.481595e+00
## 203 -6.26 -6.359866e+00
## 204 -6.27 -6.655393e+00
## 205 -6.35 -5.786245e+00
## 206 -6.57 -6.003233e+00
## 207 -6.62 -5.319672e+00
## 208 -6.96 -5.961618e+00
## 209 -7.02 -7.442019e+00
## 210 -7.20 -7.175934e+00
## 211 -7.28 -7.284850e+00
## 212 -7.32 -7.510870e+00
## 213 -7.39 -7.915024e+00
## 214 -7.82 -8.410622e+00
## 215 -8.23 -8.962363e+00
## 216 -8.94 -8.512390e+00
## 217 1.07 -1.246604e-01
## 218 0.43 1.819635e-01
## 219 0.32 -2.103867e-01
## 220 0.00 3.000252e-02
## 221 -0.40 -8.028867e-01
## 222 -0.52 -5.428799e-01
## 223 -0.55 -7.255459e-01
## 224 -0.60 -8.720942e-01
## 225 -0.62 -2.578015e+00
## 226 -0.85 -1.233755e+00
## 227 -0.89 -7.755708e-01
## 228 -0.93 -8.709553e-01
## 229 -0.96 -3.004478e-04
## 230 -1.06 -2.097683e+00
## 231 -1.10 -1.627420e+00
## 232 -1.12 -1.042399e+00
## 233 -1.15 -8.097424e-01
## 234 -1.28 -3.394287e-01
## 235 -1.30 -1.611365e+00
## 236 -1.31 -1.228053e+00
## 237 -1.35 -2.935202e+00
## 238 -1.39 -1.949023e+00
## 239 -1.41 -1.575112e+00
## 240 -1.41 -1.367658e+00
## 241 -1.42 -6.161983e-01
## 242 -1.46 -1.949181e+00
## 243 -1.50 -1.646122e+00
## 244 -1.50 -2.341926e+00
## 245 -1.52 -1.558616e+00
## 246 -1.52 -6.113012e-01
## 247 -1.59 -1.526668e+00
## 248 -1.61 -7.851878e-01
## 249 -1.63 -1.126933e+00
## 250 -1.71 -2.365064e+00
## 251 -1.83 -2.131320e+00
## 252 -2.05 -1.966495e+00
## 253 -2.06 -2.341654e+00
## 254 -2.07 -3.859841e+00
## 255 -2.15 -2.627314e+00
## 256 -2.16 -9.795253e-01
## 257 -1.99 -1.015377e-01
## 258 -2.36 -1.884174e+00
## 259 -2.38 -4.009556e+00
## 260 -2.39 -1.575245e+00
## 261 -2.46 -2.268956e+00
## 262 -2.49 -2.345026e+00
## 263 -2.54 -2.776700e+00
## 264 -2.55 -3.045244e+00
## 265 -2.63 -2.509177e+00
## 266 -2.64 -1.597537e+00
## 267 -2.67 -2.667330e+00
## 268 -2.68 -2.027918e+00
## 269 -2.77 -2.595922e+00
## 270 -2.78 -2.919825e+00
## 271 -2.82 -2.662614e+00
## 272 -2.92 -3.597443e+00
## 273 -3.03 -3.447847e+00
## 274 -3.12 -3.483471e+00
## 275 -3.16 -3.082106e+00
## 276 -3.19 -3.263342e+00
## 277 -3.54 -3.554744e+00
## 278 -3.54 -2.429122e+00
## 279 -3.59 -3.697451e+00
## 280 -3.66 -2.985546e+00
## 281 -3.68 -2.316387e+00
## 282 -3.75 -3.907819e+00
## 283 -3.76 -3.946207e+00
## 284 -3.78 -3.908796e+00
## 285 -3.80 -4.103146e+00
## 286 -3.80 -4.615035e+00
## 287 -3.85 -3.349860e+00
## 288 -3.89 -3.742668e+00
## 289 -3.95 -4.270295e+00
## 290 -4.29 -4.954343e+00
## 291 -4.42 -4.604252e+00
## 292 -4.48 -4.326483e+00
## 293 -4.48 -3.248538e+00
## 294 -4.53 -4.955238e+00
## 295 -4.63 -4.477200e+00
## 296 -4.73 -4.094596e+00
## 297 -4.84 -4.174860e+00
## 298 -4.89 -3.892153e+00
## 299 -4.89 -4.934865e+00
## 300 -5.26 -5.652802e+00
## 301 -6.09 -5.179791e+00
## 302 -6.29 -5.970943e+00
## 303 -6.29 -6.355769e+00
## 304 -6.89 -5.674112e+00
## 305 -6.96 -6.876594e+00
## 306 -7.00 -7.023199e+00
## 307 -7.05 -7.793294e+00
## 308 -8.30 -8.895771e+00
## 309 -8.66 -8.719002e+00
## 310 -9.03 -9.331363e+00
## 311 -10.41 -1.001937e+01
## 312 -7.89 -7.531643e+00
## 313 -2.32 -1.692328e+00
## 314 0.39 -2.853248e+00
## 315 -2.90 -4.930309e+00
## 316 -2.47 -4.911651e+00
##################################
# Reporting the independent evaluation results
# for the test set
##################################
<- postResample(PLR_R_Test[,2], PLR_R_Test[,1])) (PLR_R_Test_Metrics
## RMSE Rsquared MAE
## 0.7414774 0.8751709 0.5526653
<- PLR_R_Test_Metrics[1]) (PLR_R_Test_RMSE
## RMSE
## 0.7414774
<- PLR_R_Test_Metrics[2]) (PLR_R_Test_Rsquared
## Rsquared
## 0.8751709
##################################
# Transforming factor predictors
# as required by the nature of the model
##################################
# Creating a local object
# for the train and test sets
##################################
<- as.data.frame(lapply(PMA_PreModelling_Train, function(x) as.numeric(as.character(x))))
PMA_PreModelling_Train_PLR_L dim(PMA_PreModelling_Train_PLR_L)
## [1] 951 221
<- as.data.frame(lapply(PMA_PreModelling_Test, function(x) as.numeric(as.character(x))))
PMA_PreModelling_Test_PLR_L dim(PMA_PreModelling_Test_PLR_L)
## [1] 316 221
##################################
# Creating consistent fold assignments
# for the 10-Fold Cross Validation process
##################################
set.seed(12345678)
<- createFolds(PMA_PreModelling_Train_PLR_L$Log_Solubility,
KFold_Indices k = 10,
returnTrain=TRUE)
<- trainControl(method="cv",
KFold_Control index=KFold_Indices)
##################################
# Setting the conditions
# for hyperparameter tuning
##################################
= expand.grid(fraction = seq(0.05, 1.00, length = 5))
PLR_L_Grid
##################################
# Running the penalized linear regression (lasso) model
# by setting the caret method to 'lasso'
##################################
set.seed(12345678)
<- train(x = PMA_PreModelling_Train_PLR_L[,!names(PMA_PreModelling_Train_PLR_L) %in% c("Log_Solubility")],
PLR_L_Tune y = PMA_PreModelling_Train_PLR_L$Log_Solubility,
method = "lasso",
tuneGrid = PLR_L_Grid,
trControl = KFold_Control,
preProc = c("center", "scale"))
##################################
# Reporting the cross-validation results
# for the train set
##################################
PLR_L_Tune
## The lasso
##
## 951 samples
## 220 predictors
##
## Pre-processing: centered (220), scaled (220)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 857, 855, 855, 856, 857, 856, ...
## Resampling results across tuning parameters:
##
## fraction RMSE Rsquared MAE
## 0.0500 1.3714216 0.6930427 1.0614377
## 0.2875 0.6613587 0.8945923 0.5086228
## 0.5250 0.6489629 0.8976271 0.4924596
## 0.7625 0.6630176 0.8933415 0.4997783
## 1.0000 0.6871897 0.8862951 0.5149301
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was fraction = 0.525.
$finalModel PLR_L_Tune
##
## Call:
## elasticnet::enet(x = as.matrix(x), y = y, lambda = 0)
## Cp statistics of the Lasso fit
## Cp: 12147.631 8960.184 8880.588 8659.618 7297.388 7224.150 6934.162 5253.116 4620.595 3482.230 3096.590 2843.585 2618.559 2533.345 2518.911 2391.936 2092.306 2058.456 2054.991 2047.146 1898.978 1865.980 1696.256 1646.751 1520.984 1499.405 1455.990 1351.810 1235.766 1236.051 1204.983 1143.443 1141.487 1122.554 1124.274 1108.768 1065.801 987.005 956.350 951.645 953.350 933.746 924.143 885.546 854.090 855.297 827.167 804.988 798.345 793.217 783.876 773.862 747.774 748.174 712.944 682.511 671.203 628.058 624.066 605.453 603.143 604.207 595.627 581.308 573.393 561.422 558.363 558.929 558.062 523.230 502.926 483.996 477.690 455.641 449.095 429.235 430.321 418.363 417.038 416.781 418.042 417.637 418.832 419.515 420.611 399.826 396.865 396.332 397.607 386.916 380.295 377.827 378.699 379.500 369.926 371.149 361.951 357.389 351.252 348.606 347.668 349.403 340.928 338.847 332.763 332.305 332.025 323.501 318.564 314.492 313.714 310.364 305.040 304.899 305.952 298.866 297.875 299.313 299.862 301.455 302.825 304.260 305.011 304.244 304.082 298.960 300.959 299.493 296.499 295.133 287.224 280.270 280.103 280.995 275.558 272.528 274.168 275.842 275.606 268.698 269.525 268.020 257.321 257.816 256.343 248.587 246.926 244.175 242.235 243.180 243.526 244.893 244.625 240.380 240.615 235.903 231.627 232.361 231.969 223.326 218.912 215.456 214.262 212.596 213.554 212.821 210.162 212.086 213.966 214.766 214.858 212.604 211.935 212.224 212.416 214.050 214.374 214.753 214.384 211.681 210.300 211.432 210.741 211.079 212.362 214.246 214.047 212.754 213.634 213.982 211.562 210.798 210.145 210.053 209.230 208.898 207.942 208.718 210.453 211.166 211.176 212.857 210.926 209.684 210.157 206.758 206.758 208.234 209.798 204.682 206.103 204.992 203.600 202.250 203.742 205.446 202.423 203.462 201.459 202.813 201.771 199.699 198.260 197.356 196.158 195.899 196.748 197.919 195.392 190.400 190.763 189.951 190.155 190.096 185.087 185.045 185.878 186.544 188.461 189.805 191.098 192.053 190.526 187.096 186.957 185.210 186.771 188.733 187.974 184.653 186.614 188.064 187.687 189.630 189.168 188.555 188.585 189.835 191.736 193.491 194.387 195.915 197.833 199.497 197.044 195.553 195.200 194.979 196.883 198.001 198.171 196.941 194.709 192.111 188.874 188.548 190.043 189.789 189.356 191.213 193.153 194.556 196.220 197.921 197.855 197.460 199.219 201.202 202.831 204.209 205.756 207.727 209.678 207.674 209.511 211.417 213.384 215.295 217.232 219.046 221.000
## DF: 1 2 3 4 5 6 7 8 9 10 11 12 12 12 13 14 15 16 17 18 19 19 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 43 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 63 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 78 78 79 80 80 80 81 82 83 84 85 86 86 86 87 88 89 90 91 92 93 94 95 96 97 96 97 98 99 99 99 100 101 102 103 104 105 106 107 108 109 110 110 110 110 110 111 112 112 112 113 114 114 114 115 116 117 118 119 120 120 119 119 120 121 122 123 124 125 125 125 126 127 127 127 128 129 130 131 131 131 132 133 134 135 136 137 138 139 140 141 142 142 142 143 144 145 146 147 148 148 148 149 150 151 151 151 151 151 151 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 166 166 167 168 169 170 171 172 173 174 174 174 174 174 175 176 177 178 179 180 181 181 180 180 181 182 183 184 185 186 186 185 185 186 187 188 188 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 202 202 202 202 203 204 205 205 204 203 202 202 203 203 203 204 205 206 207 208 208 208 209 210 211 212 213 214 215 214 215 216 217 218 219 220 221
## Sequence of moves:
## MolWeight NumCarbon NumChlorine FP044 FP089 NumHalogen FP072
## Var 206 211 215 44 89 216 72
## Step 1 2 3 4 5 6 7
## HydrophilicFactor NumMultBonds SurfaceArea1 FP063 FP059 FP089 FP142 FP135
## Var 218 208 219 63 59 -89 142 135
## Step 8 9 10 11 12 13 14 15
## FP206 FP204 FP084 FP074 FP147 FP073 HydrophilicFactor FP116 FP040
## Var 203 201 84 74 147 73 -218 116 40
## Step 16 17 18 19 20 21 22 23 24
## NumOxygen FP043 FP039 NumSulfer FP111 FP094 FP124 FP011 FP198 FP193 FP137
## Var 213 43 39 214 111 94 124 11 197 192 137
## Step 25 26 27 28 29 30 31 32 33 34 35
## FP050 FP175 FP080 FP101 FP088 FP122 FP203 NumRotBonds FP136 FP085 FP123
## Var 50 174 80 101 88 122 200 209 136 85 123
## Step 36 37 38 39 40 41 42 43 44 45 46
## FP042 FP084 FP202 FP126 FP162 FP081 FP128 FP145 FP026 FP188 FP138 FP031
## Var 42 -84 199 126 161 81 128 145 26 187 138 31
## Step 47 48 49 50 51 52 53 54 55 56 57 58
## FP002 FP187 FP176 FP065 FP037 FP075 FP033 FP127 FP171 FP207 FP102 FP175
## Var 2 186 175 65 37 75 33 127 170 204 102 -174
## Step 59 60 61 62 63 64 65 66 67 68 69 70
## FP053 NumRings FP173 FP164 FP113 FP083 FP099 FP023 FP166 FP003 FP004 FP091
## Var 53 217 172 163 113 83 99 23 165 3 4 91
## Step 71 72 73 74 75 76 77 78 79 80 81 82
## FP100 FP064 FP131 FP133 FP080 FP169 FP078 FP012 FP073 FP201 FP184 FP172
## Var 100 64 131 133 -80 168 78 12 -73 198 183 171
## Step 83 84 85 86 87 88 89 90 91 92 93 94
## FP149 FP098 FP013 FP159 FP204 FP150 FP015 FP186 FP109 FP079 FP104 FP036
## Var 149 98 13 158 -201 150 15 185 109 79 104 36
## Step 95 96 97 98 99 100 101 102 103 104 105 106
## FP180 FP148 FP163 FP054 FP102 FP089 FP174 FP141 FP021 FP011 FP016
## Var 179 148 162 54 -102 89 173 141 21 -11 16
## Step 107 108 109 110 111 112 113 114 115 116 117
## NumNitrogen FP144 FP011 FP049 FP152 FP161 FP027 FP157 FP084 FP076 FP155
## Var 212 144 11 49 152 160 27 156 84 76 154
## Step 118 119 120 121 122 123 124 125 126 127 128
## FP123 FP103 FP166 FP018 FP167 FP071 FP116 FP185 FP034 FP191 FP180 NumBonds
## Var -123 103 -165 18 166 71 -116 184 34 190 -179 207
## Step 129 130 131 132 133 134 135 136 137 138 139 140
## FP017 FP181 FP170 FP022 FP080 FP035 SurfaceArea1 FP021 FP146 FP048 FP093
## Var 17 180 169 22 80 35 -219 -21 146 48 93
## Step 141 142 143 144 145 146 147 148 149 150 151
## FP038 FP130 FP119 FP129 FP036 FP090 FP087 FP140 FP203 FP180 FP082 FP105
## Var 38 130 119 129 -36 90 87 140 -200 179 82 105
## Step 152 153 154 155 156 157 158 159 160 161 162 163
## NumDblBonds FP055 FP011 FP009 FP153 FP067 FP139 FP077 FP066 FP158 FP156
## Var 210 55 -11 9 153 67 139 77 66 157 155
## Step 164 165 166 167 168 169 170 171 172 173 174
## FP168 FP118 FP114 FP060 FP129 FP143 FP073 FP045 FP068 FP024 FP095 FP069
## Var 167 118 114 60 -129 143 73 45 68 24 95 69
## Step 175 176 177 178 179 180 181 182 183 184 185 186
## FP043 FP134 FP030 FP051 FP032 FP022 FP160 FP149 FP092 FP141 FP121 FP190
## Var -43 134 30 51 32 -22 159 -149 92 -141 121 189
## Step 187 188 189 190 191 192 193 194 195 196 197 198
## FP165 FP178 FP007 FP070 FP132 FP028 FP189 FP108 FP116 FP117 FP195 FP010
## Var 164 177 7 70 132 28 188 108 116 117 194 10
## Step 199 200 201 202 203 204 205 206 207 208 209 210
## FP021 FP052 FP193 FP096 FP194 FP151 FP182 FP022 FP129 FP205 SurfaceArea2
## Var 21 52 -192 96 193 151 181 22 129 202 220
## Step 211 212 213 214 215 216 217 218 219 220 221
## FP011 FP069 FP179 FP137 FP123 FP097 FP208 FP107 FP175 FP110 FP041 FP120
## Var 11 -69 178 -137 123 97 205 107 174 110 41 120
## Step 222 223 224 225 226 227 228 229 230 231 232 233
## FP153 FP160 FP166 FP115 FP025 FP177 FP204 FP193 FP061 NumSulfer FP205
## Var -153 -159 165 115 25 176 201 192 61 -214 -202
## Step 234 235 236 237 238 239 240 241 242 243 244
## FP005 FP008 FP001 FP125 FP042 FP141 FP196 FP029 FP197 FP112 FP056
## Var 5 8 1 125 -42 141 195 29 196 112 56
## Step 245 246 247 248 249 250 251 252 253 254 255
## NumSulfer FP106 HydrophilicFactor FP057 FP205 FP020 FP019 FP062 FP014
## Var 214 106 218 57 202 20 19 62 14
## Step 256 257 258 259 260 261 262 263 264
## FP206 FP069 FP138 FP192 FP102 FP006 FP160 FP129 FP007 FP190 FP029 FP138
## Var -203 69 -138 191 102 6 159 -129 -7 -189 -29 138
## Step 265 266 267 268 269 270 271 272 273 274 275 276
## FP043 FP101 FP203 FP137 FP153 FP042 FP046 FP101 FP177 FP149 FP007 FP036
## Var 43 -101 200 137 153 42 46 101 -176 149 7 36
## Step 277 278 279 280 281 282 283 284 285 286 287 288
## FP206 FP183 FP047 FP086 FP049 FP058 FP177 FP129 FP190 FP049 FP029
## Var 203 182 47 86 -49 58 176 129 189 49 29
## Step 289 290 291 292 293 294 295 296 297 298 299
## SurfaceArea1
## Var 219 301
## Step 300 301
$results PLR_L_Tune
## fraction RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 0.0500 1.3714216 0.6930427 1.0614377 0.11094901 0.06769221 0.06732090
## 2 0.2875 0.6613587 0.8945923 0.5086228 0.05318164 0.02052497 0.04380793
## 3 0.5250 0.6489629 0.8976271 0.4924596 0.04674210 0.02063368 0.03218482
## 4 0.7625 0.6630176 0.8933415 0.4997783 0.05238428 0.02426442 0.04144706
## 5 1.0000 0.6871897 0.8862951 0.5149301 0.05361883 0.02595243 0.04394202
<- PLR_L_Tune$results[PLR_L_Tune$results$fraction==PLR_L_Tune$bestTune$fraction,
(PLR_L_Train_RMSE c("RMSE")])
## [1] 0.6489629
<- PLR_L_Tune$results[PLR_L_Tune$results$fraction==PLR_L_Tune$bestTune$fraction,
(PLR_L_Train_Rsquared c("Rsquared")])
## [1] 0.8976271
##################################
# Identifying and plotting the
# best model predictors
##################################
# model does not support variable importance measurement
##################################
# Independently evaluating the model
# on the test set
##################################
<- data.frame(PLR_L_Observed = PMA_PreModelling_Test$Log_Solubility,
PLR_L_Test PLR_L_Predicted = predict(PLR_L_Tune,
!names(PMA_PreModelling_Test_PLR_L) %in% c("Log_Solubility")]))
PMA_PreModelling_Test_PLR_L[,
PLR_L_Test
## PLR_L_Observed PLR_L_Predicted
## 1 0.93 0.696775055
## 2 0.85 0.279350920
## 3 0.81 -0.531764425
## 4 0.74 0.738995667
## 5 0.61 -0.099258728
## 6 0.58 1.510972708
## 7 0.57 0.508081340
## 8 0.56 0.435049171
## 9 0.52 0.246122439
## 10 0.45 -0.923412219
## 11 0.40 -0.777923856
## 12 0.36 -0.883130465
## 13 0.22 -0.025862114
## 14 0.08 -0.348931025
## 15 0.07 -1.116015074
## 16 0.02 -0.696506527
## 17 0.00 -0.258151380
## 18 -0.01 0.009412014
## 19 -0.07 0.113617626
## 20 -0.12 -0.997069045
## 21 -0.17 0.723927170
## 22 -0.29 -0.084301471
## 23 -0.38 -0.717614771
## 24 -0.38 -0.886545809
## 25 -0.39 -1.062025692
## 26 -0.42 -0.781678345
## 27 -0.44 -0.734472594
## 28 -0.46 1.290539498
## 29 -0.48 -2.226162570
## 30 -0.60 -1.235241992
## 31 -0.63 -2.284735542
## 32 -0.66 -0.684830137
## 33 -0.72 -0.740096111
## 34 -0.72 -0.144164115
## 35 -0.80 0.330368697
## 36 -0.80 -1.213167987
## 37 -0.82 0.537007867
## 38 -0.82 -0.613165183
## 39 -0.84 0.221321155
## 40 -0.85 -0.904408794
## 41 -0.85 -0.426444653
## 42 -0.87 -1.735769138
## 43 -0.89 -1.221130929
## 44 -0.90 0.105981135
## 45 -0.96 -1.416375399
## 46 -0.96 -0.664104071
## 47 -0.99 -0.358741235
## 48 -1.01 -0.773201608
## 49 -1.09 -1.062745723
## 50 -1.12 -0.584113671
## 51 -1.14 -0.692465482
## 52 -1.17 -1.783095082
## 53 -1.19 -1.621653363
## 54 -1.22 -1.331617002
## 55 -1.27 -1.818780418
## 56 -1.28 -1.284108210
## 57 -1.32 -1.322170819
## 58 -1.38 -1.507263453
## 59 -1.39 -1.626772250
## 60 -1.42 -1.654953819
## 61 -1.47 -0.905182149
## 62 -1.47 -1.589744171
## 63 -1.50 -0.847287439
## 64 -1.52 -1.282428348
## 65 -1.54 -1.465053294
## 66 -1.55 -2.196145729
## 67 -1.56 -3.010647775
## 68 -1.57 -1.867221330
## 69 -1.60 -1.415277213
## 70 -1.60 -2.494036964
## 71 -1.62 -1.616449068
## 72 -1.64 -2.600897255
## 73 -1.67 -1.787197025
## 74 -1.70 -3.122240066
## 75 -1.70 -2.054005171
## 76 -1.71 -2.178463062
## 77 -1.71 -2.376952658
## 78 -1.75 -1.965728963
## 79 -1.78 -1.510121181
## 80 -1.78 -2.423276082
## 81 -1.82 -1.166566385
## 82 -1.87 -1.773312149
## 83 -1.89 -2.124444746
## 84 -1.92 -2.018171402
## 85 -1.92 -1.341373271
## 86 -1.92 -1.444321376
## 87 -1.94 -3.223994709
## 88 -1.99 -2.526462965
## 89 -2.00 -2.341432766
## 90 -2.05 -2.278845551
## 91 -2.06 -1.688365418
## 92 -2.08 -2.209833149
## 93 -2.10 -2.645390502
## 94 -2.11 -1.342539676
## 95 -2.12 -0.631788199
## 96 -2.17 -2.214863047
## 97 -2.21 -1.825444785
## 98 -2.24 -2.867260223
## 99 -2.24 -1.674835486
## 100 -2.29 -2.308687604
## 101 -2.31 -2.343324523
## 102 -2.32 -2.078619420
## 103 -2.35 -2.844175708
## 104 -2.35 -2.127668590
## 105 -2.36 -2.533484084
## 106 -2.36 -1.982062937
## 107 -2.38 -2.370288096
## 108 -2.42 -2.535807307
## 109 -2.43 -3.189706478
## 110 -2.44 -3.334444895
## 111 -2.52 -2.443131161
## 112 -2.53 -2.926965061
## 113 -2.57 -3.003896482
## 114 -2.62 -2.977492389
## 115 -2.62 -2.760198669
## 116 -2.64 -3.325236403
## 117 -2.64 -3.439032258
## 118 -2.70 -2.497946653
## 119 -2.82 -2.389403771
## 120 -2.88 -2.552102479
## 121 -2.89 -2.217966043
## 122 -2.92 -1.231861218
## 123 -2.93 -3.332496852
## 124 -2.96 -2.729673533
## 125 -2.98 -2.572734302
## 126 -3.01 -2.667790719
## 127 -3.01 -3.289083464
## 128 -3.02 -3.563604497
## 129 -3.07 -3.323972216
## 130 -3.09 -3.026689084
## 131 -3.11 -3.060210671
## 132 -3.13 -3.650220790
## 133 -3.14 -1.921849837
## 134 -3.15 -3.545390714
## 135 -3.22 -2.445678825
## 136 -3.26 -3.289116670
## 137 -3.27 -2.749382957
## 138 -3.27 -2.860445693
## 139 -3.30 -2.818830343
## 140 -3.31 -2.335638374
## 141 -3.33 -2.258790537
## 142 -3.37 -2.077745169
## 143 -3.43 -3.376236288
## 144 -3.43 -2.279780514
## 145 -3.48 -2.983772712
## 146 -3.51 -3.513288148
## 147 -3.59 -2.367631912
## 148 -3.61 -2.628943992
## 149 -3.63 -3.473330160
## 150 -3.63 -3.455159204
## 151 -3.68 -2.156604057
## 152 -3.71 -3.643893919
## 153 -3.74 -2.362066916
## 154 -3.75 -3.756466588
## 155 -3.75 -2.302796005
## 156 -3.77 -3.287034135
## 157 -3.77 -4.174210302
## 158 -3.78 -5.060426176
## 159 -3.81 -3.670615283
## 160 -3.95 -4.349909530
## 161 -3.96 -5.212693942
## 162 -3.96 -4.112299153
## 163 -4.00 -3.617061437
## 164 -4.02 -4.732237138
## 165 -4.04 -4.230150688
## 166 -4.12 -3.558513200
## 167 -4.15 -4.725952338
## 168 -4.16 -3.587352086
## 169 -4.17 -4.432884802
## 170 -4.21 -4.640913412
## 171 -4.23 -4.378043113
## 172 -4.25 -3.314015947
## 173 -4.30 -3.465419039
## 174 -4.31 -5.493632641
## 175 -4.35 -4.744344413
## 176 -4.40 -3.969478451
## 177 -4.40 -4.257997370
## 178 -4.43 -4.745523504
## 179 -4.46 -4.510586518
## 180 -4.47 -3.023754977
## 181 -4.51 -5.111993398
## 182 -4.60 -3.744522580
## 183 -4.64 -4.603617398
## 184 -4.69 -4.796472832
## 185 -4.71 -3.976400093
## 186 -4.77 -3.614641382
## 187 -4.95 -4.669930662
## 188 -4.98 -4.179255492
## 189 -5.21 -5.744332441
## 190 -5.22 -5.403651244
## 191 -5.28 -4.337172918
## 192 -5.31 -2.997916771
## 193 -5.35 -4.725606653
## 194 -5.37 -4.939665955
## 195 -5.40 -4.564707412
## 196 -5.43 -4.413729687
## 197 -5.65 -5.565050656
## 198 -5.66 -4.287482105
## 199 -6.70 -4.958690917
## 200 -5.72 -5.086849288
## 201 -6.00 -7.260717366
## 202 -6.25 -6.392068718
## 203 -6.26 -6.283502155
## 204 -6.27 -6.492722706
## 205 -6.35 -5.761142444
## 206 -6.57 -6.040895637
## 207 -6.62 -5.144977476
## 208 -6.96 -5.948936913
## 209 -7.02 -7.610741404
## 210 -7.20 -7.182641266
## 211 -7.28 -7.157697592
## 212 -7.32 -7.432934428
## 213 -7.39 -7.774820740
## 214 -7.82 -8.269199698
## 215 -8.23 -8.950711629
## 216 -8.94 -8.386654302
## 217 1.07 -0.214622812
## 218 0.43 0.208940637
## 219 0.32 -0.399368721
## 220 0.00 0.056561994
## 221 -0.40 -0.769290741
## 222 -0.52 -0.531669275
## 223 -0.55 -0.651485152
## 224 -0.60 -0.783950637
## 225 -0.62 -2.499473761
## 226 -0.85 -1.275268018
## 227 -0.89 -0.765145393
## 228 -0.93 -0.911002770
## 229 -0.96 -0.159062530
## 230 -1.06 -1.993965890
## 231 -1.10 -1.607910909
## 232 -1.12 -0.971075346
## 233 -1.15 -0.773830102
## 234 -1.28 -0.461914844
## 235 -1.30 -1.620774151
## 236 -1.31 -1.356486731
## 237 -1.35 -3.174400411
## 238 -1.39 -1.982371446
## 239 -1.41 -1.621653363
## 240 -1.41 -1.223902977
## 241 -1.42 -0.631093637
## 242 -1.46 -2.080658160
## 243 -1.50 -1.599089375
## 244 -1.50 -2.193001514
## 245 -1.52 -1.571032900
## 246 -1.52 -0.655223537
## 247 -1.59 -1.540810054
## 248 -1.61 -0.855785977
## 249 -1.63 -1.182040133
## 250 -1.71 -2.454445326
## 251 -1.83 -2.177896548
## 252 -2.05 -1.699500123
## 253 -2.06 -2.416523823
## 254 -2.07 -3.506056620
## 255 -2.15 -2.615868774
## 256 -2.16 -1.037187955
## 257 -1.99 -0.146522845
## 258 -2.36 -1.852814607
## 259 -2.38 -3.866103631
## 260 -2.39 -1.537484755
## 261 -2.46 -2.322784403
## 262 -2.49 -2.273558877
## 263 -2.54 -2.814918988
## 264 -2.55 -2.981684173
## 265 -2.63 -2.353472726
## 266 -2.64 -1.849508656
## 267 -2.67 -2.685489208
## 268 -2.68 -2.084766386
## 269 -2.77 -2.598438681
## 270 -2.78 -2.941925145
## 271 -2.82 -2.613379667
## 272 -2.92 -3.639098035
## 273 -3.03 -3.417266607
## 274 -3.12 -3.416567545
## 275 -3.16 -3.056557840
## 276 -3.19 -3.276541774
## 277 -3.54 -3.550729684
## 278 -3.54 -2.352647917
## 279 -3.59 -3.577961892
## 280 -3.66 -3.046180768
## 281 -3.68 -2.300947101
## 282 -3.75 -3.972119547
## 283 -3.76 -3.919947751
## 284 -3.78 -3.874151756
## 285 -3.80 -4.106195549
## 286 -3.80 -4.299403485
## 287 -3.85 -3.177916766
## 288 -3.89 -3.935682549
## 289 -3.95 -4.246928411
## 290 -4.29 -4.838309442
## 291 -4.42 -4.405006321
## 292 -4.48 -4.286944982
## 293 -4.48 -3.200053206
## 294 -4.53 -4.887325199
## 295 -4.63 -4.463809867
## 296 -4.73 -4.110392416
## 297 -4.84 -4.054421694
## 298 -4.89 -3.863573933
## 299 -4.89 -4.804951825
## 300 -5.26 -5.652575880
## 301 -6.09 -4.991967282
## 302 -6.29 -5.890490453
## 303 -6.29 -6.276045708
## 304 -6.89 -5.650652209
## 305 -6.96 -6.780581597
## 306 -7.00 -6.900928547
## 307 -7.05 -7.666254176
## 308 -8.30 -8.751989911
## 309 -8.66 -8.914147984
## 310 -9.03 -9.524766011
## 311 -10.41 -9.831681435
## 312 -7.89 -7.409485131
## 313 -2.32 -1.615257561
## 314 0.39 -2.659349386
## 315 -2.90 -4.948707174
## 316 -2.47 -5.115499794
##################################
# Reporting the independent evaluation results
# for the test set
##################################
<- postResample(PLR_L_Test[,2], PLR_L_Test[,1])) (PLR_L_Test_Metrics
## RMSE Rsquared MAE
## 0.7389135 0.8746818 0.5538049
<- PLR_L_Test_Metrics[1]) (PLR_L_Test_RMSE
## RMSE
## 0.7389135
<- PLR_L_Test_Metrics[2]) (PLR_L_Test_Rsquared
## Rsquared
## 0.8746818
##################################
# Transforming factor predictors
# as required by the nature of the model
##################################
# Creating a local object
# for the train and test sets
##################################
<- as.data.frame(lapply(PMA_PreModelling_Train, function(x) as.numeric(as.character(x))))
PMA_PreModelling_Train_PLR_E dim(PMA_PreModelling_Train_PLR_E)
## [1] 951 221
<- as.data.frame(lapply(PMA_PreModelling_Test, function(x) as.numeric(as.character(x))))
PMA_PreModelling_Test_PLR_E dim(PMA_PreModelling_Test_PLR_E)
## [1] 316 221
##################################
# Creating consistent fold assignments
# for the 10-Fold Cross Validation process
##################################
set.seed(12345678)
<- createFolds(PMA_PreModelling_Train_PLR_E$Log_Solubility,
KFold_Indices k = 10,
returnTrain=TRUE)
<- trainControl(method="cv",
KFold_Control index=KFold_Indices)
##################################
# Setting the conditions
# for hyperparameter tuning
##################################
= expand.grid(lambda = c(0, 0.01, 0.10),
PLR_E_Grid fraction = seq(0.05, 1.00, length = 5))
##################################
# Running the penalized linear regression (elasticnet) model
# by setting the caret method to 'enet'
##################################
set.seed(12345678)
<- train(x = PMA_PreModelling_Train_PLR_E[,!names(PMA_PreModelling_Train_PLR_E) %in% c("Log_Solubility")],
PLR_E_Tune y = PMA_PreModelling_Train_PLR_E$Log_Solubility,
method = "enet",
tuneGrid = PLR_E_Grid,
trControl = KFold_Control,
preProc = c("center", "scale"))
##################################
# Reporting the cross-validation results
# for the train set
##################################
PLR_E_Tune
## Elasticnet
##
## 951 samples
## 220 predictors
##
## Pre-processing: centered (220), scaled (220)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 857, 855, 855, 856, 857, 856, ...
## Resampling results across tuning parameters:
##
## lambda fraction RMSE Rsquared MAE
## 0.00 0.0500 1.3714216 0.6930427 1.0614377
## 0.00 0.2875 0.6613587 0.8945923 0.5086228
## 0.00 0.5250 0.6489629 0.8976271 0.4924596
## 0.00 0.7625 0.6630176 0.8933415 0.4997783
## 0.00 1.0000 0.6871897 0.8862951 0.5149301
## 0.01 0.0500 1.5421085 0.6130789 1.1969314
## 0.01 0.2875 0.7137466 0.8789934 0.5468178
## 0.01 0.5250 0.6480166 0.8981058 0.4949997
## 0.01 0.7625 0.6471642 0.8982164 0.4916267
## 0.01 1.0000 0.6574773 0.8951502 0.4997426
## 0.10 0.0500 1.6926512 0.5217008 1.3038723
## 0.10 0.2875 0.8539515 0.8340789 0.6556144
## 0.10 0.5250 0.7069247 0.8808840 0.5402746
## 0.10 0.7625 0.6864824 0.8893575 0.5267061
## 0.10 1.0000 0.6831128 0.8915190 0.5240739
##
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were fraction = 0.7625 and lambda = 0.01.
$finalModel PLR_E_Tune
##
## Call:
## elasticnet::enet(x = as.matrix(x), y = y, lambda = param$lambda)
## Sequence of moves:
## MolWeight NumCarbon NumChlorine FP044 NumHalogen FP089 FP072
## Var 206 211 215 44 216 89 72
## Step 1 2 3 4 5 6 7
## HydrophilicFactor NumMultBonds SurfaceArea1 FP063 FP059 FP142 FP089 FP135
## Var 218 208 219 63 59 142 -89 135
## Step 8 9 10 11 12 13 14 15
## FP206 FP084 FP204 FP147 FP074 FP073 FP116 FP040 HydrophilicFactor
## Var 203 84 201 147 74 73 116 40 -218
## Step 16 17 18 19 20 21 22 23 24
## NumOxygen FP039 FP043 NumSulfer FP094 FP111 FP011 FP124 FP193 FP050 FP198
## Var 213 39 43 214 94 111 11 124 192 50 197
## Step 25 26 27 28 29 30 31 32 33 34 35
## FP175 FP137 FP080 NumRotBonds FP101 FP088 FP122 FP203 FP136 FP123 FP085
## Var 174 137 80 209 101 88 122 200 136 123 85
## Step 36 37 38 39 40 41 42 43 44 45 46
## FP042 FP081 FP202 FP126 FP084 FP162 FP145 FP128 FP188 FP026 FP065 FP138
## Var 42 81 199 126 -84 161 145 128 187 26 65 138
## Step 47 48 49 50 51 52 53 54 55 56 57 58
## NumBonds NumRings FP031 FP002 FP127 FP187 FP175 FP033 FP075 FP176 FP102
## Var 207 217 31 2 127 186 -174 33 75 175 102
## Step 59 60 61 62 63 64 65 66 67 68 69
## FP037 FP207 FP171 FP053 FP173 FP164 FP113 FP003 FP172 FP099 FP166 FP023
## Var 37 204 170 53 172 163 113 3 171 99 165 23
## Step 70 71 72 73 74 75 76 77 78 79 80 81
## FP091 FP100 FP131 FP133 FP064 FP169 FP078 FP083 FP004 FP013 FP201 FP184
## Var 91 100 131 133 64 168 78 83 4 13 198 183
## Step 82 83 84 85 86 87 88 89 90 91 92 93
## FP084 FP022 FP036 FP149 FP141 FP159 FP012 FP150 FP073 FP015 FP098 FP204
## Var 84 22 36 149 141 158 12 150 -73 15 98 -201
## Step 94 95 96 97 98 99 100 101 102 103 104 105
## FP186 FP054 FP104 FP034 FP109 FP049 FP103 FP163 FP021 FP174 FP148 FP144
## Var 185 54 104 34 109 49 103 162 21 173 148 144
## Step 106 107 108 109 110 111 112 113 114 115 116 117
## FP155 FP016 FP079 NumNitrogen FP089 FP168 FP161 FP071 FP185 FP157 FP166
## Var 154 16 79 212 89 167 160 71 184 156 -165
## Step 118 119 120 121 122 123 124 125 126 127 128
## FP027 FP152 FP167 FP060 FP018 FP076 FP102 FP087 FP017 FP021 FP181 FP035
## Var 27 152 166 60 18 76 -102 87 17 -21 180 35
## Step 129 130 131 132 133 134 135 136 137 138 139 140
## FP170 FP038 FP119 FP093 FP191 FP146 FP115 FP180 FP066 FP090 FP048 FP140
## Var 169 38 119 93 190 146 115 179 66 90 48 140
## Step 141 142 143 144 145 146 147 148 149 150 151 152
## FP036 FP009 FP105 FP134 FP077 FP069 FP153 FP160 NumDblBonds FP158 FP095
## Var -36 9 105 134 77 69 153 159 210 157 95
## Step 153 154 155 156 157 158 159 160 161 162 163
## FP068 FP139 FP055 FP203 FP082 FP143 FP156 FP130 FP118 FP030 FP190 FP011
## Var 68 139 55 -200 82 143 155 130 118 30 189 -11
## Step 164 165 166 167 168 169 170 171 172 173 174 175
## FP067 FP028 FP114 FP045 FP024 FP007 FP205 FP073 FP032 FP195 FP051 FP092
## Var 67 28 114 45 24 7 202 73 32 194 51 92
## Step 176 177 178 179 180 181 182 183 184 185 186 187
## FP149 FP141 FP021 FP115 FP132 FP189 FP121 FP178 FP151 FP001 FP192 FP010
## Var -149 -141 21 -115 132 188 121 177 151 1 191 10
## Step 188 189 190 191 192 193 194 195 196 197 198 199
## FP108 FP106 FP052 FP125 FP008 FP182 FP011 FP165 FP117 FP041 FP179 FP102
## Var 108 106 52 125 8 181 11 164 117 41 178 102
## Step 200 201 202 203 204 205 206 207 208 209 210 211
## FP070 FP196 FP043 FP129 FP204 FP096 FP005 FP132 FP061 FP086 FP057 FP036
## Var 70 195 -43 129 201 96 5 -132 61 86 57 36
## Step 212 213 214 215 216 217 218 219 220 221 222 223
## FP110 FP097 FP020 FP107 FP197 FP019 FP036 FP194 FP043 FP029 FP175 FP208
## Var 110 97 20 107 196 19 -36 193 43 29 174 205
## Step 224 225 226 227 228 229 230 231 232 233 234 235
## FP193 FP137 FP025 FP141 FP056 FP120 FP036 FP166 FP047 FP177 FP057
## Var -192 -137 25 141 56 120 36 165 47 176 -57
## Step 236 237 238 239 240 241 242 243 244 245 246
## SurfaceArea2 FP042 FP193 FP115 FP207 FP046 FP014 FP112 FP203 FP137 FP194
## Var 220 -42 192 115 -204 46 14 112 200 137 -193
## Step 247 248 249 250 251 252 253 254 255 256 257
## FP183 FP149 FP057 FP042 FP132 FP006 FP058 FP194 FP207 FP062
## Var 182 149 57 42 132 6 58 193 204 62
## Step 258 259 260 261 262 263 264 265 266 267
## HydrophilicFactor
## Var 218 269
## Step 268 269
$results PLR_E_Tune
## lambda fraction RMSE Rsquared MAE RMSESD RsquaredSD
## 1 0.00 0.0500 1.3714216 0.6930427 1.0614377 0.11094901 0.06769221
## 6 0.01 0.0500 1.5421085 0.6130789 1.1969314 0.12408312 0.07335623
## 11 0.10 0.0500 1.6926512 0.5217008 1.3038723 0.14784070 0.06347022
## 2 0.00 0.2875 0.6613587 0.8945923 0.5086228 0.05318164 0.02052497
## 7 0.01 0.2875 0.7137466 0.8789934 0.5468178 0.05698646 0.02622341
## 12 0.10 0.2875 0.8539515 0.8340789 0.6556144 0.07507936 0.03225781
## 3 0.00 0.5250 0.6489629 0.8976271 0.4924596 0.04674210 0.02063368
## 8 0.01 0.5250 0.6480166 0.8981058 0.4949997 0.05119816 0.02039617
## 13 0.10 0.5250 0.7069247 0.8808840 0.5402746 0.06940379 0.02444560
## 4 0.00 0.7625 0.6630176 0.8933415 0.4997783 0.05238428 0.02426442
## 9 0.01 0.7625 0.6471642 0.8982164 0.4916267 0.04859629 0.02155089
## 14 0.10 0.7625 0.6864824 0.8893575 0.5267061 0.06181857 0.02187491
## 5 0.00 1.0000 0.6871897 0.8862951 0.5149301 0.05361883 0.02595243
## 10 0.01 1.0000 0.6574773 0.8951502 0.4997426 0.05079803 0.02346660
## 15 0.10 1.0000 0.6831128 0.8915190 0.5240739 0.05894427 0.02219650
## MAESD
## 1 0.06732090
## 6 0.07068810
## 11 0.07940188
## 2 0.04380793
## 7 0.05465912
## 12 0.06437932
## 3 0.03218482
## 8 0.03932769
## 13 0.05812490
## 4 0.04144706
## 9 0.03273336
## 14 0.04620786
## 5 0.04394202
## 10 0.03674911
## 15 0.04416185
<- PLR_E_Tune$results[PLR_E_Tune$results$fraction==PLR_E_Tune$bestTune$fraction &
(PLR_E_Train_RMSE $results$lambda==PLR_E_Tune$bestTune$lambda,
PLR_E_Tunec("RMSE")])
## [1] 0.6471642
<- PLR_E_Tune$results[PLR_E_Tune$results$fraction==PLR_E_Tune$bestTune$fraction &
(PLR_E_Train_Rsquared $results$lambda==PLR_E_Tune$bestTune$lambda,
PLR_E_Tunec("Rsquared")])
## [1] 0.8982164
##################################
# Identifying and plotting the
# best model predictors
##################################
# model does not support variable importance measurement
##################################
# Independently evaluating the model
# on the test set
##################################
<- data.frame(PLR_E_Observed = PMA_PreModelling_Test$Log_Solubility,
PLR_E_Test PLR_E_Predicted = predict(PLR_E_Tune,
!names(PMA_PreModelling_Test_PLR_E) %in% c("Log_Solubility")]))
PMA_PreModelling_Test_PLR_E[,
PLR_E_Test
## PLR_E_Observed PLR_E_Predicted
## 1 0.93 0.668846414
## 2 0.85 0.280893855
## 3 0.81 -0.579788791
## 4 0.74 0.771484158
## 5 0.61 -0.013116890
## 6 0.58 1.479188924
## 7 0.57 0.502045330
## 8 0.56 0.448134311
## 9 0.52 0.251150345
## 10 0.45 -0.922999335
## 11 0.40 -0.742385879
## 12 0.36 -0.927601984
## 13 0.22 -0.004649436
## 14 0.08 -0.378770481
## 15 0.07 -1.050183627
## 16 0.02 -0.738223827
## 17 0.00 -0.274711927
## 18 -0.01 0.001544168
## 19 -0.07 0.108071952
## 20 -0.12 -1.011201885
## 21 -0.17 0.642921463
## 22 -0.29 -0.115824840
## 23 -0.38 -0.724252834
## 24 -0.38 -0.878659858
## 25 -0.39 -1.050871441
## 26 -0.42 -0.773758490
## 27 -0.44 -0.770921665
## 28 -0.46 1.297609515
## 29 -0.48 -2.192540266
## 30 -0.60 -1.295327248
## 31 -0.63 -2.389036533
## 32 -0.66 -0.735581228
## 33 -0.72 -0.723246430
## 34 -0.72 -0.139643440
## 35 -0.80 0.335696333
## 36 -0.80 -1.182475461
## 37 -0.82 0.495335271
## 38 -0.82 -0.643228865
## 39 -0.84 0.185982859
## 40 -0.85 -0.889442552
## 41 -0.85 -0.504311722
## 42 -0.87 -1.835530536
## 43 -0.89 -1.244830228
## 44 -0.90 0.125969758
## 45 -0.96 -1.469282542
## 46 -0.96 -0.710138624
## 47 -0.99 -0.371042731
## 48 -1.01 -0.817195183
## 49 -1.09 -1.066168666
## 50 -1.12 -0.531973147
## 51 -1.14 -0.583582968
## 52 -1.17 -1.717398270
## 53 -1.19 -1.590696623
## 54 -1.22 -1.313860912
## 55 -1.27 -1.842553202
## 56 -1.28 -1.265173240
## 57 -1.32 -1.296518556
## 58 -1.38 -1.515897942
## 59 -1.39 -1.601633238
## 60 -1.42 -1.701851539
## 61 -1.47 -0.954899980
## 62 -1.47 -1.590463809
## 63 -1.50 -0.886858920
## 64 -1.52 -1.301509087
## 65 -1.54 -1.413138971
## 66 -1.55 -2.151822552
## 67 -1.56 -3.011443132
## 68 -1.57 -1.871043817
## 69 -1.60 -1.358903499
## 70 -1.60 -2.613933357
## 71 -1.62 -1.609950590
## 72 -1.64 -2.561940491
## 73 -1.67 -1.775451129
## 74 -1.70 -3.203143374
## 75 -1.70 -2.077848246
## 76 -1.71 -2.233098156
## 77 -1.71 -2.364623435
## 78 -1.75 -1.955760913
## 79 -1.78 -1.542622423
## 80 -1.78 -2.431109191
## 81 -1.82 -1.240546243
## 82 -1.87 -1.820485168
## 83 -1.89 -2.147924955
## 84 -1.92 -1.999416661
## 85 -1.92 -1.327066552
## 86 -1.92 -1.433184818
## 87 -1.94 -3.269375771
## 88 -1.99 -2.589352811
## 89 -2.00 -2.273981852
## 90 -2.05 -2.302609526
## 91 -2.06 -1.663985414
## 92 -2.08 -2.182090708
## 93 -2.10 -2.621346081
## 94 -2.11 -1.409511259
## 95 -2.12 -0.642149752
## 96 -2.17 -2.167288852
## 97 -2.21 -1.839903543
## 98 -2.24 -2.785209151
## 99 -2.24 -1.623901457
## 100 -2.29 -2.291456043
## 101 -2.31 -2.265371507
## 102 -2.32 -2.138037930
## 103 -2.35 -2.745688927
## 104 -2.35 -2.177792437
## 105 -2.36 -2.529500157
## 106 -2.36 -1.974312737
## 107 -2.38 -2.374156805
## 108 -2.42 -2.584850961
## 109 -2.43 -3.206856905
## 110 -2.44 -3.335026534
## 111 -2.52 -2.431242093
## 112 -2.53 -2.927480976
## 113 -2.57 -3.000359881
## 114 -2.62 -3.006403423
## 115 -2.62 -2.761609613
## 116 -2.64 -3.192093734
## 117 -2.64 -3.280482058
## 118 -2.70 -2.402088359
## 119 -2.82 -2.441730022
## 120 -2.88 -2.619264510
## 121 -2.89 -2.238521978
## 122 -2.92 -1.207154981
## 123 -2.93 -3.407337263
## 124 -2.96 -2.736808247
## 125 -2.98 -2.612069749
## 126 -3.01 -2.686772847
## 127 -3.01 -3.348956822
## 128 -3.02 -3.562040426
## 129 -3.07 -3.389024422
## 130 -3.09 -3.035041735
## 131 -3.11 -3.052142590
## 132 -3.13 -3.736591421
## 133 -3.14 -1.951100229
## 134 -3.15 -3.593327284
## 135 -3.22 -2.422479505
## 136 -3.26 -3.338648268
## 137 -3.27 -2.776129799
## 138 -3.27 -2.858582546
## 139 -3.30 -2.858808641
## 140 -3.31 -2.371606102
## 141 -3.33 -2.278769366
## 142 -3.37 -2.148336286
## 143 -3.43 -3.447573479
## 144 -3.43 -2.401650761
## 145 -3.48 -3.001259480
## 146 -3.51 -3.485969896
## 147 -3.59 -2.371321077
## 148 -3.61 -2.631247926
## 149 -3.63 -3.484346764
## 150 -3.63 -3.454265835
## 151 -3.68 -2.084504271
## 152 -3.71 -3.635895548
## 153 -3.74 -2.375746346
## 154 -3.75 -3.729406546
## 155 -3.75 -2.509516118
## 156 -3.77 -3.294687607
## 157 -3.77 -4.229586310
## 158 -3.78 -5.129443694
## 159 -3.81 -3.719102343
## 160 -3.95 -4.400027684
## 161 -3.96 -5.325231779
## 162 -3.96 -4.155973946
## 163 -4.00 -3.596455497
## 164 -4.02 -4.715188665
## 165 -4.04 -4.360562477
## 166 -4.12 -3.555720514
## 167 -4.15 -4.897037201
## 168 -4.16 -3.667023519
## 169 -4.17 -4.478220876
## 170 -4.21 -4.679344505
## 171 -4.23 -4.351502415
## 172 -4.25 -3.391447378
## 173 -4.30 -3.550395193
## 174 -4.31 -5.472567427
## 175 -4.35 -4.788203840
## 176 -4.40 -4.033696610
## 177 -4.40 -4.303364468
## 178 -4.43 -4.796155370
## 179 -4.46 -4.559413357
## 180 -4.47 -3.033733714
## 181 -4.51 -5.099405157
## 182 -4.60 -3.861458738
## 183 -4.64 -4.654397665
## 184 -4.69 -4.787021541
## 185 -4.71 -4.000790062
## 186 -4.77 -3.642986811
## 187 -4.95 -4.641108156
## 188 -4.98 -4.379311504
## 189 -5.21 -5.784468493
## 190 -5.22 -5.484909171
## 191 -5.28 -4.358137201
## 192 -5.31 -3.008072879
## 193 -5.35 -4.710948076
## 194 -5.37 -5.019460188
## 195 -5.40 -4.602930142
## 196 -5.43 -4.503639544
## 197 -5.65 -5.583949140
## 198 -5.66 -4.331119192
## 199 -6.70 -5.001635662
## 200 -5.72 -5.169796328
## 201 -6.00 -7.308435337
## 202 -6.25 -6.425975129
## 203 -6.26 -6.330091400
## 204 -6.27 -6.581465756
## 205 -6.35 -5.762684428
## 206 -6.57 -6.024074235
## 207 -6.62 -5.241527363
## 208 -6.96 -5.937208040
## 209 -7.02 -7.529659549
## 210 -7.20 -7.157945756
## 211 -7.28 -7.226209242
## 212 -7.32 -7.466705657
## 213 -7.39 -7.830991434
## 214 -7.82 -8.326389255
## 215 -8.23 -8.941071007
## 216 -8.94 -8.438086944
## 217 1.07 -0.212847918
## 218 0.43 0.186677380
## 219 0.32 -0.348378759
## 220 0.00 0.045434632
## 221 -0.40 -0.789167867
## 222 -0.52 -0.524621279
## 223 -0.55 -0.675962619
## 224 -0.60 -0.836243522
## 225 -0.62 -2.550028488
## 226 -0.85 -1.263393122
## 227 -0.89 -0.771067860
## 228 -0.93 -0.911434866
## 229 -0.96 -0.140652368
## 230 -1.06 -2.042335464
## 231 -1.10 -1.627873124
## 232 -1.12 -0.996695840
## 233 -1.15 -0.781726838
## 234 -1.28 -0.424568431
## 235 -1.30 -1.604207221
## 236 -1.31 -1.317255241
## 237 -1.35 -3.054247411
## 238 -1.39 -1.963387525
## 239 -1.41 -1.590696623
## 240 -1.41 -1.291910598
## 241 -1.42 -0.635717732
## 242 -1.46 -2.049227667
## 243 -1.50 -1.617025122
## 244 -1.50 -2.293466532
## 245 -1.52 -1.588327062
## 246 -1.52 -0.624424177
## 247 -1.59 -1.531116090
## 248 -1.61 -0.821311299
## 249 -1.63 -1.163621054
## 250 -1.71 -2.414921248
## 251 -1.83 -2.162002049
## 252 -2.05 -1.823105489
## 253 -2.06 -2.368208866
## 254 -2.07 -3.630586856
## 255 -2.15 -2.654775056
## 256 -2.16 -1.040564271
## 257 -1.99 -0.197735310
## 258 -2.36 -1.894851343
## 259 -2.38 -3.937286149
## 260 -2.39 -1.554269377
## 261 -2.46 -2.307514014
## 262 -2.49 -2.300420441
## 263 -2.54 -2.803495218
## 264 -2.55 -3.017964093
## 265 -2.63 -2.416237494
## 266 -2.64 -1.752190120
## 267 -2.67 -2.701718146
## 268 -2.68 -2.054472439
## 269 -2.77 -2.609142993
## 270 -2.78 -2.930664874
## 271 -2.82 -2.639804601
## 272 -2.92 -3.614277595
## 273 -3.03 -3.453234661
## 274 -3.12 -3.447334096
## 275 -3.16 -3.048998810
## 276 -3.19 -3.285417898
## 277 -3.54 -3.555808300
## 278 -3.54 -2.382536191
## 279 -3.59 -3.655298560
## 280 -3.66 -3.034395192
## 281 -3.68 -2.300203903
## 282 -3.75 -3.959871364
## 283 -3.76 -3.918213256
## 284 -3.78 -3.868384096
## 285 -3.80 -4.110286865
## 286 -3.80 -4.426213518
## 287 -3.85 -3.242958707
## 288 -3.89 -3.842185232
## 289 -3.95 -4.259076603
## 290 -4.29 -4.888379114
## 291 -4.42 -4.549047883
## 292 -4.48 -4.285249844
## 293 -4.48 -3.218975882
## 294 -4.53 -4.908688084
## 295 -4.63 -4.482653232
## 296 -4.73 -4.110587825
## 297 -4.84 -4.130849965
## 298 -4.89 -3.882265342
## 299 -4.89 -4.837162144
## 300 -5.26 -5.651126226
## 301 -6.09 -5.095823433
## 302 -6.29 -5.951733556
## 303 -6.29 -6.314009294
## 304 -6.89 -5.649162041
## 305 -6.96 -6.806439862
## 306 -7.00 -6.949428220
## 307 -7.05 -7.735107705
## 308 -8.30 -8.810751696
## 309 -8.66 -8.808557379
## 310 -9.03 -9.425311508
## 311 -10.41 -9.895151102
## 312 -7.89 -7.458326683
## 313 -2.32 -1.656390580
## 314 0.39 -2.761746386
## 315 -2.90 -4.937502427
## 316 -2.47 -4.999453735
##################################
# Reporting the independent evaluation results
# for the test set
##################################
<- postResample(PLR_E_Test[,2], PLR_E_Test[,1])) (PLR_E_Test_Metrics
## RMSE Rsquared MAE
## 0.7351873 0.8761762 0.5504336
<- PLR_E_Test_Metrics[1]) (PLR_E_Test_RMSE
## RMSE
## 0.7351873
<- PLR_E_Test_Metrics[2]) (PLR_E_Test_Rsquared
## Rsquared
## 0.8761762
##################################
# Creating a local object
# for the train set
##################################
<- PMA_PreModelling_Train
PMA_PreModelling_Train_PCR
##################################
# Creating consistent fold assignments
# for the 10-Fold Cross Validation process
##################################
set.seed(12345678)
<- createFolds(PMA_PreModelling_Train_PCR$Log_Solubility,
KFold_Indices k = 10,
returnTrain=TRUE)
<- trainControl(method="cv",
KFold_Control index=KFold_Indices)
##################################
# Setting the conditions
# for hyperparameter tuning
##################################
= expand.grid(ncomp = 1:35)
PCR_Grid
##################################
# Running the principal component regression model
# by setting the caret method to 'pcr'
##################################
set.seed(12345678)
<- train(x = PMA_PreModelling_Train_PCR[,!names(PMA_PreModelling_Train_PCR) %in% c("Log_Solubility")],
PCR_Tune y = PMA_PreModelling_Train_PCR$Log_Solubility,
method = "pcr",
tuneGrid = PCR_Grid,
trControl = KFold_Control)
##################################
# Reporting the cross-validation results
# for the train set
##################################
PCR_Tune
## Principal Component Analysis
##
## 951 samples
## 220 predictors
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 857, 855, 855, 856, 857, 856, ...
## Resampling results across tuning parameters:
##
## ncomp RMSE Rsquared MAE
## 1 2.0296889 0.01442414 1.5675994
## 2 1.6567709 0.34667895 1.3084248
## 3 1.3399066 0.56959232 1.0554564
## 4 1.3365025 0.57098448 1.0523863
## 5 1.1077389 0.70727215 0.8560733
## 6 1.0934315 0.71523392 0.8396607
## 7 1.0918104 0.71620758 0.8392329
## 8 1.0822762 0.72034455 0.8355520
## 9 0.9758776 0.77338983 0.7648700
## 10 0.9193282 0.79797036 0.7191366
## 11 0.9100087 0.80274346 0.7098640
## 12 0.9008101 0.80711914 0.7026402
## 13 0.8994621 0.80774236 0.7012849
## 14 0.8997422 0.80755616 0.7022250
## 15 0.8972326 0.80815361 0.6971850
## 16 0.8724231 0.81941761 0.6786738
## 17 0.8479644 0.82828435 0.6684450
## 18 0.8468783 0.82878646 0.6677195
## 19 0.8238325 0.83755774 0.6506819
## 20 0.8210854 0.83853781 0.6496191
## 21 0.8170708 0.84039342 0.6407918
## 22 0.7981577 0.84802017 0.6241175
## 23 0.7935049 0.84967647 0.6207379
## 24 0.7936238 0.84963015 0.6215020
## 25 0.7950743 0.84910027 0.6229236
## 26 0.7959497 0.84890735 0.6232711
## 27 0.7917201 0.85064965 0.6207289
## 28 0.7856018 0.85294606 0.6116164
## 29 0.7868587 0.85160687 0.6102646
## 30 0.7686928 0.85854250 0.5979590
## 31 0.7598560 0.86182884 0.5950581
## 32 0.7544523 0.86381691 0.5880242
## 33 0.7506357 0.86488656 0.5850940
## 34 0.7426083 0.86771537 0.5751721
## 35 0.7453166 0.86670191 0.5772234
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was ncomp = 34.
$finalModel PCR_Tune
## Principal component regression, fitted with the singular value decomposition algorithm.
## Call:
## pcr(formula = .outcome ~ ., ncomp = ncomp, data = dat)
$results PCR_Tune
## ncomp RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 1 2.0296889 0.01442414 1.5675994 0.18519012 0.01370650 0.10605601
## 2 2 1.6567709 0.34667895 1.3084248 0.11847283 0.10128844 0.10795416
## 3 3 1.3399066 0.56959232 1.0554564 0.09770998 0.06849443 0.07696745
## 4 4 1.3365025 0.57098448 1.0523863 0.09650563 0.07030819 0.07599880
## 5 5 1.1077389 0.70727215 0.8560733 0.08184295 0.05070045 0.06351824
## 6 6 1.0934315 0.71523392 0.8396607 0.07559229 0.04492993 0.06175559
## 7 7 1.0918104 0.71620758 0.8392329 0.07511865 0.04349455 0.06138873
## 8 8 1.0822762 0.72034455 0.8355520 0.07326621 0.04682361 0.06128337
## 9 9 0.9758776 0.77338983 0.7648700 0.06964522 0.03464898 0.06535156
## 10 10 0.9193282 0.79797036 0.7191366 0.07361984 0.03602854 0.06431757
## 11 11 0.9100087 0.80274346 0.7098640 0.07141421 0.03037915 0.06273034
## 12 12 0.9008101 0.80711914 0.7026402 0.06848519 0.02973063 0.05972693
## 13 13 0.8994621 0.80774236 0.7012849 0.06984533 0.02920963 0.06106845
## 14 14 0.8997422 0.80755616 0.7022250 0.06850319 0.02895750 0.05863619
## 15 15 0.8972326 0.80815361 0.6971850 0.06764988 0.02917955 0.05763866
## 16 16 0.8724231 0.81941761 0.6786738 0.07191361 0.02836615 0.06605188
## 17 17 0.8479644 0.82828435 0.6684450 0.07312473 0.03029092 0.06153839
## 18 18 0.8468783 0.82878646 0.6677195 0.07541100 0.02945746 0.06126364
## 19 19 0.8238325 0.83755774 0.6506819 0.07286795 0.02830103 0.05844023
## 20 20 0.8210854 0.83853781 0.6496191 0.07502121 0.02738817 0.05717632
## 21 21 0.8170708 0.84039342 0.6407918 0.07248873 0.02467572 0.05691329
## 22 22 0.7981577 0.84802017 0.6241175 0.07722684 0.02299189 0.06220102
## 23 23 0.7935049 0.84967647 0.6207379 0.07379642 0.02348702 0.05821540
## 24 24 0.7936238 0.84963015 0.6215020 0.07342546 0.02316486 0.05802904
## 25 25 0.7950743 0.84910027 0.6229236 0.07377674 0.02344697 0.05852246
## 26 26 0.7959497 0.84890735 0.6232711 0.07207635 0.02257987 0.05798154
## 27 27 0.7917201 0.85064965 0.6207289 0.07105699 0.02176341 0.05815307
## 28 28 0.7856018 0.85294606 0.6116164 0.07237138 0.02341986 0.06013560
## 29 29 0.7868587 0.85160687 0.6102646 0.06768926 0.02704719 0.05771990
## 30 30 0.7686928 0.85854250 0.5979590 0.06442948 0.02674887 0.05450236
## 31 31 0.7598560 0.86182884 0.5950581 0.05592123 0.02580600 0.04900977
## 32 32 0.7544523 0.86381691 0.5880242 0.06025075 0.02663350 0.05237288
## 33 33 0.7506357 0.86488656 0.5850940 0.05823760 0.02791928 0.04824653
## 34 34 0.7426083 0.86771537 0.5751721 0.05377055 0.02667267 0.04901977
## 35 35 0.7453166 0.86670191 0.5772234 0.05224404 0.02655696 0.04814371
<- PCR_Tune$results[PCR_Tune$results$ncomp==PCR_Tune$bestTune$ncomp,
(PCR_Train_RMSE c("RMSE")])
## [1] 0.7426083
<- PCR_Tune$results[PCR_Tune$results$ncomp==PCR_Tune$bestTune$ncomp,
(PCR_Train_Rsquared c("Rsquared")])
## [1] 0.8677154
##################################
# Identifying and plotting the
# best model predictors
##################################
# model does not support variable importance measurement
##################################
# Independently evaluating the model
# on the test set
##################################
<- data.frame(PCR_Observed = PMA_PreModelling_Test$Log_Solubility,
PCR_Test PCR_Predicted = predict(PCR_Tune,
!names(PMA_PreModelling_Test) %in% c("Log_Solubility")]))
PMA_PreModelling_Test[,
PCR_Test
## PCR_Observed PCR_Predicted
## 1 0.93 0.462182268
## 2 0.85 0.316580448
## 3 0.81 -0.945880837
## 4 0.74 -0.293259569
## 5 0.61 0.404635957
## 6 0.58 0.733217245
## 7 0.57 -0.028483379
## 8 0.56 0.315707814
## 9 0.52 0.114526282
## 10 0.45 -0.986935760
## 11 0.40 -0.689009516
## 12 0.36 -2.179352029
## 13 0.22 -0.153574606
## 14 0.08 0.177580181
## 15 0.07 -1.379675327
## 16 0.02 -1.739622050
## 17 0.00 -0.776967168
## 18 -0.01 -0.591326254
## 19 -0.07 0.045838747
## 20 -0.12 -2.240547697
## 21 -0.17 0.327118691
## 22 -0.29 -0.322805421
## 23 -0.38 -0.340975848
## 24 -0.38 -1.313602576
## 25 -0.39 -0.861727039
## 26 -0.42 -1.451680481
## 27 -0.44 -0.474179666
## 28 -0.46 0.804611679
## 29 -0.48 -1.677483235
## 30 -0.60 -0.968405139
## 31 -0.63 -3.188136735
## 32 -0.66 -0.623192401
## 33 -0.72 -0.463389333
## 34 -0.72 -0.587561005
## 35 -0.80 -0.534014873
## 36 -0.80 -0.554172780
## 37 -0.82 -0.122282875
## 38 -0.82 -0.659049111
## 39 -0.84 0.570252763
## 40 -0.85 -1.478444252
## 41 -0.85 -0.539779301
## 42 -0.87 -2.395308530
## 43 -0.89 -1.192540375
## 44 -0.90 0.223560105
## 45 -0.96 -1.749350382
## 46 -0.96 -0.872753931
## 47 -0.99 -0.726103159
## 48 -1.01 -0.898962066
## 49 -1.09 -1.097879461
## 50 -1.12 -0.772302099
## 51 -1.14 -0.307991271
## 52 -1.17 -1.627889193
## 53 -1.19 -1.941436108
## 54 -1.22 -1.248394890
## 55 -1.27 -0.938117980
## 56 -1.28 -1.488884776
## 57 -1.32 -1.378055930
## 58 -1.38 -1.353955783
## 59 -1.39 -1.928209622
## 60 -1.42 -2.607835558
## 61 -1.47 -0.921378405
## 62 -1.47 -1.190814712
## 63 -1.50 -1.902176513
## 64 -1.52 -1.187231141
## 65 -1.54 -1.946745163
## 66 -1.55 -2.275539452
## 67 -1.56 -2.587219183
## 68 -1.57 -2.358002072
## 69 -1.60 -0.952500446
## 70 -1.60 -2.636971744
## 71 -1.62 -2.943902005
## 72 -1.64 -3.139592131
## 73 -1.67 -1.611531503
## 74 -1.70 -2.754256197
## 75 -1.70 -2.265948203
## 76 -1.71 -2.491616937
## 77 -1.71 -2.550917657
## 78 -1.75 -1.955587957
## 79 -1.78 -1.193975692
## 80 -1.78 -1.692866557
## 81 -1.82 -0.881504064
## 82 -1.87 -1.148979452
## 83 -1.89 -2.610859665
## 84 -1.92 -1.897992545
## 85 -1.92 -1.510320673
## 86 -1.92 -1.077079670
## 87 -1.94 -3.213930262
## 88 -1.99 -2.533788513
## 89 -2.00 -2.256290306
## 90 -2.05 -2.626307120
## 91 -2.06 -2.001038378
## 92 -2.08 -2.707752970
## 93 -2.10 -2.565556218
## 94 -2.11 -1.344135086
## 95 -2.12 -1.385845736
## 96 -2.17 -1.925358660
## 97 -2.21 -1.973684177
## 98 -2.24 -2.326429801
## 99 -2.24 -1.409145654
## 100 -2.29 -1.739801390
## 101 -2.31 -2.072667721
## 102 -2.32 -1.975447594
## 103 -2.35 -3.163667428
## 104 -2.35 -1.688681672
## 105 -2.36 -2.312923262
## 106 -2.36 -2.196530460
## 107 -2.38 -2.328140438
## 108 -2.42 -3.238598289
## 109 -2.43 -2.901984183
## 110 -2.44 -2.937477131
## 111 -2.52 -3.048212697
## 112 -2.53 -2.695754747
## 113 -2.57 -2.773728454
## 114 -2.62 -2.807118420
## 115 -2.62 -2.737497203
## 116 -2.64 -2.736818053
## 117 -2.64 -3.441255512
## 118 -2.70 -2.749881922
## 119 -2.82 -3.523453889
## 120 -2.88 -2.357894077
## 121 -2.89 -2.301650028
## 122 -2.92 -1.180398464
## 123 -2.93 -3.054401715
## 124 -2.96 -2.531055760
## 125 -2.98 -3.754526428
## 126 -3.01 -3.112725817
## 127 -3.01 -2.975724090
## 128 -3.02 -2.916900908
## 129 -3.07 -3.009323008
## 130 -3.09 -3.372478938
## 131 -3.11 -3.074854971
## 132 -3.13 -3.571721771
## 133 -3.14 -1.719787580
## 134 -3.15 -3.995569084
## 135 -3.22 -2.067589470
## 136 -3.26 -3.074966588
## 137 -3.27 -2.264274749
## 138 -3.27 -2.564007920
## 139 -3.30 -1.900229061
## 140 -3.31 -1.954089261
## 141 -3.33 -2.297730426
## 142 -3.37 -1.586020086
## 143 -3.43 -3.709588935
## 144 -3.43 -2.735537334
## 145 -3.48 -3.497607438
## 146 -3.51 -4.067987387
## 147 -3.59 -3.380665801
## 148 -3.61 -2.586232322
## 149 -3.63 -3.559068847
## 150 -3.63 -3.209219549
## 151 -3.68 -2.044011728
## 152 -3.71 -4.340950545
## 153 -3.74 -1.944809652
## 154 -3.75 -3.352402046
## 155 -3.75 -3.578456815
## 156 -3.77 -3.819627738
## 157 -3.77 -4.442839703
## 158 -3.78 -4.835774030
## 159 -3.81 -3.524309500
## 160 -3.95 -4.879064966
## 161 -3.96 -4.792206612
## 162 -3.96 -4.234330789
## 163 -4.00 -3.219178215
## 164 -4.02 -3.830199697
## 165 -4.04 -4.358420780
## 166 -4.12 -2.948752067
## 167 -4.15 -5.501447290
## 168 -4.16 -3.549054346
## 169 -4.17 -4.340408429
## 170 -4.21 -4.415403498
## 171 -4.23 -4.715752594
## 172 -4.25 -2.824363852
## 173 -4.30 -3.698913819
## 174 -4.31 -5.411467048
## 175 -4.35 -4.627827233
## 176 -4.40 -3.908063678
## 177 -4.40 -4.054909207
## 178 -4.43 -4.923837515
## 179 -4.46 -4.651534851
## 180 -4.47 -2.218006110
## 181 -4.51 -5.524697405
## 182 -4.60 -3.946655342
## 183 -4.64 -4.057825025
## 184 -4.69 -4.704944822
## 185 -4.71 -3.978170057
## 186 -4.77 -4.011425397
## 187 -4.95 -5.042474931
## 188 -4.98 -5.053214221
## 189 -5.21 -5.714703733
## 190 -5.22 -5.419091321
## 191 -5.28 -4.396016452
## 192 -5.31 -3.291142485
## 193 -5.35 -4.983434872
## 194 -5.37 -5.038913691
## 195 -5.40 -4.442449730
## 196 -5.43 -4.736215470
## 197 -5.65 -5.240423743
## 198 -5.66 -4.351381058
## 199 -6.70 -5.040054389
## 200 -5.72 -4.615187285
## 201 -6.00 -6.587120785
## 202 -6.25 -6.532152807
## 203 -6.26 -6.390327981
## 204 -6.27 -6.377405240
## 205 -6.35 -5.439261305
## 206 -6.57 -5.957509355
## 207 -6.62 -4.676276071
## 208 -6.96 -5.736849435
## 209 -7.02 -7.072417082
## 210 -7.20 -7.000998973
## 211 -7.28 -7.029806037
## 212 -7.32 -7.520922791
## 213 -7.39 -7.633211006
## 214 -7.82 -8.083969547
## 215 -8.23 -7.831992798
## 216 -8.94 -8.515673674
## 217 1.07 0.219276294
## 218 0.43 0.330756116
## 219 0.32 0.453690461
## 220 0.00 -0.122893337
## 221 -0.40 -0.897191014
## 222 -0.52 0.006136031
## 223 -0.55 -0.761653529
## 224 -0.60 -0.979753251
## 225 -0.62 -2.635726297
## 226 -0.85 -1.837743804
## 227 -0.89 -1.148776455
## 228 -0.93 -1.384262415
## 229 -0.96 0.523815330
## 230 -1.06 -1.915401741
## 231 -1.10 -1.892408601
## 232 -1.12 -1.556460179
## 233 -1.15 -0.667150802
## 234 -1.28 -1.031054071
## 235 -1.30 -1.420232370
## 236 -1.31 -1.804920337
## 237 -1.35 -3.166523244
## 238 -1.39 -1.568456582
## 239 -1.41 -1.941436108
## 240 -1.41 -0.861592267
## 241 -1.42 -0.724916742
## 242 -1.46 -2.256991358
## 243 -1.50 -1.447087059
## 244 -1.50 -2.804344982
## 245 -1.52 -2.111224594
## 246 -1.52 -1.306281870
## 247 -1.59 -1.592576468
## 248 -1.61 -1.376918862
## 249 -1.63 -0.998306905
## 250 -1.71 -2.696795839
## 251 -1.83 -2.420000757
## 252 -2.05 -1.441688717
## 253 -2.06 -2.758186386
## 254 -2.07 -4.042500416
## 255 -2.15 -2.431994838
## 256 -2.16 -2.275531571
## 257 -1.99 -0.173377342
## 258 -2.36 -2.336237964
## 259 -2.38 -3.442324638
## 260 -2.39 -1.900260379
## 261 -2.46 -2.462622866
## 262 -2.49 -2.104211599
## 263 -2.54 -2.672144704
## 264 -2.55 -3.216310658
## 265 -2.63 -2.828185945
## 266 -2.64 -2.753887744
## 267 -2.67 -1.966441007
## 268 -2.68 -1.967121062
## 269 -2.77 -2.450328328
## 270 -2.78 -3.303406099
## 271 -2.82 -2.256717032
## 272 -2.92 -4.145055031
## 273 -3.03 -2.939164798
## 274 -3.12 -3.522588040
## 275 -3.16 -1.951175133
## 276 -3.19 -3.282460244
## 277 -3.54 -3.271416451
## 278 -3.54 -2.612759118
## 279 -3.59 -3.605429789
## 280 -3.66 -2.620727432
## 281 -3.68 -1.924573732
## 282 -3.75 -4.004126140
## 283 -3.76 -3.058300701
## 284 -3.78 -4.147347143
## 285 -3.80 -3.963159969
## 286 -3.80 -5.772243733
## 287 -3.85 -2.254368698
## 288 -3.89 -3.951555216
## 289 -3.95 -4.618859163
## 290 -4.29 -4.867327525
## 291 -4.42 -5.030249065
## 292 -4.48 -3.903435024
## 293 -4.48 -2.226098113
## 294 -4.53 -4.998385380
## 295 -4.63 -4.686001225
## 296 -4.73 -4.639582512
## 297 -4.84 -4.091800836
## 298 -4.89 -3.972934607
## 299 -4.89 -4.716591126
## 300 -5.26 -5.324474783
## 301 -6.09 -4.557018150
## 302 -6.29 -6.652659640
## 303 -6.29 -6.310292077
## 304 -6.89 -6.191608905
## 305 -6.96 -6.431595674
## 306 -7.00 -7.161323096
## 307 -7.05 -7.491386180
## 308 -8.30 -8.525981441
## 309 -8.66 -7.858370455
## 310 -9.03 -8.052868044
## 311 -10.41 -9.426263256
## 312 -7.89 -7.622903239
## 313 -2.32 -1.836654177
## 314 0.39 -3.006531686
## 315 -2.90 -5.038923278
## 316 -2.47 -5.208029998
##################################
# Reporting the independent evaluation results
# for the test set
##################################
<- postResample(PCR_Test[,2], PCR_Test[,1])) (PCR_Test_Metrics
## RMSE Rsquared MAE
## 0.8448324 0.8351614 0.6337370
<- PCR_Test_Metrics[1]) (PCR_Test_RMSE
## RMSE
## 0.8448324
<- PCR_Test_Metrics[2]) (PCR_Test_Rsquared
## Rsquared
## 0.8351614
##################################
# Creating a local object
# for the train set
##################################
<- PMA_PreModelling_Train
PMA_PreModelling_Train_PLS
##################################
# Creating consistent fold assignments
# for the 10-Fold Cross Validation process
##################################
set.seed(12345678)
<- createFolds(PMA_PreModelling_Train_PLS$Log_Solubility,
KFold_Indices k = 10,
returnTrain=TRUE)
<- trainControl(method="cv",
KFold_Control index=KFold_Indices)
##################################
# Setting the conditions
# for hyperparameter tuning
##################################
= expand.grid(ncomp = 1:35)
PLS_Grid
##################################
# Running the partial least squares model
# by setting the caret method to 'pls'
##################################
set.seed(12345678)
<- train(x = PMA_PreModelling_Train_PLS[,!names(PMA_PreModelling_Train_PLS) %in% c("Log_Solubility")],
PLS_Tune y = PMA_PreModelling_Train_PLS$Log_Solubility,
method = "pls",
tuneGrid = PLS_Grid,
trControl = KFold_Control)
##################################
# Reporting the cross-validation results
# for the train set
##################################
PLS_Tune
## Partial Least Squares
##
## 951 samples
## 220 predictors
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 857, 855, 855, 856, 857, 856, ...
## Resampling results across tuning parameters:
##
## ncomp RMSE Rsquared MAE
## 1 1.1257315 0.6941062 0.8684986
## 2 0.9541662 0.7835253 0.7540264
## 3 0.8676624 0.8221432 0.6810922
## 4 0.7921963 0.8491996 0.6109496
## 5 0.7436770 0.8677605 0.5784938
## 6 0.7023456 0.8816418 0.5418892
## 7 0.6895688 0.8852670 0.5314509
## 8 0.6793308 0.8880567 0.5174052
## 9 0.6711184 0.8907599 0.5076668
## 10 0.6646817 0.8930123 0.5044063
## 11 0.6569810 0.8951394 0.5041779
## 12 0.6527074 0.8965189 0.4971062
## 13 0.6490005 0.8976341 0.4989624
## 14 0.6459830 0.8983443 0.4957764
## 15 0.6444142 0.8990045 0.4914714
## 16 0.6440406 0.8992123 0.4908844
## 17 0.6455088 0.8988869 0.4922801
## 18 0.6485785 0.8979029 0.4952112
## 19 0.6526084 0.8968343 0.4974662
## 20 0.6544471 0.8963104 0.4997403
## 21 0.6562105 0.8955441 0.5010076
## 22 0.6566659 0.8954656 0.4994650
## 23 0.6578121 0.8952035 0.5008882
## 24 0.6601307 0.8946705 0.5025501
## 25 0.6629940 0.8936550 0.5035348
## 26 0.6656543 0.8927872 0.5051810
## 27 0.6662820 0.8925428 0.5053838
## 28 0.6684003 0.8919317 0.5061196
## 29 0.6719706 0.8908891 0.5075432
## 30 0.6732406 0.8905865 0.5087067
## 31 0.6743116 0.8902882 0.5089104
## 32 0.6757629 0.8898430 0.5095610
## 33 0.6763937 0.8897242 0.5095410
## 34 0.6782967 0.8891499 0.5096571
## 35 0.6795773 0.8887826 0.5098305
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was ncomp = 16.
$finalModel PLS_Tune
## Partial least squares regression, fitted with the orthogonal scores algorithm.
## Call:
## plsr(formula = .outcome ~ ., ncomp = ncomp, data = dat, method = "oscorespls")
$results PLS_Tune
## ncomp RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 1 1.1257315 0.6941062 0.8684986 0.08785965 0.06920539 0.06592163
## 2 2 0.9541662 0.7835253 0.7540264 0.08585840 0.03093668 0.07734570
## 3 3 0.8676624 0.8221432 0.6810922 0.07131188 0.02840465 0.06528264
## 4 4 0.7921963 0.8491996 0.6109496 0.06551658 0.02829300 0.05655513
## 5 5 0.7436770 0.8677605 0.5784938 0.06205082 0.02270654 0.04622536
## 6 6 0.7023456 0.8816418 0.5418892 0.05533813 0.01983644 0.04436123
## 7 7 0.6895688 0.8852670 0.5314509 0.05168219 0.02098024 0.04557589
## 8 8 0.6793308 0.8880567 0.5174052 0.04862126 0.02287536 0.04283374
## 9 9 0.6711184 0.8907599 0.5076668 0.04887598 0.02256537 0.04288068
## 10 10 0.6646817 0.8930123 0.5044063 0.05256807 0.02149819 0.04451135
## 11 11 0.6569810 0.8951394 0.5041779 0.05600095 0.02217211 0.04357013
## 12 12 0.6527074 0.8965189 0.4971062 0.05628616 0.02180750 0.04248493
## 13 13 0.6490005 0.8976341 0.4989624 0.05179806 0.02023778 0.03802353
## 14 14 0.6459830 0.8983443 0.4957764 0.05257732 0.02157699 0.03864626
## 15 15 0.6444142 0.8990045 0.4914714 0.05353578 0.02210539 0.03907895
## 16 16 0.6440406 0.8992123 0.4908844 0.05262752 0.02252339 0.03717169
## 17 17 0.6455088 0.8988869 0.4922801 0.05442467 0.02280276 0.03862195
## 18 18 0.6485785 0.8979029 0.4952112 0.05175533 0.02231314 0.04019243
## 19 19 0.6526084 0.8968343 0.4974662 0.05203807 0.02266594 0.03979290
## 20 20 0.6544471 0.8963104 0.4997403 0.05135115 0.02278692 0.03930285
## 21 21 0.6562105 0.8955441 0.5010076 0.04871939 0.02355241 0.03845644
## 22 22 0.6566659 0.8954656 0.4994650 0.04830068 0.02328930 0.03860606
## 23 23 0.6578121 0.8952035 0.5008882 0.04807952 0.02302174 0.03898917
## 24 24 0.6601307 0.8946705 0.5025501 0.04699277 0.02251990 0.03689860
## 25 25 0.6629940 0.8936550 0.5035348 0.04584397 0.02342117 0.03614755
## 26 26 0.6656543 0.8927872 0.5051810 0.04439360 0.02383698 0.03507013
## 27 27 0.6662820 0.8925428 0.5053838 0.04543114 0.02436476 0.03742399
## 28 28 0.6684003 0.8919317 0.5061196 0.04281628 0.02448916 0.03635414
## 29 29 0.6719706 0.8908891 0.5075432 0.04376714 0.02457799 0.03647787
## 30 30 0.6732406 0.8905865 0.5087067 0.04412259 0.02433861 0.03722085
## 31 31 0.6743116 0.8902882 0.5089104 0.04473642 0.02446474 0.03718359
## 32 32 0.6757629 0.8898430 0.5095610 0.04642356 0.02453491 0.03931718
## 33 33 0.6763937 0.8897242 0.5095410 0.04587084 0.02431574 0.03883305
## 34 34 0.6782967 0.8891499 0.5096571 0.04440711 0.02378521 0.03836758
## 35 35 0.6795773 0.8887826 0.5098305 0.04409193 0.02361950 0.03882448
<- PLS_Tune$results[PLS_Tune$results$ncomp==PLS_Tune$bestTune$ncomp,
(PLS_Train_RMSE c("RMSE")])
## [1] 0.6440406
<- PLS_Tune$results[PLS_Tune$results$ncomp==PLS_Tune$bestTune$ncomp,
(PLS_Train_Rsquared c("Rsquared")])
## [1] 0.8992123
##################################
# Identifying and plotting the
# best model predictors
##################################
<- varImp(PLS_Tune, scale = TRUE)
PLS_VarImp plot(PLS_VarImp,
top=25,
scales=list(y=list(cex = .95)),
main="Ranked Variable Importance : Partial Least Squares",
xlab="Scaled Variable Importance Metrics",
ylab="Predictors",
cex=2,
origin=0,
alpha=0.45)
##################################
# Independently evaluating the model
# on the test set
##################################
<- data.frame(PLS_Observed = PMA_PreModelling_Test$Log_Solubility,
PLS_Test PLS_Predicted = predict(PLS_Tune,
!names(PMA_PreModelling_Test) %in% c("Log_Solubility")]))
PMA_PreModelling_Test[,
PLS_Test
## PLS_Observed PLS_Predicted
## 1 0.93 0.74516623
## 2 0.85 0.32286457
## 3 0.81 -0.68678635
## 4 0.74 0.69592854
## 5 0.61 -0.02063332
## 6 0.58 1.46184911
## 7 0.57 0.44414519
## 8 0.56 0.60745934
## 9 0.52 0.28515046
## 10 0.45 -0.82667942
## 11 0.40 -0.81267451
## 12 0.36 -0.64850163
## 13 0.22 -0.14771117
## 14 0.08 -0.39819158
## 15 0.07 -1.13005975
## 16 0.02 -0.54342796
## 17 0.00 -0.20980394
## 18 -0.01 0.11378822
## 19 -0.07 0.37618124
## 20 -0.12 -0.92441367
## 21 -0.17 0.43228521
## 22 -0.29 0.00811489
## 23 -0.38 -0.77028890
## 24 -0.38 -0.93219690
## 25 -0.39 -0.97794310
## 26 -0.42 -0.77065466
## 27 -0.44 -0.75690855
## 28 -0.46 1.24295736
## 29 -0.48 -2.17274777
## 30 -0.60 -1.30587845
## 31 -0.63 -2.21544824
## 32 -0.66 -0.80024408
## 33 -0.72 -0.62072649
## 34 -0.72 -0.09828913
## 35 -0.80 0.34290909
## 36 -0.80 -1.18886889
## 37 -0.82 0.66221622
## 38 -0.82 -0.62813729
## 39 -0.84 0.20935707
## 40 -0.85 -0.73127603
## 41 -0.85 -0.62471779
## 42 -0.87 -1.74166827
## 43 -0.89 -1.09236575
## 44 -0.90 0.01620119
## 45 -0.96 -1.39335311
## 46 -0.96 -0.74195691
## 47 -0.99 -0.21944765
## 48 -1.01 -0.83784671
## 49 -1.09 -1.04732099
## 50 -1.12 -0.46500547
## 51 -1.14 -0.65423556
## 52 -1.17 -1.70787162
## 53 -1.19 -1.60155096
## 54 -1.22 -1.27737980
## 55 -1.27 -1.90933656
## 56 -1.28 -1.13950216
## 57 -1.32 -1.29914009
## 58 -1.38 -1.36423658
## 59 -1.39 -1.54617665
## 60 -1.42 -1.55490958
## 61 -1.47 -1.02147287
## 62 -1.47 -1.56736833
## 63 -1.50 -0.85767316
## 64 -1.52 -1.28141641
## 65 -1.54 -1.48046202
## 66 -1.55 -2.34552576
## 67 -1.56 -3.01893991
## 68 -1.57 -1.89674619
## 69 -1.60 -1.19042598
## 70 -1.60 -2.30994380
## 71 -1.62 -1.26556707
## 72 -1.64 -2.39106649
## 73 -1.67 -1.76352007
## 74 -1.70 -3.02979969
## 75 -1.70 -2.14120973
## 76 -1.71 -2.30199354
## 77 -1.71 -2.32738510
## 78 -1.75 -1.82784190
## 79 -1.78 -1.67857037
## 80 -1.78 -2.43084787
## 81 -1.82 -1.39535781
## 82 -1.87 -2.06608800
## 83 -1.89 -2.13478150
## 84 -1.92 -1.95828264
## 85 -1.92 -1.43848151
## 86 -1.92 -1.40018560
## 87 -1.94 -3.15152921
## 88 -1.99 -2.58836079
## 89 -2.00 -2.43350116
## 90 -2.05 -2.16143503
## 91 -2.06 -1.43677439
## 92 -2.08 -2.29766191
## 93 -2.10 -2.48008812
## 94 -2.11 -1.24175529
## 95 -2.12 -0.57595713
## 96 -2.17 -2.11092901
## 97 -2.21 -1.83978590
## 98 -2.24 -2.75462929
## 99 -2.24 -1.53929965
## 100 -2.29 -2.26545288
## 101 -2.31 -2.41770577
## 102 -2.32 -2.13386632
## 103 -2.35 -2.77068700
## 104 -2.35 -2.27630694
## 105 -2.36 -2.53423634
## 106 -2.36 -1.91198678
## 107 -2.38 -2.41441602
## 108 -2.42 -2.53089810
## 109 -2.43 -3.33749556
## 110 -2.44 -3.19036414
## 111 -2.52 -2.27545717
## 112 -2.53 -3.04173664
## 113 -2.57 -3.08080431
## 114 -2.62 -2.80366660
## 115 -2.62 -2.48223715
## 116 -2.64 -3.50451066
## 117 -2.64 -3.61872227
## 118 -2.70 -2.53747690
## 119 -2.82 -2.61439893
## 120 -2.88 -2.81441783
## 121 -2.89 -2.17109119
## 122 -2.92 -1.22392862
## 123 -2.93 -3.39327798
## 124 -2.96 -2.64427695
## 125 -2.98 -2.21989078
## 126 -3.01 -2.56257240
## 127 -3.01 -3.48867159
## 128 -3.02 -3.55059224
## 129 -3.07 -3.35503650
## 130 -3.09 -2.93492570
## 131 -3.11 -3.00807576
## 132 -3.13 -3.65770226
## 133 -3.14 -2.00219329
## 134 -3.15 -3.56664342
## 135 -3.22 -2.66783119
## 136 -3.26 -3.34287167
## 137 -3.27 -2.88657518
## 138 -3.27 -2.90118907
## 139 -3.30 -3.05460695
## 140 -3.31 -2.36959803
## 141 -3.33 -2.25720169
## 142 -3.37 -2.16115690
## 143 -3.43 -3.34354398
## 144 -3.43 -2.38472019
## 145 -3.48 -2.86222931
## 146 -3.51 -3.51097380
## 147 -3.59 -1.90045149
## 148 -3.61 -2.64203809
## 149 -3.63 -3.51315051
## 150 -3.63 -3.44342931
## 151 -3.68 -1.86708149
## 152 -3.71 -3.76968428
## 153 -3.74 -2.30299100
## 154 -3.75 -3.64762358
## 155 -3.75 -2.62428090
## 156 -3.77 -3.13696015
## 157 -3.77 -4.22015240
## 158 -3.78 -5.69040930
## 159 -3.81 -3.67272006
## 160 -3.95 -4.42082593
## 161 -3.96 -5.36110080
## 162 -3.96 -4.18819513
## 163 -4.00 -3.57001838
## 164 -4.02 -5.01716681
## 165 -4.04 -4.31607551
## 166 -4.12 -3.53587473
## 167 -4.15 -4.93376854
## 168 -4.16 -3.64282935
## 169 -4.17 -4.48596196
## 170 -4.21 -4.62731700
## 171 -4.23 -4.37316805
## 172 -4.25 -3.45146883
## 173 -4.30 -3.52342335
## 174 -4.31 -5.70580727
## 175 -4.35 -4.64309157
## 176 -4.40 -4.14102773
## 177 -4.40 -4.09464184
## 178 -4.43 -4.69360246
## 179 -4.46 -4.53047986
## 180 -4.47 -3.24594717
## 181 -4.51 -5.10145111
## 182 -4.60 -3.59325502
## 183 -4.64 -4.64792161
## 184 -4.69 -4.94079150
## 185 -4.71 -4.02841192
## 186 -4.77 -3.40564565
## 187 -4.95 -4.62714984
## 188 -4.98 -3.72037252
## 189 -5.21 -5.69086767
## 190 -5.22 -5.71349252
## 191 -5.28 -4.32241689
## 192 -5.31 -2.97633559
## 193 -5.35 -4.73049637
## 194 -5.37 -5.01320884
## 195 -5.40 -4.61985993
## 196 -5.43 -4.49627474
## 197 -5.65 -5.49894785
## 198 -5.66 -4.31998082
## 199 -6.70 -5.03126968
## 200 -5.72 -4.99345717
## 201 -6.00 -7.44493866
## 202 -6.25 -6.38632161
## 203 -6.26 -6.30658011
## 204 -6.27 -6.61828828
## 205 -6.35 -5.78215846
## 206 -6.57 -6.04996054
## 207 -6.62 -5.08427265
## 208 -6.96 -6.11228466
## 209 -7.02 -7.73073818
## 210 -7.20 -7.14859732
## 211 -7.28 -7.08864886
## 212 -7.32 -7.35627710
## 213 -7.39 -7.69737633
## 214 -7.82 -8.21120559
## 215 -8.23 -9.28917253
## 216 -8.94 -8.34390493
## 217 1.07 -0.18673697
## 218 0.43 0.21920251
## 219 0.32 -0.80294519
## 220 0.00 0.25049656
## 221 -0.40 -0.82164221
## 222 -0.52 -0.48163966
## 223 -0.55 -0.55600576
## 224 -0.60 -0.76670069
## 225 -0.62 -2.48616925
## 226 -0.85 -1.38484047
## 227 -0.89 -0.61468811
## 228 -0.93 -0.75031525
## 229 -0.96 -0.26052538
## 230 -1.06 -1.95631799
## 231 -1.10 -1.70410907
## 232 -1.12 -0.99095724
## 233 -1.15 -0.72691439
## 234 -1.28 -0.44749129
## 235 -1.30 -1.82920071
## 236 -1.31 -1.40072052
## 237 -1.35 -2.84117775
## 238 -1.39 -2.32994756
## 239 -1.41 -1.60155096
## 240 -1.41 -1.35598735
## 241 -1.42 -0.55969822
## 242 -1.46 -2.03712578
## 243 -1.50 -1.64216074
## 244 -1.50 -1.96940438
## 245 -1.52 -1.53726702
## 246 -1.52 -0.47282000
## 247 -1.59 -1.48907422
## 248 -1.61 -0.73245535
## 249 -1.63 -1.32416935
## 250 -1.71 -2.46798249
## 251 -1.83 -2.17059291
## 252 -2.05 -1.64450839
## 253 -2.06 -2.34123762
## 254 -2.07 -3.32784866
## 255 -2.15 -2.52800080
## 256 -2.16 -0.90783766
## 257 -1.99 0.54163534
## 258 -2.36 -1.84641829
## 259 -2.38 -3.81334635
## 260 -2.39 -1.70383078
## 261 -2.46 -2.18438819
## 262 -2.49 -2.31226148
## 263 -2.54 -2.98101664
## 264 -2.55 -2.88699504
## 265 -2.63 -2.33250034
## 266 -2.64 -1.50217898
## 267 -2.67 -2.66499236
## 268 -2.68 -2.15374558
## 269 -2.77 -2.56856285
## 270 -2.78 -2.98994252
## 271 -2.82 -2.76635710
## 272 -2.92 -3.59929134
## 273 -3.03 -3.45320339
## 274 -3.12 -3.35485037
## 275 -3.16 -3.29895149
## 276 -3.19 -3.21661282
## 277 -3.54 -3.62961130
## 278 -3.54 -2.34156420
## 279 -3.59 -3.79258018
## 280 -3.66 -3.13048096
## 281 -3.68 -2.35661151
## 282 -3.75 -3.87439692
## 283 -3.76 -3.94072391
## 284 -3.78 -4.10197539
## 285 -3.80 -4.12056709
## 286 -3.80 -4.27716205
## 287 -3.85 -3.38186479
## 288 -3.89 -4.06169949
## 289 -3.95 -4.21471603
## 290 -4.29 -4.77518396
## 291 -4.42 -4.53990613
## 292 -4.48 -4.44961229
## 293 -4.48 -3.43841877
## 294 -4.53 -4.95817112
## 295 -4.63 -4.35759291
## 296 -4.73 -4.05065023
## 297 -4.84 -4.22520600
## 298 -4.89 -3.85329860
## 299 -4.89 -4.80375460
## 300 -5.26 -5.63390877
## 301 -6.09 -4.92519297
## 302 -6.29 -5.80307805
## 303 -6.29 -6.03815212
## 304 -6.89 -5.48895866
## 305 -6.96 -6.73734679
## 306 -7.00 -6.79982974
## 307 -7.05 -7.61763483
## 308 -8.30 -8.71246554
## 309 -8.66 -9.04631273
## 310 -9.03 -9.68264256
## 311 -10.41 -9.78744938
## 312 -7.89 -7.32881571
## 313 -2.32 -1.67527618
## 314 0.39 -2.65782329
## 315 -2.90 -5.22722039
## 316 -2.47 -5.38039716
##################################
# Reporting the independent evaluation results
# for the test set
##################################
<- postResample(PLS_Test[,2], PLS_Test[,1])) (PLS_Test_Metrics
## RMSE Rsquared MAE
## 0.7647343 0.8670618 0.5743195
<- PLS_Test_Metrics[1]) (PLS_Test_RMSE
## RMSE
## 0.7647343
<- PLS_Test_Metrics[2]) (PLS_Test_Rsquared
## Rsquared
## 0.8670618
##################################
# Transforming factor predictors
# as required by the nature of the model
##################################
# Creating a local object
# for the train and test sets
##################################
<- as.data.frame(lapply(PMA_PreModelling_Train, function(x) as.numeric(as.character(x))))
PMA_PreModelling_Train_AVNN dim(PMA_PreModelling_Train_AVNN)
## [1] 951 221
<- as.data.frame(lapply(PMA_PreModelling_Test, function(x) as.numeric(as.character(x))))
PMA_PreModelling_Test_AVNN dim(PMA_PreModelling_Test_AVNN)
## [1] 316 221
##################################
# Creating consistent fold assignments
# for the 10-Fold Cross Validation process
##################################
set.seed(12345678)
<- createFolds(PMA_PreModelling_Train_AVNN$Log_Solubility,
KFold_Indices k = 10,
returnTrain=TRUE)
<- trainControl(method="cv",
KFold_Control index=KFold_Indices)
##################################
# Setting the conditions
# for hyperparameter tuning
##################################
= expand.grid(decay = c(0.00, 0.01, 0.10),
AVNN_Grid size = c(1, 5, 9, 13),
bag = FALSE)
<- max(AVNN_Grid$size)
maxSize
##################################
# Running the averaged neural network model
# by setting the caret method to 'avNNet'
##################################
set.seed(12345678)
<- train(x = PMA_PreModelling_Train_AVNN[,!names(PMA_PreModelling_Train_AVNN) %in% c("Log_Solubility")],
AVNN_Tune y = PMA_PreModelling_Train_AVNN$Log_Solubility,
method = "avNNet",
tuneGrid = AVNN_Grid,
trControl = KFold_Control,
preProc = c("center", "scale"),
linout = TRUE,
trace = FALSE,
MaxNWts = maxSize * ((ncol(PMA_PreModelling_Train_AVNN)-1) + 1) + maxSize + 1,
maxit = 5,
allowParallel = FALSE)
##################################
# Reporting the cross-validation results
# for the train set
##################################
AVNN_Tune
## Model Averaged Neural Network
##
## 951 samples
## 220 predictors
##
## Pre-processing: centered (220), scaled (220)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 857, 855, 855, 856, 857, 856, ...
## Resampling results across tuning parameters:
##
## decay size RMSE Rsquared MAE
## 0.00 1 1.454694 0.5172018 1.1109574
## 0.00 5 1.347390 0.6971736 1.0964081
## 0.00 9 1.792702 0.6637279 1.4885554
## 0.00 13 1.088251 0.7302139 0.8277263
## 0.01 1 1.448632 0.5272961 1.1222274
## 0.01 5 1.315333 0.7115074 1.0725117
## 0.01 9 1.777012 0.6453049 1.4544647
## 0.01 13 1.060884 0.7480237 0.8047786
## 0.10 1 1.478537 0.5099087 1.1292415
## 0.10 5 1.308401 0.6962179 1.0545170
## 0.10 9 1.807650 0.6393351 1.4786651
## 0.10 13 1.115552 0.7253052 0.8510722
##
## Tuning parameter 'bag' was held constant at a value of FALSE
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were size = 13, decay = 0.01 and bag
## = FALSE.
$finalModel AVNN_Tune
## Model Averaged Neural Network with 5 Repeats
##
## a 220-13-1 network with 2887 weights
## options were - linear output units decay=0.01
$results AVNN_Tune
## decay size bag RMSE Rsquared MAE RMSESD RsquaredSD
## 1 0.00 1 FALSE 1.454694 0.5172018 1.1109574 0.12683175 0.08309555
## 5 0.01 1 FALSE 1.448632 0.5272961 1.1222274 0.17580426 0.06594086
## 9 0.10 1 FALSE 1.478537 0.5099087 1.1292415 0.11601214 0.08200610
## 2 0.00 5 FALSE 1.347390 0.6971736 1.0964081 0.12991586 0.04513138
## 6 0.01 5 FALSE 1.315333 0.7115074 1.0725117 0.13161757 0.04596683
## 10 0.10 5 FALSE 1.308401 0.6962179 1.0545170 0.09774587 0.03100244
## 3 0.00 9 FALSE 1.792702 0.6637279 1.4885554 0.22327568 0.06591481
## 7 0.01 9 FALSE 1.777012 0.6453049 1.4544647 0.22293972 0.07472578
## 11 0.10 9 FALSE 1.807650 0.6393351 1.4786651 0.25501136 0.04410773
## 4 0.00 13 FALSE 1.088251 0.7302139 0.8277263 0.13964203 0.03951261
## 8 0.01 13 FALSE 1.060884 0.7480237 0.8047786 0.10299708 0.02500384
## 12 0.10 13 FALSE 1.115552 0.7253052 0.8510722 0.12135148 0.02874047
## MAESD
## 1 0.08554343
## 5 0.13029101
## 9 0.08595082
## 2 0.11431945
## 6 0.10837426
## 10 0.09158530
## 3 0.21618322
## 7 0.21689288
## 11 0.23327268
## 4 0.09673945
## 8 0.08015089
## 12 0.07083277
<- AVNN_Tune$results[AVNN_Tune$results$decay==AVNN_Tune$bestTune$decay &
(AVNN_Train_RMSE $results$size==AVNN_Tune$bestTune$size,
AVNN_Tunec("RMSE")])
## [1] 1.060884
<- AVNN_Tune$results[AVNN_Tune$results$decay==AVNN_Tune$bestTune$decay &
(AVNN_Train_Rsquared $results$size==AVNN_Tune$bestTune$size,
AVNN_Tunec("Rsquared")])
## [1] 0.7480237
##################################
# Identifying and plotting the
# best model predictors
##################################
# model does not support variable importance measurement
##################################
# Independently evaluating the model
# on the test set
##################################
<- data.frame(AVNN_Observed = PMA_PreModelling_Test$Log_Solubility,
AVNN_Test AVNN_Predicted = predict(AVNN_Tune,
!names(PMA_PreModelling_Test_AVNN) %in% c("Log_Solubility")]))
PMA_PreModelling_Test_AVNN[,
AVNN_Test
## AVNN_Observed AVNN_Predicted
## 1 0.93 0.00835390
## 2 0.85 0.78571273
## 3 0.81 -0.51048960
## 4 0.74 -0.61831608
## 5 0.61 0.68492837
## 6 0.58 0.58222525
## 7 0.57 0.11210291
## 8 0.56 0.12698662
## 9 0.52 -0.12960828
## 10 0.45 -0.49082527
## 11 0.40 -0.40533715
## 12 0.36 -1.13928008
## 13 0.22 0.22318452
## 14 0.08 -0.70202613
## 15 0.07 -1.20477469
## 16 0.02 -1.03719974
## 17 0.00 -0.62075796
## 18 -0.01 -0.49328897
## 19 -0.07 0.66286832
## 20 -0.12 -0.69904157
## 21 -0.17 0.09073153
## 22 -0.29 -1.35399935
## 23 -0.38 -1.03811354
## 24 -0.38 -0.82194203
## 25 -0.39 -0.17005757
## 26 -0.42 -1.05297811
## 27 -0.44 -1.02337861
## 28 -0.46 0.38382574
## 29 -0.48 -2.09529708
## 30 -0.60 -1.24206210
## 31 -0.63 -2.60578058
## 32 -0.66 -0.38872809
## 33 -0.72 -0.77382880
## 34 -0.72 -0.43385534
## 35 -0.80 0.04545028
## 36 -0.80 -0.14925624
## 37 -0.82 0.23297104
## 38 -0.82 -0.80013641
## 39 -0.84 -0.27710142
## 40 -0.85 -1.70345814
## 41 -0.85 -0.76783426
## 42 -0.87 -3.02947397
## 43 -0.89 -1.51468908
## 44 -0.90 1.12761796
## 45 -0.96 -1.11731956
## 46 -0.96 -0.85399216
## 47 -0.99 -1.39298469
## 48 -1.01 -0.72232209
## 49 -1.09 -1.19918137
## 50 -1.12 0.18221554
## 51 -1.14 0.51859602
## 52 -1.17 -1.33064481
## 53 -1.19 -0.83994275
## 54 -1.22 0.08354657
## 55 -1.27 -1.27824935
## 56 -1.28 -1.09442111
## 57 -1.32 -1.31255854
## 58 -1.38 -1.08411707
## 59 -1.39 -1.19379705
## 60 -1.42 -2.04316995
## 61 -1.47 -0.56252323
## 62 -1.47 -1.60297815
## 63 -1.50 -2.80359125
## 64 -1.52 -0.87796640
## 65 -1.54 -0.96890154
## 66 -1.55 -3.15772190
## 67 -1.56 -1.31225645
## 68 -1.57 -2.04907753
## 69 -1.60 -1.89059589
## 70 -1.60 -2.98244610
## 71 -1.62 -2.29821948
## 72 -1.64 -2.69202325
## 73 -1.67 -2.38916972
## 74 -1.70 -3.22556202
## 75 -1.70 -1.61334157
## 76 -1.71 -2.05994845
## 77 -1.71 -2.19948977
## 78 -1.75 -2.13743135
## 79 -1.78 -0.79153623
## 80 -1.78 -1.64050145
## 81 -1.82 -0.51074680
## 82 -1.87 -2.07706563
## 83 -1.89 -2.50249476
## 84 -1.92 -1.72910335
## 85 -1.92 -1.55732946
## 86 -1.92 -1.03687112
## 87 -1.94 -3.62249029
## 88 -1.99 -2.18226488
## 89 -2.00 -1.98415246
## 90 -2.05 -3.52952353
## 91 -2.06 -2.46322259
## 92 -2.08 -1.53820408
## 93 -2.10 -3.07132848
## 94 -2.11 -1.97194029
## 95 -2.12 -0.68008252
## 96 -2.17 -1.10143571
## 97 -2.21 -1.72757518
## 98 -2.24 -2.23971064
## 99 -2.24 -0.70717337
## 100 -2.29 -1.67267736
## 101 -2.31 -1.95867771
## 102 -2.32 -1.91660905
## 103 -2.35 -2.47109392
## 104 -2.35 -0.94056709
## 105 -2.36 -2.24959670
## 106 -2.36 -1.44652998
## 107 -2.38 -3.02820191
## 108 -2.42 -3.78064848
## 109 -2.43 -2.44915089
## 110 -2.44 -2.80783086
## 111 -2.52 -2.43404320
## 112 -2.53 -1.91786300
## 113 -2.57 -2.15716850
## 114 -2.62 -2.59147642
## 115 -2.62 -4.28861584
## 116 -2.64 -2.44080054
## 117 -2.64 -2.82150146
## 118 -2.70 -0.96438386
## 119 -2.82 -3.07901330
## 120 -2.88 -3.62774695
## 121 -2.89 -3.14358987
## 122 -2.92 -0.76696761
## 123 -2.93 -2.04099107
## 124 -2.96 -2.36877417
## 125 -2.98 -3.36636463
## 126 -3.01 -2.25293743
## 127 -3.01 -3.06256241
## 128 -3.02 -1.84133063
## 129 -3.07 -3.07513117
## 130 -3.09 -3.80076282
## 131 -3.11 -2.38144290
## 132 -3.13 -3.63954367
## 133 -3.14 -2.61244568
## 134 -3.15 -2.98572459
## 135 -3.22 -1.72694956
## 136 -3.26 -4.26707540
## 137 -3.27 -2.90162584
## 138 -3.27 -2.23245869
## 139 -3.30 -2.21278855
## 140 -3.31 -2.61904022
## 141 -3.33 -1.55138367
## 142 -3.37 -2.76559371
## 143 -3.43 -4.41158035
## 144 -3.43 -2.85181196
## 145 -3.48 -3.20631535
## 146 -3.51 -4.08806369
## 147 -3.59 -2.41230699
## 148 -3.61 -1.72312668
## 149 -3.63 -2.81197400
## 150 -3.63 -3.93062803
## 151 -3.68 -1.49254880
## 152 -3.71 -3.60868789
## 153 -3.74 -2.29882160
## 154 -3.75 -2.74608985
## 155 -3.75 -2.96813956
## 156 -3.77 -4.70066581
## 157 -3.77 -4.75519430
## 158 -3.78 -5.00876028
## 159 -3.81 -3.37272557
## 160 -3.95 -3.70714823
## 161 -3.96 -5.86251786
## 162 -3.96 -4.55209739
## 163 -4.00 -2.58911154
## 164 -4.02 -2.51963442
## 165 -4.04 -4.50543716
## 166 -4.12 -2.79170717
## 167 -4.15 -4.99284968
## 168 -4.16 -2.99591311
## 169 -4.17 -3.84198527
## 170 -4.21 -4.56846868
## 171 -4.23 -3.93101395
## 172 -4.25 -3.35808714
## 173 -4.30 -3.78313824
## 174 -4.31 -3.74132143
## 175 -4.35 -5.79697188
## 176 -4.40 -3.92745774
## 177 -4.40 -4.85531112
## 178 -4.43 -4.51974115
## 179 -4.46 -4.57412102
## 180 -4.47 -2.58763321
## 181 -4.51 -4.36878843
## 182 -4.60 -4.70521062
## 183 -4.64 -4.31725299
## 184 -4.69 -4.03858514
## 185 -4.71 -4.14580224
## 186 -4.77 -4.61006469
## 187 -4.95 -2.71045438
## 188 -4.98 -5.08058226
## 189 -5.21 -5.98762635
## 190 -5.22 -5.22465904
## 191 -5.28 -4.50680156
## 192 -5.31 -3.71066709
## 193 -5.35 -4.60543848
## 194 -5.37 -4.54512108
## 195 -5.40 -4.88559019
## 196 -5.43 -4.31730992
## 197 -5.65 -5.16448836
## 198 -5.66 -4.33687084
## 199 -6.70 -4.54416493
## 200 -5.72 -5.23912914
## 201 -6.00 -6.16503175
## 202 -6.25 -6.08588672
## 203 -6.26 -6.08485674
## 204 -6.27 -6.26339342
## 205 -6.35 -5.58991667
## 206 -6.57 -5.40606700
## 207 -6.62 -5.19018394
## 208 -6.96 -5.22897476
## 209 -7.02 -5.65820122
## 210 -7.20 -5.66316999
## 211 -7.28 -6.33206179
## 212 -7.32 -6.11924582
## 213 -7.39 -6.30081062
## 214 -7.82 -6.32123413
## 215 -8.23 -6.29083688
## 216 -8.94 -6.28434163
## 217 1.07 0.23295450
## 218 0.43 0.28706554
## 219 0.32 0.43424401
## 220 0.00 -0.54398087
## 221 -0.40 -0.98778263
## 222 -0.52 0.06707668
## 223 -0.55 -0.49434335
## 224 -0.60 -1.19571555
## 225 -0.62 -1.61603359
## 226 -0.85 -1.46371481
## 227 -0.89 -1.24752620
## 228 -0.93 -0.23322076
## 229 -0.96 0.49310055
## 230 -1.06 -2.90980832
## 231 -1.10 -1.43823249
## 232 -1.12 -1.05774573
## 233 -1.15 -0.94895261
## 234 -1.28 -0.24414870
## 235 -1.30 -1.69181220
## 236 -1.31 -1.15680076
## 237 -1.35 -1.34345376
## 238 -1.39 -2.43615548
## 239 -1.41 -0.83994275
## 240 -1.41 -0.62713819
## 241 -1.42 -0.42126656
## 242 -1.46 -0.92502808
## 243 -1.50 -1.54331764
## 244 -1.50 -2.53425009
## 245 -1.52 -3.28944231
## 246 -1.52 -0.75450253
## 247 -1.59 -1.39755119
## 248 -1.61 -0.80812713
## 249 -1.63 -1.08335055
## 250 -1.71 -1.69518803
## 251 -1.83 -2.44577086
## 252 -2.05 -0.71095293
## 253 -2.06 -1.88001256
## 254 -2.07 -2.68440533
## 255 -2.15 -1.55437927
## 256 -2.16 -2.42083797
## 257 -1.99 -1.32568853
## 258 -2.36 -1.85484082
## 259 -2.38 -3.97836785
## 260 -2.39 -0.19994597
## 261 -2.46 -2.51584092
## 262 -2.49 -2.81503326
## 263 -2.54 -2.08462296
## 264 -2.55 -2.82351411
## 265 -2.63 -3.31583908
## 266 -2.64 -2.52634719
## 267 -2.67 -2.89598839
## 268 -2.68 -0.56201302
## 269 -2.77 -1.92602730
## 270 -2.78 -2.85209098
## 271 -2.82 -2.13056602
## 272 -2.92 -3.77351396
## 273 -3.03 -3.70074705
## 274 -3.12 -3.86003705
## 275 -3.16 -1.63053815
## 276 -3.19 -4.36452441
## 277 -3.54 -3.67372725
## 278 -3.54 -2.11935050
## 279 -3.59 -3.77519251
## 280 -3.66 -2.76360588
## 281 -3.68 -1.80986450
## 282 -3.75 -4.12951274
## 283 -3.76 -3.94918060
## 284 -3.78 -3.87609352
## 285 -3.80 -3.83140382
## 286 -3.80 -4.76717737
## 287 -3.85 -3.06788830
## 288 -3.89 -3.19657712
## 289 -3.95 -3.81279846
## 290 -4.29 -5.56382993
## 291 -4.42 -3.61559076
## 292 -4.48 -4.45091217
## 293 -4.48 -3.07783558
## 294 -4.53 -4.67087856
## 295 -4.63 -4.45586703
## 296 -4.73 -4.10678904
## 297 -4.84 -3.82555739
## 298 -4.89 -4.27538138
## 299 -4.89 -4.90839493
## 300 -5.26 -5.77900486
## 301 -6.09 -5.16890905
## 302 -6.29 -5.49249058
## 303 -6.29 -5.97719748
## 304 -6.89 -4.84655705
## 305 -6.96 -5.19380937
## 306 -7.00 -6.17027777
## 307 -7.05 -6.36064957
## 308 -8.30 -6.33323369
## 309 -8.66 -6.17292319
## 310 -9.03 -6.19240516
## 311 -10.41 -6.37723082
## 312 -7.89 -6.21823503
## 313 -2.32 -1.67539765
## 314 0.39 -2.24823570
## 315 -2.90 -4.04380850
## 316 -2.47 -3.52349712
##################################
# Reporting the independent evaluation results
# for the test set
##################################
<- postResample(AVNN_Test[,2], AVNN_Test[,1])) (AVNN_Test_Metrics
## RMSE Rsquared MAE
## 0.9862466 0.7829311 0.7664094
<- AVNN_Test_Metrics[1]) (AVNN_Test_RMSE
## RMSE
## 0.9862466
<- AVNN_Test_Metrics[2]) (AVNN_Test_Rsquared
## Rsquared
## 0.7829311
##################################
# Creating a local object
# for the train set
##################################
<- PMA_PreModelling_Train
PMA_PreModelling_Train_MARS
##################################
# Creating consistent fold assignments
# for the 10-Fold Cross Validation process
##################################
set.seed(12345678)
<- createFolds(PMA_PreModelling_Train_MARS$Log_Solubility,
KFold_Indices k = 10,
returnTrain=TRUE)
<- trainControl(method="cv",
KFold_Control index=KFold_Indices)
##################################
# Setting the conditions
# for hyperparameter tuning
##################################
= expand.grid(expand.grid(degree = 1:5, nprune = seq(5, 20, length = 4)))
MARS_Grid
##################################
# Running the multivariate adaptive regression splines model
# by setting the caret method to 'earth'
##################################
set.seed(12345678)
<- train(x = PMA_PreModelling_Train_MARS[,!names(PMA_PreModelling_Train_MARS) %in% c("Log_Solubility")],
MARS_Tune y = PMA_PreModelling_Train_MARS$Log_Solubility,
method = "earth",
tuneGrid = MARS_Grid,
trControl = KFold_Control)
##################################
# Reporting the cross-validation results
# for the train set
##################################
MARS_Tune
## Multivariate Adaptive Regression Spline
##
## 951 samples
## 220 predictors
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 857, 855, 855, 856, 857, 856, ...
## Resampling results across tuning parameters:
##
## degree nprune RMSE Rsquared MAE
## 1 5 0.9885345 0.7679877 0.7535049
## 1 10 0.8378705 0.8331533 0.6482106
## 1 15 0.7724676 0.8571440 0.5925168
## 1 20 0.7502562 0.8647265 0.5766279
## 2 5 1.0173780 0.7495592 0.7685247
## 2 10 0.8369038 0.8332926 0.6483745
## 2 15 0.7536825 0.8642340 0.5753097
## 2 20 0.7104771 0.8789853 0.5399575
## 3 5 1.0054768 0.7579555 0.7661151
## 3 10 0.8394402 0.8318432 0.6384177
## 3 15 0.7409402 0.8693494 0.5689538
## 3 20 0.7034801 0.8823049 0.5417897
## 4 5 1.0036472 0.7577222 0.7660022
## 4 10 0.8420512 0.8317263 0.6388350
## 4 15 0.7358285 0.8720765 0.5599648
## 4 20 0.7096746 0.8802622 0.5419139
## 5 5 1.0036472 0.7577222 0.7660022
## 5 10 0.8420512 0.8317263 0.6388350
## 5 15 0.7358285 0.8720765 0.5599648
## 5 20 0.7096746 0.8802622 0.5419139
##
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were nprune = 20 and degree = 3.
$finalModel MARS_Tune
## Selected 20 of 54 terms, and 14 of 220 predictors (nprune=20)
## Termination condition: RSq changed by less than 0.001 at 54 terms
## Importance: MolWeight, NumCarbon, SurfaceArea1, NumRotBonds, FP1421, ...
## Number of terms at each degree of interaction: 1 8 10 1
## GCV 0.4295012 RSS 367.8563 GRSq 0.8975708 RSq 0.9075576
$results MARS_Tune
## degree nprune RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 1 5 0.9885345 0.7679877 0.7535049 0.06217594 0.02392790 0.04607850
## 5 2 5 1.0173780 0.7495592 0.7685247 0.05718989 0.06196692 0.05106240
## 9 3 5 1.0054768 0.7579555 0.7661151 0.06779247 0.04314324 0.05183156
## 13 4 5 1.0036472 0.7577222 0.7660022 0.09683141 0.04816271 0.07672106
## 17 5 5 1.0036472 0.7577222 0.7660022 0.09683141 0.04816271 0.07672106
## 2 1 10 0.8378705 0.8331533 0.6482106 0.06679188 0.02230525 0.05226945
## 6 2 10 0.8369038 0.8332926 0.6483745 0.08936033 0.03616057 0.05782491
## 10 3 10 0.8394402 0.8318432 0.6384177 0.07319923 0.02514998 0.05911625
## 14 4 10 0.8420512 0.8317263 0.6388350 0.05710409 0.02418435 0.05010629
## 18 5 10 0.8420512 0.8317263 0.6388350 0.05710409 0.02418435 0.05010629
## 3 1 15 0.7724676 0.8571440 0.5925168 0.05872201 0.02799201 0.05136498
## 7 2 15 0.7536825 0.8642340 0.5753097 0.06302002 0.02684197 0.05073034
## 11 3 15 0.7409402 0.8693494 0.5689538 0.09111186 0.02236058 0.07082359
## 15 4 15 0.7358285 0.8720765 0.5599648 0.07979965 0.02056922 0.06118755
## 19 5 15 0.7358285 0.8720765 0.5599648 0.07979965 0.02056922 0.06118755
## 4 1 20 0.7502562 0.8647265 0.5766279 0.05189451 0.02990680 0.04122031
## 8 2 20 0.7104771 0.8789853 0.5399575 0.06605725 0.02465061 0.04947483
## 12 3 20 0.7034801 0.8823049 0.5417897 0.08445692 0.01978894 0.05903665
## 16 4 20 0.7096746 0.8802622 0.5419139 0.07222965 0.02381439 0.04888709
## 20 5 20 0.7096746 0.8802622 0.5419139 0.07222965 0.02381439 0.04888709
<- MARS_Tune$results[MARS_Tune$results$nprune==MARS_Tune$bestTune$nprune &
(MARS_Train_RMSE $results$degree==MARS_Tune$bestTune$degree,
MARS_Tunec("RMSE")])
## [1] 0.7034801
<- MARS_Tune$results[MARS_Tune$results$nprune==MARS_Tune$bestTune$nprune &
(MARS_Train_Rsquared $results$degree==MARS_Tune$bestTune$degree,
MARS_Tunec("Rsquared")])
## [1] 0.8823049
##################################
# Identifying and plotting the
# best model predictors
##################################
<- varImp(MARS_Tune, scale = TRUE)
MARS_VarImp plot(MARS_VarImp,
top=25,
scales=list(y=list(cex = .95)),
main="Ranked Variable Importance : Multivariate Adaptive Regression Splines",
xlab="Scaled Variable Importance Metrics",
ylab="Predictors",
cex=2,
origin=0,
alpha=0.45)
##################################
# Independently evaluating the model
# on the test set
##################################
<- data.frame(MARS_Observed = PMA_PreModelling_Test$Log_Solubility,
MARS_Test MARS_Predicted = predict(MARS_Tune,
!names(PMA_PreModelling_Test) %in% c("Log_Solubility")]))
PMA_PreModelling_Test[,
MARS_Test
## MARS_Observed y
## 1 0.93 0.76892387
## 2 0.85 0.14312439
## 3 0.81 -0.34585499
## 4 0.74 -0.33314461
## 5 0.61 -0.45292704
## 6 0.58 0.97274206
## 7 0.57 0.48607461
## 8 0.56 0.73627468
## 9 0.52 -0.03806131
## 10 0.45 -0.14649919
## 11 0.40 -0.14649919
## 12 0.36 -1.79679729
## 13 0.22 -0.41538986
## 14 0.08 -0.40552544
## 15 0.07 -1.17892314
## 16 0.02 -0.49870512
## 17 0.00 -0.59427338
## 18 -0.01 -0.22958003
## 19 -0.07 0.25523921
## 20 -0.12 -0.83910301
## 21 -0.17 0.54520397
## 22 -0.29 0.61508860
## 23 -0.38 -0.91923808
## 24 -0.38 -0.74344480
## 25 -0.39 -0.91923808
## 26 -0.42 -0.74344480
## 27 -0.44 -0.59721214
## 28 -0.46 0.36577945
## 29 -0.48 -2.61004959
## 30 -0.60 -0.65168062
## 31 -0.63 -3.01395024
## 32 -0.66 -0.73554105
## 33 -0.72 -0.73554105
## 34 -0.72 -0.54774008
## 35 -0.80 -0.54774008
## 36 -0.80 -0.99254612
## 37 -0.82 0.09111766
## 38 -0.82 -0.73554105
## 39 -0.84 -0.62663374
## 40 -0.85 -0.80027030
## 41 -0.85 -0.34900714
## 42 -0.87 -1.55934291
## 43 -0.89 -1.37553750
## 44 -0.90 0.17140988
## 45 -0.96 -2.27526868
## 46 -0.96 0.32437683
## 47 -0.99 -0.23488426
## 48 -1.01 -0.73554105
## 49 -1.09 -1.24426320
## 50 -1.12 -1.38447918
## 51 -1.14 0.13754052
## 52 -1.17 -1.13685295
## 53 -1.19 -1.26101447
## 54 -1.22 -1.22800620
## 55 -1.27 -1.50775967
## 56 -1.28 -1.55867097
## 57 -1.32 -1.64040574
## 58 -1.38 -1.89603447
## 59 -1.39 -1.50959168
## 60 -1.42 -2.36130718
## 61 -1.47 -0.96904918
## 62 -1.47 -1.32309870
## 63 -1.50 -0.57828240
## 64 -1.52 -1.36259436
## 65 -1.54 -1.26101447
## 66 -1.55 -2.03224428
## 67 -1.56 -2.11989280
## 68 -1.57 -1.65889136
## 69 -1.60 -1.53401452
## 70 -1.60 -1.59059428
## 71 -1.62 -2.40166859
## 72 -1.64 -2.05873294
## 73 -1.67 -1.46853229
## 74 -1.70 -2.55740066
## 75 -1.70 -1.94955288
## 76 -1.71 -2.02070510
## 77 -1.71 -1.98783184
## 78 -1.75 -1.57432194
## 79 -1.78 -1.27949961
## 80 -1.78 -1.29043367
## 81 -1.82 -0.67092628
## 82 -1.87 -0.99664060
## 83 -1.89 -2.48329490
## 84 -1.92 -1.81942060
## 85 -1.92 -1.04037012
## 86 -1.92 -1.45328259
## 87 -1.94 -2.24827541
## 88 -1.99 -1.79937600
## 89 -2.00 -1.74270366
## 90 -2.05 -1.93979298
## 91 -2.06 -1.75787050
## 92 -2.08 -1.91837147
## 93 -2.10 -2.39028501
## 94 -2.11 -0.91090821
## 95 -2.12 0.60380783
## 96 -2.17 -2.01635355
## 97 -2.21 -2.47326982
## 98 -2.24 -1.94735591
## 99 -2.24 -1.45372800
## 100 -2.29 -2.08876198
## 101 -2.31 -1.50952526
## 102 -2.32 -1.96220573
## 103 -2.35 -2.14600574
## 104 -2.35 -1.61565689
## 105 -2.36 -3.73129268
## 106 -2.36 -1.96986410
## 107 -2.38 -2.85306228
## 108 -2.42 -2.39234978
## 109 -2.43 -3.76412015
## 110 -2.44 -3.24071824
## 111 -2.52 -2.24803538
## 112 -2.53 -2.28315411
## 113 -2.57 -2.24803538
## 114 -2.62 -3.10362983
## 115 -2.62 -3.00158745
## 116 -2.64 -2.96767777
## 117 -2.64 -3.35548368
## 118 -2.70 -4.25241319
## 119 -2.82 -3.45751861
## 120 -2.88 -1.93823961
## 121 -2.89 -2.39963380
## 122 -2.92 -1.39749682
## 123 -2.93 -3.08595702
## 124 -2.96 -2.54235854
## 125 -2.98 -3.93108321
## 126 -3.01 -2.35307133
## 127 -3.01 -4.50500549
## 128 -3.02 -2.51929331
## 129 -3.07 -2.69557749
## 130 -3.09 -2.83001114
## 131 -3.11 -3.25768913
## 132 -3.13 -4.10603182
## 133 -3.14 -1.23622872
## 134 -3.15 -3.84144279
## 135 -3.22 -2.76710780
## 136 -3.26 -2.80002180
## 137 -3.27 -1.98653550
## 138 -3.27 -3.25768913
## 139 -3.30 -3.65416530
## 140 -3.31 -1.60991855
## 141 -3.33 -2.53601483
## 142 -3.37 -2.75739769
## 143 -3.43 -3.39312647
## 144 -3.43 -2.43097739
## 145 -3.48 -3.15509827
## 146 -3.51 -3.62621348
## 147 -3.59 -3.51911679
## 148 -3.61 -3.15196542
## 149 -3.63 -4.03201026
## 150 -3.63 -3.44261322
## 151 -3.68 -1.63449819
## 152 -3.71 -4.50720229
## 153 -3.74 -4.02097476
## 154 -3.75 -3.64465652
## 155 -3.75 -3.88199138
## 156 -3.77 -3.52026993
## 157 -3.77 -4.23892372
## 158 -3.78 -4.45394764
## 159 -3.81 -3.08318538
## 160 -3.95 -4.27149407
## 161 -3.96 -4.59515852
## 162 -3.96 -3.65674380
## 163 -4.00 -3.26110326
## 164 -4.02 -3.78080404
## 165 -4.04 -4.24459971
## 166 -4.12 -3.64465652
## 167 -4.15 -4.19144272
## 168 -4.16 -3.80882138
## 169 -4.17 -5.02012232
## 170 -4.21 -4.76251871
## 171 -4.23 -4.43674661
## 172 -4.25 -5.05130369
## 173 -4.30 -3.87942381
## 174 -4.31 -5.65090762
## 175 -4.35 -4.59463177
## 176 -4.40 -3.75658431
## 177 -4.40 -4.45887269
## 178 -4.43 -4.95174143
## 179 -4.46 -4.45327859
## 180 -4.47 -4.91254849
## 181 -4.51 -4.99182180
## 182 -4.60 -3.62483206
## 183 -4.64 -4.09282596
## 184 -4.69 -5.31534355
## 185 -4.71 -4.05147739
## 186 -4.77 -3.82532960
## 187 -4.95 -4.65121197
## 188 -4.98 -4.37807284
## 189 -5.21 -5.75852554
## 190 -5.22 -5.04248295
## 191 -5.28 -3.78029468
## 192 -5.31 -3.02722452
## 193 -5.35 -5.02512514
## 194 -5.37 -4.90962912
## 195 -5.40 -4.27856689
## 196 -5.43 -4.78357040
## 197 -5.65 -5.69232707
## 198 -5.66 -4.26545272
## 199 -6.70 -5.64662620
## 200 -5.72 -4.88488055
## 201 -6.00 -6.45403152
## 202 -6.25 -6.33246176
## 203 -6.26 -6.33246176
## 204 -6.27 -6.33246176
## 205 -6.35 -5.59520463
## 206 -6.57 -6.07694265
## 207 -6.62 -4.70193613
## 208 -6.96 -5.80074158
## 209 -7.02 -7.72534637
## 210 -7.20 -7.34543990
## 211 -7.28 -6.86581992
## 212 -7.32 -7.36767153
## 213 -7.39 -7.36767153
## 214 -7.82 -8.17260423
## 215 -8.23 -8.11165366
## 216 -8.94 -8.76562128
## 217 1.07 0.14964568
## 218 0.43 0.01165068
## 219 0.32 0.12220789
## 220 0.00 -0.17657276
## 221 -0.40 -0.90002866
## 222 -0.52 -0.57307431
## 223 -0.55 -0.75852623
## 224 -0.60 -0.74062318
## 225 -0.62 -2.83283995
## 226 -0.85 -1.08794655
## 227 -0.89 -1.06917180
## 228 -0.93 -1.38582368
## 229 -0.96 -0.82392306
## 230 -1.06 -1.80039862
## 231 -1.10 -1.39937965
## 232 -1.12 -0.42597283
## 233 -1.15 -1.22800620
## 234 -1.28 -0.27842908
## 235 -1.30 -1.41326697
## 236 -1.31 -1.26101447
## 237 -1.35 -1.37918726
## 238 -1.39 -1.17035624
## 239 -1.41 -1.26101447
## 240 -1.41 -0.67092628
## 241 -1.42 -0.97892080
## 242 -1.46 -2.31873453
## 243 -1.50 -1.72223274
## 244 -1.50 -1.74317544
## 245 -1.52 -1.60327296
## 246 -1.52 -0.59809889
## 247 -1.59 -1.72955934
## 248 -1.61 -1.43551515
## 249 -1.63 -1.42629212
## 250 -1.71 -2.03463763
## 251 -1.83 -2.90210899
## 252 -2.05 -0.94393986
## 253 -2.06 -2.04450697
## 254 -2.07 -3.48517443
## 255 -2.15 -2.40954067
## 256 -2.16 -0.90605765
## 257 -1.99 -0.74041817
## 258 -2.36 -2.19175245
## 259 -2.38 -2.76552215
## 260 -2.39 -0.92341877
## 261 -2.46 -2.16624833
## 262 -2.49 -2.46461783
## 263 -2.54 -2.29690705
## 264 -2.55 -3.44594353
## 265 -2.63 -3.01538128
## 266 -2.64 -2.77620169
## 267 -2.67 -2.27927439
## 268 -2.68 -2.03810034
## 269 -2.77 -2.96249574
## 270 -2.78 -3.12735599
## 271 -2.82 -3.00441986
## 272 -2.92 -3.80173414
## 273 -3.03 -3.47481071
## 274 -3.12 -3.64366980
## 275 -3.16 -2.45235060
## 276 -3.19 -3.44261322
## 277 -3.54 -3.51752975
## 278 -3.54 -2.36402441
## 279 -3.59 -3.46086523
## 280 -3.66 -3.52725326
## 281 -3.68 -4.02097476
## 282 -3.75 -4.06139824
## 283 -3.76 -3.67602690
## 284 -3.78 -3.94705267
## 285 -3.80 -4.36861267
## 286 -3.80 -4.72117277
## 287 -3.85 -4.35910681
## 288 -3.89 -4.64471393
## 289 -3.95 -4.61902812
## 290 -4.29 -4.58222895
## 291 -4.42 -5.79748198
## 292 -4.48 -4.19672434
## 293 -4.48 -4.91254849
## 294 -4.53 -5.22016678
## 295 -4.63 -4.72479212
## 296 -4.73 -4.04285744
## 297 -4.84 -3.48882864
## 298 -4.89 -4.17422747
## 299 -4.89 -4.48292982
## 300 -5.26 -5.59520463
## 301 -6.09 -4.70193613
## 302 -6.29 -5.56779875
## 303 -6.29 -6.33246176
## 304 -6.89 -5.88920901
## 305 -6.96 -5.46080954
## 306 -7.00 -6.86581992
## 307 -7.05 -7.36767153
## 308 -8.30 -8.76562128
## 309 -8.66 -9.14422945
## 310 -9.03 -9.26828995
## 311 -10.41 -9.95165538
## 312 -7.89 -7.36767153
## 313 -2.32 -1.81686774
## 314 0.39 -2.98832962
## 315 -2.90 -5.35876149
## 316 -2.47 -3.26304651
##################################
# Reporting the independent evaluation results
# for the test set
##################################
<- postResample(MARS_Test[,2], MARS_Test[,1])) (MARS_Test_Metrics
## RMSE Rsquared MAE
## 0.758042 0.868910 0.562053
<- MARS_Test_Metrics[1]) (MARS_Test_RMSE
## RMSE
## 0.758042
<- MARS_Test_Metrics[2]) (MARS_Test_Rsquared
## Rsquared
## 0.86891
##################################
# Transforming factor predictors
# as required by the nature of the model
##################################
# Creating a local object
# for the train and test sets
##################################
<- as.data.frame(lapply(PMA_PreModelling_Train, function(x) as.numeric(as.character(x))))
PMA_PreModelling_Train_SVM_R dim(PMA_PreModelling_Train_SVM_R)
## [1] 951 221
<- as.data.frame(lapply(PMA_PreModelling_Test, function(x) as.numeric(as.character(x))))
PMA_PreModelling_Test_SVM_R dim(PMA_PreModelling_Test_SVM_R)
## [1] 316 221
##################################
# Creating consistent fold assignments
# for the 10-Fold Cross Validation process
##################################
set.seed(12345678)
<- createFolds(PMA_PreModelling_Train_SVM_R$Log_Solubility,
KFold_Indices k = 10,
returnTrain=TRUE)
<- trainControl(method="cv",
KFold_Control index=KFold_Indices)
##################################
# Setting the conditions
# for hyperparameter tuning
##################################
# used a range of default values
##################################
# Running the support vector machine (radial basis function kernel) model
# by setting the caret method to 'svmRadial'
##################################
set.seed(12345678)
<- train(x = PMA_PreModelling_Train_SVM_R[,!names(PMA_PreModelling_Train_SVM_R) %in% c("Log_Solubility")],
SVM_R_Tune y = PMA_PreModelling_Train_SVM_R$Log_Solubility,
method = "svmRadial",
tuneLength = 14,
trControl = KFold_Control,
preProc = c("center", "scale"))
##################################
# Reporting the cross-validation results
# for the train set
##################################
SVM_R_Tune
## Support Vector Machines with Radial Basis Function Kernel
##
## 951 samples
## 220 predictors
##
## Pre-processing: centered (220), scaled (220)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 857, 855, 855, 856, 857, 856, ...
## Resampling results across tuning parameters:
##
## C RMSE Rsquared MAE
## 0.25 0.8096652 0.8648241 0.6037022
## 0.50 0.7082927 0.8880984 0.5294633
## 1.00 0.6537354 0.9006737 0.4849819
## 2.00 0.6221070 0.9086287 0.4586863
## 4.00 0.6102493 0.9116037 0.4481332
## 8.00 0.5981789 0.9147527 0.4394685
## 16.00 0.5950500 0.9155098 0.4386376
## 32.00 0.5964935 0.9151700 0.4401950
## 64.00 0.5970088 0.9150461 0.4394875
## 128.00 0.5988087 0.9145520 0.4408041
## 256.00 0.6002748 0.9141568 0.4416319
## 512.00 0.6023160 0.9135943 0.4428558
## 1024.00 0.6054508 0.9127164 0.4447153
## 2048.00 0.6093996 0.9116221 0.4483096
##
## Tuning parameter 'sigma' was held constant at a value of 0.002858301
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were sigma = 0.002858301 and C = 16.
$finalModel SVM_R_Tune
## Support Vector Machine object of class "ksvm"
##
## SV type: eps-svr (regression)
## parameter : epsilon = 0.1 cost C = 16
##
## Gaussian Radial Basis kernel function.
## Hyperparameter : sigma = 0.00285830098890164
##
## Number of Support Vectors : 619
##
## Objective Function Value : -275.9914
## Training error : 0.009889
$results SVM_R_Tune
## sigma C RMSE Rsquared MAE RMSESD RsquaredSD
## 1 0.002858301 0.25 0.8096652 0.8648241 0.6037022 0.07449474 0.02540316
## 2 0.002858301 0.50 0.7082927 0.8880984 0.5294633 0.05703847 0.02138058
## 3 0.002858301 1.00 0.6537354 0.9006737 0.4849819 0.05072661 0.01960991
## 4 0.002858301 2.00 0.6221070 0.9086287 0.4586863 0.04872850 0.01682004
## 5 0.002858301 4.00 0.6102493 0.9116037 0.4481332 0.04980713 0.01505246
## 6 0.002858301 8.00 0.5981789 0.9147527 0.4394685 0.05030562 0.01557070
## 7 0.002858301 16.00 0.5950500 0.9155098 0.4386376 0.04905689 0.01634957
## 8 0.002858301 32.00 0.5964935 0.9151700 0.4401950 0.04954128 0.01652382
## 9 0.002858301 64.00 0.5970088 0.9150461 0.4394875 0.05073287 0.01635371
## 10 0.002858301 128.00 0.5988087 0.9145520 0.4408041 0.05126175 0.01595339
## 11 0.002858301 256.00 0.6002748 0.9141568 0.4416319 0.05073171 0.01574943
## 12 0.002858301 512.00 0.6023160 0.9135943 0.4428558 0.04940859 0.01564832
## 13 0.002858301 1024.00 0.6054508 0.9127164 0.4447153 0.04790476 0.01530957
## 14 0.002858301 2048.00 0.6093996 0.9116221 0.4483096 0.04652339 0.01490467
## MAESD
## 1 0.05859916
## 2 0.04672074
## 3 0.04096699
## 4 0.03486431
## 5 0.03220601
## 6 0.03251493
## 7 0.03264716
## 8 0.03314391
## 9 0.03421506
## 10 0.03518379
## 11 0.03497268
## 12 0.03490923
## 13 0.03404608
## 14 0.03416602
<- SVM_R_Tune$results[SVM_R_Tune$results$C==SVM_R_Tune$bestTune$C,
SVM_R_Train_RMSE c("RMSE")]
<- SVM_R_Tune$results[SVM_R_Tune$results$C==SVM_R_Tune$bestTune$C,
SVM_R_Train_Rsquared c("Rsquared")]
##################################
# Identifying and plotting the
# best model predictors
##################################
# model does not support variable importance measurement
##################################
# Independently evaluating the model
# on the test set
##################################
<- data.frame(SVM_R_Observed = PMA_PreModelling_Test$Log_Solubility,
SVM_R_Test SVM_R_Predicted = predict(SVM_R_Tune,
!names(PMA_PreModelling_Test_SVM_R) %in% c("Log_Solubility")]))
PMA_PreModelling_Test_SVM_R[,
SVM_R_Test
## SVM_R_Observed SVM_R_Predicted
## 1 0.93 0.377022305
## 2 0.85 0.615378735
## 3 0.81 -0.495500158
## 4 0.74 0.999716875
## 5 0.61 -0.273021690
## 6 0.58 1.144789709
## 7 0.57 0.776186139
## 8 0.56 0.351278512
## 9 0.52 0.164746867
## 10 0.45 -0.313673146
## 11 0.40 0.215267762
## 12 0.36 -1.444072128
## 13 0.22 -0.173733882
## 14 0.08 -0.107742817
## 15 0.07 -0.977523710
## 16 0.02 -0.367507982
## 17 0.00 0.005440399
## 18 -0.01 -0.043270643
## 19 -0.07 -0.534124545
## 20 -0.12 -0.314219905
## 21 -0.17 0.800729693
## 22 -0.29 -0.249230909
## 23 -0.38 -0.497962627
## 24 -0.38 -0.283376194
## 25 -0.39 -1.009446033
## 26 -0.42 -0.531411164
## 27 -0.44 -0.470814324
## 28 -0.46 0.650988718
## 29 -0.48 -2.253412310
## 30 -0.60 -1.023406891
## 31 -0.63 -2.192023433
## 32 -0.66 -0.685469901
## 33 -0.72 -0.470897408
## 34 -0.72 -0.193187463
## 35 -0.80 -0.099438571
## 36 -0.80 -0.851745808
## 37 -0.82 -0.117707185
## 38 -0.82 -0.707807306
## 39 -0.84 -0.250509212
## 40 -0.85 -0.718555103
## 41 -0.85 -0.457840413
## 42 -0.87 -1.605336586
## 43 -0.89 -1.266233178
## 44 -0.90 0.202970718
## 45 -0.96 -1.300590230
## 46 -0.96 -1.113669532
## 47 -0.99 -0.788372992
## 48 -1.01 -0.879754535
## 49 -1.09 -1.166775522
## 50 -1.12 -0.782898076
## 51 -1.14 -1.016515705
## 52 -1.17 -1.053804117
## 53 -1.19 -1.524217223
## 54 -1.22 -1.153215847
## 55 -1.27 -1.581349539
## 56 -1.28 -1.412468806
## 57 -1.32 -1.400032827
## 58 -1.38 -1.249607292
## 59 -1.39 -1.898505458
## 60 -1.42 -1.644696329
## 61 -1.47 -1.469277638
## 62 -1.47 -1.431606890
## 63 -1.50 -1.391886851
## 64 -1.52 -1.455094629
## 65 -1.54 -1.651900342
## 66 -1.55 -2.357220471
## 67 -1.56 -3.116362973
## 68 -1.57 -2.050972286
## 69 -1.60 -1.891256352
## 70 -1.60 -2.605498896
## 71 -1.62 -1.653764049
## 72 -1.64 -2.578493119
## 73 -1.67 -1.623587385
## 74 -1.70 -3.407635951
## 75 -1.70 -2.209337572
## 76 -1.71 -2.578630647
## 77 -1.71 -2.295490696
## 78 -1.75 -1.767491468
## 79 -1.78 -2.115841996
## 80 -1.78 -1.844700866
## 81 -1.82 -1.545720948
## 82 -1.87 -1.147509663
## 83 -1.89 -2.238332142
## 84 -1.92 -1.752672883
## 85 -1.92 -1.601158692
## 86 -1.92 -1.414138108
## 87 -1.94 -2.688640423
## 88 -1.99 -2.657220311
## 89 -2.00 -2.286036951
## 90 -2.05 -2.230578068
## 91 -2.06 -1.896853833
## 92 -2.08 -2.472398752
## 93 -2.10 -2.888216605
## 94 -2.11 -1.105576542
## 95 -2.12 -1.587921526
## 96 -2.17 -2.382127845
## 97 -2.21 -1.899554335
## 98 -2.24 -2.595826372
## 99 -2.24 -1.136841757
## 100 -2.29 -2.335588764
## 101 -2.31 -2.118959614
## 102 -2.32 -2.118769515
## 103 -2.35 -2.269277663
## 104 -2.35 -2.134777212
## 105 -2.36 -2.665765474
## 106 -2.36 -1.463534992
## 107 -2.38 -2.234915921
## 108 -2.42 -2.052935091
## 109 -2.43 -3.391729050
## 110 -2.44 -2.712549116
## 111 -2.52 -2.702197745
## 112 -2.53 -2.929447598
## 113 -2.57 -2.836040487
## 114 -2.62 -2.400751009
## 115 -2.62 -2.851182824
## 116 -2.64 -2.901480332
## 117 -2.64 -2.916113108
## 118 -2.70 -2.814932392
## 119 -2.82 -2.542155181
## 120 -2.88 -2.885532840
## 121 -2.89 -3.025616643
## 122 -2.92 -1.152556489
## 123 -2.93 -4.064703841
## 124 -2.96 -2.712008375
## 125 -2.98 -2.686608036
## 126 -3.01 -2.103323633
## 127 -3.01 -3.341049576
## 128 -3.02 -3.383212873
## 129 -3.07 -3.300387203
## 130 -3.09 -3.167463222
## 131 -3.11 -3.540992536
## 132 -3.13 -3.269241486
## 133 -3.14 -2.997857623
## 134 -3.15 -3.587759298
## 135 -3.22 -2.105110561
## 136 -3.26 -3.615825340
## 137 -3.27 -2.972121814
## 138 -3.27 -3.289672245
## 139 -3.30 -3.281242551
## 140 -3.31 -2.652632885
## 141 -3.33 -2.451874650
## 142 -3.37 -2.673141107
## 143 -3.43 -3.652189262
## 144 -3.43 -2.963520099
## 145 -3.48 -2.734132679
## 146 -3.51 -3.759421710
## 147 -3.59 -3.117634995
## 148 -3.61 -2.865708744
## 149 -3.63 -3.890887067
## 150 -3.63 -3.615553852
## 151 -3.68 -2.238188670
## 152 -3.71 -3.544321782
## 153 -3.74 -3.436753466
## 154 -3.75 -4.157748720
## 155 -3.75 -3.624836042
## 156 -3.77 -3.725467014
## 157 -3.77 -4.404887276
## 158 -3.78 -3.772025829
## 159 -3.81 -3.651606763
## 160 -3.95 -3.971767610
## 161 -3.96 -4.901292941
## 162 -3.96 -4.401856116
## 163 -4.00 -3.561658526
## 164 -4.02 -4.154945749
## 165 -4.04 -4.404172979
## 166 -4.12 -4.188957510
## 167 -4.15 -4.221349408
## 168 -4.16 -3.184883381
## 169 -4.17 -4.796306020
## 170 -4.21 -4.807876135
## 171 -4.23 -4.916891572
## 172 -4.25 -4.046710509
## 173 -4.30 -4.268222102
## 174 -4.31 -5.634051251
## 175 -4.35 -4.651869392
## 176 -4.40 -4.049443466
## 177 -4.40 -4.471285305
## 178 -4.43 -4.666848995
## 179 -4.46 -4.579655203
## 180 -4.47 -3.631150297
## 181 -4.51 -4.891675705
## 182 -4.60 -4.025784390
## 183 -4.64 -4.849530170
## 184 -4.69 -5.299473994
## 185 -4.71 -4.172402729
## 186 -4.77 -4.086780949
## 187 -4.95 -4.150190525
## 188 -4.98 -4.065105341
## 189 -5.21 -6.030279827
## 190 -5.22 -5.164765070
## 191 -5.28 -4.373875317
## 192 -5.31 -3.379565405
## 193 -5.35 -4.814728090
## 194 -5.37 -4.568884126
## 195 -5.40 -4.750906721
## 196 -5.43 -4.047652853
## 197 -5.65 -4.990274486
## 198 -5.66 -4.366499903
## 199 -6.70 -4.602288174
## 200 -5.72 -5.114658476
## 201 -6.00 -6.653135205
## 202 -6.25 -6.546154814
## 203 -6.26 -6.254078169
## 204 -6.27 -6.587388705
## 205 -6.35 -6.436542986
## 206 -6.57 -6.157770811
## 207 -6.62 -5.255748105
## 208 -6.96 -6.197333500
## 209 -7.02 -7.653849775
## 210 -7.20 -7.445634469
## 211 -7.28 -7.275692515
## 212 -7.32 -7.760467059
## 213 -7.39 -7.833836642
## 214 -7.82 -8.377927995
## 215 -8.23 -8.704717870
## 216 -8.94 -8.486476898
## 217 1.07 0.033206407
## 218 0.43 0.342412722
## 219 0.32 -0.296738855
## 220 0.00 0.237151659
## 221 -0.40 -1.289882629
## 222 -0.52 -0.460806067
## 223 -0.55 -0.640695572
## 224 -0.60 -0.618080635
## 225 -0.62 -2.157719490
## 226 -0.85 -1.178257625
## 227 -0.89 -0.645907140
## 228 -0.93 -1.064454849
## 229 -0.96 -0.920571854
## 230 -1.06 -1.825633046
## 231 -1.10 -1.258264981
## 232 -1.12 -0.932496194
## 233 -1.15 -0.811202310
## 234 -1.28 -0.673083860
## 235 -1.30 -0.938499257
## 236 -1.31 -1.714428660
## 237 -1.35 -2.157742946
## 238 -1.39 -2.303111346
## 239 -1.41 -1.524217223
## 240 -1.41 -1.617521856
## 241 -1.42 -1.160491567
## 242 -1.46 -1.757300522
## 243 -1.50 -1.529587296
## 244 -1.50 -1.866686517
## 245 -1.52 -1.888299299
## 246 -1.52 -1.127109450
## 247 -1.59 -1.878042961
## 248 -1.61 -1.516062571
## 249 -1.63 -1.401531925
## 250 -1.71 -2.823961805
## 251 -1.83 -2.238115337
## 252 -2.05 -2.523258936
## 253 -2.06 -2.137902734
## 254 -2.07 -3.462960502
## 255 -2.15 -2.381700375
## 256 -2.16 -0.839919439
## 257 -1.99 -1.667208513
## 258 -2.36 -2.330953460
## 259 -2.38 -3.077290007
## 260 -2.39 -1.738152693
## 261 -2.46 -2.128524036
## 262 -2.49 -2.239051608
## 263 -2.54 -2.598974846
## 264 -2.55 -2.843872131
## 265 -2.63 -2.765634259
## 266 -2.64 -2.302265487
## 267 -2.67 -2.827141104
## 268 -2.68 -1.771335226
## 269 -2.77 -2.577234554
## 270 -2.78 -2.934429670
## 271 -2.82 -2.189159005
## 272 -2.92 -3.434728199
## 273 -3.03 -3.704745246
## 274 -3.12 -3.846550176
## 275 -3.16 -3.276068182
## 276 -3.19 -3.389947805
## 277 -3.54 -3.503200610
## 278 -3.54 -2.566182583
## 279 -3.59 -3.612913661
## 280 -3.66 -3.576761405
## 281 -3.68 -3.308960113
## 282 -3.75 -3.987304108
## 283 -3.76 -3.381057144
## 284 -3.78 -4.052769004
## 285 -3.80 -3.991689749
## 286 -3.80 -4.430228082
## 287 -3.85 -3.657422448
## 288 -3.89 -3.507907547
## 289 -3.95 -4.066589608
## 290 -4.29 -5.236341193
## 291 -4.42 -4.871003795
## 292 -4.48 -3.812777461
## 293 -4.48 -3.429953859
## 294 -4.53 -4.907051173
## 295 -4.63 -4.742097842
## 296 -4.73 -4.529025529
## 297 -4.84 -4.558342063
## 298 -4.89 -4.141407629
## 299 -4.89 -5.203295643
## 300 -5.26 -5.974658488
## 301 -6.09 -5.137652342
## 302 -6.29 -6.240607037
## 303 -6.29 -6.558637603
## 304 -6.89 -5.904143016
## 305 -6.96 -6.618614467
## 306 -7.00 -6.871813984
## 307 -7.05 -7.892004727
## 308 -8.30 -8.882113937
## 309 -8.66 -9.854624215
## 310 -9.03 -9.525977965
## 311 -10.41 -9.898151813
## 312 -7.89 -7.446218949
## 313 -2.32 -1.934078811
## 314 0.39 -2.198779112
## 315 -2.90 -4.558402779
## 316 -2.47 -3.747752017
##################################
# Reporting the independent evaluation results
# for the test set
##################################
<- postResample(SVM_R_Test[,2], SVM_R_Test[,1])) (SVM_R_Test_Metrics
## RMSE Rsquared MAE
## 0.6274210 0.9098906 0.4664314
<- SVM_R_Test_Metrics[1]) (SVM_R_Test_RMSE
## RMSE
## 0.627421
<- SVM_R_Test_Metrics[2]) (SVM_R_Test_Rsquared
## Rsquared
## 0.9098906
##################################
# Transforming factor predictors
# as required by the nature of the model
##################################
# Creating a local object
# for the train and test sets
##################################
<- as.data.frame(lapply(PMA_PreModelling_Train, function(x) as.numeric(as.character(x))))
PMA_PreModelling_Train_SVM_P dim(PMA_PreModelling_Train_SVM_P)
## [1] 951 221
<- as.data.frame(lapply(PMA_PreModelling_Test, function(x) as.numeric(as.character(x))))
PMA_PreModelling_Test_SVM_P dim(PMA_PreModelling_Test_SVM_P)
## [1] 316 221
##################################
# Creating consistent fold assignments
# for the 10-Fold Cross Validation process
##################################
set.seed(12345678)
<- createFolds(PMA_PreModelling_Train_SVM_P$Log_Solubility,
KFold_Indices k = 10,
returnTrain=TRUE)
<- trainControl(method="cv",
KFold_Control index=KFold_Indices)
##################################
# Setting the conditions
# for hyperparameter tuning
##################################
= expand.grid(degree = 1:2,
SVM_P_Grid scale = c(0.01, 0.005, 0.001),
C = 2^(-2:5))
##################################
# Running the support vector machine (polynomial kernel) model
# by setting the caret method to 'svmPoly'
##################################
set.seed(12345678)
<- train(x = PMA_PreModelling_Train_SVM_P[,!names(PMA_PreModelling_Train_SVM_P) %in% c("Log_Solubility")],
SVM_P_Tune y = PMA_PreModelling_Train_SVM_P$Log_Solubility,
method = "svmPoly",
tuneGrid = SVM_P_Grid,
trControl = KFold_Control,
preProc = c("center", "scale"))
##################################
# Reporting the cross-validation results
# for the train set
##################################
SVM_P_Tune
## Support Vector Machines with Polynomial Kernel
##
## 951 samples
## 220 predictors
##
## Pre-processing: centered (220), scaled (220)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 857, 855, 855, 856, 857, 856, ...
## Resampling results across tuning parameters:
##
## degree scale C RMSE Rsquared MAE
## 1 0.001 0.25 1.0813115 0.7829667 0.8202879
## 1 0.001 0.50 0.8997673 0.8307038 0.6850280
## 1 0.001 1.00 0.7854557 0.8607263 0.5964276
## 1 0.001 2.00 0.7203591 0.8781068 0.5463177
## 1 0.001 4.00 0.6833017 0.8874576 0.5181085
## 1 0.001 8.00 0.6597666 0.8939662 0.4989184
## 1 0.001 16.00 0.6604655 0.8934007 0.4990358
## 1 0.001 32.00 0.6648932 0.8925153 0.4974638
## 1 0.005 0.25 0.7626772 0.8669115 0.5790854
## 1 0.005 0.50 0.7036714 0.8824261 0.5344290
## 1 0.005 1.00 0.6744750 0.8897958 0.5120373
## 1 0.005 2.00 0.6599982 0.8937877 0.4978656
## 1 0.005 4.00 0.6632663 0.8925820 0.4992507
## 1 0.005 8.00 0.6667905 0.8921659 0.4973640
## 1 0.005 16.00 0.6711989 0.8909501 0.5015729
## 1 0.005 32.00 0.6773182 0.8892810 0.5029691
## 1 0.010 0.25 0.7036730 0.8824235 0.5344304
## 1 0.010 0.50 0.6744566 0.8898008 0.5120301
## 1 0.010 1.00 0.6600157 0.8937816 0.4978646
## 1 0.010 2.00 0.6632819 0.8925722 0.4992309
## 1 0.010 4.00 0.6667228 0.8921874 0.4972902
## 1 0.010 8.00 0.6711336 0.8909682 0.5014820
## 1 0.010 16.00 0.6772383 0.8892925 0.5030500
## 1 0.010 32.00 0.6829954 0.8876055 0.5038941
## 2 0.001 0.25 0.8787346 0.8384794 0.6664104
## 2 0.001 0.50 0.7649968 0.8687093 0.5800609
## 2 0.001 1.00 0.6886037 0.8889442 0.5220816
## 2 0.001 2.00 0.6441548 0.9002961 0.4845796
## 2 0.001 4.00 0.6170665 0.9076174 0.4619635
## 2 0.001 8.00 0.6028074 0.9116780 0.4470343
## 2 0.001 16.00 0.6097426 0.9099771 0.4521128
## 2 0.001 32.00 0.6166312 0.9084466 0.4569551
## 2 0.005 0.25 0.6443821 0.9013243 0.4859189
## 2 0.005 0.50 0.6143918 0.9090246 0.4577171
## 2 0.005 1.00 0.6054694 0.9116588 0.4482625
## 2 0.005 2.00 0.6034269 0.9124624 0.4481158
## 2 0.005 4.00 0.6064033 0.9117054 0.4502996
## 2 0.005 8.00 0.6122424 0.9102944 0.4538997
## 2 0.005 16.00 0.6234769 0.9073413 0.4595375
## 2 0.005 32.00 0.6357322 0.9038510 0.4675284
## 2 0.010 0.25 0.6166474 0.9087067 0.4557365
## 2 0.010 0.50 0.6091353 0.9109400 0.4503510
## 2 0.010 1.00 0.6107104 0.9103522 0.4513637
## 2 0.010 2.00 0.6122546 0.9100855 0.4540873
## 2 0.010 4.00 0.6201277 0.9080690 0.4580899
## 2 0.010 8.00 0.6313489 0.9048271 0.4639982
## 2 0.010 16.00 0.6348806 0.9041682 0.4680582
## 2 0.010 32.00 0.6388360 0.9030783 0.4712983
##
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were degree = 2, scale = 0.001 and C = 8.
$finalModel SVM_P_Tune
## Support Vector Machine object of class "ksvm"
##
## SV type: eps-svr (regression)
## parameter : epsilon = 0.1 cost C = 8
##
## Polynomial kernel function.
## Hyperparameters : degree = 2 scale = 0.001 offset = 1
##
## Number of Support Vectors : 628
##
## Objective Function Value : -592.802
## Training error : 0.026533
$results SVM_P_Tune
## degree scale C RMSE Rsquared MAE RMSESD RsquaredSD
## 1 1 0.001 0.25 1.0813115 0.7829667 0.8202879 0.10939801 0.04515491
## 2 1 0.001 0.50 0.8997673 0.8307038 0.6850280 0.08809186 0.03576289
## 3 1 0.001 1.00 0.7854557 0.8607263 0.5964276 0.07371384 0.02888902
## 4 1 0.001 2.00 0.7203591 0.8781068 0.5463177 0.06343423 0.02581250
## 5 1 0.001 4.00 0.6833017 0.8874576 0.5181085 0.05363954 0.02466819
## 6 1 0.001 8.00 0.6597666 0.8939662 0.4989184 0.05297037 0.02374453
## 7 1 0.001 16.00 0.6604655 0.8934007 0.4990358 0.05463004 0.02511122
## 8 1 0.001 32.00 0.6648932 0.8925153 0.4974638 0.05144032 0.02330463
## 9 1 0.005 0.25 0.7626772 0.8669115 0.5790854 0.06862855 0.02732628
## 10 1 0.005 0.50 0.7036714 0.8824261 0.5344290 0.06006912 0.02468980
## 11 1 0.005 1.00 0.6744750 0.8897958 0.5120373 0.05409251 0.02448250
## 12 1 0.005 2.00 0.6599982 0.8937877 0.4978656 0.05073733 0.02385446
## 13 1 0.005 4.00 0.6632663 0.8925820 0.4992507 0.05207502 0.02474687
## 14 1 0.005 8.00 0.6667905 0.8921659 0.4973640 0.05375424 0.02280333
## 15 1 0.005 16.00 0.6711989 0.8909501 0.5015729 0.05781357 0.02425717
## 16 1 0.005 32.00 0.6773182 0.8892810 0.5029691 0.05519991 0.02351653
## 17 1 0.010 0.25 0.7036730 0.8824235 0.5344304 0.06008214 0.02469073
## 18 1 0.010 0.50 0.6744566 0.8898008 0.5120301 0.05411201 0.02448914
## 19 1 0.010 1.00 0.6600157 0.8937816 0.4978646 0.05078104 0.02386388
## 20 1 0.010 2.00 0.6632819 0.8925722 0.4992309 0.05212358 0.02477361
## 21 1 0.010 4.00 0.6667228 0.8921874 0.4972902 0.05380113 0.02278799
## 22 1 0.010 8.00 0.6711336 0.8909682 0.5014820 0.05780016 0.02429656
## 23 1 0.010 16.00 0.6772383 0.8892925 0.5030500 0.05553416 0.02360975
## 24 1 0.010 32.00 0.6829954 0.8876055 0.5038941 0.05472078 0.02395011
## 25 2 0.001 0.25 0.8787346 0.8384794 0.6664104 0.08461575 0.03382786
## 26 2 0.001 0.50 0.7649968 0.8687093 0.5800609 0.06959155 0.02694008
## 27 2 0.001 1.00 0.6886037 0.8889442 0.5220816 0.05754124 0.02350010
## 28 2 0.001 2.00 0.6441548 0.9002961 0.4845796 0.04827107 0.02132828
## 29 2 0.001 4.00 0.6170665 0.9076174 0.4619635 0.04262720 0.01917478
## 30 2 0.001 8.00 0.6028074 0.9116780 0.4470343 0.04458988 0.01880295
## 31 2 0.001 16.00 0.6097426 0.9099771 0.4521128 0.04611867 0.01893301
## 32 2 0.001 32.00 0.6166312 0.9084466 0.4569551 0.03957519 0.01822714
## 33 2 0.005 0.25 0.6443821 0.9013243 0.4859189 0.04519281 0.01997288
## 34 2 0.005 0.50 0.6143918 0.9090246 0.4577171 0.04259492 0.01815389
## 35 2 0.005 1.00 0.6054694 0.9116588 0.4482625 0.04607185 0.01594568
## 36 2 0.005 2.00 0.6034269 0.9124624 0.4481158 0.04239118 0.01468544
## 37 2 0.005 4.00 0.6064033 0.9117054 0.4502996 0.04296603 0.01535264
## 38 2 0.005 8.00 0.6122424 0.9102944 0.4538997 0.05051189 0.01698474
## 39 2 0.005 16.00 0.6234769 0.9073413 0.4595375 0.05670491 0.01788708
## 40 2 0.005 32.00 0.6357322 0.9038510 0.4675284 0.05855575 0.01830176
## 41 2 0.010 0.25 0.6166474 0.9087067 0.4557365 0.04262273 0.01611519
## 42 2 0.010 0.50 0.6091353 0.9109400 0.4503510 0.04367407 0.01533147
## 43 2 0.010 1.00 0.6107104 0.9103522 0.4513637 0.04211634 0.01597261
## 44 2 0.010 2.00 0.6122546 0.9100855 0.4540873 0.04469450 0.01705732
## 45 2 0.010 4.00 0.6201277 0.9080690 0.4580899 0.05017426 0.01789721
## 46 2 0.010 8.00 0.6313489 0.9048271 0.4639982 0.05311794 0.01854614
## 47 2 0.010 16.00 0.6348806 0.9041682 0.4680582 0.05566059 0.01761767
## 48 2 0.010 32.00 0.6388360 0.9030783 0.4712983 0.05467329 0.01787180
## MAESD
## 1 0.07848841
## 2 0.07058536
## 3 0.05952693
## 4 0.04868334
## 5 0.03486260
## 6 0.03460661
## 7 0.03935118
## 8 0.03698402
## 9 0.05594710
## 10 0.04363187
## 11 0.03477481
## 12 0.03526787
## 13 0.03712135
## 14 0.04031082
## 15 0.04322886
## 16 0.04122167
## 17 0.04366243
## 18 0.03480054
## 19 0.03530389
## 20 0.03711223
## 21 0.04046490
## 22 0.04318967
## 23 0.04156588
## 24 0.04262717
## 25 0.06830106
## 26 0.05869496
## 27 0.04616486
## 28 0.03587886
## 29 0.03160052
## 30 0.03497487
## 31 0.03428316
## 32 0.03065615
## 33 0.03897937
## 34 0.03553405
## 35 0.03289566
## 36 0.02924431
## 37 0.03212803
## 38 0.03858799
## 39 0.04030368
## 40 0.04059176
## 41 0.03300444
## 42 0.03211784
## 43 0.03032351
## 44 0.03471796
## 45 0.03644517
## 46 0.03732003
## 47 0.03885197
## 48 0.03859199
<- SVM_P_Tune$results[SVM_P_Tune$results$degree==SVM_P_Tune$bestTune$degree &
(SVM_P_Train_RMSE $results$scale==SVM_P_Tune$bestTune$scale &
SVM_P_Tune$results$C==SVM_P_Tune$bestTune$C,
SVM_P_Tunec("RMSE")])
## [1] 0.6028074
<- SVM_P_Tune$results[SVM_P_Tune$results$degree==SVM_P_Tune$bestTune$degree &
(SVM_P_Train_Rsquared $results$scale==SVM_P_Tune$bestTune$scale &
SVM_P_Tune$results$C==SVM_P_Tune$bestTune$C,
SVM_P_Tunec("Rsquared")])
## [1] 0.911678
##################################
# Identifying and plotting the
# best model predictors
##################################
# model does not support variable importance measurement
##################################
# Independently evaluating the model
# on the test set
##################################
<- data.frame(SVM_P_Observed = PMA_PreModelling_Test$Log_Solubility,
SVM_P_Test SVM_P_Predicted = predict(SVM_P_Tune,
!names(PMA_PreModelling_Test_SVM_P) %in% c("Log_Solubility")]))
PMA_PreModelling_Test_SVM_P[,
SVM_P_Test
## SVM_P_Observed SVM_P_Predicted
## 1 0.93 0.698154063
## 2 0.85 0.427078854
## 3 0.81 -0.480331714
## 4 0.74 1.151652132
## 5 0.61 -0.191366205
## 6 0.58 1.272369495
## 7 0.57 0.621837390
## 8 0.56 0.275252184
## 9 0.52 0.244099962
## 10 0.45 -0.267458399
## 11 0.40 0.184350760
## 12 0.36 -1.143304605
## 13 0.22 -0.052647414
## 14 0.08 -0.187367223
## 15 0.07 -1.042317495
## 16 0.02 -0.594978656
## 17 0.00 0.016467639
## 18 -0.01 0.046589001
## 19 -0.07 -0.031929988
## 20 -0.12 -0.584611961
## 21 -0.17 0.349572884
## 22 -0.29 -0.199967293
## 23 -0.38 -0.559361582
## 24 -0.38 -0.736243197
## 25 -0.39 -1.039990157
## 26 -0.42 -0.675090910
## 27 -0.44 -0.904872764
## 28 -0.46 0.925867013
## 29 -0.48 -2.220979516
## 30 -0.60 -1.064812332
## 31 -0.63 -1.949547589
## 32 -0.66 -0.588381454
## 33 -0.72 -0.599637631
## 34 -0.72 0.149512404
## 35 -0.80 0.258555219
## 36 -0.80 -0.855727981
## 37 -0.82 0.038362282
## 38 -0.82 -0.738151291
## 39 -0.84 -0.126856464
## 40 -0.85 -0.730214678
## 41 -0.85 -0.451254859
## 42 -0.87 -1.799953374
## 43 -0.89 -1.232357250
## 44 -0.90 0.264729516
## 45 -0.96 -1.278004345
## 46 -0.96 -0.879195975
## 47 -0.99 -0.739438079
## 48 -1.01 -0.645786383
## 49 -1.09 -1.094881375
## 50 -1.12 -0.352566521
## 51 -1.14 -0.628898470
## 52 -1.17 -1.519992371
## 53 -1.19 -1.560170436
## 54 -1.22 -1.209797042
## 55 -1.27 -1.697206808
## 56 -1.28 -1.484302846
## 57 -1.32 -1.367706155
## 58 -1.38 -1.265292190
## 59 -1.39 -1.804276350
## 60 -1.42 -1.536563345
## 61 -1.47 -1.300796908
## 62 -1.47 -1.529248110
## 63 -1.50 -1.175006846
## 64 -1.52 -1.455412061
## 65 -1.54 -1.444841289
## 66 -1.55 -2.049066378
## 67 -1.56 -3.081381839
## 68 -1.57 -1.798677086
## 69 -1.60 -1.621986790
## 70 -1.60 -2.494002048
## 71 -1.62 -1.646953336
## 72 -1.64 -2.344908987
## 73 -1.67 -1.814674132
## 74 -1.70 -3.435314666
## 75 -1.70 -2.090599602
## 76 -1.71 -2.272610716
## 77 -1.71 -2.190820408
## 78 -1.75 -1.873007420
## 79 -1.78 -1.872382431
## 80 -1.78 -1.837311351
## 81 -1.82 -1.339788975
## 82 -1.87 -1.392772803
## 83 -1.89 -2.290974799
## 84 -1.92 -1.816823997
## 85 -1.92 -1.513558557
## 86 -1.92 -1.288063863
## 87 -1.94 -2.646280728
## 88 -1.99 -2.344567911
## 89 -2.00 -2.069970796
## 90 -2.05 -2.057227142
## 91 -2.06 -1.743731941
## 92 -2.08 -2.314880404
## 93 -2.10 -2.681167340
## 94 -2.11 -1.493674031
## 95 -2.12 -1.161436770
## 96 -2.17 -2.218950117
## 97 -2.21 -2.005823739
## 98 -2.24 -2.775730625
## 99 -2.24 -1.250804918
## 100 -2.29 -2.260919022
## 101 -2.31 -2.062057472
## 102 -2.32 -2.320875841
## 103 -2.35 -2.322571028
## 104 -2.35 -2.108682186
## 105 -2.36 -2.676862543
## 106 -2.36 -1.914040871
## 107 -2.38 -2.312320983
## 108 -2.42 -2.654536455
## 109 -2.43 -3.768718391
## 110 -2.44 -3.084686127
## 111 -2.52 -2.405670623
## 112 -2.53 -2.864252823
## 113 -2.57 -2.953668235
## 114 -2.62 -2.520714593
## 115 -2.62 -2.769419605
## 116 -2.64 -2.719666526
## 117 -2.64 -3.116224248
## 118 -2.70 -2.771851109
## 119 -2.82 -2.899431080
## 120 -2.88 -2.750914478
## 121 -2.89 -2.798036236
## 122 -2.92 -1.400209082
## 123 -2.93 -3.866448530
## 124 -2.96 -2.657928905
## 125 -2.98 -2.753468234
## 126 -3.01 -2.653648873
## 127 -3.01 -3.352905368
## 128 -3.02 -3.316902204
## 129 -3.07 -3.220233692
## 130 -3.09 -2.501155681
## 131 -3.11 -3.145705535
## 132 -3.13 -3.586748709
## 133 -3.14 -2.221559956
## 134 -3.15 -3.773571121
## 135 -3.22 -2.270276557
## 136 -3.26 -3.510190238
## 137 -3.27 -2.746469265
## 138 -3.27 -3.105914288
## 139 -3.30 -3.103940938
## 140 -3.31 -2.404622858
## 141 -3.33 -2.388056547
## 142 -3.37 -2.210729887
## 143 -3.43 -3.521228417
## 144 -3.43 -2.657084657
## 145 -3.48 -2.679578828
## 146 -3.51 -3.587751838
## 147 -3.59 -3.206702384
## 148 -3.61 -2.750732435
## 149 -3.63 -3.681777890
## 150 -3.63 -3.463384813
## 151 -3.68 -2.388806642
## 152 -3.71 -3.664334298
## 153 -3.74 -2.679977435
## 154 -3.75 -3.680145916
## 155 -3.75 -3.575739380
## 156 -3.77 -3.287900999
## 157 -3.77 -4.304201535
## 158 -3.78 -4.177008484
## 159 -3.81 -3.573058902
## 160 -3.95 -4.325395477
## 161 -3.96 -5.129282203
## 162 -3.96 -4.173591468
## 163 -4.00 -3.471539684
## 164 -4.02 -4.539149664
## 165 -4.04 -4.349746312
## 166 -4.12 -3.744941883
## 167 -4.15 -4.624183584
## 168 -4.16 -3.690634905
## 169 -4.17 -4.571181725
## 170 -4.21 -4.870470959
## 171 -4.23 -4.506831404
## 172 -4.25 -3.725010074
## 173 -4.30 -4.102783702
## 174 -4.31 -5.274579908
## 175 -4.35 -4.584738560
## 176 -4.40 -4.053139508
## 177 -4.40 -4.420537050
## 178 -4.43 -4.681799740
## 179 -4.46 -4.501839782
## 180 -4.47 -3.165527176
## 181 -4.51 -4.873423580
## 182 -4.60 -4.150885779
## 183 -4.64 -4.837760315
## 184 -4.69 -4.904755051
## 185 -4.71 -4.224662579
## 186 -4.77 -4.448898367
## 187 -4.95 -4.574815404
## 188 -4.98 -4.482689881
## 189 -5.21 -5.859679214
## 190 -5.22 -5.378301893
## 191 -5.28 -4.337243721
## 192 -5.31 -3.411926686
## 193 -5.35 -4.786053391
## 194 -5.37 -4.648226035
## 195 -5.40 -4.843246873
## 196 -5.43 -4.429248524
## 197 -5.65 -5.553963741
## 198 -5.66 -4.394597838
## 199 -6.70 -4.716531543
## 200 -5.72 -5.406897037
## 201 -6.00 -6.874976891
## 202 -6.25 -6.358653469
## 203 -6.26 -6.085281689
## 204 -6.27 -6.515283584
## 205 -6.35 -5.741983739
## 206 -6.57 -6.037426203
## 207 -6.62 -5.529276627
## 208 -6.96 -5.954719784
## 209 -7.02 -7.487759072
## 210 -7.20 -7.055003277
## 211 -7.28 -7.150113058
## 212 -7.32 -7.588775057
## 213 -7.39 -7.894251264
## 214 -7.82 -8.398237705
## 215 -8.23 -8.616266938
## 216 -8.94 -8.443737673
## 217 1.07 0.053883568
## 218 0.43 0.264237386
## 219 0.32 -0.002712269
## 220 0.00 -0.006013736
## 221 -0.40 -1.156654241
## 222 -0.52 -0.460051871
## 223 -0.55 -0.850018873
## 224 -0.60 -0.862812265
## 225 -0.62 -2.084261147
## 226 -0.85 -1.154072411
## 227 -0.89 -0.792161274
## 228 -0.93 -0.913584147
## 229 -0.96 -0.905908724
## 230 -1.06 -1.500321500
## 231 -1.10 -1.434473463
## 232 -1.12 -1.180135586
## 233 -1.15 -0.733109574
## 234 -1.28 -0.105407792
## 235 -1.30 -1.364999293
## 236 -1.31 -1.330237472
## 237 -1.35 -2.140768996
## 238 -1.39 -2.087499555
## 239 -1.41 -1.560170436
## 240 -1.41 -1.521967785
## 241 -1.42 -0.698962133
## 242 -1.46 -1.702764148
## 243 -1.50 -1.532849230
## 244 -1.50 -2.356175031
## 245 -1.52 -1.630211246
## 246 -1.52 -0.642576847
## 247 -1.59 -1.677398010
## 248 -1.61 -1.105511327
## 249 -1.63 -1.186788530
## 250 -1.71 -2.578267901
## 251 -1.83 -2.004651127
## 252 -2.05 -2.225788939
## 253 -2.06 -2.297176559
## 254 -2.07 -3.660567793
## 255 -2.15 -2.528741305
## 256 -2.16 -1.062871654
## 257 -1.99 -0.823862630
## 258 -2.36 -2.247379943
## 259 -2.38 -3.942178022
## 260 -2.39 -1.701539733
## 261 -2.46 -2.217846322
## 262 -2.49 -2.214046226
## 263 -2.54 -2.727163557
## 264 -2.55 -2.905853862
## 265 -2.63 -2.742788832
## 266 -2.64 -1.815781255
## 267 -2.67 -2.933691070
## 268 -2.68 -1.604860228
## 269 -2.77 -2.641220779
## 270 -2.78 -2.843506111
## 271 -2.82 -2.470045840
## 272 -2.92 -3.671258570
## 273 -3.03 -3.555095739
## 274 -3.12 -3.628877999
## 275 -3.16 -2.994200350
## 276 -3.19 -3.390921223
## 277 -3.54 -3.491297819
## 278 -3.54 -2.736365786
## 279 -3.59 -3.683230330
## 280 -3.66 -3.207414868
## 281 -3.68 -2.530291987
## 282 -3.75 -4.137029675
## 283 -3.76 -3.308608734
## 284 -3.78 -4.023137046
## 285 -3.80 -3.957358679
## 286 -3.80 -4.592065546
## 287 -3.85 -3.430825214
## 288 -3.89 -3.515096354
## 289 -3.95 -4.055391955
## 290 -4.29 -4.823325484
## 291 -4.42 -5.182709772
## 292 -4.48 -4.178595474
## 293 -4.48 -2.954933400
## 294 -4.53 -4.992843898
## 295 -4.63 -4.596153408
## 296 -4.73 -4.238659782
## 297 -4.84 -4.351876673
## 298 -4.89 -4.405234977
## 299 -4.89 -4.893337378
## 300 -5.26 -5.523284033
## 301 -6.09 -5.381937124
## 302 -6.29 -5.914195532
## 303 -6.29 -6.521608229
## 304 -6.89 -5.926882430
## 305 -6.96 -6.929080787
## 306 -7.00 -6.917203762
## 307 -7.05 -7.669159704
## 308 -8.30 -8.893935996
## 309 -8.66 -8.789288118
## 310 -9.03 -9.250005248
## 311 -10.41 -10.092627390
## 312 -7.89 -7.436196419
## 313 -2.32 -1.802976316
## 314 0.39 -2.337179132
## 315 -2.90 -5.065220155
## 316 -2.47 -4.032627321
##################################
# Reporting the independent evaluation results
# for the test set
##################################
<- postResample(SVM_P_Test[,2], SVM_P_Test[,1])) (SVM_P_Test_Metrics
## RMSE Rsquared MAE
## 0.6377764 0.9062256 0.4730166
<- SVM_P_Test_Metrics[1]) (SVM_P_Test_RMSE
## RMSE
## 0.6377764
<- SVM_P_Test_Metrics[2]) (SVM_P_Test_Rsquared
## Rsquared
## 0.9062256
##################################
# Transforming factor predictors
# as required by the nature of the model
##################################
# Creating a local object
# for the train and test sets
##################################
<- as.data.frame(lapply(PMA_PreModelling_Train, function(x) as.numeric(as.character(x))))
PMA_PreModelling_Train_KNN dim(PMA_PreModelling_Train_KNN)
## [1] 951 221
<- as.data.frame(lapply(PMA_PreModelling_Test, function(x) as.numeric(as.character(x))))
PMA_PreModelling_Test_KNN dim(PMA_PreModelling_Test_KNN)
## [1] 316 221
##################################
# Creating consistent fold assignments
# for the 10-Fold Cross Validation process
##################################
set.seed(12345678)
<- createFolds(PMA_PreModelling_Train_KNN$Log_Solubility,
KFold_Indices k = 10,
returnTrain=TRUE)
<- trainControl(method="cv",
KFold_Control index=KFold_Indices)
##################################
# Setting the conditions
# for hyperparameter tuning
##################################
= data.frame(k = 1:15)
KNN_Grid
##################################
# Running the k-nearest neighbors model
# by setting the caret method to 'knn'
##################################
set.seed(12345678)
<- train(x = PMA_PreModelling_Train_KNN[,!names(PMA_PreModelling_Train_KNN) %in% c("Log_Solubility")],
KNN_Tune y = PMA_PreModelling_Train_KNN$Log_Solubility,
method = "knn",
tuneGrid = KNN_Grid,
trControl = KFold_Control,
preProc = c("center", "scale"))
##################################
# Reporting the cross-validation results
# for the train set
##################################
KNN_Tune
## k-Nearest Neighbors
##
## 951 samples
## 220 predictors
##
## Pre-processing: centered (220), scaled (220)
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 857, 855, 855, 856, 857, 856, ...
## Resampling results across tuning parameters:
##
## k RMSE Rsquared MAE
## 1 1.193390 0.6909384 0.8777626
## 2 1.109959 0.7159649 0.8060134
## 3 1.067291 0.7326032 0.7947394
## 4 1.068531 0.7307165 0.8044122
## 5 1.071815 0.7288216 0.8057926
## 6 1.076846 0.7255801 0.8051050
## 7 1.077346 0.7255634 0.8133125
## 8 1.073705 0.7266408 0.8133501
## 9 1.068856 0.7273001 0.8113416
## 10 1.075687 0.7244911 0.8189207
## 11 1.083810 0.7206933 0.8290562
## 12 1.092858 0.7149748 0.8374816
## 13 1.106658 0.7079159 0.8493934
## 14 1.113444 0.7055549 0.8521571
## 15 1.121625 0.7002523 0.8661098
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was k = 3.
$finalModel KNN_Tune
## 3-nearest neighbor regression model
$results KNN_Tune
## k RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 1 1.193390 0.6909384 0.8777626 0.10476073 0.04689864 0.05661709
## 2 2 1.109959 0.7159649 0.8060134 0.06560494 0.06008038 0.03160597
## 3 3 1.067291 0.7326032 0.7947394 0.06626369 0.06949454 0.03079834
## 4 4 1.068531 0.7307165 0.8044122 0.06667622 0.06567027 0.03511312
## 5 5 1.071815 0.7288216 0.8057926 0.06567005 0.06155802 0.04694389
## 6 6 1.076846 0.7255801 0.8051050 0.08324859 0.06058341 0.06353121
## 7 7 1.077346 0.7255634 0.8133125 0.07055653 0.06059999 0.06023208
## 8 8 1.073705 0.7266408 0.8133501 0.05678811 0.05955824 0.05362260
## 9 9 1.068856 0.7273001 0.8113416 0.05777880 0.06681028 0.05431120
## 10 10 1.075687 0.7244911 0.8189207 0.05823014 0.06579563 0.05809347
## 11 11 1.083810 0.7206933 0.8290562 0.05604450 0.06217820 0.05685605
## 12 12 1.092858 0.7149748 0.8374816 0.05824283 0.06402713 0.05709366
## 13 13 1.106658 0.7079159 0.8493934 0.05899373 0.06415460 0.05468233
## 14 14 1.113444 0.7055549 0.8521571 0.05716728 0.06139267 0.05081140
## 15 15 1.121625 0.7002523 0.8661098 0.06343865 0.06509901 0.05939330
<- KNN_Tune$results[KNN_Tune$results$k==KNN_Tune$bestTune$k,
(KNN_Train_RMSE c("RMSE")])
## [1] 1.067291
<- KNN_Tune$results[KNN_Tune$results$k==KNN_Tune$bestTune$k,
(KNN_Train_Rsquared c("Rsquared")])
## [1] 0.7326032
##################################
# Identifying and plotting the
# best model predictors
##################################
# model does not support variable importance measurement
##################################
# Independently evaluating the model
# on the test set
##################################
<- data.frame(KNN_Observed = PMA_PreModelling_Test$Log_Solubility,
KNN_Test KNN_Predicted = predict(KNN_Tune,
!names(PMA_PreModelling_Test_KNN) %in% c("Log_Solubility")]))
PMA_PreModelling_Test_KNN[,
KNN_Test
## KNN_Observed KNN_Predicted
## 1 0.93 0.196666667
## 2 0.85 0.003333333
## 3 0.81 -1.256666667
## 4 0.74 0.876666667
## 5 0.61 -0.453333333
## 6 0.58 0.150000000
## 7 0.57 -1.213333333
## 8 0.56 -0.170000000
## 9 0.52 -0.303333333
## 10 0.45 0.373333333
## 11 0.40 0.373333333
## 12 0.36 0.250000000
## 13 0.22 -0.890000000
## 14 0.08 -0.873333333
## 15 0.07 -0.810000000
## 16 0.02 -1.030000000
## 17 0.00 -0.646666667
## 18 -0.01 -1.246666667
## 19 -0.07 -0.526666667
## 20 -0.12 -1.460000000
## 21 -0.17 0.610000000
## 22 -0.29 -0.860000000
## 23 -0.38 -0.730000000
## 24 -0.38 -2.070000000
## 25 -0.39 -0.676666667
## 26 -0.42 -1.803333333
## 27 -0.44 -2.093333333
## 28 -0.46 -1.186666667
## 29 -0.48 -2.086666667
## 30 -0.60 -2.520000000
## 31 -0.63 -2.693333333
## 32 -0.66 -0.713333333
## 33 -0.72 -0.853333333
## 34 -0.72 0.013333333
## 35 -0.80 -1.723333333
## 36 -0.80 -0.623333333
## 37 -0.82 -0.573333333
## 38 -0.82 -1.166666667
## 39 -0.84 -1.180000000
## 40 -0.85 -2.766666667
## 41 -0.85 -0.143333333
## 42 -0.87 -1.030000000
## 43 -0.89 -2.060000000
## 44 -0.90 1.223333333
## 45 -0.96 -1.340000000
## 46 -0.96 -1.493333333
## 47 -0.99 -0.966666667
## 48 -1.01 -0.516666667
## 49 -1.09 -1.500000000
## 50 -1.12 -0.743333333
## 51 -1.14 -0.716666667
## 52 -1.17 -1.726666667
## 53 -1.19 -0.916666667
## 54 -1.22 -0.566666667
## 55 -1.27 -0.806666667
## 56 -1.28 -1.523333333
## 57 -1.32 -1.210000000
## 58 -1.38 -1.220000000
## 59 -1.39 -1.973333333
## 60 -1.42 -2.245000000
## 61 -1.47 -1.510000000
## 62 -1.47 -1.910000000
## 63 -1.50 -0.650000000
## 64 -1.52 -1.030000000
## 65 -1.54 -1.440000000
## 66 -1.55 -1.206666667
## 67 -1.56 -1.946666667
## 68 -1.57 -2.023333333
## 69 -1.60 -1.340000000
## 70 -1.60 -2.113333333
## 71 -1.62 -2.616666667
## 72 -1.64 -2.580000000
## 73 -1.67 -1.983333333
## 74 -1.70 -2.010000000
## 75 -1.70 -1.163333333
## 76 -1.71 -2.920000000
## 77 -1.71 -2.383333333
## 78 -1.75 -2.086666667
## 79 -1.78 -1.513333333
## 80 -1.78 -1.706666667
## 81 -1.82 -1.453333333
## 82 -1.87 -3.180000000
## 83 -1.89 -1.073333333
## 84 -1.92 -2.463333333
## 85 -1.92 -2.810000000
## 86 -1.92 -0.600000000
## 87 -1.94 -3.193333333
## 88 -1.99 -0.743333333
## 89 -2.00 -2.780000000
## 90 -2.05 -2.436666667
## 91 -2.06 -2.416666667
## 92 -2.08 -1.440000000
## 93 -2.10 -3.763333333
## 94 -2.11 -1.963333333
## 95 -2.12 -0.256666667
## 96 -2.17 -1.860000000
## 97 -2.21 -2.190000000
## 98 -2.24 -2.853333333
## 99 -2.24 -1.983333333
## 100 -2.29 -1.910000000
## 101 -2.31 -3.276666667
## 102 -2.32 -2.226666667
## 103 -2.35 -1.453333333
## 104 -2.35 -2.006666667
## 105 -2.36 -3.410000000
## 106 -2.36 -1.353333333
## 107 -2.38 -2.376666667
## 108 -2.42 -3.543333333
## 109 -2.43 -3.303333333
## 110 -2.44 -2.457500000
## 111 -2.52 -3.010000000
## 112 -2.53 -2.170000000
## 113 -2.57 -3.010000000
## 114 -2.62 -2.457500000
## 115 -2.62 -3.006666667
## 116 -2.64 -3.296666667
## 117 -2.64 -3.640000000
## 118 -2.70 -1.073333333
## 119 -2.82 -1.526666667
## 120 -2.88 -2.836666667
## 121 -2.89 -3.410000000
## 122 -2.92 -1.193333333
## 123 -2.93 -2.886666667
## 124 -2.96 -2.526666667
## 125 -2.98 -0.703333333
## 126 -3.01 -2.190000000
## 127 -3.01 -3.913333333
## 128 -3.02 -4.086666667
## 129 -3.07 -3.230000000
## 130 -3.09 -3.986666667
## 131 -3.11 -3.126666667
## 132 -3.13 -3.336666667
## 133 -3.14 -2.836666667
## 134 -3.15 -1.203333333
## 135 -3.22 -1.546666667
## 136 -3.26 -3.626666667
## 137 -3.27 -2.403333333
## 138 -3.27 -1.936666667
## 139 -3.30 -4.446666667
## 140 -3.31 -2.403333333
## 141 -3.33 -3.096666667
## 142 -3.37 -2.870000000
## 143 -3.43 -3.700000000
## 144 -3.43 -2.763333333
## 145 -3.48 -1.613333333
## 146 -3.51 -0.790000000
## 147 -3.59 -2.457500000
## 148 -3.61 -3.096666667
## 149 -3.63 -3.356666667
## 150 -3.63 -4.473333333
## 151 -3.68 -0.993333333
## 152 -3.71 -3.150000000
## 153 -3.74 -3.260000000
## 154 -3.75 -3.126666667
## 155 -3.75 -3.813333333
## 156 -3.77 -4.416666667
## 157 -3.77 -4.773333333
## 158 -3.78 -3.006666667
## 159 -3.81 -3.426666667
## 160 -3.95 -2.236666667
## 161 -3.96 -6.576666667
## 162 -3.96 -4.566666667
## 163 -4.00 -3.430000000
## 164 -4.02 -2.816666667
## 165 -4.04 -2.823333333
## 166 -4.12 -3.960000000
## 167 -4.15 -4.220000000
## 168 -4.16 -2.290000000
## 169 -4.17 -2.670000000
## 170 -4.21 -1.136666667
## 171 -4.23 -4.930000000
## 172 -4.25 -4.446666667
## 173 -4.30 -4.166666667
## 174 -4.31 -5.940000000
## 175 -4.35 -3.976666667
## 176 -4.40 -4.116666667
## 177 -4.40 -3.823333333
## 178 -4.43 -4.146666667
## 179 -4.46 -4.513333333
## 180 -4.47 -4.446666667
## 181 -4.51 -3.336666667
## 182 -4.60 -4.316666667
## 183 -4.64 -4.213333333
## 184 -4.69 -3.340000000
## 185 -4.71 -4.210000000
## 186 -4.77 -3.726666667
## 187 -4.95 -3.866666667
## 188 -4.98 -2.936666667
## 189 -5.21 -6.106666667
## 190 -5.22 -6.543333333
## 191 -5.28 -4.703333333
## 192 -5.31 -3.150000000
## 193 -5.35 -5.003333333
## 194 -5.37 -3.986666667
## 195 -5.40 -3.626666667
## 196 -5.43 -1.030000000
## 197 -5.65 -4.083333333
## 198 -5.66 -4.210000000
## 199 -6.70 -2.666666667
## 200 -5.72 -5.020000000
## 201 -6.00 -7.723333333
## 202 -6.25 -7.086666667
## 203 -6.26 -6.426666667
## 204 -6.27 -6.030000000
## 205 -6.35 -6.750000000
## 206 -6.57 -4.773333333
## 207 -6.62 -5.020000000
## 208 -6.96 -4.773333333
## 209 -7.02 -4.773333333
## 210 -7.20 -7.163333333
## 211 -7.28 -8.136666667
## 212 -7.32 -6.615000000
## 213 -7.39 -8.296666667
## 214 -7.82 -8.296666667
## 215 -8.23 -8.226666667
## 216 -8.94 -7.670000000
## 217 1.07 -0.170000000
## 218 0.43 -0.580000000
## 219 0.32 0.593333333
## 220 0.00 -1.156666667
## 221 -0.40 -1.913333333
## 222 -0.52 -0.676666667
## 223 -0.55 -0.956666667
## 224 -0.60 -1.813333333
## 225 -0.62 -3.086666667
## 226 -0.85 -1.453333333
## 227 -0.89 -1.580000000
## 228 -0.93 -1.723333333
## 229 -0.96 0.410000000
## 230 -1.06 -2.580000000
## 231 -1.10 -1.946666667
## 232 -1.12 -0.923333333
## 233 -1.15 -0.730000000
## 234 -1.28 -0.560000000
## 235 -1.30 -1.546666667
## 236 -1.31 -1.170000000
## 237 -1.35 -0.790000000
## 238 -1.39 -3.230000000
## 239 -1.41 -0.916666667
## 240 -1.41 -0.763333333
## 241 -1.42 -2.396666667
## 242 -1.46 -0.590000000
## 243 -1.50 -1.590000000
## 244 -1.50 -2.906666667
## 245 -1.52 -2.090000000
## 246 -1.52 -1.270000000
## 247 -1.59 -2.066666667
## 248 -1.61 -1.706666667
## 249 -1.63 -1.780000000
## 250 -1.71 -2.236666667
## 251 -1.83 -1.310000000
## 252 -2.05 -1.846666667
## 253 -2.06 -1.956666667
## 254 -2.07 -0.787500000
## 255 -2.15 -2.716666667
## 256 -2.16 -2.423333333
## 257 -1.99 -1.083333333
## 258 -2.36 -2.793333333
## 259 -2.38 -3.436666667
## 260 -2.39 -1.226666667
## 261 -2.46 -2.303333333
## 262 -2.49 -1.820000000
## 263 -2.54 -2.836666667
## 264 -2.55 -1.790000000
## 265 -2.63 -3.072500000
## 266 -2.64 -2.286666667
## 267 -2.67 -2.770000000
## 268 -2.68 0.730000000
## 269 -2.77 -3.960000000
## 270 -2.78 -1.810000000
## 271 -2.82 -1.526666667
## 272 -2.92 -2.156666667
## 273 -3.03 -2.873333333
## 274 -3.12 -1.956666667
## 275 -3.16 -3.240000000
## 276 -3.19 -3.873333333
## 277 -3.54 -3.416666667
## 278 -3.54 -3.086666667
## 279 -3.59 -4.116666667
## 280 -3.66 -3.013333333
## 281 -3.68 -3.036666667
## 282 -3.75 -3.096666667
## 283 -3.76 -3.570000000
## 284 -3.78 -2.920000000
## 285 -3.80 -3.400000000
## 286 -3.80 -2.273333333
## 287 -3.85 -4.446666667
## 288 -3.89 -2.386666667
## 289 -3.95 -3.916666667
## 290 -4.29 -4.886666667
## 291 -4.42 -2.866666667
## 292 -4.48 -3.626666667
## 293 -4.48 -2.053333333
## 294 -4.53 -5.730000000
## 295 -4.63 -4.386666667
## 296 -4.73 -2.473333333
## 297 -4.84 -4.566666667
## 298 -4.89 -3.410000000
## 299 -4.89 -4.383333333
## 300 -5.26 -6.133333333
## 301 -6.09 -5.020000000
## 302 -6.29 -6.443333333
## 303 -6.29 -6.283333333
## 304 -6.89 -2.343333333
## 305 -6.96 -4.216666667
## 306 -7.00 -6.996666667
## 307 -7.05 -8.136666667
## 308 -8.30 -8.083333333
## 309 -8.66 -7.800000000
## 310 -9.03 -7.596666667
## 311 -10.41 -9.523333333
## 312 -7.89 -6.996666667
## 313 -2.32 -2.183333333
## 314 0.39 -3.086666667
## 315 -2.90 -1.890000000
## 316 -2.47 -5.940000000
##################################
# Reporting the independent evaluation results
# for the test set
##################################
<- postResample(KNN_Test[,2], KNN_Test[,1])) (KNN_Test_Metrics
## RMSE Rsquared MAE
## 1.1247103 0.7137298 0.8436946
<- KNN_Test_Metrics[1]) (KNN_Test_RMSE
## RMSE
## 1.12471
<- KNN_Test_Metrics[2]) (KNN_Test_Rsquared
## Rsquared
## 0.7137298
##################################
# Creating a local object
# for the train set
##################################
<- PMA_PreModelling_Train
PMA_PreModelling_Train_CART
##################################
# Creating consistent fold assignments
# for the 10-Fold Cross Validation process
##################################
set.seed(12345678)
<- createFolds(PMA_PreModelling_Train_CART$Log_Solubility,
KFold_Indices k = 10,
returnTrain=TRUE)
<- trainControl(method="cv",
KFold_Control index=KFold_Indices)
##################################
# Setting the conditions
# for hyperparameter tuning
##################################
= data.frame(cp = c(0.001, 0.005, 0.010, 0.015, 0.020))
CART_Grid
##################################
# Running the classification and regression trees model
# by setting the caret method to 'rpart'
##################################
set.seed(12345678)
<- train(x = PMA_PreModelling_Train_CART[,!names(PMA_PreModelling_Train_CART) %in% c("Log_Solubility")],
CART_Tune y = PMA_PreModelling_Train_CART$Log_Solubility,
method = "rpart",
tuneGrid = CART_Grid,
trControl = KFold_Control)
##################################
# Reporting the cross-validation results
# for the train set
##################################
CART_Tune
## CART
##
## 951 samples
## 220 predictors
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 857, 855, 855, 856, 857, 856, ...
## Resampling results across tuning parameters:
##
## cp RMSE Rsquared MAE
## 0.001 0.949044 0.7814193 0.7165704
## 0.005 1.062111 0.7304621 0.8158512
## 0.010 1.094310 0.7129754 0.8405203
## 0.015 1.141204 0.6879057 0.8863220
## 0.020 1.159802 0.6767517 0.9117055
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was cp = 0.001.
$finalModel CART_Tune
## n= 951
##
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 951 3979.3030000 -2.71857000
## 2) NumCarbon>=0.4506182 317 1114.2000000 -4.48536300
## 4) SurfaceArea2< -1.012864 74 173.0767000 -6.86783800
## 8) MolWeight>=0.4710593 48 58.8117900 -7.68208300
## 16) HydrophilicFactor>=-0.5793982 16 18.1638400 -8.59187500 *
## 17) HydrophilicFactor< -0.5793982 32 20.7826500 -7.22718800
## 34) FP184=0 13 4.7800770 -7.89307700 *
## 35) FP184=1 19 6.2942530 -6.77157900 *
## 9) MolWeight< 0.4710593 26 23.6894500 -5.36461500
## 18) MolWeight>=0.1178222 12 7.1820670 -6.18333300 *
## 19) MolWeight< 0.1178222 14 1.5692860 -4.66285700 *
## 5) SurfaceArea2>=-1.012864 243 393.1730000 -3.75983500
## 10) HydrophilicFactor< 0.2078952 159 237.7748000 -4.07528300
## 20) MolWeight>=1.101123 60 65.5968900 -4.64950000
## 40) FP077=0 17 18.8918500 -5.39176500 *
## 41) FP077=1 43 33.6358300 -4.35604700
## 82) FP119=0 35 20.8480300 -4.57857100 *
## 83) FP119=1 8 3.4723500 -3.38250000 *
## 21) MolWeight< 1.101123 99 140.4044000 -3.72727300
## 42) FP075=0 55 69.1337300 -4.25290900
## 84) NumRotBonds>=2.179213 8 8.5670000 -5.56500000 *
## 85) NumRotBonds< 2.179213 47 44.4497900 -4.02957400
## 170) MolWeight>=0.4202744 26 20.8555500 -4.41692300
## 340) FP017=0 15 13.0163300 -4.79666700 *
## 341) FP017=1 11 2.7264910 -3.89909100 *
## 171) MolWeight< 0.4202744 21 14.8634000 -3.55000000 *
## 43) FP075=1 44 37.0793000 -3.07022700
## 86) HydrophilicFactor< -0.6757562 14 12.6063200 -3.73357100 *
## 87) HydrophilicFactor>=-0.6757562 30 15.4377900 -2.76066700 *
## 11) HydrophilicFactor>=0.2078952 84 109.6285000 -3.16273800
## 22) NumMultBonds>=1.228149 28 34.5152700 -3.77892900
## 44) FP131=0 19 23.0277700 -4.12263200 *
## 45) FP131=1 9 4.5046000 -3.05333300 *
## 23) NumMultBonds< 1.228149 56 59.1661900 -2.85464300
## 46) FP081=1 24 15.5219800 -3.45916700
## 92) FP125=0 12 4.3290920 -3.90583300 *
## 93) FP125=1 12 6.4046250 -3.01250000 *
## 47) FP081=0 32 28.2953500 -2.40125000
## 94) FP088=0 24 13.3008000 -2.74791700 *
## 95) FP088=1 8 3.4574880 -1.36125000 *
## 3) NumCarbon< 0.4506182 634 1380.8030000 -1.83517400
## 6) MolWeight>=0.364972 104 173.9672000 -3.32846200
## 12) SurfaceArea1< 0.1308092 43 51.7177400 -4.24674400
## 24) MolWeight>=0.8557128 18 13.6410000 -5.09333300 *
## 25) MolWeight< 0.8557128 25 15.8873000 -3.63720000 *
## 13) SurfaceArea1>=0.1308092 61 60.4300200 -2.68114800
## 26) NumOxygen< 1.113753 47 25.8753700 -2.99914900
## 52) FP001=0 7 4.7345710 -3.99571400 *
## 53) FP001=1 40 12.9722000 -2.82475000 *
## 27) NumOxygen>=1.113753 14 13.8457200 -1.61357100 *
## 7) MolWeight< 0.364972 530 929.4181000 -1.54215100
## 14) SurfaceArea2< -1.012864 118 121.2846000 -2.81500000
## 28) NumBonds>=-0.4530519 37 11.1113100 -3.96432400
## 56) NumBonds>=-0.1671013 20 3.4969000 -4.31500000 *
## 57) NumBonds< -0.1671013 17 2.2614470 -3.55176500 *
## 29) NumBonds< -0.4530519 81 38.9726000 -2.29000000
## 58) NumBonds>=-1.383145 51 14.1965000 -2.62686300
## 116) FP172=1 8 0.7938000 -3.34500000 *
## 117) FP172=0 43 8.5093440 -2.49325600 *
## 59) NumBonds< -1.383145 30 9.1503870 -1.71733300 *
## 15) SurfaceArea2>=-1.012864 412 562.2019000 -1.17759700
## 30) MolWeight>=-0.8064806 241 241.0830000 -1.74688800
## 60) NumRotBonds>=1.764021 14 3.0914860 -3.28714300 *
## 61) NumRotBonds< 1.764021 227 202.7297000 -1.65189400
## 122) NumOxygen< 1.113753 205 153.1710000 -1.76263400
## 244) HydrophilicFactor< -0.7050632 24 12.3292600 -2.41125000 *
## 245) HydrophilicFactor>=-0.7050632 181 129.4060000 -1.67663000
## 490) FP065=1 121 70.4388200 -1.86504100
## 980) MolWeight>=-0.6589892 107 57.1190600 -1.96775700
## 1960) NumChlorine>=-0.04017528 20 11.5382200 -2.61700000 *
## 1961) NumChlorine< -0.04017528 87 35.2125100 -1.81850600
## 3922) HydrophilicFactor>=-0.4643903 72 23.8945900 -1.92791700 *
## 3923) HydrophilicFactor< -0.4643903 15 6.3189330 -1.29333300 *
## 981) MolWeight< -0.6589892 14 3.5628000 -1.08000000 *
## 491) FP065=0 60 46.0095300 -1.29666700
## 982) FP102=0 53 30.9962800 -1.45717000
## 1964) FP145=0 44 20.9088800 -1.63431800 *
## 1965) FP145=1 9 1.9560890 -0.59111110 *
## 983) FP102=1 7 3.3102860 -0.08142857 *
## 123) NumOxygen>=1.113753 22 23.6190000 -0.62000000
## 246) FP168=1 9 0.7426222 -1.58444400 *
## 247) FP168=0 13 8.7094310 0.04769231 *
## 31) MolWeight< -0.8064806 171 132.9333000 -0.37526320
## 62) NumCarbon>=-0.8205617 86 36.7958900 -0.89581400
## 124) FP116=0 78 18.6207500 -1.04076900 *
## 125) FP116=1 8 0.5565500 0.51750000 *
## 63) NumCarbon< -0.8205617 85 49.2558300 0.15141180
## 126) MolWeight>=-1.497949 47 24.5774900 -0.10638300 *
## 127) MolWeight< -1.497949 38 17.6915000 0.47026320
## 254) FP063=0 18 5.5220940 -0.01055556 *
## 255) FP063=1 20 4.2628200 0.90300000 *
$results CART_Tune
## cp RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 0.001 0.949044 0.7814193 0.7165704 0.06100448 0.05338094 0.04092924
## 2 0.005 1.062111 0.7304621 0.8158512 0.04537841 0.04743129 0.03111770
## 3 0.010 1.094310 0.7129754 0.8405203 0.04536525 0.05532136 0.04079579
## 4 0.015 1.141204 0.6879057 0.8863220 0.07570284 0.04717980 0.07258210
## 5 0.020 1.159802 0.6767517 0.9117055 0.07943956 0.04864369 0.08872587
<- CART_Tune$results[CART_Tune$results$cp==CART_Tune$bestTune$cp,
(CART_Train_RMSE c("RMSE")])
## [1] 0.949044
<- CART_Tune$results[CART_Tune$results$cp==CART_Tune$bestTune$cp,
(CART_Train_Rsquared c("Rsquared")])
## [1] 0.7814193
##################################
# Identifying and plotting the
# best model predictors
##################################
<- varImp(CART_Tune, scale = TRUE)
CART_VarImp plot(CART_VarImp,
top=25,
scales=list(y=list(cex = .95)),
main="Ranked Variable Importance : Classification and Regression Trees",
xlab="Scaled Variable Importance Metrics",
ylab="Predictors",
cex=2,
origin=0,
alpha=0.45)
##################################
# Independently evaluating the model
# on the test set
##################################
<- data.frame(CART_Observed = PMA_PreModelling_Test$Log_Solubility,
CART_Test CART_Predicted = predict(CART_Tune,
!names(PMA_PreModelling_Test) %in% c("Log_Solubility")]))
PMA_PreModelling_Test[,
CART_Test
## CART_Observed CART_Predicted
## 20 0.93 -0.10638298
## 21 0.85 -0.10638298
## 23 0.81 -1.04076923
## 25 0.74 -0.10638298
## 28 0.61 -1.63431818
## 31 0.58 -0.01055556
## 32 0.57 -0.01055556
## 33 0.56 -0.01055556
## 34 0.52 -0.01055556
## 37 0.45 0.51750000
## 38 0.40 0.51750000
## 42 0.36 -1.92791667
## 49 0.22 -0.10638298
## 54 0.08 -0.10638298
## 55 0.07 -1.04076923
## 58 0.02 -1.29333333
## 60 0.00 -1.04076923
## 61 -0.01 -0.01055556
## 65 -0.07 -0.10638298
## 69 -0.12 -1.29333333
## 73 -0.17 0.04769231
## 86 -0.29 -0.10638298
## 90 -0.38 -1.04076923
## 91 -0.38 -1.04076923
## 93 -0.39 -1.04076923
## 96 -0.42 -1.04076923
## 98 -0.44 -1.04076923
## 100 -0.46 -0.10638298
## 104 -0.48 -5.39176471
## 112 -0.60 -1.04076923
## 115 -0.63 -2.76066667
## 119 -0.66 -0.10638298
## 128 -0.72 -0.10638298
## 130 -0.72 -1.04076923
## 139 -0.80 -1.04076923
## 143 -0.80 -1.63431818
## 145 -0.82 -0.01055556
## 146 -0.82 -0.10638298
## 149 -0.84 -1.63431818
## 150 -0.85 -0.01055556
## 152 -0.85 -0.01055556
## 157 -0.87 -2.61700000
## 161 -0.89 -1.63431818
## 162 -0.90 -0.10638298
## 166 -0.96 -3.28714286
## 167 -0.96 0.90300000
## 173 -0.99 -0.10638298
## 176 -1.01 -0.10638298
## 182 -1.09 -1.71733333
## 187 -1.12 -1.92791667
## 190 -1.14 -0.10638298
## 194 -1.17 -1.71733333
## 195 -1.19 -1.08000000
## 201 -1.22 -1.04076923
## 207 -1.27 -1.63431818
## 208 -1.28 -1.04076923
## 215 -1.32 -1.71733333
## 222 -1.38 -1.63431818
## 224 -1.39 -1.92791667
## 231 -1.42 -2.41125000
## 236 -1.47 -1.63431818
## 237 -1.47 -1.71733333
## 240 -1.50 -0.08142857
## 243 -1.52 -1.04076923
## 248 -1.54 -1.08000000
## 251 -1.55 -2.76066667
## 256 -1.56 -1.92791667
## 258 -1.57 -2.61700000
## 262 -1.60 -1.04076923
## 266 -1.60 -1.63431818
## 272 -1.62 -3.63720000
## 280 -1.64 -2.82475000
## 283 -1.67 -1.63431818
## 286 -1.70 -2.82475000
## 287 -1.70 -1.08000000
## 289 -1.71 -1.71733333
## 290 -1.71 -2.82475000
## 298 -1.75 -1.63431818
## 305 -1.78 -1.92791667
## 306 -1.78 -1.61357143
## 312 -1.82 -1.92791667
## 320 -1.87 -1.04076923
## 325 -1.89 -3.28714286
## 332 -1.92 -1.63431818
## 333 -1.92 -2.41125000
## 335 -1.92 -1.63431818
## 339 -1.94 -2.41125000
## 346 -1.99 -1.92791667
## 347 -2.00 -1.71733333
## 350 -2.05 -2.41125000
## 353 -2.06 -1.63431818
## 358 -2.08 -1.92791667
## 365 -2.10 -2.61700000
## 367 -2.11 -1.58444444
## 370 -2.12 -1.92791667
## 379 -2.17 -1.92791667
## 386 -2.21 -2.49325581
## 394 -2.24 -3.99571429
## 396 -2.24 -1.92791667
## 400 -2.29 -1.71733333
## 404 -2.31 -1.71733333
## 405 -2.32 -1.63431818
## 413 -2.35 -1.92791667
## 415 -2.35 -1.92791667
## 417 -2.36 -2.49325581
## 418 -2.36 -0.08142857
## 423 -2.38 -3.34500000
## 434 -2.42 -1.92791667
## 437 -2.43 -2.82475000
## 440 -2.44 -2.82475000
## 449 -2.52 -2.82475000
## 450 -2.53 -2.61700000
## 457 -2.57 -2.82475000
## 467 -2.62 -2.82475000
## 469 -2.62 -1.61357143
## 474 -2.64 -3.73357143
## 475 -2.64 -2.76066667
## 485 -2.70 -3.28714286
## 504 -2.82 -4.12263158
## 511 -2.88 -2.74791667
## 512 -2.89 -2.61700000
## 517 -2.92 -1.63431818
## 519 -2.93 -4.12263158
## 520 -2.96 -2.49325581
## 522 -2.98 -3.73357143
## 527 -3.01 -2.76066667
## 528 -3.01 -3.55176471
## 529 -3.02 -3.99571429
## 537 -3.07 -2.49325581
## 540 -3.09 -3.89909091
## 541 -3.11 -4.31500000
## 547 -3.13 -2.76066667
## 550 -3.14 -2.74791667
## 555 -3.15 -4.12263158
## 564 -3.22 -5.09333333
## 570 -3.26 -3.90583333
## 573 -3.27 -2.82475000
## 575 -3.27 -4.31500000
## 578 -3.30 -3.55176471
## 581 -3.31 -2.61700000
## 585 -3.33 -3.99571429
## 590 -3.37 -4.79666667
## 601 -3.43 -3.01250000
## 602 -3.43 -3.63720000
## 607 -3.48 -3.55000000
## 610 -3.51 -3.55000000
## 618 -3.59 -3.05333333
## 624 -3.61 -3.99571429
## 626 -3.63 -3.28714286
## 627 -3.63 -3.34500000
## 634 -3.68 -1.36125000
## 640 -3.71 -4.57857143
## 642 -3.74 -3.55176471
## 643 -3.75 -4.31500000
## 644 -3.75 -3.55000000
## 645 -3.77 -3.63720000
## 646 -3.77 -4.31500000
## 647 -3.78 -3.05333333
## 652 -3.81 -3.55176471
## 658 -3.95 -2.74791667
## 659 -3.96 -4.66285714
## 660 -3.96 -3.89909091
## 664 -4.00 -3.99571429
## 666 -4.02 -5.09333333
## 667 -4.04 -5.39176471
## 675 -4.12 -4.31500000
## 680 -4.15 -4.57857143
## 681 -4.16 -2.76066667
## 687 -4.17 -4.66285714
## 694 -4.21 -5.39176471
## 697 -4.23 -5.09333333
## 701 -4.25 -4.31500000
## 705 -4.30 -3.01250000
## 707 -4.31 -5.56500000
## 710 -4.35 -3.63720000
## 716 -4.40 -3.89909091
## 719 -4.40 -3.73357143
## 720 -4.43 -4.57857143
## 725 -4.46 -4.57857143
## 727 -4.47 -4.31500000
## 730 -4.51 -5.09333333
## 738 -4.60 -3.55000000
## 745 -4.64 -4.31500000
## 748 -4.69 -5.56500000
## 751 -4.71 -4.57857143
## 756 -4.77 -2.76066667
## 766 -4.95 -4.12263158
## 769 -4.98 -3.63720000
## 783 -5.21 -6.77157895
## 785 -5.22 -4.66285714
## 790 -5.28 -3.89909091
## 793 -5.31 -3.55000000
## 795 -5.35 -4.57857143
## 796 -5.37 -4.57857143
## 797 -5.40 -3.90583333
## 801 -5.43 -5.09333333
## 811 -5.65 -3.63720000
## 812 -5.66 -4.57857143
## 815 -6.70 -3.89909091
## 816 -5.72 -5.09333333
## 817 -6.00 -6.18333333
## 824 -6.25 -6.77157895
## 825 -6.26 -6.77157895
## 826 -6.27 -6.77157895
## 830 -6.35 -4.66285714
## 837 -6.57 -6.18333333
## 838 -6.62 -5.09333333
## 844 -6.96 -6.18333333
## 845 -7.02 -7.89307692
## 847 -7.20 -7.89307692
## 850 -7.28 -6.77157895
## 852 -7.32 -6.77157895
## 853 -7.39 -6.77157895
## 861 -7.82 -8.59187500
## 868 -8.23 -7.89307692
## 874 -8.94 -8.59187500
## 879 1.07 -0.01055556
## 895 0.43 -0.01055556
## 899 0.32 0.90300000
## 903 0.00 -0.01055556
## 917 -0.40 -1.92791667
## 927 -0.52 -0.10638298
## 929 -0.55 -0.10638298
## 931 -0.60 -0.10638298
## 933 -0.62 -1.92791667
## 944 -0.85 -1.04076923
## 947 -0.89 -1.04076923
## 949 -0.93 -1.63431818
## 953 -0.96 -0.10638298
## 958 -1.06 -2.41125000
## 961 -1.10 -1.04076923
## 963 -1.12 -1.63431818
## 964 -1.15 -1.04076923
## 973 -1.28 -1.08000000
## 976 -1.30 -1.71733333
## 977 -1.31 -1.08000000
## 980 -1.35 -3.99571429
## 983 -1.39 -1.29333333
## 984 -1.41 -1.08000000
## 986 -1.41 -1.92791667
## 989 -1.42 -1.04076923
## 991 -1.46 -1.63431818
## 996 -1.50 -1.71733333
## 997 -1.50 -1.29333333
## 999 -1.52 -1.08000000
## 1000 -1.52 -1.08000000
## 1003 -1.59 -1.71733333
## 1008 -1.61 -1.92791667
## 1009 -1.63 -1.63431818
## 1014 -1.71 -1.63431818
## 1015 -1.83 -2.74791667
## 1040 -2.05 -1.58444444
## 1042 -2.06 -2.82475000
## 1043 -2.07 -1.29333333
## 1050 -2.15 -1.63431818
## 1052 -2.16 -1.04076923
## 1056 -1.99 -1.61357143
## 1070 -2.36 -2.82475000
## 1073 -2.38 -2.49325581
## 1074 -2.39 -0.10638298
## 1079 -2.46 -1.63431818
## 1080 -2.49 -2.41125000
## 1085 -2.54 -1.71733333
## 1087 -2.55 -3.55000000
## 1096 -2.63 -4.57857143
## 1099 -2.64 -2.74791667
## 1100 -2.67 -2.41125000
## 1102 -2.68 -2.82475000
## 1107 -2.77 -3.55176471
## 1109 -2.78 -4.12263158
## 1114 -2.82 -2.49325581
## 1118 -2.92 -2.74791667
## 1123 -3.03 -2.82475000
## 1132 -3.12 -3.55000000
## 1134 -3.16 -3.55000000
## 1137 -3.19 -3.34500000
## 1154 -3.54 -3.63720000
## 1155 -3.54 -2.76066667
## 1157 -3.59 -3.90583333
## 1162 -3.66 -4.31500000
## 1164 -3.68 -3.55176471
## 1171 -3.75 -4.57857143
## 1172 -3.76 -3.63720000
## 1175 -3.78 -4.12263158
## 1177 -3.80 -3.28714286
## 1179 -3.80 -5.39176471
## 1183 -3.85 -4.31500000
## 1185 -3.89 -5.39176471
## 1189 -3.95 -3.90583333
## 1211 -4.29 -4.66285714
## 1218 -4.42 -5.39176471
## 1224 -4.48 -3.34500000
## 1225 -4.48 -4.31500000
## 1227 -4.53 -5.39176471
## 1232 -4.63 -4.57857143
## 1235 -4.73 -3.55000000
## 1238 -4.84 -3.90583333
## 1240 -4.89 -5.39176471
## 1241 -4.89 -4.66285714
## 1248 -5.26 -4.66285714
## 1258 -6.09 -5.09333333
## 1261 -6.29 -5.09333333
## 1263 -6.29 -6.77157895
## 1269 -6.89 -5.39176471
## 1270 -6.96 -4.57857143
## 1271 -7.00 -6.77157895
## 1272 -7.05 -6.77157895
## 1280 -8.30 -8.59187500
## 1286 -8.66 -7.89307692
## 1287 -9.03 -7.89307692
## 1289 -10.41 -8.59187500
## 1290 -7.89 -6.77157895
## 1291 -2.32 -1.92791667
## 1294 0.39 -1.29333333
## 1305 -2.90 -4.12263158
## 1308 -2.47 -3.01250000
##################################
# Reporting the independent evaluation results
# for the test set
##################################
<- postResample(CART_Test[,2], CART_Test[,1])) (CART_Test_Metrics
## RMSE Rsquared MAE
## 0.9194665 0.8070706 0.6980661
<- CART_Test_Metrics[1]) (CART_Test_RMSE
## RMSE
## 0.9194665
<- CART_Test_Metrics[2]) (CART_Test_Rsquared
## Rsquared
## 0.8070706
##################################
# Creating a local object
# for the train set
##################################
<- PMA_PreModelling_Train
PMA_PreModelling_Train_CTREE
##################################
# Creating consistent fold assignments
# for the 10-Fold Cross Validation process
##################################
set.seed(12345678)
<- createFolds(PMA_PreModelling_Train_CTREE$Log_Solubility,
KFold_Indices k = 10,
returnTrain=TRUE)
<- trainControl(method="cv",
KFold_Control index=KFold_Indices)
##################################
# Setting the conditions
# for hyperparameter tuning
##################################
= data.frame(mincriterion = sort(c(0.95, seq(0.75, 0.99, length = 2))))
CTREE_Grid
##################################
# Running the conditional inference trees model
# by setting the caret method to 'ctree'
##################################
set.seed(12345678)
<- train(x = PMA_PreModelling_Train_CTREE[,!names(PMA_PreModelling_Train_CTREE) %in% c("Log_Solubility")],
CTREE_Tune y = PMA_PreModelling_Train_CTREE$Log_Solubility,
method = "ctree",
tuneGrid = CTREE_Grid,
trControl = KFold_Control)
##################################
# Reporting the cross-validation results
# for the train set
##################################
CTREE_Tune
## Conditional Inference Tree
##
## 951 samples
## 220 predictors
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 857, 855, 855, 856, 857, 856, ...
## Resampling results across tuning parameters:
##
## mincriterion RMSE Rsquared MAE
## 0.75 0.9570454 0.7794050 0.7258222
## 0.95 0.9994702 0.7596920 0.7568657
## 0.99 1.0355167 0.7433903 0.7830925
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was mincriterion = 0.75.
$finalModel CTREE_Tune
##
## Conditional inference tree with 43 terminal nodes
##
## Response: .outcome
## Inputs: FP001, FP002, FP003, FP004, FP005, FP006, FP007, FP008, FP009, FP010, FP011, FP012, FP013, FP014, FP015, FP016, FP017, FP018, FP019, FP020, FP021, FP022, FP023, FP024, FP025, FP026, FP027, FP028, FP029, FP030, FP031, FP032, FP033, FP034, FP035, FP036, FP037, FP038, FP039, FP040, FP041, FP042, FP043, FP044, FP045, FP046, FP047, FP048, FP049, FP050, FP051, FP052, FP053, FP054, FP055, FP056, FP057, FP058, FP059, FP060, FP061, FP062, FP063, FP064, FP065, FP066, FP067, FP068, FP069, FP070, FP071, FP072, FP073, FP074, FP075, FP076, FP077, FP078, FP079, FP080, FP081, FP082, FP083, FP084, FP085, FP086, FP087, FP088, FP089, FP090, FP091, FP092, FP093, FP094, FP095, FP096, FP097, FP098, FP099, FP100, FP101, FP102, FP103, FP104, FP105, FP106, FP107, FP108, FP109, FP110, FP111, FP112, FP113, FP114, FP115, FP116, FP117, FP118, FP119, FP120, FP121, FP122, FP123, FP124, FP125, FP126, FP127, FP128, FP129, FP130, FP131, FP132, FP133, FP134, FP135, FP136, FP137, FP138, FP139, FP140, FP141, FP142, FP143, FP144, FP145, FP146, FP147, FP148, FP149, FP150, FP151, FP152, FP153, FP155, FP156, FP157, FP158, FP159, FP160, FP161, FP162, FP163, FP164, FP165, FP166, FP167, FP168, FP169, FP170, FP171, FP172, FP173, FP174, FP175, FP176, FP177, FP178, FP179, FP180, FP181, FP182, FP183, FP184, FP185, FP186, FP187, FP188, FP189, FP190, FP191, FP192, FP193, FP194, FP195, FP196, FP197, FP198, FP201, FP202, FP203, FP204, FP205, FP206, FP207, FP208, MolWeight, NumBonds, NumMultBonds, NumRotBonds, NumDblBonds, NumCarbon, NumNitrogen, NumOxygen, NumSulfer, NumChlorine, NumHalogen, NumRings, HydrophilicFactor, SurfaceArea1, SurfaceArea2
## Number of observations: 951
##
## 1) MolWeight <= 0.07100313; criterion = 1, statistic = 411.936
## 2) NumCarbon <= -0.4425175; criterion = 1, statistic = 155.007
## 3) FP072 == {1}; criterion = 1, statistic = 76.589
## 4) NumCarbon <= -0.9551655; criterion = 1, statistic = 60.157
## 5) FP131 == {0}; criterion = 1, statistic = 36.759
## 6) MolWeight <= -1.24085; criterion = 0.889, statistic = 11.986
## 7) FP063 == {1}; criterion = 0.999, statistic = 21.467
## 8)* weights = 14
## 7) FP063 == {0}
## 9) NumCarbon <= -1.612785; criterion = 0.901, statistic = 12.217
## 10)* weights = 8
## 9) NumCarbon > -1.612785
## 11)* weights = 24
## 6) MolWeight > -1.24085
## 12)* weights = 31
## 5) FP131 == {1}
## 13)* weights = 15
## 4) NumCarbon > -0.9551655
## 14) NumCarbon <= -0.685958; criterion = 0.997, statistic = 18.77
## 15) FP147 == {1}; criterion = 0.986, statistic = 15.937
## 16)* weights = 7
## 15) FP147 == {0}
## 17)* weights = 47
## 14) NumCarbon > -0.685958
## 18)* weights = 54
## 3) FP072 == {0}
## 19) FP063 == {1}; criterion = 1, statistic = 46.785
## 20) MolWeight <= -0.4330091; criterion = 1, statistic = 27.515
## 21) MolWeight <= -1.533127; criterion = 0.808, statistic = 10.891
## 22)* weights = 11
## 21) MolWeight > -1.533127
## 23) FP116 == {1}; criterion = 0.941, statistic = 13.221
## 24)* weights = 8
## 23) FP116 == {0}
## 25)* weights = 14
## 20) MolWeight > -0.4330091
## 26)* weights = 12
## 19) FP063 == {0}
## 27) NumBonds <= -1.536946; criterion = 1, statistic = 51.593
## 28)* weights = 26
## 27) NumBonds > -1.536946
## 29) NumBonds <= -0.5036947; criterion = 1, statistic = 27.6
## 30) FP172 == {0}; criterion = 0.994, statistic = 17.469
## 31) NumBonds <= -0.9603748; criterion = 0.999, statistic = 20.422
## 32)* weights = 22
## 31) NumBonds > -0.9603748
## 33)* weights = 21
## 30) FP172 == {1}
## 34)* weights = 7
## 29) NumBonds > -0.5036947
## 35)* weights = 10
## 2) NumCarbon > -0.4425175
## 36) SurfaceArea1 <= -1.033167; criterion = 1, statistic = 59.056
## 37) NumBonds <= -0.1221974; criterion = 1, statistic = 24.262
## 38)* weights = 14
## 37) NumBonds > -0.1221974
## 39)* weights = 26
## 36) SurfaceArea1 > -1.033167
## 40) NumCarbon <= 0.1817764; criterion = 1, statistic = 38.11
## 41) FP059 == {0}; criterion = 1, statistic = 26.239
## 42)* weights = 103
## 41) FP059 == {1}
## 43)* weights = 16
## 40) NumCarbon > 0.1817764
## 44)* weights = 17
## 1) MolWeight > 0.07100313
## 45) FP015 == {1}; criterion = 1, statistic = 143.122
## 46) SurfaceArea1 <= 0.116782; criterion = 1, statistic = 59.802
## 47) MolWeight <= 1.080767; criterion = 1, statistic = 31.733
## 48) NumCarbon <= 1.00853; criterion = 0.988, statistic = 16.207
## 49) FP059 == {0}; criterion = 0.884, statistic = 11.904
## 50) FP077 == {1}; criterion = 0.829, statistic = 11.129
## 51)* weights = 10
## 50) FP077 == {0}
## 52) FP070 == {0}; criterion = 0.914, statistic = 12.499
## 53)* weights = 34
## 52) FP070 == {1}
## 54)* weights = 13
## 49) FP059 == {1}
## 55)* weights = 10
## 48) NumCarbon > 1.00853
## 56) MolWeight <= 0.7928528; criterion = 0.98, statistic = 15.306
## 57)* weights = 7
## 56) MolWeight > 0.7928528
## 58)* weights = 27
## 47) MolWeight > 1.080767
## 59) FP137 == {1}; criterion = 0.966, statistic = 14.274
## 60)* weights = 13
## 59) FP137 == {0}
## 61)* weights = 27
## 46) SurfaceArea1 > 0.116782
## 62) MolWeight <= 1.134223; criterion = 1, statistic = 46.235
## 63) NumOxygen <= 0.824597; criterion = 1, statistic = 32.067
## 64) NumOxygen <= -0.332028; criterion = 0.799, statistic = 10.794
## 65)* weights = 14
## 64) NumOxygen > -0.332028
## 66) NumMultBonds <= 0.9381143; criterion = 0.967, statistic = 14.326
## 67) FP105 == {0}; criterion = 0.829, statistic = 11.124
## 68)* weights = 54
## 67) FP105 == {1}
## 69)* weights = 15
## 66) NumMultBonds > 0.9381143
## 70) FP131 == {1}; criterion = 0.859, statistic = 11.513
## 71)* weights = 15
## 70) FP131 == {0}
## 72)* weights = 19
## 63) NumOxygen > 0.824597
## 73) FP002 == {0}; criterion = 0.985, statistic = 15.814
## 74)* weights = 11
## 73) FP002 == {1}
## 75)* weights = 26
## 62) MolWeight > 1.134223
## 76) HydrophilicFactor <= 2.044025; criterion = 0.999, statistic = 21.327
## 77)* weights = 69
## 76) HydrophilicFactor > 2.044025
## 78)* weights = 8
## 45) FP015 == {0}
## 79) FP070 == {0}; criterion = 1, statistic = 38.978
## 80) FP149 == {0}; criterion = 0.997, statistic = 18.853
## 81)* weights = 16
## 80) FP149 == {1}
## 82)* weights = 18
## 79) FP070 == {1}
## 83) MolWeight <= 1.239164; criterion = 0.954, statistic = 13.687
## 84)* weights = 23
## 83) MolWeight > 1.239164
## 85)* weights = 15
$results CTREE_Tune
## mincriterion RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 0.75 0.9570454 0.7794050 0.7258222 0.09347278 0.05478381 0.06000684
## 2 0.95 0.9994702 0.7596920 0.7568657 0.10135350 0.04780590 0.07516456
## 3 0.99 1.0355167 0.7433903 0.7830925 0.10465367 0.04950312 0.07684305
<- CTREE_Tune$results[CTREE_Tune$results$mincriterion==CTREE_Tune$bestTune$mincriterion,
(CTREE_Train_RMSE c("RMSE")])
## [1] 0.9570454
<- CTREE_Tune$results[CTREE_Tune$results$mincriterion==CTREE_Tune$bestTune$mincriterion,
(CTREE_Train_Rsquared c("Rsquared")])
## [1] 0.779405
##################################
# Identifying and plotting the
# best model predictors
##################################
<- varImp(CTREE_Tune, scale = TRUE)
CTREE_VarImp plot(CTREE_VarImp,
top=25,
scales=list(y=list(cex = .95)),
main="Ranked Variable Importance : Conditional Inference Trees",
xlab="Scaled Variable Importance Metrics",
ylab="Predictors",
cex=2,
origin=0,
alpha=0.45)
##################################
# Independently evaluating the model
# on the test set
##################################
<- data.frame(CTREE_Observed = PMA_PreModelling_Test$Log_Solubility,
CTREE_Test CTREE_Predicted = predict(CTREE_Tune,
!names(PMA_PreModelling_Test) %in% c("Log_Solubility")]))
PMA_PreModelling_Test[,
CTREE_Test
## CTREE_Observed CTREE_Predicted
## 1 0.93 0.4825000
## 2 0.85 0.9485714
## 3 0.81 -0.8640426
## 4 0.74 0.2262500
## 5 0.61 -0.8640426
## 6 0.58 0.4825000
## 7 0.57 0.4825000
## 8 0.56 -0.1095833
## 9 0.52 -0.1095833
## 10 0.45 0.2262500
## 11 0.40 0.2262500
## 12 0.36 -4.3207692
## 13 0.22 -0.2038710
## 14 0.08 -0.1095833
## 15 0.07 -0.9721429
## 16 0.02 0.2262500
## 17 0.00 -0.8640426
## 18 -0.01 -0.1095833
## 19 -0.07 -0.2038710
## 20 -0.12 -2.4616667
## 21 -0.17 0.3214286
## 22 -0.29 -0.9721429
## 23 -0.38 -0.8640426
## 24 -0.38 -0.9721429
## 25 -0.39 -0.8640426
## 26 -0.42 -0.9721429
## 27 -0.44 -0.8640426
## 28 -0.46 0.9485714
## 29 -0.48 -4.0410145
## 30 -0.60 -0.8640426
## 31 -0.63 -4.3207692
## 32 -0.66 -0.2038710
## 33 -0.72 -0.2038710
## 34 -0.72 -0.8640426
## 35 -0.80 -0.8640426
## 36 -0.80 -0.8640426
## 37 -0.82 -0.1095833
## 38 -0.82 -0.2038710
## 39 -0.84 -0.8640426
## 40 -0.85 -0.1095833
## 41 -0.85 -0.1095833
## 42 -0.87 -2.4616667
## 43 -0.89 -1.7153398
## 44 -0.90 -0.2038710
## 45 -0.96 -1.7153398
## 46 -0.96 0.7363636
## 47 -0.99 -0.1095833
## 48 -1.01 -0.2038710
## 49 -1.09 -1.5430769
## 50 -1.12 -1.7153398
## 51 -1.14 -1.4840000
## 52 -1.17 -1.5430769
## 53 -1.19 -1.7153398
## 54 -1.22 -1.2851852
## 55 -1.27 -1.7153398
## 56 -1.28 -1.7153398
## 57 -1.32 -1.5430769
## 58 -1.38 -1.7153398
## 59 -1.39 -0.8418182
## 60 -1.42 -3.3047059
## 61 -1.47 -0.2038710
## 62 -1.47 -2.1595455
## 63 -1.50 -2.2320000
## 64 -1.52 -1.2851852
## 65 -1.54 -1.7153398
## 66 -1.55 -6.5542857
## 67 -1.56 -3.3047059
## 68 -1.57 -1.2851852
## 69 -1.60 -1.7153398
## 70 -1.60 -2.3885185
## 71 -1.62 -4.3207692
## 72 -1.64 -0.8418182
## 73 -1.67 -2.8475000
## 74 -1.70 -2.4050000
## 75 -1.70 -1.7153398
## 76 -1.71 -3.1679412
## 77 -1.71 -0.8418182
## 78 -1.75 -1.2851852
## 79 -1.78 -1.7153398
## 80 -1.78 -2.4050000
## 81 -1.82 -1.2851852
## 82 -1.87 -1.2851852
## 83 -1.89 -0.8418182
## 84 -1.92 -1.2851852
## 85 -1.92 -1.7153398
## 86 -1.92 -1.2851852
## 87 -1.94 -3.3047059
## 88 -1.99 -3.3047059
## 89 -2.00 -1.5430769
## 90 -2.05 -2.8475000
## 91 -2.06 -1.7153398
## 92 -2.08 -3.3047059
## 93 -2.10 -1.2851852
## 94 -2.11 -1.7153398
## 95 -2.12 0.2262500
## 96 -2.17 -1.7153398
## 97 -2.21 -2.7528571
## 98 -2.24 -3.1520000
## 99 -2.24 -1.7153398
## 100 -2.29 -2.1595455
## 101 -2.31 -1.5430769
## 102 -2.32 -1.7153398
## 103 -2.35 -3.3047059
## 104 -2.35 -1.7153398
## 105 -2.36 -2.7528571
## 106 -2.36 -4.6220000
## 107 -2.38 -3.3642857
## 108 -2.42 -1.7153398
## 109 -2.43 -3.6873684
## 110 -2.44 -2.4050000
## 111 -2.52 -0.8418182
## 112 -2.53 -3.1679412
## 113 -2.57 -0.8418182
## 114 -2.62 -2.4050000
## 115 -2.62 -2.1587500
## 116 -2.64 -4.2018519
## 117 -2.64 -2.2320000
## 118 -2.70 -3.1520000
## 119 -2.82 -3.6873684
## 120 -2.88 -2.1587500
## 121 -2.89 -3.1679412
## 122 -2.92 -1.7153398
## 123 -2.93 -2.4050000
## 124 -2.96 -2.2320000
## 125 -2.98 -6.5542857
## 126 -3.01 -2.2320000
## 127 -3.01 -3.8330000
## 128 -3.02 -3.1520000
## 129 -3.07 -2.7528571
## 130 -3.09 -0.8418182
## 131 -3.11 -3.3535714
## 132 -3.13 -2.4050000
## 133 -3.14 -2.1587500
## 134 -3.15 -3.6873684
## 135 -3.22 -3.1679412
## 136 -3.26 -3.1520000
## 137 -3.27 -2.3885185
## 138 -3.27 -3.3535714
## 139 -3.30 -3.8330000
## 140 -3.31 -1.2851852
## 141 -3.33 -2.3885185
## 142 -3.37 -2.4050000
## 143 -3.43 -4.0410145
## 144 -3.43 -3.1679412
## 145 -3.48 -3.3047059
## 146 -3.51 -3.3047059
## 147 -3.59 -4.0410145
## 148 -3.61 -4.0410145
## 149 -3.63 -3.3047059
## 150 -3.63 -3.8556250
## 151 -3.68 -2.1587500
## 152 -3.71 -4.4853846
## 153 -3.74 -3.8330000
## 154 -3.75 -4.4373077
## 155 -3.75 -3.8556250
## 156 -3.77 -3.1679412
## 157 -3.77 -4.4373077
## 158 -3.78 -4.0410145
## 159 -3.81 -3.8330000
## 160 -3.95 -3.6873684
## 161 -3.96 -4.4373077
## 162 -3.96 -3.1520000
## 163 -4.00 -3.6328571
## 164 -4.02 -4.3207692
## 165 -4.04 -4.0410145
## 166 -4.12 -4.4373077
## 167 -4.15 -4.4853846
## 168 -4.16 -2.4050000
## 169 -4.17 -4.4373077
## 170 -4.21 -5.9914815
## 171 -4.23 -2.2320000
## 172 -4.25 -4.4373077
## 173 -4.30 -4.0410145
## 174 -4.31 -3.1520000
## 175 -4.35 -3.8556250
## 176 -4.40 -3.1520000
## 177 -4.40 -2.4050000
## 178 -4.43 -5.9914815
## 179 -4.46 -4.0410145
## 180 -4.47 -4.4373077
## 181 -4.51 -5.9914815
## 182 -4.60 -3.3047059
## 183 -4.64 -4.4373077
## 184 -4.69 -4.6220000
## 185 -4.71 -4.0410145
## 186 -4.77 -2.2320000
## 187 -4.95 -3.6873684
## 188 -4.98 -7.2295652
## 189 -5.21 -5.8988889
## 190 -5.22 -4.4373077
## 191 -5.28 -3.1520000
## 192 -5.31 -3.8556250
## 193 -5.35 -4.0410145
## 194 -5.37 -4.0410145
## 195 -5.40 -3.1520000
## 196 -5.43 -7.2295652
## 197 -5.65 -7.2295652
## 198 -5.66 -4.0410145
## 199 -6.70 -4.2018519
## 200 -5.72 -5.9914815
## 201 -6.00 -7.2295652
## 202 -6.25 -5.8988889
## 203 -6.26 -5.8988889
## 204 -6.27 -5.8988889
## 205 -6.35 -4.4373077
## 206 -6.57 -6.5542857
## 207 -6.62 -5.9914815
## 208 -6.96 -6.5542857
## 209 -7.02 -4.2018519
## 210 -7.20 -5.9914815
## 211 -7.28 -7.2295652
## 212 -7.32 -8.6760000
## 213 -7.39 -8.6760000
## 214 -7.82 -8.6760000
## 215 -8.23 -7.2295652
## 216 -8.94 -8.6760000
## 217 1.07 -0.1095833
## 218 0.43 -0.1095833
## 219 0.32 0.7363636
## 220 0.00 -0.1095833
## 221 -0.40 -1.2851852
## 222 -0.52 -0.1095833
## 223 -0.55 -0.1095833
## 224 -0.60 -0.1095833
## 225 -0.62 -2.3885185
## 226 -0.85 -0.9721429
## 227 -0.89 -0.8640426
## 228 -0.93 -1.7153398
## 229 -0.96 -0.2038710
## 230 -1.06 -1.7153398
## 231 -1.10 -0.9721429
## 232 -1.12 -0.2038710
## 233 -1.15 -1.2851852
## 234 -1.28 0.2262500
## 235 -1.30 -1.5430769
## 236 -1.31 -1.7153398
## 237 -1.35 -3.6328571
## 238 -1.39 -1.7153398
## 239 -1.41 -1.7153398
## 240 -1.41 -1.2851852
## 241 -1.42 -0.8640426
## 242 -1.46 -0.9721429
## 243 -1.50 -1.5430769
## 244 -1.50 -0.9721429
## 245 -1.52 -0.9721429
## 246 -1.52 -1.4840000
## 247 -1.59 -2.1595455
## 248 -1.61 -1.7153398
## 249 -1.63 -1.7153398
## 250 -1.71 -1.7153398
## 251 -1.83 -2.4050000
## 252 -2.05 -1.7153398
## 253 -2.06 -0.8418182
## 254 -2.07 -4.3207692
## 255 -2.15 -1.2851852
## 256 -2.16 -0.9721429
## 257 -1.99 -0.8418182
## 258 -2.36 -2.3885185
## 259 -2.38 -2.1595455
## 260 -2.39 -2.1595455
## 261 -2.46 -1.7153398
## 262 -2.49 -3.3047059
## 263 -2.54 -1.5430769
## 264 -2.55 -3.1679412
## 265 -2.63 -4.4853846
## 266 -2.64 -2.3885185
## 267 -2.67 -3.3047059
## 268 -2.68 -2.3885185
## 269 -2.77 -3.3535714
## 270 -2.78 -2.4050000
## 271 -2.82 -3.3535714
## 272 -2.92 -3.8556250
## 273 -3.03 -2.4050000
## 274 -3.12 -3.1679412
## 275 -3.16 -2.4050000
## 276 -3.19 -3.8556250
## 277 -3.54 -3.8556250
## 278 -3.54 -3.1679412
## 279 -3.59 -4.0410145
## 280 -3.66 -4.4373077
## 281 -3.68 -3.8330000
## 282 -3.75 -4.0410145
## 283 -3.76 -3.1679412
## 284 -3.78 -3.6873684
## 285 -3.80 -4.6220000
## 286 -3.80 -4.0410145
## 287 -3.85 -3.8330000
## 288 -3.89 -4.0410145
## 289 -3.95 -4.0410145
## 290 -4.29 -4.4373077
## 291 -4.42 -4.0410145
## 292 -4.48 -3.8556250
## 293 -4.48 -4.4373077
## 294 -4.53 -4.0410145
## 295 -4.63 -4.0410145
## 296 -4.73 -7.2295652
## 297 -4.84 -3.1520000
## 298 -4.89 -3.6873684
## 299 -4.89 -4.4373077
## 300 -5.26 -4.4373077
## 301 -6.09 -5.9914815
## 302 -6.29 -5.9914815
## 303 -6.29 -7.2295652
## 304 -6.89 -5.9914815
## 305 -6.96 -4.0410145
## 306 -7.00 -7.2295652
## 307 -7.05 -8.6760000
## 308 -8.30 -8.6760000
## 309 -8.66 -7.2295652
## 310 -9.03 -7.2295652
## 311 -10.41 -8.6760000
## 312 -7.89 -8.6760000
## 313 -2.32 -1.7153398
## 314 0.39 -4.3207692
## 315 -2.90 -4.0410145
## 316 -2.47 -4.0410145
##################################
# Reporting the independent evaluation results
# for the test set
##################################
<- postResample(CTREE_Test[,2], CTREE_Test[,1])) (CTREE_Test_Metrics
## RMSE Rsquared MAE
## 1.1160100 0.7310919 0.8027123
<- CTREE_Test_Metrics[1]) (CTREE_Test_RMSE
## RMSE
## 1.11601
<- CTREE_Test_Metrics[2]) (CTREE_Test_Rsquared
## Rsquared
## 0.7310919
##################################
# Creating a local object
# for the train set
##################################
<- PMA_PreModelling_Train
PMA_PreModelling_Train_RF
##################################
# Creating consistent fold assignments
# for the 10-Fold Cross Validation process
##################################
set.seed(12345678)
<- createFolds(PMA_PreModelling_Train_RF$Log_Solubility,
KFold_Indices k = 10,
returnTrain=TRUE)
<- trainControl(method="cv",
KFold_Control index=KFold_Indices)
##################################
# Setting the conditions
# for hyperparameter tuning
##################################
= data.frame(mtry = c(25,75,125))
RF_Grid
##################################
# Running the random forest model
# by setting the caret method to 'rf'
##################################
set.seed(12345678)
<- train(x = PMA_PreModelling_Train_RF[,!names(PMA_PreModelling_Train_RF) %in% c("Log_Solubility")],
RF_Tune y = PMA_PreModelling_Train_RF$Log_Solubility,
method = "rf",
tuneGrid = RF_Grid,
ntree = 100,
importance = TRUE,
trControl = KFold_Control)
##################################
# Reporting the cross-validation results
# for the train set
##################################
RF_Tune
## Random Forest
##
## 951 samples
## 220 predictors
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 857, 855, 855, 856, 857, 856, ...
## Resampling results across tuning parameters:
##
## mtry RMSE Rsquared MAE
## 25 0.6714148 0.8976040 0.4955195
## 75 0.6541868 0.9009781 0.4772816
## 125 0.6589382 0.8983983 0.4776712
##
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was mtry = 75.
$finalModel RF_Tune
##
## Call:
## randomForest(x = x, y = y, ntree = 100, mtry = param$mtry, importance = TRUE)
## Type of random forest: regression
## Number of trees: 100
## No. of variables tried at each split: 75
##
## Mean of squared residuals: 0.4364622
## % Var explained: 89.57
$results RF_Tune
## mtry RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 25 0.6714148 0.8976040 0.4955195 0.06557018 0.01260342 0.04375357
## 2 75 0.6541868 0.9009781 0.4772816 0.06595932 0.01310389 0.04172285
## 3 125 0.6589382 0.8983983 0.4776712 0.05581432 0.01080426 0.04016860
<- RF_Tune$results[RF_Tune$results$mtry==RF_Tune$bestTune$mtry,
(RF_Train_RMSE c("RMSE")])
## [1] 0.6541868
<- RF_Tune$results[RF_Tune$results$mtry==RF_Tune$bestTune$mtry,
(RF_Train_Rsquared c("Rsquared")])
## [1] 0.9009781
##################################
# Identifying and plotting the
# best model predictors
##################################
<- varImp(RF_Tune, scale = TRUE)
RF_VarImp plot(RF_VarImp,
top=25,
scales=list(y=list(cex = .95)),
main="Ranked Variable Importance : Random Forest",
xlab="Scaled Variable Importance Metrics",
ylab="Predictors",
cex=2,
origin=0,
alpha=0.45)
##################################
# Independently evaluating the model
# on the test set
##################################
<- data.frame(RF_Observed = PMA_PreModelling_Test$Log_Solubility,
RF_Test RF_Predicted = predict(RF_Tune,
!names(PMA_PreModelling_Test) %in% c("Log_Solubility")]))
PMA_PreModelling_Test[,
RF_Test
## RF_Observed RF_Predicted
## 20 0.93 0.20989500
## 21 0.85 0.46575500
## 23 0.81 -0.33998667
## 25 0.74 0.37796167
## 28 0.61 -0.57687667
## 31 0.58 0.42797333
## 32 0.57 0.29741167
## 33 0.56 0.15913667
## 34 0.52 0.05253667
## 37 0.45 -0.16529833
## 38 0.40 0.16764500
## 42 0.36 -2.35196500
## 49 0.22 -0.35109000
## 54 0.08 -0.17439500
## 55 0.07 -0.54914667
## 58 0.02 -1.13201333
## 60 0.00 -0.25148833
## 61 -0.01 -0.01665500
## 65 -0.07 0.05839000
## 69 -0.12 -0.60179333
## 73 -0.17 0.34814667
## 86 -0.29 -0.02392167
## 90 -0.38 -0.63526333
## 91 -0.38 -0.53145667
## 93 -0.39 -0.76127833
## 96 -0.42 -0.43075667
## 98 -0.44 -0.43162333
## 100 -0.46 0.74329833
## 104 -0.48 -2.44920167
## 112 -0.60 -0.51055500
## 115 -0.63 -2.98466167
## 119 -0.66 -0.57326000
## 128 -0.72 -0.67806833
## 130 -0.72 -0.31738000
## 139 -0.80 -0.42072167
## 143 -0.80 -0.70985000
## 145 -0.82 0.11085167
## 146 -0.82 -0.66479333
## 149 -0.84 -0.55452167
## 150 -0.85 -0.44121833
## 152 -0.85 -0.04707333
## 157 -0.87 -2.50876000
## 161 -0.89 -1.56917167
## 162 -0.90 -0.22690000
## 166 -0.96 -1.27440833
## 167 -0.96 0.22710500
## 173 -0.99 -0.61188333
## 176 -1.01 -0.52732381
## 182 -1.09 -1.47098833
## 187 -1.12 -1.00478333
## 190 -1.14 -1.11679333
## 194 -1.17 -1.63073667
## 195 -1.19 -1.36738833
## 201 -1.22 -0.98321500
## 207 -1.27 -1.39956333
## 208 -1.28 -1.53184000
## 215 -1.32 -1.41741333
## 222 -1.38 -1.66779167
## 224 -1.39 -1.70169167
## 231 -1.42 -1.89129333
## 236 -1.47 -0.87659000
## 237 -1.47 -1.57729833
## 240 -1.50 -1.35113500
## 243 -1.52 -1.20394000
## 248 -1.54 -1.40041333
## 251 -1.55 -3.08410500
## 256 -1.56 -2.41555167
## 258 -1.57 -1.68766000
## 262 -1.60 -1.45603000
## 266 -1.60 -2.13415833
## 272 -1.62 -3.11365667
## 280 -1.64 -2.47095000
## 283 -1.67 -1.66484667
## 286 -1.70 -2.71505333
## 287 -1.70 -1.29740333
## 289 -1.71 -2.63375333
## 290 -1.71 -2.15921333
## 298 -1.75 -1.81461667
## 305 -1.78 -1.76615833
## 306 -1.78 -1.65183167
## 312 -1.82 -1.64487167
## 320 -1.87 -1.18677667
## 325 -1.89 -2.12132000
## 332 -1.92 -1.90805667
## 333 -1.92 -2.03452833
## 335 -1.92 -1.50591000
## 339 -1.94 -2.17312643
## 346 -1.99 -2.23364667
## 347 -2.00 -2.15230833
## 350 -2.05 -2.24742333
## 353 -2.06 -1.91868833
## 358 -2.08 -2.29837500
## 365 -2.10 -2.35561667
## 367 -2.11 -1.62317000
## 370 -2.12 -1.71070167
## 379 -2.17 -1.93786000
## 386 -2.21 -2.65126833
## 394 -2.24 -2.84050000
## 396 -2.24 -1.51852333
## 400 -2.29 -1.84330000
## 404 -2.31 -2.34471833
## 405 -2.32 -2.07237000
## 413 -2.35 -2.15067833
## 415 -2.35 -2.11579000
## 417 -2.36 -3.07110500
## 418 -2.36 -2.73504167
## 423 -2.38 -2.43465667
## 434 -2.42 -1.73134000
## 437 -2.43 -2.87439833
## 440 -2.44 -2.69540000
## 449 -2.52 -2.64786000
## 450 -2.53 -2.46411667
## 457 -2.57 -2.59764333
## 467 -2.62 -2.75441667
## 469 -2.62 -2.95589167
## 474 -2.64 -3.66249833
## 475 -2.64 -3.49781833
## 485 -2.70 -3.08082500
## 504 -2.82 -2.92268333
## 511 -2.88 -2.88706333
## 512 -2.89 -3.01256000
## 517 -2.92 -1.66833667
## 519 -2.93 -3.24460667
## 520 -2.96 -2.44246167
## 522 -2.98 -3.68176333
## 527 -3.01 -3.24534000
## 528 -3.01 -3.75289333
## 529 -3.02 -2.94384500
## 537 -3.07 -2.91294833
## 540 -3.09 -3.58601833
## 541 -3.11 -3.65920500
## 547 -3.13 -3.15069667
## 550 -3.14 -2.60735143
## 555 -3.15 -3.09614167
## 564 -3.22 -2.50023333
## 570 -3.26 -3.32189333
## 573 -3.27 -2.95375500
## 575 -3.27 -3.67831500
## 578 -3.30 -3.50198667
## 581 -3.31 -2.33753667
## 585 -3.33 -3.22839167
## 590 -3.37 -3.17885833
## 601 -3.43 -3.73816667
## 602 -3.43 -3.27175667
## 607 -3.48 -2.98412500
## 610 -3.51 -3.41157333
## 618 -3.59 -3.21784000
## 624 -3.61 -3.38225333
## 626 -3.63 -3.20796667
## 627 -3.63 -3.36039167
## 634 -3.68 -1.98414833
## 640 -3.71 -3.82553667
## 642 -3.74 -3.51340667
## 643 -3.75 -4.12928333
## 644 -3.75 -3.94622000
## 645 -3.77 -3.67290833
## 646 -3.77 -4.27068500
## 647 -3.78 -3.94457833
## 652 -3.81 -3.41955333
## 658 -3.95 -4.26720000
## 659 -3.96 -5.03294500
## 660 -3.96 -4.21195667
## 664 -4.00 -3.48207667
## 666 -4.02 -3.81235167
## 667 -4.04 -4.42665167
## 675 -4.12 -4.10467000
## 680 -4.15 -4.31214667
## 681 -4.16 -3.47621500
## 687 -4.17 -5.03863667
## 694 -4.21 -4.49756000
## 697 -4.23 -4.04320500
## 701 -4.25 -4.46614167
## 705 -4.30 -3.93528000
## 707 -4.31 -5.35805667
## 710 -4.35 -4.24843000
## 716 -4.40 -4.29815000
## 719 -4.40 -4.16561500
## 720 -4.43 -4.59730667
## 725 -4.46 -4.48651000
## 727 -4.47 -4.25193000
## 730 -4.51 -4.57426333
## 738 -4.60 -3.95472143
## 745 -4.64 -4.53408167
## 748 -4.69 -4.88521500
## 751 -4.71 -4.09683000
## 756 -4.77 -3.86949333
## 766 -4.95 -3.67954000
## 769 -4.98 -3.78668333
## 783 -5.21 -5.94445333
## 785 -5.22 -5.61153000
## 790 -5.28 -4.17007000
## 793 -5.31 -3.90614000
## 795 -5.35 -4.43664833
## 796 -5.37 -4.43663833
## 797 -5.40 -4.02382333
## 801 -5.43 -5.04274667
## 811 -5.65 -4.67941500
## 812 -5.66 -4.30371167
## 815 -6.70 -4.70443500
## 816 -5.72 -4.60836500
## 817 -6.00 -6.70994000
## 824 -6.25 -6.23801464
## 825 -6.26 -6.03402548
## 826 -6.27 -6.29619048
## 830 -6.35 -5.28174333
## 837 -6.57 -6.50772143
## 838 -6.62 -4.60072667
## 844 -6.96 -6.00886333
## 845 -7.02 -7.52277500
## 847 -7.20 -6.91610464
## 850 -7.28 -7.15971667
## 852 -7.32 -7.70975167
## 853 -7.39 -7.82604106
## 861 -7.82 -8.09600380
## 868 -8.23 -8.19397833
## 874 -8.94 -7.99572143
## 879 1.07 0.31703000
## 895 0.43 0.02934833
## 899 0.32 0.12630833
## 903 0.00 -0.24217000
## 917 -0.40 -1.50730000
## 927 -0.52 -0.22160333
## 929 -0.55 -0.30704333
## 931 -0.60 -0.49359000
## 933 -0.62 -1.97804333
## 944 -0.85 -0.93113500
## 947 -0.89 -0.93667333
## 949 -0.93 -1.50149167
## 953 -0.96 -0.66297833
## 958 -1.06 -1.89618500
## 961 -1.10 -0.97368000
## 963 -1.12 -0.99423833
## 964 -1.15 -0.89360500
## 973 -1.28 -0.82012833
## 976 -1.30 -1.30890167
## 977 -1.31 -1.39726333
## 980 -1.35 -2.89281667
## 983 -1.39 -1.91987167
## 984 -1.41 -1.36738833
## 986 -1.41 -1.57123500
## 989 -1.42 -0.88137548
## 991 -1.46 -1.09400000
## 996 -1.50 -1.54966833
## 997 -1.50 -1.87420833
## 999 -1.52 -1.80561833
## 1000 -1.52 -1.33649167
## 1003 -1.59 -1.86752167
## 1008 -1.61 -1.41786500
## 1009 -1.63 -1.39789500
## 1014 -1.71 -1.78755333
## 1015 -1.83 -2.59385500
## 1040 -2.05 -1.55244833
## 1042 -2.06 -1.99637167
## 1043 -2.07 -3.00424833
## 1050 -2.15 -1.53587000
## 1052 -2.16 -1.09019833
## 1056 -1.99 -1.83613167
## 1070 -2.36 -2.23690500
## 1073 -2.38 -2.88251667
## 1074 -2.39 -1.62589667
## 1079 -2.46 -2.27056667
## 1080 -2.49 -2.28973667
## 1085 -2.54 -2.04383667
## 1087 -2.55 -2.79085167
## 1096 -2.63 -3.20284500
## 1099 -2.64 -2.37778833
## 1100 -2.67 -2.55542500
## 1102 -2.68 -2.52760833
## 1107 -2.77 -3.16981000
## 1109 -2.78 -3.16385000
## 1114 -2.82 -2.72513167
## 1118 -2.92 -3.38231667
## 1123 -3.03 -3.44474500
## 1132 -3.12 -3.94192167
## 1134 -3.16 -3.09815667
## 1137 -3.19 -3.46202833
## 1154 -3.54 -3.75560667
## 1155 -3.54 -3.08713833
## 1157 -3.59 -3.78611500
## 1162 -3.66 -4.07704500
## 1164 -3.68 -3.48136667
## 1171 -3.75 -3.99305167
## 1172 -3.76 -3.94040167
## 1175 -3.78 -3.48154333
## 1177 -3.80 -3.90414000
## 1179 -3.80 -4.31383833
## 1183 -3.85 -4.09734714
## 1185 -3.89 -3.81921500
## 1189 -3.95 -3.98643000
## 1211 -4.29 -4.96257333
## 1218 -4.42 -3.94297667
## 1224 -4.48 -3.53352167
## 1225 -4.48 -4.32658000
## 1227 -4.53 -4.81960000
## 1232 -4.63 -4.45693667
## 1235 -4.73 -4.20299000
## 1238 -4.84 -3.77068333
## 1240 -4.89 -4.36926000
## 1241 -4.89 -5.04212429
## 1248 -5.26 -5.19248000
## 1258 -6.09 -4.59498667
## 1261 -6.29 -5.81051333
## 1263 -6.29 -6.38106667
## 1269 -6.89 -4.82948167
## 1270 -6.96 -5.02815167
## 1271 -7.00 -6.85987806
## 1272 -7.05 -7.89716773
## 1280 -8.30 -8.33209427
## 1286 -8.66 -8.14722381
## 1287 -9.03 -8.23099381
## 1289 -10.41 -9.87519048
## 1290 -7.89 -7.61023417
## 1291 -2.32 -2.04724500
## 1294 0.39 -2.11422667
## 1305 -2.90 -4.30074333
## 1308 -2.47 -3.94169167
##################################
# Reporting the independent evaluation results
# for the test set
##################################
<- postResample(RF_Test[,2], RF_Test[,1])) (RF_Test_Metrics
## RMSE Rsquared MAE
## 0.6557191 0.9005713 0.4681318
<- RF_Test_Metrics[1]) (RF_Test_RMSE
## RMSE
## 0.6557191
<- RF_Test_Metrics[2]) (RF_Test_Rsquared
## Rsquared
## 0.9005713
##################################
# Transforming factor predictors
# as required by the nature of the model
##################################
# Creating a local object
# for the train and test sets
##################################
<- as.data.frame(lapply(PMA_PreModelling_Train, function(x) as.numeric(as.character(x))))
PMA_PreModelling_Train_CUB dim(PMA_PreModelling_Train_CUB)
## [1] 951 221
<- as.data.frame(lapply(PMA_PreModelling_Test, function(x) as.numeric(as.character(x))))
PMA_PreModelling_Test_CUB dim(PMA_PreModelling_Test_CUB)
## [1] 316 221
##################################
# Creating consistent fold assignments
# for the 10-Fold Cross Validation process
##################################
set.seed(12345678)
<- createFolds(PMA_PreModelling_Train_CUB$Log_Solubility,
KFold_Indices k = 10,
returnTrain=TRUE)
<- trainControl(method="cv",
KFold_Control index=KFold_Indices)
##################################
# Setting the conditions
# for hyperparameter tuning
##################################
= expand.grid(committees = c(1:10, 20, 50, 75, 100),
CUB_Grid neighbors = c(0, 1, 5, 9))
##################################
# Running the cubist model
# by setting the caret method to 'cubist'
##################################
set.seed(12345678)
<- train(x = PMA_PreModelling_Train_CUB[,!names(PMA_PreModelling_Train_CUB) %in% c("Log_Solubility")],
CUB_Tune y = PMA_PreModelling_Train_CUB$Log_Solubility,
method = "cubist",
tuneGrid = CUB_Grid,
trControl = KFold_Control)
##################################
# Reporting the cross-validation results
# for the train set
##################################
CUB_Tune
## Cubist
##
## 951 samples
## 220 predictors
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 857, 855, 855, 856, 857, 856, ...
## Resampling results across tuning parameters:
##
## committees neighbors RMSE Rsquared MAE
## 1 0 0.6752542 0.8901629 0.5189171
## 1 1 0.7008903 0.8837194 0.5159612
## 1 5 0.6188772 0.9080481 0.4604200
## 1 9 0.6135842 0.9097579 0.4582143
## 2 0 0.6328227 0.9039087 0.4764290
## 2 1 0.6700298 0.8931433 0.4966519
## 2 5 0.5930006 0.9154193 0.4397414
## 2 9 0.5903048 0.9161890 0.4379268
## 3 0 0.6192273 0.9079020 0.4696538
## 3 1 0.6554171 0.8972964 0.4841175
## 3 5 0.5774071 0.9196275 0.4266022
## 3 9 0.5727396 0.9209246 0.4243919
## 4 0 0.6135739 0.9100651 0.4631492
## 4 1 0.6604880 0.8963709 0.4880789
## 4 5 0.5764280 0.9205037 0.4269968
## 4 9 0.5733087 0.9212829 0.4258535
## 5 0 0.6094395 0.9106368 0.4583396
## 5 1 0.6618431 0.8951874 0.4868163
## 5 5 0.5752840 0.9202914 0.4236369
## 5 9 0.5691704 0.9218718 0.4200879
## 6 0 0.6063451 0.9122943 0.4539687
## 6 1 0.6599286 0.8961854 0.4851999
## 6 5 0.5718522 0.9216146 0.4218841
## 6 9 0.5676389 0.9226830 0.4200237
## 7 0 0.6067740 0.9116071 0.4552029
## 7 1 0.6613310 0.8955053 0.4858224
## 7 5 0.5740189 0.9205727 0.4224482
## 7 9 0.5683498 0.9220973 0.4190641
## 8 0 0.6054532 0.9122002 0.4543680
## 8 1 0.6629762 0.8952615 0.4880736
## 8 5 0.5738019 0.9208570 0.4240097
## 8 9 0.5693537 0.9219893 0.4215548
## 9 0 0.6019943 0.9130469 0.4511290
## 9 1 0.6566265 0.8967188 0.4826374
## 9 5 0.5703953 0.9215861 0.4218037
## 9 9 0.5656251 0.9228227 0.4182451
## 10 0 0.6028672 0.9128099 0.4539634
## 10 1 0.6536486 0.8978655 0.4816393
## 10 5 0.5704452 0.9216393 0.4214629
## 10 9 0.5669797 0.9225541 0.4190950
## 20 0 0.5993153 0.9139238 0.4516966
## 20 1 0.6471885 0.8998615 0.4774183
## 20 5 0.5683336 0.9224196 0.4208599
## 20 9 0.5639998 0.9235150 0.4172223
## 50 0 0.5926935 0.9157234 0.4458315
## 50 1 0.6388245 0.9022758 0.4700059
## 50 5 0.5625895 0.9239751 0.4162366
## 50 9 0.5581390 0.9250679 0.4125082
## 75 0 0.5907510 0.9160536 0.4435589
## 75 1 0.6381473 0.9023978 0.4693750
## 75 5 0.5628524 0.9237714 0.4158270
## 75 9 0.5578756 0.9250054 0.4117147
## 100 0 0.5904289 0.9161594 0.4436908
## 100 1 0.6384052 0.9022657 0.4692430
## 100 5 0.5614157 0.9241375 0.4152984
## 100 9 0.5564468 0.9253788 0.4115436
##
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were committees = 100 and neighbors = 9.
$finalModel CUB_Tune
##
## Call:
## cubist.default(x = x, y = y, committees = param$committees)
##
## Number of samples: 951
## Number of predictors: 220
##
## Number of committees: 100
## Number of rules per committee: 3, 4, 5, 3, 5, 5, 3, 9, 4, 4, 3, 4, 3, 10, 3, 6, 3, 5, 3, 8 ...
$results CUB_Tune
## committees neighbors RMSE Rsquared MAE RMSESD RsquaredSD
## 1 1 0 0.6752542 0.8901629 0.5189171 0.07075110 0.02148323
## 2 1 1 0.7008903 0.8837194 0.5159612 0.06509564 0.02451798
## 3 1 5 0.6188772 0.9080481 0.4604200 0.06922244 0.01920558
## 4 1 9 0.6135842 0.9097579 0.4582143 0.07173506 0.01815343
## 5 2 0 0.6328227 0.9039087 0.4764290 0.06424339 0.01992966
## 6 2 1 0.6700298 0.8931433 0.4966519 0.07053455 0.02567565
## 7 2 5 0.5930006 0.9154193 0.4397414 0.06449195 0.01859323
## 8 2 9 0.5903048 0.9161890 0.4379268 0.07015247 0.01834661
## 9 3 0 0.6192273 0.9079020 0.4696538 0.06721987 0.02038225
## 10 3 1 0.6554171 0.8972964 0.4841175 0.06030129 0.02312476
## 11 3 5 0.5774071 0.9196275 0.4266022 0.05989280 0.01780228
## 12 3 9 0.5727396 0.9209246 0.4243919 0.06193643 0.01679528
## 13 4 0 0.6135739 0.9100651 0.4631492 0.06840373 0.01813722
## 14 4 1 0.6604880 0.8963709 0.4880789 0.06650423 0.02100972
## 15 4 5 0.5764280 0.9205037 0.4269968 0.06700236 0.01651648
## 16 4 9 0.5733087 0.9212829 0.4258535 0.07068752 0.01589680
## 17 5 0 0.6094395 0.9106368 0.4583396 0.06873004 0.02063372
## 18 5 1 0.6618431 0.8951874 0.4868163 0.06461635 0.02501600
## 19 5 5 0.5752840 0.9202914 0.4236369 0.05596206 0.01670758
## 20 5 9 0.5691704 0.9218718 0.4200879 0.06117837 0.01667682
## 21 6 0 0.6063451 0.9122943 0.4539687 0.06820048 0.01781319
## 22 6 1 0.6599286 0.8961854 0.4851999 0.06298180 0.02238397
## 23 6 5 0.5718522 0.9216146 0.4218841 0.05815585 0.01479684
## 24 6 9 0.5676389 0.9226830 0.4200237 0.06183992 0.01446132
## 25 7 0 0.6067740 0.9116071 0.4552029 0.06873557 0.01943849
## 26 7 1 0.6613310 0.8955053 0.4858224 0.06397915 0.02393074
## 27 7 5 0.5740189 0.9205727 0.4224482 0.05798751 0.01671333
## 28 7 9 0.5683498 0.9220973 0.4190641 0.06196759 0.01617214
## 29 8 0 0.6054532 0.9122002 0.4543680 0.06960940 0.01908694
## 30 8 1 0.6629762 0.8952615 0.4880736 0.06009633 0.02290597
## 31 8 5 0.5738019 0.9208570 0.4240097 0.05749275 0.01559596
## 32 8 9 0.5693537 0.9219893 0.4215548 0.06244961 0.01532364
## 33 9 0 0.6019943 0.9130469 0.4511290 0.07313827 0.02063898
## 34 9 1 0.6566265 0.8967188 0.4826374 0.05992369 0.02372793
## 35 9 5 0.5703953 0.9215861 0.4218037 0.05826366 0.01673550
## 36 9 9 0.5656251 0.9228227 0.4182451 0.06368779 0.01669090
## 37 10 0 0.6028672 0.9128099 0.4539634 0.07098043 0.01989151
## 38 10 1 0.6536486 0.8978655 0.4816393 0.05809324 0.02229468
## 39 10 5 0.5704452 0.9216393 0.4214629 0.05716485 0.01614830
## 40 10 9 0.5669797 0.9225541 0.4190950 0.06262368 0.01607240
## 41 20 0 0.5993153 0.9139238 0.4516966 0.06445217 0.01863460
## 42 20 1 0.6471885 0.8998615 0.4774183 0.04788924 0.02090719
## 43 20 5 0.5683336 0.9224196 0.4208599 0.05092834 0.01534381
## 44 20 9 0.5639998 0.9235150 0.4172223 0.05603083 0.01519094
## 45 50 0 0.5926935 0.9157234 0.4458315 0.06214991 0.01847751
## 46 50 1 0.6388245 0.9022758 0.4700059 0.05140761 0.02147608
## 47 50 5 0.5625895 0.9239751 0.4162366 0.05230705 0.01561716
## 48 50 9 0.5581390 0.9250679 0.4125082 0.05656249 0.01544108
## 49 75 0 0.5907510 0.9160536 0.4435589 0.06153736 0.01910950
## 50 75 1 0.6381473 0.9023978 0.4693750 0.05024654 0.02189054
## 51 75 5 0.5628524 0.9237714 0.4158270 0.05055428 0.01598086
## 52 75 9 0.5578756 0.9250054 0.4117147 0.05525199 0.01587909
## 53 100 0 0.5904289 0.9161594 0.4436908 0.06106853 0.01932156
## 54 100 1 0.6384052 0.9022657 0.4692430 0.04950692 0.02206031
## 55 100 5 0.5614157 0.9241375 0.4152984 0.05035178 0.01601373
## 56 100 9 0.5564468 0.9253788 0.4115436 0.05497151 0.01586072
## MAESD
## 1 0.06061840
## 2 0.04687813
## 3 0.05380609
## 4 0.05544167
## 5 0.05773281
## 6 0.04973829
## 7 0.04967622
## 8 0.05624501
## 9 0.05649767
## 10 0.04504088
## 11 0.04608716
## 12 0.04967174
## 13 0.06164008
## 14 0.05038505
## 15 0.05332448
## 16 0.05819382
## 17 0.05731339
## 18 0.04936554
## 19 0.04627150
## 20 0.05052480
## 21 0.05629940
## 22 0.04890753
## 23 0.04742342
## 24 0.05190857
## 25 0.05724735
## 26 0.04786595
## 27 0.04557568
## 28 0.05153380
## 29 0.05796442
## 30 0.04676440
## 31 0.04675714
## 32 0.05291233
## 33 0.05949880
## 34 0.04730699
## 35 0.04655640
## 36 0.05408456
## 37 0.05908847
## 38 0.04622777
## 39 0.04741228
## 40 0.05429759
## 41 0.05441810
## 42 0.03822151
## 43 0.04281448
## 44 0.05116607
## 45 0.05136633
## 46 0.03915659
## 47 0.04229044
## 48 0.05012352
## 49 0.05130847
## 50 0.03903998
## 51 0.04112365
## 52 0.04913291
## 53 0.05072458
## 54 0.03892660
## 55 0.04077691
## 56 0.04863760
<- CUB_Tune$results[CUB_Tune$results$committees==CUB_Tune$bestTune$committees &
(CUB_Train_RMSE $results$neighbors==CUB_Tune$bestTune$neighbors,
CUB_Tunec("RMSE")])
## [1] 0.5564468
<- CUB_Tune$results[CUB_Tune$results$committees==CUB_Tune$bestTune$committees &
(CUB_Train_Rsquared $results$neighbors==CUB_Tune$bestTune$neighbors,
CUB_Tunec("Rsquared")])
## [1] 0.9253788
##################################
# Identifying and plotting the
# best model predictors
##################################
<- varImp(CUB_Tune, scale = TRUE)
CUB_VarImp plot(CUB_VarImp,
top=25,
scales=list(y=list(cex = .95)),
main="Ranked Variable Importance : Cubist",
xlab="Scaled Variable Importance Metrics",
ylab="Predictors",
cex=2,
origin=0,
alpha=0.45)
##################################
# Independently evaluating the model
# on the test set
##################################
<- data.frame(CUB_Observed = PMA_PreModelling_Test$Log_Solubility,
CUB_Test CUB_Predicted = predict(CUB_Tune,
!names(PMA_PreModelling_Test) %in% c("Log_Solubility")]))
PMA_PreModelling_Test[,
CUB_Test
## CUB_Observed CUB_Predicted
## 1 0.93 5.172006e-01
## 2 0.85 4.744695e-01
## 3 0.81 -3.106867e-01
## 4 0.74 5.642811e-01
## 5 0.61 -2.795888e-01
## 6 0.58 1.220973e+00
## 7 0.57 5.271021e-01
## 8 0.56 4.673800e-01
## 9 0.52 3.596828e-01
## 10 0.45 -1.953650e-01
## 11 0.40 9.581463e-02
## 12 0.36 -1.770827e+00
## 13 0.22 6.735242e-02
## 14 0.08 -1.327409e-02
## 15 0.07 -9.621343e-01
## 16 0.02 -3.472218e-01
## 17 0.00 -1.880903e-02
## 18 -0.01 3.657053e-02
## 19 -0.07 7.146210e-01
## 20 -0.12 -4.522400e-01
## 21 -0.17 4.591612e-01
## 22 -0.29 -7.476294e-02
## 23 -0.38 -4.240656e-01
## 24 -0.38 -8.791980e-01
## 25 -0.39 -8.478543e-01
## 26 -0.42 -9.527521e-01
## 27 -0.44 -5.708790e-01
## 28 -0.46 5.170208e-01
## 29 -0.48 -2.082416e+00
## 30 -0.60 -7.150730e-01
## 31 -0.63 -1.928441e+00
## 32 -0.66 -4.997951e-01
## 33 -0.72 -5.738401e-01
## 34 -0.72 -2.128842e-01
## 35 -0.80 -6.117046e-02
## 36 -0.80 -5.603025e-01
## 37 -0.82 5.831223e-01
## 38 -0.82 -7.053283e-01
## 39 -0.84 -4.939408e-01
## 40 -0.85 -7.390255e-01
## 41 -0.85 -2.165750e-01
## 42 -0.87 -1.334413e+00
## 43 -0.89 -1.491920e+00
## 44 -0.90 1.310215e-01
## 45 -0.96 -1.228196e+00
## 46 -0.96 -3.503021e-01
## 47 -0.99 -5.740114e-01
## 48 -1.01 -4.228113e-01
## 49 -1.09 -1.122615e+00
## 50 -1.12 -1.104403e+00
## 51 -1.14 -1.202845e+00
## 52 -1.17 -1.178322e+00
## 53 -1.19 -1.299870e+00
## 54 -1.22 -9.899879e-01
## 55 -1.27 -1.195272e+00
## 56 -1.28 -1.340667e+00
## 57 -1.32 -1.489669e+00
## 58 -1.38 -1.269200e+00
## 59 -1.39 -1.918472e+00
## 60 -1.42 -1.619951e+00
## 61 -1.47 -1.468703e+00
## 62 -1.47 -1.210847e+00
## 63 -1.50 -8.650517e-01
## 64 -1.52 -1.272961e+00
## 65 -1.54 -1.350985e+00
## 66 -1.55 -2.278194e+00
## 67 -1.56 -2.410002e+00
## 68 -1.57 -1.594703e+00
## 69 -1.60 -1.049522e+00
## 70 -1.60 -2.410012e+00
## 71 -1.62 -2.412026e+00
## 72 -1.64 -2.566043e+00
## 73 -1.67 -1.478632e+00
## 74 -1.70 -2.595009e+00
## 75 -1.70 -1.743490e+00
## 76 -1.71 -2.534111e+00
## 77 -1.71 -2.347152e+00
## 78 -1.75 -1.684114e+00
## 79 -1.78 -1.550764e+00
## 80 -1.78 -1.847633e+00
## 81 -1.82 -1.274292e+00
## 82 -1.87 -1.074537e+00
## 83 -1.89 -1.875680e+00
## 84 -1.92 -1.824179e+00
## 85 -1.92 -1.844802e+00
## 86 -1.92 -1.256984e+00
## 87 -1.94 -2.816156e+00
## 88 -1.99 -1.926969e+00
## 89 -2.00 -2.193821e+00
## 90 -2.05 -2.079077e+00
## 91 -2.06 -2.110536e+00
## 92 -2.08 -2.154182e+00
## 93 -2.10 -2.901855e+00
## 94 -2.11 -1.330634e+00
## 95 -2.12 -1.239691e+00
## 96 -2.17 -1.880477e+00
## 97 -2.21 -2.308400e+00
## 98 -2.24 -2.739114e+00
## 99 -2.24 -1.113671e+00
## 100 -2.29 -2.161248e+00
## 101 -2.31 -2.032299e+00
## 102 -2.32 -1.975597e+00
## 103 -2.35 -2.346902e+00
## 104 -2.35 -1.761070e+00
## 105 -2.36 -3.044691e+00
## 106 -2.36 -2.079780e+00
## 107 -2.38 -2.408683e+00
## 108 -2.42 -2.339951e+00
## 109 -2.43 -3.300439e+00
## 110 -2.44 -2.431149e+00
## 111 -2.52 -2.319848e+00
## 112 -2.53 -2.331616e+00
## 113 -2.57 -2.479958e+00
## 114 -2.62 -2.187034e+00
## 115 -2.62 -2.345561e+00
## 116 -2.64 -2.720923e+00
## 117 -2.64 -3.278156e+00
## 118 -2.70 -2.948889e+00
## 119 -2.82 -3.071409e+00
## 120 -2.88 -2.688234e+00
## 121 -2.89 -2.659425e+00
## 122 -2.92 -1.547081e+00
## 123 -2.93 -3.379261e+00
## 124 -2.96 -2.848138e+00
## 125 -2.98 -3.233932e+00
## 126 -3.01 -2.541941e+00
## 127 -3.01 -3.778557e+00
## 128 -3.02 -3.653405e+00
## 129 -3.07 -3.189232e+00
## 130 -3.09 -2.888317e+00
## 131 -3.11 -3.528169e+00
## 132 -3.13 -3.667722e+00
## 133 -3.14 -2.110050e+00
## 134 -3.15 -3.409269e+00
## 135 -3.22 -2.452943e+00
## 136 -3.26 -3.587133e+00
## 137 -3.27 -2.928179e+00
## 138 -3.27 -3.385448e+00
## 139 -3.30 -3.630623e+00
## 140 -3.31 -2.285615e+00
## 141 -3.33 -2.768307e+00
## 142 -3.37 -2.404949e+00
## 143 -3.43 -3.460907e+00
## 144 -3.43 -2.520015e+00
## 145 -3.48 -2.840985e+00
## 146 -3.51 -3.623060e+00
## 147 -3.59 -2.456094e+00
## 148 -3.61 -3.088016e+00
## 149 -3.63 -3.684434e+00
## 150 -3.63 -3.515325e+00
## 151 -3.68 -2.135059e+00
## 152 -3.71 -4.457196e+00
## 153 -3.74 -3.582433e+00
## 154 -3.75 -4.037922e+00
## 155 -3.75 -3.477290e+00
## 156 -3.77 -3.524473e+00
## 157 -3.77 -4.344183e+00
## 158 -3.78 -4.136355e+00
## 159 -3.81 -3.812230e+00
## 160 -3.95 -4.050872e+00
## 161 -3.96 -4.931999e+00
## 162 -3.96 -4.342718e+00
## 163 -4.00 -3.883743e+00
## 164 -4.02 -3.727985e+00
## 165 -4.04 -4.366707e+00
## 166 -4.12 -3.948336e+00
## 167 -4.15 -4.901789e+00
## 168 -4.16 -3.495876e+00
## 169 -4.17 -4.702560e+00
## 170 -4.21 -4.632524e+00
## 171 -4.23 -4.245769e+00
## 172 -4.25 -4.345779e+00
## 173 -4.30 -3.930046e+00
## 174 -4.31 -5.519844e+00
## 175 -4.35 -4.435305e+00
## 176 -4.40 -4.087770e+00
## 177 -4.40 -4.437664e+00
## 178 -4.43 -4.511951e+00
## 179 -4.46 -4.352980e+00
## 180 -4.47 -4.155533e+00
## 181 -4.51 -4.805473e+00
## 182 -4.60 -3.794261e+00
## 183 -4.64 -4.800642e+00
## 184 -4.69 -4.989451e+00
## 185 -4.71 -4.137802e+00
## 186 -4.77 -4.017933e+00
## 187 -4.95 -4.560051e+00
## 188 -4.98 -4.267509e+00
## 189 -5.21 -6.012078e+00
## 190 -5.22 -5.521434e+00
## 191 -5.28 -4.443992e+00
## 192 -5.31 -3.563909e+00
## 193 -5.35 -4.531025e+00
## 194 -5.37 -4.271461e+00
## 195 -5.40 -4.877545e+00
## 196 -5.43 -4.166358e+00
## 197 -5.65 -4.984075e+00
## 198 -5.66 -4.356925e+00
## 199 -6.70 -4.587479e+00
## 200 -5.72 -4.561443e+00
## 201 -6.00 -7.018298e+00
## 202 -6.25 -6.643809e+00
## 203 -6.26 -6.376649e+00
## 204 -6.27 -6.654786e+00
## 205 -6.35 -6.170313e+00
## 206 -6.57 -6.342891e+00
## 207 -6.62 -4.842244e+00
## 208 -6.96 -6.280235e+00
## 209 -7.02 -7.803179e+00
## 210 -7.20 -7.195404e+00
## 211 -7.28 -7.344370e+00
## 212 -7.32 -7.612546e+00
## 213 -7.39 -7.837104e+00
## 214 -7.82 -8.262712e+00
## 215 -8.23 -8.822540e+00
## 216 -8.94 -8.526814e+00
## 217 1.07 1.221488e-01
## 218 0.43 3.174650e-01
## 219 0.32 -1.087638e-01
## 220 0.00 1.497244e-04
## 221 -0.40 -1.316449e+00
## 222 -0.52 -3.207503e-01
## 223 -0.55 -4.866762e-01
## 224 -0.60 -8.038339e-01
## 225 -0.62 -2.058377e+00
## 226 -0.85 -9.838387e-01
## 227 -0.89 -8.581456e-01
## 228 -0.93 -1.231096e+00
## 229 -0.96 -1.979357e-01
## 230 -1.06 -1.416757e+00
## 231 -1.10 -1.161621e+00
## 232 -1.12 -1.132983e+00
## 233 -1.15 -6.601524e-01
## 234 -1.28 -4.816613e-01
## 235 -1.30 -1.264985e+00
## 236 -1.31 -1.320430e+00
## 237 -1.35 -3.245388e+00
## 238 -1.39 -2.065192e+00
## 239 -1.41 -1.299870e+00
## 240 -1.41 -1.277439e+00
## 241 -1.42 -6.929296e-01
## 242 -1.46 -1.827158e+00
## 243 -1.50 -1.628045e+00
## 244 -1.50 -1.952074e+00
## 245 -1.52 -1.947410e+00
## 246 -1.52 -1.354323e+00
## 247 -1.59 -1.757442e+00
## 248 -1.61 -1.333914e+00
## 249 -1.63 -1.059824e+00
## 250 -1.71 -2.249090e+00
## 251 -1.83 -2.417639e+00
## 252 -2.05 -1.511111e+00
## 253 -2.06 -2.567048e+00
## 254 -2.07 -2.790463e+00
## 255 -2.15 -2.824705e+00
## 256 -2.16 -1.367295e+00
## 257 -1.99 -6.977186e-01
## 258 -2.36 -2.102892e+00
## 259 -2.38 -3.211128e+00
## 260 -2.39 -1.688960e+00
## 261 -2.46 -2.103126e+00
## 262 -2.49 -2.239222e+00
## 263 -2.54 -2.531280e+00
## 264 -2.55 -2.618171e+00
## 265 -2.63 -2.682087e+00
## 266 -2.64 -2.131080e+00
## 267 -2.67 -2.637926e+00
## 268 -2.68 -2.204462e+00
## 269 -2.77 -2.946862e+00
## 270 -2.78 -3.149663e+00
## 271 -2.82 -2.917542e+00
## 272 -2.92 -3.519327e+00
## 273 -3.03 -2.857930e+00
## 274 -3.12 -3.571173e+00
## 275 -3.16 -2.835844e+00
## 276 -3.19 -3.610034e+00
## 277 -3.54 -3.522895e+00
## 278 -3.54 -2.603230e+00
## 279 -3.59 -3.528462e+00
## 280 -3.66 -3.734816e+00
## 281 -3.68 -3.620473e+00
## 282 -3.75 -3.780582e+00
## 283 -3.76 -3.823359e+00
## 284 -3.78 -4.155089e+00
## 285 -3.80 -3.935853e+00
## 286 -3.80 -4.474550e+00
## 287 -3.85 -3.945191e+00
## 288 -3.89 -4.292277e+00
## 289 -3.95 -4.025692e+00
## 290 -4.29 -4.995418e+00
## 291 -4.42 -4.714052e+00
## 292 -4.48 -3.679958e+00
## 293 -4.48 -4.167231e+00
## 294 -4.53 -4.984929e+00
## 295 -4.63 -4.420812e+00
## 296 -4.73 -4.404468e+00
## 297 -4.84 -4.030525e+00
## 298 -4.89 -4.271135e+00
## 299 -4.89 -4.997596e+00
## 300 -5.26 -6.030104e+00
## 301 -6.09 -4.678152e+00
## 302 -6.29 -6.102019e+00
## 303 -6.29 -6.583594e+00
## 304 -6.89 -6.388579e+00
## 305 -6.96 -6.376021e+00
## 306 -7.00 -7.061845e+00
## 307 -7.05 -7.878247e+00
## 308 -8.30 -8.766311e+00
## 309 -8.66 -9.350718e+00
## 310 -9.03 -9.559437e+00
## 311 -10.41 -1.000493e+01
## 312 -7.89 -7.574618e+00
## 313 -2.32 -1.948596e+00
## 314 0.39 -2.261666e+00
## 315 -2.90 -4.946440e+00
## 316 -2.47 -5.025486e+00
##################################
# Reporting the independent evaluation results
# for the test set
##################################
<- postResample(CUB_Test[,2], CUB_Test[,1])) (CUB_Test_Metrics
## RMSE Rsquared MAE
## 0.6380865 0.9071665 0.4665613
<- CUB_Test_Metrics[1]) (CUB_Test_RMSE
## RMSE
## 0.6380865
<- CUB_Test_Metrics[2]) (CUB_Test_Rsquared
## Rsquared
## 0.9071665
##################################
# Consolidating all evaluation results
# for the train and test sets
# using the r-squared metric
##################################
<- c('LR','PLR_R','PLR_L','PLR_E','PCR','PLS','AVNN','MARS','SVM_R','SVM_P','KNN','CART','CTREE','RF','CUB',
Model 'LR','PLR_R','PLR_L','PLR_E','PCR','PLS','AVNN','MARS','SVM_R','SVM_P','KNN','CART','CTREE','RF','CUB')
<- c(rep('Cross-Validation',15),rep('Test',15))
Set
<- c(LR_Train_Rsquared,PLR_R_Train_Rsquared,PLR_L_Train_Rsquared,PLR_E_Train_Rsquared,PCR_Train_Rsquared,
R_Squared
PLS_Train_Rsquared,AVNN_Train_Rsquared,MARS_Train_Rsquared,SVM_R_Train_Rsquared,SVM_P_Train_Rsquared,
KNN_Train_Rsquared,CART_Train_Rsquared,CTREE_Train_Rsquared,RF_Train_Rsquared,CUB_Train_Rsquared,
LR_Test_Rsquared,PLR_R_Test_Rsquared,PLR_L_Test_Rsquared,PLR_E_Test_Rsquared,PCR_Test_Rsquared,
PLS_Test_Rsquared,AVNN_Test_Rsquared,MARS_Test_Rsquared,SVM_R_Test_Rsquared,SVM_P_Test_Rsquared,
KNN_Test_Rsquared,CART_Test_Rsquared,CTREE_Test_Rsquared,RF_Test_Rsquared,CUB_Test_Rsquared)
<- as.data.frame(cbind(Model,Set,R_Squared))
R_Squared_Summary
$R_Squared <- as.numeric(as.character(R_Squared_Summary$R_Squared))
R_Squared_Summary$Set <- factor(R_Squared_Summary$Set,
R_Squared_Summarylevels = c("Cross-Validation",
"Test"))
$Model <- factor(R_Squared_Summary$Model,
R_Squared_Summarylevels = c("LR",
"PLR_R",
"PLR_L",
"PLR_E",
"PCR",
"PLS",
"AVNN",
"MARS",
"SVM_R",
"SVM_P",
"KNN",
"CART",
"CTREE",
"RF",
"CUB"))
print(R_Squared_Summary, row.names=FALSE)
## Model Set R_Squared
## LR Cross-Validation 0.8862948
## PLR_R Cross-Validation 0.8968424
## PLR_L Cross-Validation 0.8976271
## PLR_E Cross-Validation 0.8982164
## PCR Cross-Validation 0.8677154
## PLS Cross-Validation 0.8992123
## AVNN Cross-Validation 0.7480237
## MARS Cross-Validation 0.8823049
## SVM_R Cross-Validation 0.9155098
## SVM_P Cross-Validation 0.9116780
## KNN Cross-Validation 0.7326032
## CART Cross-Validation 0.7814193
## CTREE Cross-Validation 0.7794050
## RF Cross-Validation 0.9009781
## CUB Cross-Validation 0.9253788
## LR Test 0.8643929
## PLR_R Test 0.8751709
## PLR_L Test 0.8746818
## PLR_E Test 0.8761762
## PCR Test 0.8351614
## PLS Test 0.8670618
## AVNN Test 0.7829311
## MARS Test 0.8689100
## SVM_R Test 0.9098906
## SVM_P Test 0.9062256
## KNN Test 0.7137298
## CART Test 0.8070706
## CTREE Test 0.7310919
## RF Test 0.9005713
## CUB Test 0.9071665
<- dotplot(Model ~ R_Squared,
(R_Squared_Plot data = R_Squared_Summary,
groups = Set,
main = "Regression Model Performance Comparison",
ylab = "Model",
xlab = "R-Squared",
auto.key = list(adj=1, space="top", columns=2),
type = c("p", "h"),
origin = 0,
alpha = 0.45,
pch = 16,
cex = 2))
##################################
# Consolidating all evaluation results
# for the train and test sets
# using the rmse metric
##################################
<- c('LR','PLR_R','PLR_L','PLR_E','PCR','PLS','AVNN','MARS','SVM_R','SVM_P','KNN','CART','CTREE','RF','CUB',
Model 'LR','PLR_R','PLR_L','PLR_E','PCR','PLS','AVNN','MARS','SVM_R','SVM_P','KNN','CART','CTREE','RF','CUB')
<- c(rep('Cross-Validation',15),rep('Test',15))
Set
<- c(LR_Train_RMSE,PLR_R_Train_RMSE,PLR_L_Train_RMSE,PLR_E_Train_RMSE,PCR_Train_RMSE,
RMSE
PLS_Train_RMSE,AVNN_Train_RMSE,MARS_Train_RMSE,SVM_R_Train_RMSE,SVM_P_Train_RMSE,
KNN_Train_RMSE,CART_Train_RMSE,CTREE_Train_RMSE,RF_Train_RMSE,CUB_Train_RMSE,
LR_Test_RMSE,PLR_R_Test_RMSE,PLR_L_Test_RMSE,PLR_E_Test_RMSE,PCR_Test_RMSE,
PLS_Test_RMSE,AVNN_Test_RMSE,MARS_Test_RMSE,SVM_R_Test_RMSE,SVM_P_Test_RMSE,
KNN_Test_RMSE,CART_Test_RMSE,CTREE_Test_RMSE,RF_Test_RMSE,CUB_Test_RMSE)
<- as.data.frame(cbind(Model,Set,RMSE))
RMSE_Summary
$RMSE<- as.numeric(as.character(RMSE_Summary$RMSE))
RMSE_Summary$Set <- factor(RMSE_Summary$Set,
RMSE_Summarylevels = c("Cross-Validation",
"Test"))
$Model <- factor(RMSE_Summary$Model,
RMSE_Summarylevels = c("LR",
"PLR_R",
"PLR_L",
"PLR_E",
"PCR",
"PLS",
"AVNN",
"MARS",
"SVM_R",
"SVM_P",
"KNN",
"CART",
"CTREE",
"RF",
"CUB"))
print(RMSE_Summary, row.names=FALSE)
## Model Set RMSE
## LR Cross-Validation 0.6871912
## PLR_R Cross-Validation 0.6527539
## PLR_L Cross-Validation 0.6489629
## PLR_E Cross-Validation 0.6471642
## PCR Cross-Validation 0.7426083
## PLS Cross-Validation 0.6440406
## AVNN Cross-Validation 1.0608843
## MARS Cross-Validation 0.7034801
## SVM_R Cross-Validation 0.5950500
## SVM_P Cross-Validation 0.6028074
## KNN Cross-Validation 1.0672913
## CART Cross-Validation 0.9490440
## CTREE Cross-Validation 0.9570454
## RF Cross-Validation 0.6541868
## CUB Cross-Validation 0.5564468
## LR Test 0.7725809
## PLR_R Test 0.7414774
## PLR_L Test 0.7389135
## PLR_E Test 0.7351873
## PCR Test 0.8448324
## PLS Test 0.7647343
## AVNN Test 0.9862466
## MARS Test 0.7580420
## SVM_R Test 0.6274210
## SVM_P Test 0.6377764
## KNN Test 1.1247103
## CART Test 0.9194665
## CTREE Test 1.1160100
## RF Test 0.6557191
## CUB Test 0.6380865
<- dotplot(Model ~ RMSE,
(RMSE_Plot data = RMSE_Summary,
groups = Set,
main = "Regression Model Performance Comparison",
ylab = "Model",
xlab = "Root-Mean-Square Error",
auto.key = list(adj=1, space="top", columns=2),
type = c("p", "h"),
origin = 0,
alpha = 0.45,
pch = 16,
cex = 2))