##################################
# Loading R libraries
##################################
library(AppliedPredictiveModeling)
library(caret)
library(rpart)
library(lattice)
library(dplyr)
library(tidyr)
library(moments)
library(skimr)
library(RANN)
library(pls)
library(corrplot)
library(tidyverse)
library(lares)
library(DMwR2)
library(gridExtra)
library(rattle)
library(rpart.plot)
library(RColorBrewer)
library(stats)
library(nnet)
library(elasticnet)
library(earth)
library(party)
library(kernlab)
library(randomForest)
library(Cubist)
library(minerva)
library(CORElearn)
##################################
# Loading source and
# formulating the train set
##################################
data(solubility)
<- as.data.frame(cbind(solTrainY,solTrainX))
Solubility_Train <- as.data.frame(cbind(solTestY,solTestX))
Solubility_Test
##################################
# Performing a general exploration of the train set
##################################
dim(Solubility_Train)
## [1] 951 229
str(Solubility_Train)
## 'data.frame': 951 obs. of 229 variables:
## $ solTrainY : num -3.97 -3.98 -3.99 -4 -4.06 -4.08 -4.08 -4.1 -4.1 -4.11 ...
## $ FP001 : int 0 0 1 0 0 1 0 1 1 1 ...
## $ FP002 : int 1 1 1 0 0 0 1 0 0 1 ...
## $ FP003 : int 0 0 1 1 1 1 0 1 1 1 ...
## $ FP004 : int 0 1 1 0 1 1 1 1 1 1 ...
## $ FP005 : int 1 1 1 0 1 0 1 0 0 1 ...
## $ FP006 : int 0 1 0 0 1 0 0 0 1 1 ...
## $ FP007 : int 0 1 0 1 0 0 0 1 1 1 ...
## $ FP008 : int 1 1 1 0 0 0 1 0 0 0 ...
## $ FP009 : int 0 0 0 0 1 1 1 0 1 0 ...
## $ FP010 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ FP011 : int 0 1 0 0 0 0 0 0 1 0 ...
## $ FP012 : int 0 0 0 0 0 1 0 1 0 0 ...
## $ FP013 : int 0 0 0 0 1 0 1 0 0 0 ...
## $ FP014 : int 0 0 0 0 0 0 1 0 0 0 ...
## $ FP015 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ FP016 : int 0 1 0 0 1 1 0 1 0 0 ...
## $ FP017 : int 0 0 1 1 0 0 0 0 1 1 ...
## $ FP018 : int 0 1 0 0 0 0 0 0 0 0 ...
## $ FP019 : int 1 0 0 0 1 0 1 0 0 0 ...
## $ FP020 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP021 : int 0 0 0 0 0 1 0 0 1 0 ...
## $ FP022 : int 0 0 0 0 0 0 0 0 0 1 ...
## $ FP023 : int 0 0 0 1 0 0 0 0 1 0 ...
## $ FP024 : int 1 0 0 0 1 0 0 0 0 0 ...
## $ FP025 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ FP026 : int 1 0 0 0 0 0 1 0 0 0 ...
## $ FP027 : int 0 0 0 0 0 0 0 0 0 1 ...
## $ FP028 : int 0 1 0 0 0 0 0 0 1 1 ...
## $ FP029 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP030 : int 0 0 0 0 1 0 0 0 0 0 ...
## $ FP031 : int 0 0 0 0 0 0 0 1 0 0 ...
## $ FP032 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP033 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP034 : int 0 0 0 0 1 0 0 0 0 1 ...
## $ FP035 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ FP036 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP037 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ FP038 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ FP039 : int 1 0 0 0 0 0 0 0 0 0 ...
## $ FP040 : int 1 0 0 0 0 0 0 0 0 0 ...
## $ FP041 : int 0 0 0 1 0 0 0 0 1 0 ...
## $ FP042 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP043 : int 0 1 0 0 0 0 0 0 0 0 ...
## $ FP044 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP045 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ FP046 : int 0 1 0 0 0 0 1 0 0 1 ...
## $ FP047 : int 0 1 1 0 0 0 1 0 0 0 ...
## $ FP048 : int 0 0 0 0 0 0 0 1 0 0 ...
## $ FP049 : int 0 0 0 0 0 0 1 0 0 0 ...
## $ FP050 : int 0 0 0 0 0 0 0 1 0 1 ...
## $ FP051 : int 0 1 0 0 0 0 0 0 0 0 ...
## $ FP052 : int 0 0 0 0 0 0 0 0 0 1 ...
## $ FP053 : int 0 0 0 0 0 0 1 0 0 0 ...
## $ FP054 : int 0 0 0 1 0 0 0 0 1 1 ...
## $ FP055 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP056 : int 1 0 0 0 0 0 0 0 0 0 ...
## $ FP057 : int 0 0 0 0 0 0 1 0 0 0 ...
## $ FP058 : int 0 0 0 0 0 0 0 0 0 1 ...
## $ FP059 : int 0 0 0 0 0 0 0 1 0 0 ...
## $ FP060 : int 0 1 1 0 0 0 0 1 1 0 ...
## $ FP061 : int 0 0 1 0 0 0 0 1 1 0 ...
## $ FP062 : int 0 0 1 0 0 1 0 1 1 1 ...
## $ FP063 : int 1 1 0 0 1 1 1 0 0 1 ...
## $ FP064 : int 0 1 1 0 1 1 0 1 0 0 ...
## $ FP065 : int 1 1 0 0 1 0 1 0 1 1 ...
## $ FP066 : int 1 0 1 1 1 1 1 1 1 1 ...
## $ FP067 : int 1 1 0 0 1 1 1 0 0 1 ...
## $ FP068 : int 0 1 0 0 1 1 1 0 0 1 ...
## $ FP069 : int 1 0 1 1 1 1 0 1 1 0 ...
## $ FP070 : int 1 1 0 1 0 0 1 0 1 0 ...
## $ FP071 : int 0 0 0 0 0 0 1 0 1 1 ...
## $ FP072 : int 0 1 1 0 0 1 0 1 1 1 ...
## $ FP073 : int 0 1 1 0 0 0 0 0 1 0 ...
## $ FP074 : int 0 1 0 0 0 0 0 0 1 0 ...
## $ FP075 : int 0 1 0 0 1 1 1 0 0 1 ...
## $ FP076 : int 1 1 0 0 0 0 1 0 1 1 ...
## $ FP077 : int 0 1 0 1 0 0 0 1 1 1 ...
## $ FP078 : int 0 1 0 0 0 0 0 0 1 0 ...
## $ FP079 : int 1 1 1 1 1 0 1 0 1 1 ...
## $ FP080 : int 0 1 0 0 1 1 1 1 0 0 ...
## $ FP081 : int 0 0 1 1 0 0 0 1 1 1 ...
## $ FP082 : int 1 1 1 0 1 1 1 0 1 1 ...
## $ FP083 : int 0 0 0 0 1 0 0 0 0 1 ...
## $ FP084 : int 1 1 0 0 1 0 1 0 0 0 ...
## $ FP085 : int 0 1 0 0 0 0 1 0 0 0 ...
## $ FP086 : int 0 0 0 1 1 0 0 1 1 1 ...
## $ FP087 : int 1 1 1 1 1 0 1 0 1 1 ...
## $ FP088 : int 0 1 0 0 0 0 0 1 1 0 ...
## $ FP089 : int 1 1 0 0 0 0 1 0 0 0 ...
## $ FP090 : int 0 1 0 1 0 0 0 1 1 1 ...
## $ FP091 : int 1 1 0 0 1 0 1 0 0 1 ...
## $ FP092 : int 0 0 0 0 1 1 1 0 1 0 ...
## $ FP093 : int 0 1 0 1 0 0 0 1 1 1 ...
## $ FP094 : int 0 0 0 0 1 0 0 1 0 0 ...
## $ FP095 : int 0 0 0 0 0 0 0 0 1 1 ...
## $ FP096 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ FP097 : int 1 1 0 0 0 0 1 0 1 0 ...
## $ FP098 : int 0 0 1 0 0 0 0 1 0 0 ...
## [list output truncated]
summary(Solubility_Train)
## solTrainY FP001 FP002 FP003
## Min. :-11.620 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.: -3.955 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median : -2.510 Median :0.0000 Median :1.0000 Median :0.0000
## Mean : -2.719 Mean :0.4932 Mean :0.5394 Mean :0.4364
## 3rd Qu.: -1.360 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. : 1.580 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP004 FP005 FP006 FP007
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :1.0000 Median :0.0000 Median :0.0000
## Mean :0.5846 Mean :0.5794 Mean :0.4006 Mean :0.3638
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP008 FP009 FP010 FP011
## Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.326 Mean :0.2797 Mean :0.1788 Mean :0.2145
## 3rd Qu.:1.000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP012 FP013 FP014 FP015
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:1.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :1.0000
## Mean :0.1767 Mean :0.1661 Mean :0.1609 Mean :0.8601
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP016 FP017 FP018 FP019
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.000
## Mean :0.1462 Mean :0.1441 Mean :0.1314 Mean :0.122
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.000
## FP020 FP021 FP022 FP023
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.000
## Mean :0.1199 Mean :0.1209 Mean :0.1041 Mean :0.123
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.000
## FP024 FP025 FP026 FP027
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.0000 Median :0.0000 Median :0.00000 Median :0.00000
## Mean :0.1125 Mean :0.1157 Mean :0.08412 Mean :0.09779
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.00000
## FP028 FP029 FP030 FP031
## Min. :0.0000 Min. :0.000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.0000 Median :0.000 Median :0.00000 Median :0.00000
## Mean :0.1062 Mean :0.102 Mean :0.09359 Mean :0.08938
## 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.000 Max. :1.00000 Max. :1.00000
## FP032 FP033 FP034 FP035
## Min. :0.00000 Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.0000 Median :0.00000 Median :0.00000
## Mean :0.07361 Mean :0.0694 Mean :0.07992 Mean :0.07256
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.0000 Max. :1.00000 Max. :1.00000
## FP036 FP037 FP038 FP039
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.07571 Mean :0.07045 Mean :0.08622 Mean :0.07466
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP040 FP041 FP042 FP043
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.06835 Mean :0.06309 Mean :0.05678 Mean :0.06625
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP044 FP045 FP046 FP047
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.00000 Median :0.00000 Median :0.0000 Median :0.000
## Mean :0.05994 Mean :0.05573 Mean :0.3155 Mean :0.266
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:1.000
## Max. :1.00000 Max. :1.00000 Max. :1.0000 Max. :1.000
## FP048 FP049 FP050 FP051
## Min. :0.0000 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.000 Median :0.0000 Median :0.0000
## Mean :0.1241 Mean :0.122 Mean :0.1125 Mean :0.1094
## 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.000 Max. :1.0000 Max. :1.0000
## FP052 FP053 FP054 FP055
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.09148 Mean :0.09359 Mean :0.07571 Mean :0.05363
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP056 FP057 FP058 FP059
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.00000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.06519 Mean :0.1199 Mean :0.1136 Mean :0.05468
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## FP060 FP061 FP062 FP063
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.4816 Mean :0.4469 Mean :0.4374 Mean :0.4259
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP064 FP065 FP066 FP067
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :1.0000 Median :1.0000 Median :0.0000
## Mean :0.4164 Mean :0.5931 Mean :0.6099 Mean :0.3796
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP068 FP069 FP070 FP071
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.000
## Mean :0.3617 Mean :0.3617 Mean :0.3554 Mean :0.327
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.000
## FP072 FP073 FP074 FP075
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.6583 Mean :0.3102 Mean :0.3249 Mean :0.3386
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP076 FP077 FP078 FP079
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :1.0000
## Mean :0.3281 Mean :0.3207 Mean :0.3039 Mean :0.6898
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP080 FP081 FP082 FP083
## Min. :0.0000 Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :1.000 Median :0.0000
## Mean :0.3028 Mean :0.2787 Mean :0.714 Mean :0.2734
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.000 Max. :1.0000
## FP084 FP085 FP086 FP087
## Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.000 Median :0.0000 Median :0.0000 Median :1.0000
## Mean :0.286 Mean :0.2555 Mean :0.2692 Mean :0.7266
## 3rd Qu.:1.000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP088 FP089 FP090 FP091
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.000
## Mean :0.2629 Mean :0.2471 Mean :0.2492 Mean :0.225
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.000
## FP092 FP093 FP094 FP095
## Min. :0.000 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.000 Median :0.000 Median :0.0000 Median :0.0000
## Mean :0.244 Mean :0.244 Mean :0.2313 Mean :0.2198
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.000 Max. :1.000 Max. :1.0000 Max. :1.0000
## FP096 FP097 FP098 FP099
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.2177 Mean :0.2355 Mean :0.2376 Mean :0.2271
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP100 FP101 FP102 FP103
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.2313 Mean :0.2366 Mean :0.2019 Mean :0.2187
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP104 FP105 FP106 FP107
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.2229 Mean :0.2156 Mean :0.1914 Mean :0.2114
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP108 FP109 FP110 FP111
## Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.205 Mean :0.1767 Mean :0.2061 Mean :0.1966
## 3rd Qu.:0.000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP112 FP113 FP114 FP115
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1945 Mean :0.1956 Mean :0.1556 Mean :0.1788
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP116 FP117 FP118 FP119
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.000
## Mean :0.1924 Mean :0.1788 Mean :0.1924 Mean :0.163
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.000
## FP120 FP121 FP122 FP123
## Min. :0.0000 Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.000 Median :0.0000
## Mean :0.1661 Mean :0.1399 Mean :0.164 Mean :0.1672
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.000 Max. :1.0000
## FP124 FP125 FP126 FP127
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1619 Mean :0.1556 Mean :0.1483 Mean :0.1399
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP128 FP129 FP130 FP131
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1483 Mean :0.1388 Mean :0.1052 Mean :0.1262
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP132 FP133 FP134 FP135
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1251 Mean :0.1262 Mean :0.1272 Mean :0.1262
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP136 FP137 FP138 FP139
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.1209 Mean :0.1157 Mean :0.1115 Mean :0.08202
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## FP140 FP141 FP142 FP143
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.1115 Mean :0.1167 Mean :0.1094 Mean :0.08097
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## FP144 FP145 FP146 FP147
## Min. :0.0000 Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.000 Median :0.0000
## Mean :0.1041 Mean :0.1041 Mean :0.103 Mean :0.1052
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.000 Max. :1.0000
## FP148 FP149 FP150 FP151
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.08728 Mean :0.09043 Mean :0.07886 Mean :0.05573
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP152 FP153 FP154 FP155
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.08202 Mean :0.07781 Mean :0.03785 Mean :0.0694
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.0000
## FP156 FP157 FP158 FP159
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.07045 Mean :0.06204 Mean :0.05363 Mean :0.07045
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP160 FP161 FP162 FP163
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.0000 Median :0.0000
## Mean :0.06835 Mean :0.06625 Mean :0.4953 Mean :0.4763
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.00000 Max. :1.00000 Max. :1.0000 Max. :1.0000
## FP164 FP165 FP166 FP167
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.6278 Mean :0.3491 Mean :0.3312 Mean :0.3281
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP168 FP169 FP170 FP171
## Min. :0.0000 Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000
## Median :1.0000 Median :0.0000 Median :0.000 Median :0.0000
## Mean :0.6656 Mean :0.1861 Mean :0.184 Mean :0.1693
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.000 Max. :1.0000
## FP172 FP173 FP174 FP175
## Min. :0.0000 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.000 Median :0.0000 Median :0.0000
## Mean :0.1514 Mean :0.142 Mean :0.1304 Mean :0.1346
## 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.000 Max. :1.0000 Max. :1.0000
## FP176 FP177 FP178 FP179
## Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.122 Mean :0.1209 Mean :0.1209 Mean :0.09779
## 3rd Qu.:0.000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## FP180 FP181 FP182 FP183
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.0000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.1073 Mean :0.09359 Mean :0.09884 Mean :0.07571
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP184 FP185 FP186 FP187
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.08412 Mean :0.08517 Mean :0.07676 Mean :0.07256
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP188 FP189 FP190 FP191
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.06835 Mean :0.07676 Mean :0.07256 Mean :0.07045
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP192 FP193 FP194 FP195
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.06099 Mean :0.06204 Mean :0.05889 Mean :0.06099
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP196 FP197 FP198 FP199
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.05678 Mean :0.05258 Mean :0.05678 Mean :0.04732
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP200 FP201 FP202 FP203
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.0000 Median :0.0000
## Mean :0.04942 Mean :0.05258 Mean :0.2576 Mean :0.1146
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.00000 Max. :1.0000 Max. :1.0000
## FP204 FP205 FP206 FP207
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.09884 Mean :0.07781 Mean :0.05994 Mean :0.05678
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP208 MolWeight NumAtoms NumNonHAtoms
## Min. :0.0000 Min. : 46.09 Min. : 5.00 Min. : 2.00
## 1st Qu.:0.0000 1st Qu.:122.61 1st Qu.:17.00 1st Qu.: 8.00
## Median :0.0000 Median :179.23 Median :22.00 Median :12.00
## Mean :0.1125 Mean :201.65 Mean :25.51 Mean :13.16
## 3rd Qu.:0.0000 3rd Qu.:264.34 3rd Qu.:31.00 3rd Qu.:17.00
## Max. :1.0000 Max. :665.81 Max. :94.00 Max. :47.00
## NumBonds NumNonHBonds NumMultBonds NumRotBonds
## Min. : 4.00 Min. : 1.00 Min. : 0.000 Min. : 0.000
## 1st Qu.:17.00 1st Qu.: 8.00 1st Qu.: 1.000 1st Qu.: 0.000
## Median :23.00 Median :12.00 Median : 6.000 Median : 2.000
## Mean :25.91 Mean :13.56 Mean : 6.148 Mean : 2.251
## 3rd Qu.:31.50 3rd Qu.:18.00 3rd Qu.:10.000 3rd Qu.: 3.500
## Max. :97.00 Max. :50.00 Max. :25.000 Max. :16.000
## NumDblBonds NumAromaticBonds NumHydrogen NumCarbon
## Min. :0.000 Min. : 0.000 Min. : 0.00 Min. : 1.000
## 1st Qu.:0.000 1st Qu.: 0.000 1st Qu.: 7.00 1st Qu.: 6.000
## Median :1.000 Median : 6.000 Median :11.00 Median : 9.000
## Mean :1.006 Mean : 5.121 Mean :12.35 Mean : 9.893
## 3rd Qu.:2.000 3rd Qu.: 6.000 3rd Qu.:16.00 3rd Qu.:12.000
## Max. :7.000 Max. :25.000 Max. :47.00 Max. :33.000
## NumNitrogen NumOxygen NumSulfer NumChlorine
## Min. :0.0000 Min. : 0.000 Min. :0.000 Min. : 0.0000
## 1st Qu.:0.0000 1st Qu.: 0.000 1st Qu.:0.000 1st Qu.: 0.0000
## Median :0.0000 Median : 1.000 Median :0.000 Median : 0.0000
## Mean :0.8128 Mean : 1.574 Mean :0.164 Mean : 0.5563
## 3rd Qu.:1.0000 3rd Qu.: 2.000 3rd Qu.:0.000 3rd Qu.: 0.0000
## Max. :6.0000 Max. :13.000 Max. :4.000 Max. :10.0000
## NumHalogen NumRings HydrophilicFactor SurfaceArea1
## Min. : 0.0000 Min. :0.000 Min. :-0.98500 Min. : 0.00
## 1st Qu.: 0.0000 1st Qu.:0.000 1st Qu.:-0.76300 1st Qu.: 9.23
## Median : 0.0000 Median :1.000 Median :-0.31400 Median : 29.10
## Mean : 0.6982 Mean :1.402 Mean :-0.02059 Mean : 36.46
## 3rd Qu.: 1.0000 3rd Qu.:2.000 3rd Qu.: 0.31300 3rd Qu.: 53.28
## Max. :10.0000 Max. :7.000 Max. :13.48300 Max. :331.94
## SurfaceArea2
## Min. : 0.00
## 1st Qu.: 10.63
## Median : 33.12
## Mean : 40.23
## 3rd Qu.: 60.66
## Max. :331.94
##################################
# Performing a general exploration of the test set
##################################
dim(Solubility_Test)
## [1] 316 229
str(Solubility_Test)
## 'data.frame': 316 obs. of 229 variables:
## $ solTestY : num 0.93 0.85 0.81 0.74 0.61 0.58 0.57 0.56 0.52 0.45 ...
## $ FP001 : int 1 1 0 0 1 1 1 0 1 0 ...
## $ FP002 : int 0 0 1 0 1 0 0 0 0 1 ...
## $ FP003 : int 0 1 0 1 0 0 0 0 1 0 ...
## $ FP004 : int 1 1 0 0 1 1 1 1 1 0 ...
## $ FP005 : int 0 0 1 0 1 0 0 0 0 1 ...
## $ FP006 : int 0 1 0 1 1 0 0 0 0 0 ...
## $ FP007 : int 0 0 0 0 0 0 0 1 1 0 ...
## $ FP008 : int 0 0 0 0 1 0 0 0 0 0 ...
## $ FP009 : int 1 0 0 0 0 0 0 0 0 0 ...
## $ FP010 : int 1 0 1 0 0 0 0 0 0 0 ...
## $ FP011 : int 0 1 0 0 1 0 0 0 0 0 ...
## $ FP012 : int 0 1 0 0 0 1 0 1 0 0 ...
## $ FP013 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP014 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP015 : int 1 1 0 1 1 1 1 1 1 1 ...
## $ FP016 : int 0 1 0 0 0 0 0 1 0 0 ...
## $ FP017 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP018 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP019 : int 0 0 0 0 1 0 0 0 0 1 ...
## $ FP020 : int 0 0 0 0 0 1 0 0 0 0 ...
## $ FP021 : int 1 0 0 0 0 0 0 0 0 0 ...
## $ FP022 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP023 : int 0 0 0 0 0 0 1 0 0 0 ...
## $ FP024 : int 0 0 0 0 1 0 0 0 0 1 ...
## $ FP025 : int 1 0 0 0 0 0 0 0 0 0 ...
## $ FP026 : int 0 0 0 0 0 0 0 0 0 1 ...
## $ FP027 : int 0 0 0 1 0 0 0 0 0 0 ...
## $ FP028 : int 0 0 0 1 0 0 0 0 0 0 ...
## $ FP029 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP030 : int 0 0 0 1 0 0 0 0 0 0 ...
## $ FP031 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP032 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ FP033 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ FP034 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP035 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP036 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP037 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ FP038 : int 1 0 0 0 0 0 0 0 0 0 ...
## $ FP039 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP040 : int 0 0 0 0 1 0 0 0 0 0 ...
## $ FP041 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP042 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP043 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP044 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP045 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP046 : int 0 0 1 0 0 0 0 0 0 1 ...
## $ FP047 : int 0 0 0 0 1 0 0 0 0 0 ...
## $ FP048 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP049 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP050 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP051 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP052 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP053 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP054 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP055 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ FP056 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP057 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP058 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP059 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP060 : int 1 1 1 0 0 1 0 1 0 0 ...
## $ FP061 : int 1 1 1 0 0 1 0 0 0 0 ...
## $ FP062 : int 1 1 0 0 1 1 1 0 1 0 ...
## $ FP063 : int 0 1 0 1 1 0 0 0 0 1 ...
## $ FP064 : int 1 1 0 0 0 0 0 0 1 0 ...
## $ FP065 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ FP066 : int 0 1 0 1 0 1 0 0 1 1 ...
## $ FP067 : int 0 1 0 1 1 0 0 0 0 1 ...
## $ FP068 : int 0 1 0 1 1 0 0 0 0 0 ...
## $ FP069 : int 0 0 0 0 0 0 0 0 1 1 ...
## $ FP070 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP071 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP072 : int 1 1 1 0 1 1 1 1 1 0 ...
## $ FP073 : int 1 0 1 0 0 0 0 0 0 0 ...
## $ FP074 : int 0 0 1 0 0 0 0 0 1 0 ...
## $ FP075 : int 0 1 0 1 0 0 0 1 0 0 ...
## $ FP076 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP077 : int 0 0 0 1 0 0 0 1 0 0 ...
## $ FP078 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ FP079 : int 0 0 1 1 1 0 0 0 0 1 ...
## $ FP080 : int 1 1 0 1 0 0 0 1 0 0 ...
## $ FP081 : int 0 0 0 1 0 0 0 0 1 0 ...
## $ FP082 : int 0 0 1 0 1 0 0 0 0 1 ...
## $ FP083 : int 0 1 0 1 1 0 0 0 0 0 ...
## $ FP084 : int 0 0 0 1 1 0 0 1 0 1 ...
## $ FP085 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP086 : int 0 0 0 1 0 0 0 0 0 0 ...
## $ FP087 : int 0 0 1 1 1 0 0 1 0 1 ...
## $ FP088 : int 1 0 0 0 0 0 0 1 1 0 ...
## $ FP089 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP090 : int 0 0 0 1 0 0 0 1 0 0 ...
## $ FP091 : int 0 0 0 1 1 0 0 0 0 0 ...
## $ FP092 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP093 : int 0 0 0 1 0 0 0 1 0 0 ...
## $ FP094 : int 0 1 0 0 0 0 0 0 1 0 ...
## $ FP095 : int 0 0 1 1 0 0 0 0 0 0 ...
## $ FP096 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP097 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP098 : int 1 1 0 0 0 1 0 0 0 0 ...
## [list output truncated]
summary(Solubility_Test)
## solTestY FP001 FP002 FP003
## Min. :-10.410 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.: -3.953 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median : -2.480 Median :0.0000 Median :1.0000 Median :0.000
## Mean : -2.797 Mean :0.4684 Mean :0.5854 Mean :0.443
## 3rd Qu.: -1.373 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.000
## Max. : 1.070 Max. :1.0000 Max. :1.0000 Max. :1.000
## FP004 FP005 FP006 FP007
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :1.0000 Median :0.0000 Median :0.0000
## Mean :0.5316 Mean :0.6171 Mean :0.3513 Mean :0.3544
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP008 FP009 FP010 FP011
## Min. :0.0000 Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.000 Median :0.0000
## Mean :0.3608 Mean :0.2627 Mean :0.193 Mean :0.1741
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.000 Max. :1.0000
## FP012 FP013 FP014 FP015
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:1.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :1.0000
## Mean :0.1677 Mean :0.1646 Mean :0.1582 Mean :0.8291
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP016 FP017 FP018 FP019
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.00000 Median :0.0000
## Mean :0.1424 Mean :0.1487 Mean :0.08544 Mean :0.1139
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.0000
## FP020 FP021 FP022 FP023
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.1076 Mean :0.1076 Mean :0.1171 Mean :0.08544
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## FP024 FP025 FP026 FP027
## Min. :0.0000 Min. :0.00000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.0000 Median :0.00000 Median :0.0000 Median :0.00000
## Mean :0.0981 Mean :0.07911 Mean :0.1171 Mean :0.07911
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.00000 Max. :1.0000 Max. :1.00000
## FP028 FP029 FP030 FP031
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.05696 Mean :0.05063 Mean :0.08228 Mean :0.0981
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.0000
## FP032 FP033 FP034 FP035
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.00000 Median :0.0000
## Mean :0.1297 Mean :0.1203 Mean :0.06646 Mean :0.0981
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.0000
## FP036 FP037 FP038 FP039
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.06013 Mean :0.09494 Mean :0.03165 Mean :0.06329
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP040 FP041 FP042 FP043
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.05696 Mean :0.06013 Mean :0.06013 Mean :0.0443
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.0000
## FP044 FP045 FP046 FP047
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.0000 Median :0.0000
## Mean :0.06013 Mean :0.06329 Mean :0.3259 Mean :0.2975
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.00000 Max. :1.00000 Max. :1.0000 Max. :1.0000
## FP048 FP049 FP050 FP051
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.1139 Mean :0.1076 Mean :0.1139 Mean :0.05696
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## FP052 FP053 FP054 FP055
## Min. :0.0000 Min. :0.00000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.0000 Median :0.00000 Median :0.0000 Median :0.00000
## Mean :0.1044 Mean :0.06013 Mean :0.0981 Mean :0.09177
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.00000 Max. :1.0000 Max. :1.00000
## FP056 FP057 FP058 FP059
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.06329 Mean :0.1234 Mean :0.1361 Mean :0.0443
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP060 FP061 FP062 FP063
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.4525 Mean :0.3924 Mean :0.4272 Mean :0.3576
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP064 FP065 FP066 FP067
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :1.0000 Median :1.0000 Median :0.0000
## Mean :0.3892 Mean :0.5981 Mean :0.6171 Mean :0.3259
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP068 FP069 FP070 FP071
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.2911 Mean :0.3734 Mean :0.3323 Mean :0.3449
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP072 FP073 FP074 FP075
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.6456 Mean :0.2911 Mean :0.3259 Mean :0.2563
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP076 FP077 FP078 FP079
## Min. :0.0000 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.000 Median :0.0000 Median :1.0000
## Mean :0.3165 Mean :0.307 Mean :0.3101 Mean :0.7278
## 3rd Qu.:1.0000 3rd Qu.:1.000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.000 Max. :1.0000 Max. :1.0000
## FP080 FP081 FP082 FP083
## Min. :0.0000 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.000 Median :1.0000 Median :0.0000
## Mean :0.2627 Mean :0.288 Mean :0.7437 Mean :0.2532
## 3rd Qu.:1.0000 3rd Qu.:1.000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.000 Max. :1.0000 Max. :1.0000
## FP084 FP085 FP086 FP087
## Min. :0.0000 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:1.0000
## Median :0.0000 Median :0.000 Median :0.0000 Median :1.0000
## Mean :0.2247 Mean :0.269 Mean :0.2722 Mean :0.7627
## 3rd Qu.:0.0000 3rd Qu.:1.000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.000 Max. :1.0000 Max. :1.0000
## FP088 FP089 FP090 FP091
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.000
## Mean :0.2437 Mean :0.2532 Mean :0.2278 Mean :0.231
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.000
## FP092 FP093 FP094 FP095
## Min. :0.0000 Min. :0.0000 Min. :0.00 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.00 Median :0.0000
## Mean :0.2184 Mean :0.2152 Mean :0.25 Mean :0.2057
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.25 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.00 Max. :1.0000
## FP096 FP097 FP098 FP099
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.000
## Mean :0.1867 Mean :0.2089 Mean :0.2025 Mean :0.212
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.000
## FP100 FP101 FP102 FP103
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1804 Mean :0.1772 Mean :0.1456 Mean :0.2184
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP104 FP105 FP106 FP107
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1835 Mean :0.2152 Mean :0.1361 Mean :0.1962
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP108 FP109 FP110 FP111
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1804 Mean :0.1741 Mean :0.1646 Mean :0.1804
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP112 FP113 FP114 FP115
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1772 Mean :0.1646 Mean :0.1772 Mean :0.1582
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP116 FP117 FP118 FP119
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1487 Mean :0.1709 Mean :0.1171 Mean :0.1677
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP120 FP121 FP122 FP123
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1551 Mean :0.1076 Mean :0.1361 Mean :0.1456
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP124 FP125 FP126 FP127
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1329 Mean :0.1203 Mean :0.1139 Mean :0.1487
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP128 FP129 FP130 FP131
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.00000 Median :0.0000
## Mean :0.1076 Mean :0.1392 Mean :0.08228 Mean :0.1076
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.0000
## FP132 FP133 FP134 FP135
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.0000 Median :0.0000 Median :0.00000 Median :0.00000
## Mean :0.1266 Mean :0.1361 Mean :0.08544 Mean :0.06329
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.00000
## FP136 FP137 FP138 FP139
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.0000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.1013 Mean :0.08861 Mean :0.08228 Mean :0.06329
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP140 FP141 FP142 FP143
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.08861 Mean :0.06962 Mean :0.09494 Mean :0.0538
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.0000
## FP144 FP145 FP146 FP147
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.09177 Mean :0.06329 Mean :0.09177 Mean :0.06962
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP148 FP149 FP150 FP151
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.07911 Mean :0.08228 Mean :0.06646 Mean :0.03165
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP152 FP153 FP154 FP155
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.0000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.0538 Mean :0.03481 Mean :0.03165 Mean :0.06646
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP156 FP157 FP158 FP159
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.04747 Mean :0.05696 Mean :0.07911 Mean :0.03481
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP160 FP161 FP162 FP163
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :1.0000 Median :0.0000
## Mean :0.03481 Mean :0.03481 Mean :0.5316 Mean :0.4525
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.00000 Max. :1.00000 Max. :1.0000 Max. :1.0000
## FP164 FP165 FP166 FP167
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.6551 Mean :0.3196 Mean :0.3386 Mean :0.3006
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP168 FP169 FP170 FP171
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.7152 Mean :0.1867 Mean :0.1551 Mean :0.1297
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP172 FP173 FP174 FP175
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1487 Mean :0.1361 Mean :0.1551 Mean :0.1329
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP176 FP177 FP178 FP179
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1076 Mean :0.1013 Mean :0.1076 Mean :0.1392
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP180 FP181 FP182 FP183
## Min. :0.00000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.00000 Median :0.0000 Median :0.00000 Median :0.0000
## Mean :0.06962 Mean :0.1044 Mean :0.07595 Mean :0.1329
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.0000 Max. :1.00000 Max. :1.0000
## FP184 FP185 FP186 FP187
## Min. :0.00000 Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.0000 Median :0.00000 Median :0.00000
## Mean :0.09494 Mean :0.0981 Mean :0.06013 Mean :0.06646
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.0000 Max. :1.00000 Max. :1.00000
## FP188 FP189 FP190 FP191
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.0000 Median :0.00000
## Mean :0.06962 Mean :0.04114 Mean :0.0538 Mean :0.05696
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.0000 Max. :1.00000
## FP192 FP193 FP194 FP195
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.06962 Mean :0.06962 Mean :0.06646 Mean :0.05063
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP196 FP197 FP198 FP199
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.0000 Median :0.00000
## Mean :0.06962 Mean :0.06329 Mean :0.0443 Mean :0.07278
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.0000 Max. :1.00000
## FP200 FP201 FP202 FP203
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.0000 Median :0.0000
## Mean :0.06329 Mean :0.04114 Mean :0.2658 Mean :0.1361
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.00000 Max. :1.0000 Max. :1.0000
## FP204 FP205 FP206 FP207
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.09494 Mean :0.07911 Mean :0.05063 Mean :0.0443
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.0000
## FP208 MolWeight NumAtoms NumNonHAtoms NumBonds
## Min. :0.0000 Min. : 56.07 Min. : 5.0 Min. : 3.00 Min. : 4
## 1st Qu.:0.0000 1st Qu.:121.91 1st Qu.:17.0 1st Qu.: 8.00 1st Qu.:16
## Median :0.0000 Median :170.11 Median :22.0 Median :11.00 Median :23
## Mean :0.1361 Mean :194.12 Mean :24.6 Mean :12.71 Mean :25
## 3rd Qu.:0.0000 3rd Qu.:253.82 3rd Qu.:29.0 3rd Qu.:16.00 3rd Qu.:30
## Max. :1.0000 Max. :478.92 Max. :68.0 Max. :33.00 Max. :71
## NumNonHBonds NumMultBonds NumRotBonds NumDblBonds
## Min. : 2.0 Min. : 0.000 Min. : 0.000 Min. :0.0000
## 1st Qu.: 8.0 1st Qu.: 1.000 1st Qu.: 0.000 1st Qu.:0.0000
## Median :12.0 Median : 6.000 Median : 1.000 Median :1.0000
## Mean :13.1 Mean : 6.313 Mean : 1.949 Mean :0.8892
## 3rd Qu.:17.0 3rd Qu.:10.000 3rd Qu.: 3.000 3rd Qu.:1.0000
## Max. :36.0 Max. :27.000 Max. :16.000 Max. :6.0000
## NumAromaticBonds NumHydrogen NumCarbon NumNitrogen
## Min. : 0.000 Min. : 0.0 Min. : 1.000 Min. :0.0000
## 1st Qu.: 0.000 1st Qu.: 7.0 1st Qu.: 6.000 1st Qu.:0.0000
## Median : 6.000 Median :11.0 Median : 8.000 Median :0.0000
## Mean : 5.399 Mean :11.9 Mean : 9.785 Mean :0.7089
## 3rd Qu.:10.000 3rd Qu.:15.0 3rd Qu.:12.000 3rd Qu.:1.0000
## Max. :27.000 Max. :40.0 Max. :24.000 Max. :6.0000
## NumOxygen NumSulfer NumChlorine NumHalogen
## Min. :0.000 Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000
## Median :1.000 Median :0.0000 Median :0.000 Median :0.0000
## Mean :1.389 Mean :0.1013 Mean :0.557 Mean :0.7089
## 3rd Qu.:2.000 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:1.0000
## Max. :9.000 Max. :3.0000 Max. :9.000 Max. :9.0000
## NumRings HydrophilicFactor SurfaceArea1 SurfaceArea2
## Min. :0.000 Min. :-0.9860 Min. : 0.00 Min. : 0.00
## 1st Qu.:1.000 1st Qu.:-0.7670 1st Qu.: 9.23 1st Qu.: 9.23
## Median :1.000 Median :-0.3970 Median : 26.30 Median : 26.30
## Mean :1.399 Mean :-0.1022 Mean : 32.76 Mean : 35.04
## 3rd Qu.:2.000 3rd Qu.: 0.2140 3rd Qu.: 49.55 3rd Qu.: 52.32
## Max. :6.000 Max. : 5.0000 Max. :201.85 Max. :201.85
##################################
# Formulating a data type assessment summary
##################################
<- Solubility_Train
PDA <- data.frame(
(PDA.Summary Column.Index=c(1:length(names(PDA))),
Column.Name= names(PDA),
Column.Type=sapply(PDA, function(x) class(x)),
row.names=NULL)
)
## Column.Index Column.Name Column.Type
## 1 1 solTrainY numeric
## 2 2 FP001 integer
## 3 3 FP002 integer
## 4 4 FP003 integer
## 5 5 FP004 integer
## 6 6 FP005 integer
## 7 7 FP006 integer
## 8 8 FP007 integer
## 9 9 FP008 integer
## 10 10 FP009 integer
## 11 11 FP010 integer
## 12 12 FP011 integer
## 13 13 FP012 integer
## 14 14 FP013 integer
## 15 15 FP014 integer
## 16 16 FP015 integer
## 17 17 FP016 integer
## 18 18 FP017 integer
## 19 19 FP018 integer
## 20 20 FP019 integer
## 21 21 FP020 integer
## 22 22 FP021 integer
## 23 23 FP022 integer
## 24 24 FP023 integer
## 25 25 FP024 integer
## 26 26 FP025 integer
## 27 27 FP026 integer
## 28 28 FP027 integer
## 29 29 FP028 integer
## 30 30 FP029 integer
## 31 31 FP030 integer
## 32 32 FP031 integer
## 33 33 FP032 integer
## 34 34 FP033 integer
## 35 35 FP034 integer
## 36 36 FP035 integer
## 37 37 FP036 integer
## 38 38 FP037 integer
## 39 39 FP038 integer
## 40 40 FP039 integer
## 41 41 FP040 integer
## 42 42 FP041 integer
## 43 43 FP042 integer
## 44 44 FP043 integer
## 45 45 FP044 integer
## 46 46 FP045 integer
## 47 47 FP046 integer
## 48 48 FP047 integer
## 49 49 FP048 integer
## 50 50 FP049 integer
## 51 51 FP050 integer
## 52 52 FP051 integer
## 53 53 FP052 integer
## 54 54 FP053 integer
## 55 55 FP054 integer
## 56 56 FP055 integer
## 57 57 FP056 integer
## 58 58 FP057 integer
## 59 59 FP058 integer
## 60 60 FP059 integer
## 61 61 FP060 integer
## 62 62 FP061 integer
## 63 63 FP062 integer
## 64 64 FP063 integer
## 65 65 FP064 integer
## 66 66 FP065 integer
## 67 67 FP066 integer
## 68 68 FP067 integer
## 69 69 FP068 integer
## 70 70 FP069 integer
## 71 71 FP070 integer
## 72 72 FP071 integer
## 73 73 FP072 integer
## 74 74 FP073 integer
## 75 75 FP074 integer
## 76 76 FP075 integer
## 77 77 FP076 integer
## 78 78 FP077 integer
## 79 79 FP078 integer
## 80 80 FP079 integer
## 81 81 FP080 integer
## 82 82 FP081 integer
## 83 83 FP082 integer
## 84 84 FP083 integer
## 85 85 FP084 integer
## 86 86 FP085 integer
## 87 87 FP086 integer
## 88 88 FP087 integer
## 89 89 FP088 integer
## 90 90 FP089 integer
## 91 91 FP090 integer
## 92 92 FP091 integer
## 93 93 FP092 integer
## 94 94 FP093 integer
## 95 95 FP094 integer
## 96 96 FP095 integer
## 97 97 FP096 integer
## 98 98 FP097 integer
## 99 99 FP098 integer
## 100 100 FP099 integer
## 101 101 FP100 integer
## 102 102 FP101 integer
## 103 103 FP102 integer
## 104 104 FP103 integer
## 105 105 FP104 integer
## 106 106 FP105 integer
## 107 107 FP106 integer
## 108 108 FP107 integer
## 109 109 FP108 integer
## 110 110 FP109 integer
## 111 111 FP110 integer
## 112 112 FP111 integer
## 113 113 FP112 integer
## 114 114 FP113 integer
## 115 115 FP114 integer
## 116 116 FP115 integer
## 117 117 FP116 integer
## 118 118 FP117 integer
## 119 119 FP118 integer
## 120 120 FP119 integer
## 121 121 FP120 integer
## 122 122 FP121 integer
## 123 123 FP122 integer
## 124 124 FP123 integer
## 125 125 FP124 integer
## 126 126 FP125 integer
## 127 127 FP126 integer
## 128 128 FP127 integer
## 129 129 FP128 integer
## 130 130 FP129 integer
## 131 131 FP130 integer
## 132 132 FP131 integer
## 133 133 FP132 integer
## 134 134 FP133 integer
## 135 135 FP134 integer
## 136 136 FP135 integer
## 137 137 FP136 integer
## 138 138 FP137 integer
## 139 139 FP138 integer
## 140 140 FP139 integer
## 141 141 FP140 integer
## 142 142 FP141 integer
## 143 143 FP142 integer
## 144 144 FP143 integer
## 145 145 FP144 integer
## 146 146 FP145 integer
## 147 147 FP146 integer
## 148 148 FP147 integer
## 149 149 FP148 integer
## 150 150 FP149 integer
## 151 151 FP150 integer
## 152 152 FP151 integer
## 153 153 FP152 integer
## 154 154 FP153 integer
## 155 155 FP154 integer
## 156 156 FP155 integer
## 157 157 FP156 integer
## 158 158 FP157 integer
## 159 159 FP158 integer
## 160 160 FP159 integer
## 161 161 FP160 integer
## 162 162 FP161 integer
## 163 163 FP162 integer
## 164 164 FP163 integer
## 165 165 FP164 integer
## 166 166 FP165 integer
## 167 167 FP166 integer
## 168 168 FP167 integer
## 169 169 FP168 integer
## 170 170 FP169 integer
## 171 171 FP170 integer
## 172 172 FP171 integer
## 173 173 FP172 integer
## 174 174 FP173 integer
## 175 175 FP174 integer
## 176 176 FP175 integer
## 177 177 FP176 integer
## 178 178 FP177 integer
## 179 179 FP178 integer
## 180 180 FP179 integer
## 181 181 FP180 integer
## 182 182 FP181 integer
## 183 183 FP182 integer
## 184 184 FP183 integer
## 185 185 FP184 integer
## 186 186 FP185 integer
## 187 187 FP186 integer
## 188 188 FP187 integer
## 189 189 FP188 integer
## 190 190 FP189 integer
## 191 191 FP190 integer
## 192 192 FP191 integer
## 193 193 FP192 integer
## 194 194 FP193 integer
## 195 195 FP194 integer
## 196 196 FP195 integer
## 197 197 FP196 integer
## 198 198 FP197 integer
## 199 199 FP198 integer
## 200 200 FP199 integer
## 201 201 FP200 integer
## 202 202 FP201 integer
## 203 203 FP202 integer
## 204 204 FP203 integer
## 205 205 FP204 integer
## 206 206 FP205 integer
## 207 207 FP206 integer
## 208 208 FP207 integer
## 209 209 FP208 integer
## 210 210 MolWeight numeric
## 211 211 NumAtoms integer
## 212 212 NumNonHAtoms integer
## 213 213 NumBonds integer
## 214 214 NumNonHBonds integer
## 215 215 NumMultBonds integer
## 216 216 NumRotBonds integer
## 217 217 NumDblBonds integer
## 218 218 NumAromaticBonds integer
## 219 219 NumHydrogen integer
## 220 220 NumCarbon integer
## 221 221 NumNitrogen integer
## 222 222 NumOxygen integer
## 223 223 NumSulfer integer
## 224 224 NumChlorine integer
## 225 225 NumHalogen integer
## 226 226 NumRings integer
## 227 227 HydrophilicFactor numeric
## 228 228 SurfaceArea1 numeric
## 229 229 SurfaceArea2 numeric
##################################
# Loading dataset
##################################
<- Solubility_Train
DQA
##################################
# Formulating an overall data quality assessment summary
##################################
<- data.frame(
(DQA.Summary Column.Index=c(1:length(names(DQA))),
Column.Name= names(DQA),
Column.Type=sapply(DQA, function(x) class(x)),
Row.Count=sapply(DQA, function(x) nrow(DQA)),
NA.Count=sapply(DQA,function(x)sum(is.na(x))),
Fill.Rate=sapply(DQA,function(x)format(round((sum(!is.na(x))/nrow(DQA)),3),nsmall=3)),
row.names=NULL)
)
## Column.Index Column.Name Column.Type Row.Count NA.Count Fill.Rate
## 1 1 solTrainY numeric 951 0 1.000
## 2 2 FP001 integer 951 0 1.000
## 3 3 FP002 integer 951 0 1.000
## 4 4 FP003 integer 951 0 1.000
## 5 5 FP004 integer 951 0 1.000
## 6 6 FP005 integer 951 0 1.000
## 7 7 FP006 integer 951 0 1.000
## 8 8 FP007 integer 951 0 1.000
## 9 9 FP008 integer 951 0 1.000
## 10 10 FP009 integer 951 0 1.000
## 11 11 FP010 integer 951 0 1.000
## 12 12 FP011 integer 951 0 1.000
## 13 13 FP012 integer 951 0 1.000
## 14 14 FP013 integer 951 0 1.000
## 15 15 FP014 integer 951 0 1.000
## 16 16 FP015 integer 951 0 1.000
## 17 17 FP016 integer 951 0 1.000
## 18 18 FP017 integer 951 0 1.000
## 19 19 FP018 integer 951 0 1.000
## 20 20 FP019 integer 951 0 1.000
## 21 21 FP020 integer 951 0 1.000
## 22 22 FP021 integer 951 0 1.000
## 23 23 FP022 integer 951 0 1.000
## 24 24 FP023 integer 951 0 1.000
## 25 25 FP024 integer 951 0 1.000
## 26 26 FP025 integer 951 0 1.000
## 27 27 FP026 integer 951 0 1.000
## 28 28 FP027 integer 951 0 1.000
## 29 29 FP028 integer 951 0 1.000
## 30 30 FP029 integer 951 0 1.000
## 31 31 FP030 integer 951 0 1.000
## 32 32 FP031 integer 951 0 1.000
## 33 33 FP032 integer 951 0 1.000
## 34 34 FP033 integer 951 0 1.000
## 35 35 FP034 integer 951 0 1.000
## 36 36 FP035 integer 951 0 1.000
## 37 37 FP036 integer 951 0 1.000
## 38 38 FP037 integer 951 0 1.000
## 39 39 FP038 integer 951 0 1.000
## 40 40 FP039 integer 951 0 1.000
## 41 41 FP040 integer 951 0 1.000
## 42 42 FP041 integer 951 0 1.000
## 43 43 FP042 integer 951 0 1.000
## 44 44 FP043 integer 951 0 1.000
## 45 45 FP044 integer 951 0 1.000
## 46 46 FP045 integer 951 0 1.000
## 47 47 FP046 integer 951 0 1.000
## 48 48 FP047 integer 951 0 1.000
## 49 49 FP048 integer 951 0 1.000
## 50 50 FP049 integer 951 0 1.000
## 51 51 FP050 integer 951 0 1.000
## 52 52 FP051 integer 951 0 1.000
## 53 53 FP052 integer 951 0 1.000
## 54 54 FP053 integer 951 0 1.000
## 55 55 FP054 integer 951 0 1.000
## 56 56 FP055 integer 951 0 1.000
## 57 57 FP056 integer 951 0 1.000
## 58 58 FP057 integer 951 0 1.000
## 59 59 FP058 integer 951 0 1.000
## 60 60 FP059 integer 951 0 1.000
## 61 61 FP060 integer 951 0 1.000
## 62 62 FP061 integer 951 0 1.000
## 63 63 FP062 integer 951 0 1.000
## 64 64 FP063 integer 951 0 1.000
## 65 65 FP064 integer 951 0 1.000
## 66 66 FP065 integer 951 0 1.000
## 67 67 FP066 integer 951 0 1.000
## 68 68 FP067 integer 951 0 1.000
## 69 69 FP068 integer 951 0 1.000
## 70 70 FP069 integer 951 0 1.000
## 71 71 FP070 integer 951 0 1.000
## 72 72 FP071 integer 951 0 1.000
## 73 73 FP072 integer 951 0 1.000
## 74 74 FP073 integer 951 0 1.000
## 75 75 FP074 integer 951 0 1.000
## 76 76 FP075 integer 951 0 1.000
## 77 77 FP076 integer 951 0 1.000
## 78 78 FP077 integer 951 0 1.000
## 79 79 FP078 integer 951 0 1.000
## 80 80 FP079 integer 951 0 1.000
## 81 81 FP080 integer 951 0 1.000
## 82 82 FP081 integer 951 0 1.000
## 83 83 FP082 integer 951 0 1.000
## 84 84 FP083 integer 951 0 1.000
## 85 85 FP084 integer 951 0 1.000
## 86 86 FP085 integer 951 0 1.000
## 87 87 FP086 integer 951 0 1.000
## 88 88 FP087 integer 951 0 1.000
## 89 89 FP088 integer 951 0 1.000
## 90 90 FP089 integer 951 0 1.000
## 91 91 FP090 integer 951 0 1.000
## 92 92 FP091 integer 951 0 1.000
## 93 93 FP092 integer 951 0 1.000
## 94 94 FP093 integer 951 0 1.000
## 95 95 FP094 integer 951 0 1.000
## 96 96 FP095 integer 951 0 1.000
## 97 97 FP096 integer 951 0 1.000
## 98 98 FP097 integer 951 0 1.000
## 99 99 FP098 integer 951 0 1.000
## 100 100 FP099 integer 951 0 1.000
## 101 101 FP100 integer 951 0 1.000
## 102 102 FP101 integer 951 0 1.000
## 103 103 FP102 integer 951 0 1.000
## 104 104 FP103 integer 951 0 1.000
## 105 105 FP104 integer 951 0 1.000
## 106 106 FP105 integer 951 0 1.000
## 107 107 FP106 integer 951 0 1.000
## 108 108 FP107 integer 951 0 1.000
## 109 109 FP108 integer 951 0 1.000
## 110 110 FP109 integer 951 0 1.000
## 111 111 FP110 integer 951 0 1.000
## 112 112 FP111 integer 951 0 1.000
## 113 113 FP112 integer 951 0 1.000
## 114 114 FP113 integer 951 0 1.000
## 115 115 FP114 integer 951 0 1.000
## 116 116 FP115 integer 951 0 1.000
## 117 117 FP116 integer 951 0 1.000
## 118 118 FP117 integer 951 0 1.000
## 119 119 FP118 integer 951 0 1.000
## 120 120 FP119 integer 951 0 1.000
## 121 121 FP120 integer 951 0 1.000
## 122 122 FP121 integer 951 0 1.000
## 123 123 FP122 integer 951 0 1.000
## 124 124 FP123 integer 951 0 1.000
## 125 125 FP124 integer 951 0 1.000
## 126 126 FP125 integer 951 0 1.000
## 127 127 FP126 integer 951 0 1.000
## 128 128 FP127 integer 951 0 1.000
## 129 129 FP128 integer 951 0 1.000
## 130 130 FP129 integer 951 0 1.000
## 131 131 FP130 integer 951 0 1.000
## 132 132 FP131 integer 951 0 1.000
## 133 133 FP132 integer 951 0 1.000
## 134 134 FP133 integer 951 0 1.000
## 135 135 FP134 integer 951 0 1.000
## 136 136 FP135 integer 951 0 1.000
## 137 137 FP136 integer 951 0 1.000
## 138 138 FP137 integer 951 0 1.000
## 139 139 FP138 integer 951 0 1.000
## 140 140 FP139 integer 951 0 1.000
## 141 141 FP140 integer 951 0 1.000
## 142 142 FP141 integer 951 0 1.000
## 143 143 FP142 integer 951 0 1.000
## 144 144 FP143 integer 951 0 1.000
## 145 145 FP144 integer 951 0 1.000
## 146 146 FP145 integer 951 0 1.000
## 147 147 FP146 integer 951 0 1.000
## 148 148 FP147 integer 951 0 1.000
## 149 149 FP148 integer 951 0 1.000
## 150 150 FP149 integer 951 0 1.000
## 151 151 FP150 integer 951 0 1.000
## 152 152 FP151 integer 951 0 1.000
## 153 153 FP152 integer 951 0 1.000
## 154 154 FP153 integer 951 0 1.000
## 155 155 FP154 integer 951 0 1.000
## 156 156 FP155 integer 951 0 1.000
## 157 157 FP156 integer 951 0 1.000
## 158 158 FP157 integer 951 0 1.000
## 159 159 FP158 integer 951 0 1.000
## 160 160 FP159 integer 951 0 1.000
## 161 161 FP160 integer 951 0 1.000
## 162 162 FP161 integer 951 0 1.000
## 163 163 FP162 integer 951 0 1.000
## 164 164 FP163 integer 951 0 1.000
## 165 165 FP164 integer 951 0 1.000
## 166 166 FP165 integer 951 0 1.000
## 167 167 FP166 integer 951 0 1.000
## 168 168 FP167 integer 951 0 1.000
## 169 169 FP168 integer 951 0 1.000
## 170 170 FP169 integer 951 0 1.000
## 171 171 FP170 integer 951 0 1.000
## 172 172 FP171 integer 951 0 1.000
## 173 173 FP172 integer 951 0 1.000
## 174 174 FP173 integer 951 0 1.000
## 175 175 FP174 integer 951 0 1.000
## 176 176 FP175 integer 951 0 1.000
## 177 177 FP176 integer 951 0 1.000
## 178 178 FP177 integer 951 0 1.000
## 179 179 FP178 integer 951 0 1.000
## 180 180 FP179 integer 951 0 1.000
## 181 181 FP180 integer 951 0 1.000
## 182 182 FP181 integer 951 0 1.000
## 183 183 FP182 integer 951 0 1.000
## 184 184 FP183 integer 951 0 1.000
## 185 185 FP184 integer 951 0 1.000
## 186 186 FP185 integer 951 0 1.000
## 187 187 FP186 integer 951 0 1.000
## 188 188 FP187 integer 951 0 1.000
## 189 189 FP188 integer 951 0 1.000
## 190 190 FP189 integer 951 0 1.000
## 191 191 FP190 integer 951 0 1.000
## 192 192 FP191 integer 951 0 1.000
## 193 193 FP192 integer 951 0 1.000
## 194 194 FP193 integer 951 0 1.000
## 195 195 FP194 integer 951 0 1.000
## 196 196 FP195 integer 951 0 1.000
## 197 197 FP196 integer 951 0 1.000
## 198 198 FP197 integer 951 0 1.000
## 199 199 FP198 integer 951 0 1.000
## 200 200 FP199 integer 951 0 1.000
## 201 201 FP200 integer 951 0 1.000
## 202 202 FP201 integer 951 0 1.000
## 203 203 FP202 integer 951 0 1.000
## 204 204 FP203 integer 951 0 1.000
## 205 205 FP204 integer 951 0 1.000
## 206 206 FP205 integer 951 0 1.000
## 207 207 FP206 integer 951 0 1.000
## 208 208 FP207 integer 951 0 1.000
## 209 209 FP208 integer 951 0 1.000
## 210 210 MolWeight numeric 951 0 1.000
## 211 211 NumAtoms integer 951 0 1.000
## 212 212 NumNonHAtoms integer 951 0 1.000
## 213 213 NumBonds integer 951 0 1.000
## 214 214 NumNonHBonds integer 951 0 1.000
## 215 215 NumMultBonds integer 951 0 1.000
## 216 216 NumRotBonds integer 951 0 1.000
## 217 217 NumDblBonds integer 951 0 1.000
## 218 218 NumAromaticBonds integer 951 0 1.000
## 219 219 NumHydrogen integer 951 0 1.000
## 220 220 NumCarbon integer 951 0 1.000
## 221 221 NumNitrogen integer 951 0 1.000
## 222 222 NumOxygen integer 951 0 1.000
## 223 223 NumSulfer integer 951 0 1.000
## 224 224 NumChlorine integer 951 0 1.000
## 225 225 NumHalogen integer 951 0 1.000
## 226 226 NumRings integer 951 0 1.000
## 227 227 HydrophilicFactor numeric 951 0 1.000
## 228 228 SurfaceArea1 numeric 951 0 1.000
## 229 229 SurfaceArea2 numeric 951 0 1.000
##################################
# Listing all predictors
##################################
<- DQA[,!names(DQA) %in% c("solTrainY")]
DQA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DQA.Predictors[,-(grep("FP", names(DQA.Predictors)))]
DQA.Predictors.Numeric
if (length(names(DQA.Predictors.Numeric))>0) {
print(paste0("There are ",
length(names(DQA.Predictors.Numeric))),
(" numeric predictor variable(s)."))
else {
} print("There are no numeric predictor variables.")
}
## [1] "There are 20 numeric predictor variable(s)."
##################################
# Listing all factor predictors
##################################
<-as.data.frame(lapply(DQA.Predictors[(grep("FP", names(DQA.Predictors)))],factor))
DQA.Predictors.Factor
if (length(names(DQA.Predictors.Factor))>0) {
print(paste0("There are ",
length(names(DQA.Predictors.Factor))),
(" factor predictor variable(s)."))
else {
} print("There are no factor predictor variables.")
}
## [1] "There are 208 factor predictor variable(s)."
##################################
# Formulating a data quality assessment summary for factor predictors
##################################
if (length(names(DQA.Predictors.Factor))>0) {
##################################
# Formulating a function to determine the first mode
##################################
<- function(x) {
FirstModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab == max(tab)]
ux[tab
}
##################################
# Formulating a function to determine the second mode
##################################
<- function(x) {
SecondModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab = ux[tab == max(tab)]
fm = x[!(x %in% fm)]
sm <- unique(sm)
usm <- tabulate(match(sm, usm))
tabsm ifelse(is.na(usm[tabsm == max(tabsm)])==TRUE,
return("x"),
return(usm[tabsm == max(tabsm)]))
}
<- data.frame(
(DQA.Predictors.Factor.Summary Column.Name= names(DQA.Predictors.Factor),
Column.Type=sapply(DQA.Predictors.Factor, function(x) class(x)),
Unique.Count=sapply(DQA.Predictors.Factor, function(x) length(unique(x))),
First.Mode.Value=sapply(DQA.Predictors.Factor, function(x) as.character(FirstModes(x)[1])),
Second.Mode.Value=sapply(DQA.Predictors.Factor, function(x) as.character(SecondModes(x)[1])),
First.Mode.Count=sapply(DQA.Predictors.Factor, function(x) sum(na.omit(x) == FirstModes(x)[1])),
Second.Mode.Count=sapply(DQA.Predictors.Factor, function(x) sum(na.omit(x) == SecondModes(x)[1])),
Unique.Count.Ratio=sapply(DQA.Predictors.Factor, function(x) format(round((length(unique(x))/nrow(DQA.Predictors.Factor)),3), nsmall=3)),
First.Second.Mode.Ratio=sapply(DQA.Predictors.Factor, function(x) format(round((sum(na.omit(x) == FirstModes(x)[1])/sum(na.omit(x) == SecondModes(x)[1])),3), nsmall=3)),
row.names=NULL)
)
}
## Column.Name Column.Type Unique.Count First.Mode.Value Second.Mode.Value
## 1 FP001 factor 2 0 1
## 2 FP002 factor 2 1 0
## 3 FP003 factor 2 0 1
## 4 FP004 factor 2 1 0
## 5 FP005 factor 2 1 0
## 6 FP006 factor 2 0 1
## 7 FP007 factor 2 0 1
## 8 FP008 factor 2 0 1
## 9 FP009 factor 2 0 1
## 10 FP010 factor 2 0 1
## 11 FP011 factor 2 0 1
## 12 FP012 factor 2 0 1
## 13 FP013 factor 2 0 1
## 14 FP014 factor 2 0 1
## 15 FP015 factor 2 1 0
## 16 FP016 factor 2 0 1
## 17 FP017 factor 2 0 1
## 18 FP018 factor 2 0 1
## 19 FP019 factor 2 0 1
## 20 FP020 factor 2 0 1
## 21 FP021 factor 2 0 1
## 22 FP022 factor 2 0 1
## 23 FP023 factor 2 0 1
## 24 FP024 factor 2 0 1
## 25 FP025 factor 2 0 1
## 26 FP026 factor 2 0 1
## 27 FP027 factor 2 0 1
## 28 FP028 factor 2 0 1
## 29 FP029 factor 2 0 1
## 30 FP030 factor 2 0 1
## 31 FP031 factor 2 0 1
## 32 FP032 factor 2 0 1
## 33 FP033 factor 2 0 1
## 34 FP034 factor 2 0 1
## 35 FP035 factor 2 0 1
## 36 FP036 factor 2 0 1
## 37 FP037 factor 2 0 1
## 38 FP038 factor 2 0 1
## 39 FP039 factor 2 0 1
## 40 FP040 factor 2 0 1
## 41 FP041 factor 2 0 1
## 42 FP042 factor 2 0 1
## 43 FP043 factor 2 0 1
## 44 FP044 factor 2 0 1
## 45 FP045 factor 2 0 1
## 46 FP046 factor 2 0 1
## 47 FP047 factor 2 0 1
## 48 FP048 factor 2 0 1
## 49 FP049 factor 2 0 1
## 50 FP050 factor 2 0 1
## 51 FP051 factor 2 0 1
## 52 FP052 factor 2 0 1
## 53 FP053 factor 2 0 1
## 54 FP054 factor 2 0 1
## 55 FP055 factor 2 0 1
## 56 FP056 factor 2 0 1
## 57 FP057 factor 2 0 1
## 58 FP058 factor 2 0 1
## 59 FP059 factor 2 0 1
## 60 FP060 factor 2 0 1
## 61 FP061 factor 2 0 1
## 62 FP062 factor 2 0 1
## 63 FP063 factor 2 0 1
## 64 FP064 factor 2 0 1
## 65 FP065 factor 2 1 0
## 66 FP066 factor 2 1 0
## 67 FP067 factor 2 0 1
## 68 FP068 factor 2 0 1
## 69 FP069 factor 2 0 1
## 70 FP070 factor 2 0 1
## 71 FP071 factor 2 0 1
## 72 FP072 factor 2 1 0
## 73 FP073 factor 2 0 1
## 74 FP074 factor 2 0 1
## 75 FP075 factor 2 0 1
## 76 FP076 factor 2 0 1
## 77 FP077 factor 2 0 1
## 78 FP078 factor 2 0 1
## 79 FP079 factor 2 1 0
## 80 FP080 factor 2 0 1
## 81 FP081 factor 2 0 1
## 82 FP082 factor 2 1 0
## 83 FP083 factor 2 0 1
## 84 FP084 factor 2 0 1
## 85 FP085 factor 2 0 1
## 86 FP086 factor 2 0 1
## 87 FP087 factor 2 1 0
## 88 FP088 factor 2 0 1
## 89 FP089 factor 2 0 1
## 90 FP090 factor 2 0 1
## 91 FP091 factor 2 0 1
## 92 FP092 factor 2 0 1
## 93 FP093 factor 2 0 1
## 94 FP094 factor 2 0 1
## 95 FP095 factor 2 0 1
## 96 FP096 factor 2 0 1
## 97 FP097 factor 2 0 1
## 98 FP098 factor 2 0 1
## 99 FP099 factor 2 0 1
## 100 FP100 factor 2 0 1
## 101 FP101 factor 2 0 1
## 102 FP102 factor 2 0 1
## 103 FP103 factor 2 0 1
## 104 FP104 factor 2 0 1
## 105 FP105 factor 2 0 1
## 106 FP106 factor 2 0 1
## 107 FP107 factor 2 0 1
## 108 FP108 factor 2 0 1
## 109 FP109 factor 2 0 1
## 110 FP110 factor 2 0 1
## 111 FP111 factor 2 0 1
## 112 FP112 factor 2 0 1
## 113 FP113 factor 2 0 1
## 114 FP114 factor 2 0 1
## 115 FP115 factor 2 0 1
## 116 FP116 factor 2 0 1
## 117 FP117 factor 2 0 1
## 118 FP118 factor 2 0 1
## 119 FP119 factor 2 0 1
## 120 FP120 factor 2 0 1
## 121 FP121 factor 2 0 1
## 122 FP122 factor 2 0 1
## 123 FP123 factor 2 0 1
## 124 FP124 factor 2 0 1
## 125 FP125 factor 2 0 1
## 126 FP126 factor 2 0 1
## 127 FP127 factor 2 0 1
## 128 FP128 factor 2 0 1
## 129 FP129 factor 2 0 1
## 130 FP130 factor 2 0 1
## 131 FP131 factor 2 0 1
## 132 FP132 factor 2 0 1
## 133 FP133 factor 2 0 1
## 134 FP134 factor 2 0 1
## 135 FP135 factor 2 0 1
## 136 FP136 factor 2 0 1
## 137 FP137 factor 2 0 1
## 138 FP138 factor 2 0 1
## 139 FP139 factor 2 0 1
## 140 FP140 factor 2 0 1
## 141 FP141 factor 2 0 1
## 142 FP142 factor 2 0 1
## 143 FP143 factor 2 0 1
## 144 FP144 factor 2 0 1
## 145 FP145 factor 2 0 1
## 146 FP146 factor 2 0 1
## 147 FP147 factor 2 0 1
## 148 FP148 factor 2 0 1
## 149 FP149 factor 2 0 1
## 150 FP150 factor 2 0 1
## 151 FP151 factor 2 0 1
## 152 FP152 factor 2 0 1
## 153 FP153 factor 2 0 1
## 154 FP154 factor 2 0 1
## 155 FP155 factor 2 0 1
## 156 FP156 factor 2 0 1
## 157 FP157 factor 2 0 1
## 158 FP158 factor 2 0 1
## 159 FP159 factor 2 0 1
## 160 FP160 factor 2 0 1
## 161 FP161 factor 2 0 1
## 162 FP162 factor 2 0 1
## 163 FP163 factor 2 0 1
## 164 FP164 factor 2 1 0
## 165 FP165 factor 2 0 1
## 166 FP166 factor 2 0 1
## 167 FP167 factor 2 0 1
## 168 FP168 factor 2 1 0
## 169 FP169 factor 2 0 1
## 170 FP170 factor 2 0 1
## 171 FP171 factor 2 0 1
## 172 FP172 factor 2 0 1
## 173 FP173 factor 2 0 1
## 174 FP174 factor 2 0 1
## 175 FP175 factor 2 0 1
## 176 FP176 factor 2 0 1
## 177 FP177 factor 2 0 1
## 178 FP178 factor 2 0 1
## 179 FP179 factor 2 0 1
## 180 FP180 factor 2 0 1
## 181 FP181 factor 2 0 1
## 182 FP182 factor 2 0 1
## 183 FP183 factor 2 0 1
## 184 FP184 factor 2 0 1
## 185 FP185 factor 2 0 1
## 186 FP186 factor 2 0 1
## 187 FP187 factor 2 0 1
## 188 FP188 factor 2 0 1
## 189 FP189 factor 2 0 1
## 190 FP190 factor 2 0 1
## 191 FP191 factor 2 0 1
## 192 FP192 factor 2 0 1
## 193 FP193 factor 2 0 1
## 194 FP194 factor 2 0 1
## 195 FP195 factor 2 0 1
## 196 FP196 factor 2 0 1
## 197 FP197 factor 2 0 1
## 198 FP198 factor 2 0 1
## 199 FP199 factor 2 0 1
## 200 FP200 factor 2 0 1
## 201 FP201 factor 2 0 1
## 202 FP202 factor 2 0 1
## 203 FP203 factor 2 0 1
## 204 FP204 factor 2 0 1
## 205 FP205 factor 2 0 1
## 206 FP206 factor 2 0 1
## 207 FP207 factor 2 0 1
## 208 FP208 factor 2 0 1
## First.Mode.Count Second.Mode.Count Unique.Count.Ratio
## 1 482 469 0.002
## 2 513 438 0.002
## 3 536 415 0.002
## 4 556 395 0.002
## 5 551 400 0.002
## 6 570 381 0.002
## 7 605 346 0.002
## 8 641 310 0.002
## 9 685 266 0.002
## 10 781 170 0.002
## 11 747 204 0.002
## 12 783 168 0.002
## 13 793 158 0.002
## 14 798 153 0.002
## 15 818 133 0.002
## 16 812 139 0.002
## 17 814 137 0.002
## 18 826 125 0.002
## 19 835 116 0.002
## 20 837 114 0.002
## 21 836 115 0.002
## 22 852 99 0.002
## 23 834 117 0.002
## 24 844 107 0.002
## 25 841 110 0.002
## 26 871 80 0.002
## 27 858 93 0.002
## 28 850 101 0.002
## 29 854 97 0.002
## 30 862 89 0.002
## 31 866 85 0.002
## 32 881 70 0.002
## 33 885 66 0.002
## 34 875 76 0.002
## 35 882 69 0.002
## 36 879 72 0.002
## 37 884 67 0.002
## 38 869 82 0.002
## 39 880 71 0.002
## 40 886 65 0.002
## 41 891 60 0.002
## 42 897 54 0.002
## 43 888 63 0.002
## 44 894 57 0.002
## 45 898 53 0.002
## 46 651 300 0.002
## 47 698 253 0.002
## 48 833 118 0.002
## 49 835 116 0.002
## 50 844 107 0.002
## 51 847 104 0.002
## 52 864 87 0.002
## 53 862 89 0.002
## 54 879 72 0.002
## 55 900 51 0.002
## 56 889 62 0.002
## 57 837 114 0.002
## 58 843 108 0.002
## 59 899 52 0.002
## 60 493 458 0.002
## 61 526 425 0.002
## 62 535 416 0.002
## 63 546 405 0.002
## 64 555 396 0.002
## 65 564 387 0.002
## 66 580 371 0.002
## 67 590 361 0.002
## 68 607 344 0.002
## 69 607 344 0.002
## 70 613 338 0.002
## 71 640 311 0.002
## 72 626 325 0.002
## 73 656 295 0.002
## 74 642 309 0.002
## 75 629 322 0.002
## 76 639 312 0.002
## 77 646 305 0.002
## 78 662 289 0.002
## 79 656 295 0.002
## 80 663 288 0.002
## 81 686 265 0.002
## 82 679 272 0.002
## 83 691 260 0.002
## 84 679 272 0.002
## 85 708 243 0.002
## 86 695 256 0.002
## 87 691 260 0.002
## 88 701 250 0.002
## 89 716 235 0.002
## 90 714 237 0.002
## 91 737 214 0.002
## 92 719 232 0.002
## 93 719 232 0.002
## 94 731 220 0.002
## 95 742 209 0.002
## 96 744 207 0.002
## 97 727 224 0.002
## 98 725 226 0.002
## 99 735 216 0.002
## 100 731 220 0.002
## 101 726 225 0.002
## 102 759 192 0.002
## 103 743 208 0.002
## 104 739 212 0.002
## 105 746 205 0.002
## 106 769 182 0.002
## 107 750 201 0.002
## 108 756 195 0.002
## 109 783 168 0.002
## 110 755 196 0.002
## 111 764 187 0.002
## 112 766 185 0.002
## 113 765 186 0.002
## 114 803 148 0.002
## 115 781 170 0.002
## 116 768 183 0.002
## 117 781 170 0.002
## 118 768 183 0.002
## 119 796 155 0.002
## 120 793 158 0.002
## 121 818 133 0.002
## 122 795 156 0.002
## 123 792 159 0.002
## 124 797 154 0.002
## 125 803 148 0.002
## 126 810 141 0.002
## 127 818 133 0.002
## 128 810 141 0.002
## 129 819 132 0.002
## 130 851 100 0.002
## 131 831 120 0.002
## 132 832 119 0.002
## 133 831 120 0.002
## 134 830 121 0.002
## 135 831 120 0.002
## 136 836 115 0.002
## 137 841 110 0.002
## 138 845 106 0.002
## 139 873 78 0.002
## 140 845 106 0.002
## 141 840 111 0.002
## 142 847 104 0.002
## 143 874 77 0.002
## 144 852 99 0.002
## 145 852 99 0.002
## 146 853 98 0.002
## 147 851 100 0.002
## 148 868 83 0.002
## 149 865 86 0.002
## 150 876 75 0.002
## 151 898 53 0.002
## 152 873 78 0.002
## 153 877 74 0.002
## 154 915 36 0.002
## 155 885 66 0.002
## 156 884 67 0.002
## 157 892 59 0.002
## 158 900 51 0.002
## 159 884 67 0.002
## 160 886 65 0.002
## 161 888 63 0.002
## 162 480 471 0.002
## 163 498 453 0.002
## 164 597 354 0.002
## 165 619 332 0.002
## 166 636 315 0.002
## 167 639 312 0.002
## 168 633 318 0.002
## 169 774 177 0.002
## 170 776 175 0.002
## 171 790 161 0.002
## 172 807 144 0.002
## 173 816 135 0.002
## 174 827 124 0.002
## 175 823 128 0.002
## 176 835 116 0.002
## 177 836 115 0.002
## 178 836 115 0.002
## 179 858 93 0.002
## 180 849 102 0.002
## 181 862 89 0.002
## 182 857 94 0.002
## 183 879 72 0.002
## 184 871 80 0.002
## 185 870 81 0.002
## 186 878 73 0.002
## 187 882 69 0.002
## 188 886 65 0.002
## 189 878 73 0.002
## 190 882 69 0.002
## 191 884 67 0.002
## 192 893 58 0.002
## 193 892 59 0.002
## 194 895 56 0.002
## 195 893 58 0.002
## 196 897 54 0.002
## 197 901 50 0.002
## 198 897 54 0.002
## 199 906 45 0.002
## 200 904 47 0.002
## 201 901 50 0.002
## 202 706 245 0.002
## 203 842 109 0.002
## 204 857 94 0.002
## 205 877 74 0.002
## 206 894 57 0.002
## 207 897 54 0.002
## 208 844 107 0.002
## First.Second.Mode.Ratio
## 1 1.028
## 2 1.171
## 3 1.292
## 4 1.408
## 5 1.378
## 6 1.496
## 7 1.749
## 8 2.068
## 9 2.575
## 10 4.594
## 11 3.662
## 12 4.661
## 13 5.019
## 14 5.216
## 15 6.150
## 16 5.842
## 17 5.942
## 18 6.608
## 19 7.198
## 20 7.342
## 21 7.270
## 22 8.606
## 23 7.128
## 24 7.888
## 25 7.645
## 26 10.887
## 27 9.226
## 28 8.416
## 29 8.804
## 30 9.685
## 31 10.188
## 32 12.586
## 33 13.409
## 34 11.513
## 35 12.783
## 36 12.208
## 37 13.194
## 38 10.598
## 39 12.394
## 40 13.631
## 41 14.850
## 42 16.611
## 43 14.095
## 44 15.684
## 45 16.943
## 46 2.170
## 47 2.759
## 48 7.059
## 49 7.198
## 50 7.888
## 51 8.144
## 52 9.931
## 53 9.685
## 54 12.208
## 55 17.647
## 56 14.339
## 57 7.342
## 58 7.806
## 59 17.288
## 60 1.076
## 61 1.238
## 62 1.286
## 63 1.348
## 64 1.402
## 65 1.457
## 66 1.563
## 67 1.634
## 68 1.765
## 69 1.765
## 70 1.814
## 71 2.058
## 72 1.926
## 73 2.224
## 74 2.078
## 75 1.953
## 76 2.048
## 77 2.118
## 78 2.291
## 79 2.224
## 80 2.302
## 81 2.589
## 82 2.496
## 83 2.658
## 84 2.496
## 85 2.914
## 86 2.715
## 87 2.658
## 88 2.804
## 89 3.047
## 90 3.013
## 91 3.444
## 92 3.099
## 93 3.099
## 94 3.323
## 95 3.550
## 96 3.594
## 97 3.246
## 98 3.208
## 99 3.403
## 100 3.323
## 101 3.227
## 102 3.953
## 103 3.572
## 104 3.486
## 105 3.639
## 106 4.225
## 107 3.731
## 108 3.877
## 109 4.661
## 110 3.852
## 111 4.086
## 112 4.141
## 113 4.113
## 114 5.426
## 115 4.594
## 116 4.197
## 117 4.594
## 118 4.197
## 119 5.135
## 120 5.019
## 121 6.150
## 122 5.096
## 123 4.981
## 124 5.175
## 125 5.426
## 126 5.745
## 127 6.150
## 128 5.745
## 129 6.205
## 130 8.510
## 131 6.925
## 132 6.992
## 133 6.925
## 134 6.860
## 135 6.925
## 136 7.270
## 137 7.645
## 138 7.972
## 139 11.192
## 140 7.972
## 141 7.568
## 142 8.144
## 143 11.351
## 144 8.606
## 145 8.606
## 146 8.704
## 147 8.510
## 148 10.458
## 149 10.058
## 150 11.680
## 151 16.943
## 152 11.192
## 153 11.851
## 154 25.417
## 155 13.409
## 156 13.194
## 157 15.119
## 158 17.647
## 159 13.194
## 160 13.631
## 161 14.095
## 162 1.019
## 163 1.099
## 164 1.686
## 165 1.864
## 166 2.019
## 167 2.048
## 168 1.991
## 169 4.373
## 170 4.434
## 171 4.907
## 172 5.604
## 173 6.044
## 174 6.669
## 175 6.430
## 176 7.198
## 177 7.270
## 178 7.270
## 179 9.226
## 180 8.324
## 181 9.685
## 182 9.117
## 183 12.208
## 184 10.887
## 185 10.741
## 186 12.027
## 187 12.783
## 188 13.631
## 189 12.027
## 190 12.783
## 191 13.194
## 192 15.397
## 193 15.119
## 194 15.982
## 195 15.397
## 196 16.611
## 197 18.020
## 198 16.611
## 199 20.133
## 200 19.234
## 201 18.020
## 202 2.882
## 203 7.725
## 204 9.117
## 205 11.851
## 206 15.684
## 207 16.611
## 208 7.888
##################################
# Formulating a data quality assessment summary for numeric predictors
##################################
if (length(names(DQA.Predictors.Numeric))>0) {
##################################
# Formulating a function to determine the first mode
##################################
<- function(x) {
FirstModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab == max(tab)]
ux[tab
}
##################################
# Formulating a function to determine the second mode
##################################
<- function(x) {
SecondModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab = ux[tab == max(tab)]
fm = na.omit(x)[!(na.omit(x) %in% fm)]
sm <- unique(sm)
usm <- tabulate(match(sm, usm))
tabsm ifelse(is.na(usm[tabsm == max(tabsm)])==TRUE,
return(0.00001),
return(usm[tabsm == max(tabsm)]))
}
<- data.frame(
(DQA.Predictors.Numeric.Summary Column.Name= names(DQA.Predictors.Numeric),
Column.Type=sapply(DQA.Predictors.Numeric, function(x) class(x)),
Unique.Count=sapply(DQA.Predictors.Numeric, function(x) length(unique(x))),
Unique.Count.Ratio=sapply(DQA.Predictors.Numeric, function(x) format(round((length(unique(x))/nrow(DQA.Predictors.Numeric)),3), nsmall=3)),
First.Mode.Value=sapply(DQA.Predictors.Numeric, function(x) format(round((FirstModes(x)[1]),3),nsmall=3)),
Second.Mode.Value=sapply(DQA.Predictors.Numeric, function(x) format(round((SecondModes(x)[1]),3),nsmall=3)),
First.Mode.Count=sapply(DQA.Predictors.Numeric, function(x) sum(na.omit(x) == FirstModes(x)[1])),
Second.Mode.Count=sapply(DQA.Predictors.Numeric, function(x) sum(na.omit(x) == SecondModes(x)[1])),
First.Second.Mode.Ratio=sapply(DQA.Predictors.Numeric, function(x) format(round((sum(na.omit(x) == FirstModes(x)[1])/sum(na.omit(x) == SecondModes(x)[1])),3), nsmall=3)),
Minimum=sapply(DQA.Predictors.Numeric, function(x) format(round(min(x,na.rm = TRUE),3), nsmall=3)),
Mean=sapply(DQA.Predictors.Numeric, function(x) format(round(mean(x,na.rm = TRUE),3), nsmall=3)),
Median=sapply(DQA.Predictors.Numeric, function(x) format(round(median(x,na.rm = TRUE),3), nsmall=3)),
Maximum=sapply(DQA.Predictors.Numeric, function(x) format(round(max(x,na.rm = TRUE),3), nsmall=3)),
Skewness=sapply(DQA.Predictors.Numeric, function(x) format(round(skewness(x,na.rm = TRUE),3), nsmall=3)),
Kurtosis=sapply(DQA.Predictors.Numeric, function(x) format(round(kurtosis(x,na.rm = TRUE),3), nsmall=3)),
Percentile25th=sapply(DQA.Predictors.Numeric, function(x) format(round(quantile(x,probs=0.25,na.rm = TRUE),3), nsmall=3)),
Percentile75th=sapply(DQA.Predictors.Numeric, function(x) format(round(quantile(x,probs=0.75,na.rm = TRUE),3), nsmall=3)),
row.names=NULL)
)
}
## Column.Name Column.Type Unique.Count Unique.Count.Ratio
## 1 MolWeight numeric 646 0.679
## 2 NumAtoms integer 66 0.069
## 3 NumNonHAtoms integer 36 0.038
## 4 NumBonds integer 72 0.076
## 5 NumNonHBonds integer 39 0.041
## 6 NumMultBonds integer 25 0.026
## 7 NumRotBonds integer 15 0.016
## 8 NumDblBonds integer 8 0.008
## 9 NumAromaticBonds integer 16 0.017
## 10 NumHydrogen integer 41 0.043
## 11 NumCarbon integer 28 0.029
## 12 NumNitrogen integer 7 0.007
## 13 NumOxygen integer 11 0.012
## 14 NumSulfer integer 5 0.005
## 15 NumChlorine integer 11 0.012
## 16 NumHalogen integer 11 0.012
## 17 NumRings integer 8 0.008
## 18 HydrophilicFactor numeric 369 0.388
## 19 SurfaceArea1 numeric 252 0.265
## 20 SurfaceArea2 numeric 287 0.302
## First.Mode.Value Second.Mode.Value First.Mode.Count Second.Mode.Count
## 1 102.200 116.230 16 14
## 2 22.000 24.000 73 51
## 3 8.000 11.000 104 73
## 4 23.000 19.000 69 56
## 5 8.000 7.000 82 66
## 6 0.000 7.000 158 122
## 7 0.000 1.000 272 186
## 8 0.000 1.000 427 268
## 9 0.000 6.000 400 302
## 10 12.000 8.000 83 79
## 11 6.000 7.000 105 97
## 12 0.000 1.000 546 191
## 13 0.000 2.000 325 218
## 14 0.000 1.000 830 96
## 15 0.000 1.000 750 81
## 16 0.000 1.000 685 107
## 17 1.000 0.000 323 260
## 18 -0.828 -0.158 21 20
## 19 0.000 20.230 218 76
## 20 0.000 20.230 211 75
## First.Second.Mode.Ratio Minimum Mean Median Maximum Skewness Kurtosis
## 1 1.143 46.090 201.654 179.230 665.810 0.988 3.945
## 2 1.431 5.000 25.507 22.000 94.000 1.364 5.523
## 3 1.425 2.000 13.161 12.000 47.000 0.993 4.129
## 4 1.232 4.000 25.909 23.000 97.000 1.360 5.408
## 5 1.242 1.000 13.563 12.000 50.000 0.969 3.842
## 6 1.295 0.000 6.148 6.000 25.000 0.670 3.053
## 7 1.462 0.000 2.251 2.000 16.000 1.577 6.437
## 8 1.593 0.000 1.006 1.000 7.000 1.360 4.760
## 9 1.325 0.000 5.121 6.000 25.000 0.796 3.241
## 10 1.051 0.000 12.346 11.000 47.000 1.262 5.261
## 11 1.082 1.000 9.893 9.000 33.000 0.927 3.616
## 12 2.859 0.000 0.813 0.000 6.000 1.554 4.831
## 13 1.491 0.000 1.574 1.000 13.000 1.772 8.494
## 14 8.646 0.000 0.164 0.000 4.000 3.842 21.526
## 15 9.259 0.000 0.556 0.000 10.000 3.178 13.780
## 16 6.402 0.000 0.698 0.000 10.000 2.691 10.808
## 17 1.242 0.000 1.402 1.000 7.000 1.034 3.875
## 18 1.050 -0.985 -0.021 -0.314 13.483 3.404 27.504
## 19 2.868 0.000 36.459 29.100 331.940 1.714 9.714
## 20 2.813 0.000 40.234 33.120 331.940 1.475 7.485
## Percentile25th Percentile75th
## 1 122.605 264.340
## 2 17.000 31.000
## 3 8.000 17.000
## 4 17.000 31.500
## 5 8.000 18.000
## 6 1.000 10.000
## 7 0.000 3.500
## 8 0.000 2.000
## 9 0.000 6.000
## 10 7.000 16.000
## 11 6.000 12.000
## 12 0.000 1.000
## 13 0.000 2.000
## 14 0.000 0.000
## 15 0.000 0.000
## 16 0.000 1.000
## 17 0.000 2.000
## 18 -0.763 0.313
## 19 9.230 53.280
## 20 10.630 60.660
##################################
# Identifying potential data quality issues
##################################
##################################
# Checking for missing observations
##################################
if ((nrow(DQA.Summary[DQA.Summary$NA.Count>0,]))>0){
print(paste0("Missing observations noted for ",
nrow(DQA.Summary[DQA.Summary$NA.Count>0,])),
(" variable(s) with NA.Count>0 and Fill.Rate<1.0."))
$NA.Count>0,]
DQA.Summary[DQA.Summaryelse {
} print("No missing observations noted.")
}
## [1] "No missing observations noted."
##################################
# Checking for zero or near-zero variance predictors
##################################
if (length(names(DQA.Predictors.Factor))==0) {
print("No factor predictors noted.")
else if (nrow(DQA.Predictors.Factor.Summary[as.numeric(as.character(DQA.Predictors.Factor.Summary$First.Second.Mode.Ratio))>5,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Predictors.Factor.Summary[as.numeric(as.character(DQA.Predictors.Factor.Summary$First.Second.Mode.Ratio))>5,])),
(" factor variable(s) with First.Second.Mode.Ratio>5."))
as.numeric(as.character(DQA.Predictors.Factor.Summary$First.Second.Mode.Ratio))>5,]
DQA.Predictors.Factor.Summary[else {
} print("No low variance factor predictors due to high first-second mode ratio noted.")
}
## [1] "Low variance observed for 124 factor variable(s) with First.Second.Mode.Ratio>5."
## Column.Name Column.Type Unique.Count First.Mode.Value Second.Mode.Value
## 13 FP013 factor 2 0 1
## 14 FP014 factor 2 0 1
## 15 FP015 factor 2 1 0
## 16 FP016 factor 2 0 1
## 17 FP017 factor 2 0 1
## 18 FP018 factor 2 0 1
## 19 FP019 factor 2 0 1
## 20 FP020 factor 2 0 1
## 21 FP021 factor 2 0 1
## 22 FP022 factor 2 0 1
## 23 FP023 factor 2 0 1
## 24 FP024 factor 2 0 1
## 25 FP025 factor 2 0 1
## 26 FP026 factor 2 0 1
## 27 FP027 factor 2 0 1
## 28 FP028 factor 2 0 1
## 29 FP029 factor 2 0 1
## 30 FP030 factor 2 0 1
## 31 FP031 factor 2 0 1
## 32 FP032 factor 2 0 1
## 33 FP033 factor 2 0 1
## 34 FP034 factor 2 0 1
## 35 FP035 factor 2 0 1
## 36 FP036 factor 2 0 1
## 37 FP037 factor 2 0 1
## 38 FP038 factor 2 0 1
## 39 FP039 factor 2 0 1
## 40 FP040 factor 2 0 1
## 41 FP041 factor 2 0 1
## 42 FP042 factor 2 0 1
## 43 FP043 factor 2 0 1
## 44 FP044 factor 2 0 1
## 45 FP045 factor 2 0 1
## 48 FP048 factor 2 0 1
## 49 FP049 factor 2 0 1
## 50 FP050 factor 2 0 1
## 51 FP051 factor 2 0 1
## 52 FP052 factor 2 0 1
## 53 FP053 factor 2 0 1
## 54 FP054 factor 2 0 1
## 55 FP055 factor 2 0 1
## 56 FP056 factor 2 0 1
## 57 FP057 factor 2 0 1
## 58 FP058 factor 2 0 1
## 59 FP059 factor 2 0 1
## 114 FP114 factor 2 0 1
## 119 FP119 factor 2 0 1
## 120 FP120 factor 2 0 1
## 121 FP121 factor 2 0 1
## 122 FP122 factor 2 0 1
## 124 FP124 factor 2 0 1
## 125 FP125 factor 2 0 1
## 126 FP126 factor 2 0 1
## 127 FP127 factor 2 0 1
## 128 FP128 factor 2 0 1
## 129 FP129 factor 2 0 1
## 130 FP130 factor 2 0 1
## 131 FP131 factor 2 0 1
## 132 FP132 factor 2 0 1
## 133 FP133 factor 2 0 1
## 134 FP134 factor 2 0 1
## 135 FP135 factor 2 0 1
## 136 FP136 factor 2 0 1
## 137 FP137 factor 2 0 1
## 138 FP138 factor 2 0 1
## 139 FP139 factor 2 0 1
## 140 FP140 factor 2 0 1
## 141 FP141 factor 2 0 1
## 142 FP142 factor 2 0 1
## 143 FP143 factor 2 0 1
## 144 FP144 factor 2 0 1
## 145 FP145 factor 2 0 1
## 146 FP146 factor 2 0 1
## 147 FP147 factor 2 0 1
## 148 FP148 factor 2 0 1
## 149 FP149 factor 2 0 1
## 150 FP150 factor 2 0 1
## 151 FP151 factor 2 0 1
## 152 FP152 factor 2 0 1
## 153 FP153 factor 2 0 1
## 154 FP154 factor 2 0 1
## 155 FP155 factor 2 0 1
## 156 FP156 factor 2 0 1
## 157 FP157 factor 2 0 1
## 158 FP158 factor 2 0 1
## 159 FP159 factor 2 0 1
## 160 FP160 factor 2 0 1
## 161 FP161 factor 2 0 1
## 172 FP172 factor 2 0 1
## 173 FP173 factor 2 0 1
## 174 FP174 factor 2 0 1
## 175 FP175 factor 2 0 1
## 176 FP176 factor 2 0 1
## 177 FP177 factor 2 0 1
## 178 FP178 factor 2 0 1
## 179 FP179 factor 2 0 1
## 180 FP180 factor 2 0 1
## 181 FP181 factor 2 0 1
## 182 FP182 factor 2 0 1
## 183 FP183 factor 2 0 1
## 184 FP184 factor 2 0 1
## 185 FP185 factor 2 0 1
## 186 FP186 factor 2 0 1
## 187 FP187 factor 2 0 1
## 188 FP188 factor 2 0 1
## 189 FP189 factor 2 0 1
## 190 FP190 factor 2 0 1
## 191 FP191 factor 2 0 1
## 192 FP192 factor 2 0 1
## 193 FP193 factor 2 0 1
## 194 FP194 factor 2 0 1
## 195 FP195 factor 2 0 1
## 196 FP196 factor 2 0 1
## 197 FP197 factor 2 0 1
## 198 FP198 factor 2 0 1
## 199 FP199 factor 2 0 1
## 200 FP200 factor 2 0 1
## 201 FP201 factor 2 0 1
## 203 FP203 factor 2 0 1
## 204 FP204 factor 2 0 1
## 205 FP205 factor 2 0 1
## 206 FP206 factor 2 0 1
## 207 FP207 factor 2 0 1
## 208 FP208 factor 2 0 1
## First.Mode.Count Second.Mode.Count Unique.Count.Ratio
## 13 793 158 0.002
## 14 798 153 0.002
## 15 818 133 0.002
## 16 812 139 0.002
## 17 814 137 0.002
## 18 826 125 0.002
## 19 835 116 0.002
## 20 837 114 0.002
## 21 836 115 0.002
## 22 852 99 0.002
## 23 834 117 0.002
## 24 844 107 0.002
## 25 841 110 0.002
## 26 871 80 0.002
## 27 858 93 0.002
## 28 850 101 0.002
## 29 854 97 0.002
## 30 862 89 0.002
## 31 866 85 0.002
## 32 881 70 0.002
## 33 885 66 0.002
## 34 875 76 0.002
## 35 882 69 0.002
## 36 879 72 0.002
## 37 884 67 0.002
## 38 869 82 0.002
## 39 880 71 0.002
## 40 886 65 0.002
## 41 891 60 0.002
## 42 897 54 0.002
## 43 888 63 0.002
## 44 894 57 0.002
## 45 898 53 0.002
## 48 833 118 0.002
## 49 835 116 0.002
## 50 844 107 0.002
## 51 847 104 0.002
## 52 864 87 0.002
## 53 862 89 0.002
## 54 879 72 0.002
## 55 900 51 0.002
## 56 889 62 0.002
## 57 837 114 0.002
## 58 843 108 0.002
## 59 899 52 0.002
## 114 803 148 0.002
## 119 796 155 0.002
## 120 793 158 0.002
## 121 818 133 0.002
## 122 795 156 0.002
## 124 797 154 0.002
## 125 803 148 0.002
## 126 810 141 0.002
## 127 818 133 0.002
## 128 810 141 0.002
## 129 819 132 0.002
## 130 851 100 0.002
## 131 831 120 0.002
## 132 832 119 0.002
## 133 831 120 0.002
## 134 830 121 0.002
## 135 831 120 0.002
## 136 836 115 0.002
## 137 841 110 0.002
## 138 845 106 0.002
## 139 873 78 0.002
## 140 845 106 0.002
## 141 840 111 0.002
## 142 847 104 0.002
## 143 874 77 0.002
## 144 852 99 0.002
## 145 852 99 0.002
## 146 853 98 0.002
## 147 851 100 0.002
## 148 868 83 0.002
## 149 865 86 0.002
## 150 876 75 0.002
## 151 898 53 0.002
## 152 873 78 0.002
## 153 877 74 0.002
## 154 915 36 0.002
## 155 885 66 0.002
## 156 884 67 0.002
## 157 892 59 0.002
## 158 900 51 0.002
## 159 884 67 0.002
## 160 886 65 0.002
## 161 888 63 0.002
## 172 807 144 0.002
## 173 816 135 0.002
## 174 827 124 0.002
## 175 823 128 0.002
## 176 835 116 0.002
## 177 836 115 0.002
## 178 836 115 0.002
## 179 858 93 0.002
## 180 849 102 0.002
## 181 862 89 0.002
## 182 857 94 0.002
## 183 879 72 0.002
## 184 871 80 0.002
## 185 870 81 0.002
## 186 878 73 0.002
## 187 882 69 0.002
## 188 886 65 0.002
## 189 878 73 0.002
## 190 882 69 0.002
## 191 884 67 0.002
## 192 893 58 0.002
## 193 892 59 0.002
## 194 895 56 0.002
## 195 893 58 0.002
## 196 897 54 0.002
## 197 901 50 0.002
## 198 897 54 0.002
## 199 906 45 0.002
## 200 904 47 0.002
## 201 901 50 0.002
## 203 842 109 0.002
## 204 857 94 0.002
## 205 877 74 0.002
## 206 894 57 0.002
## 207 897 54 0.002
## 208 844 107 0.002
## First.Second.Mode.Ratio
## 13 5.019
## 14 5.216
## 15 6.150
## 16 5.842
## 17 5.942
## 18 6.608
## 19 7.198
## 20 7.342
## 21 7.270
## 22 8.606
## 23 7.128
## 24 7.888
## 25 7.645
## 26 10.887
## 27 9.226
## 28 8.416
## 29 8.804
## 30 9.685
## 31 10.188
## 32 12.586
## 33 13.409
## 34 11.513
## 35 12.783
## 36 12.208
## 37 13.194
## 38 10.598
## 39 12.394
## 40 13.631
## 41 14.850
## 42 16.611
## 43 14.095
## 44 15.684
## 45 16.943
## 48 7.059
## 49 7.198
## 50 7.888
## 51 8.144
## 52 9.931
## 53 9.685
## 54 12.208
## 55 17.647
## 56 14.339
## 57 7.342
## 58 7.806
## 59 17.288
## 114 5.426
## 119 5.135
## 120 5.019
## 121 6.150
## 122 5.096
## 124 5.175
## 125 5.426
## 126 5.745
## 127 6.150
## 128 5.745
## 129 6.205
## 130 8.510
## 131 6.925
## 132 6.992
## 133 6.925
## 134 6.860
## 135 6.925
## 136 7.270
## 137 7.645
## 138 7.972
## 139 11.192
## 140 7.972
## 141 7.568
## 142 8.144
## 143 11.351
## 144 8.606
## 145 8.606
## 146 8.704
## 147 8.510
## 148 10.458
## 149 10.058
## 150 11.680
## 151 16.943
## 152 11.192
## 153 11.851
## 154 25.417
## 155 13.409
## 156 13.194
## 157 15.119
## 158 17.647
## 159 13.194
## 160 13.631
## 161 14.095
## 172 5.604
## 173 6.044
## 174 6.669
## 175 6.430
## 176 7.198
## 177 7.270
## 178 7.270
## 179 9.226
## 180 8.324
## 181 9.685
## 182 9.117
## 183 12.208
## 184 10.887
## 185 10.741
## 186 12.027
## 187 12.783
## 188 13.631
## 189 12.027
## 190 12.783
## 191 13.194
## 192 15.397
## 193 15.119
## 194 15.982
## 195 15.397
## 196 16.611
## 197 18.020
## 198 16.611
## 199 20.133
## 200 19.234
## 201 18.020
## 203 7.725
## 204 9.117
## 205 11.851
## 206 15.684
## 207 16.611
## 208 7.888
if (length(names(DQA.Predictors.Numeric))==0) {
print("No numeric predictors noted.")
else if (nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$First.Second.Mode.Ratio))>5,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$First.Second.Mode.Ratio))>5,])),
(" numeric variable(s) with First.Second.Mode.Ratio>5."))
as.numeric(as.character(DQA.Predictors.Numeric.Summary$First.Second.Mode.Ratio))>5,]
DQA.Predictors.Numeric.Summary[else {
} print("No low variance numeric predictors due to high first-second mode ratio noted.")
}
## [1] "Low variance observed for 3 numeric variable(s) with First.Second.Mode.Ratio>5."
## Column.Name Column.Type Unique.Count Unique.Count.Ratio First.Mode.Value
## 14 NumSulfer integer 5 0.005 0.000
## 15 NumChlorine integer 11 0.012 0.000
## 16 NumHalogen integer 11 0.012 0.000
## Second.Mode.Value First.Mode.Count Second.Mode.Count First.Second.Mode.Ratio
## 14 1.000 830 96 8.646
## 15 1.000 750 81 9.259
## 16 1.000 685 107 6.402
## Minimum Mean Median Maximum Skewness Kurtosis Percentile25th Percentile75th
## 14 0.000 0.164 0.000 4.000 3.842 21.526 0.000 0.000
## 15 0.000 0.556 0.000 10.000 3.178 13.780 0.000 0.000
## 16 0.000 0.698 0.000 10.000 2.691 10.808 0.000 1.000
if (length(names(DQA.Predictors.Numeric))==0) {
print("No numeric predictors noted.")
else if (nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Unique.Count.Ratio))<0.01,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Unique.Count.Ratio))<0.01,])),
(" numeric variable(s) with Unique.Count.Ratio<0.01."))
as.numeric(as.character(DQA.Predictors.Numeric.Summary$Unique.Count.Ratio))<0.01,]
DQA.Predictors.Numeric.Summary[else {
} print("No low variance numeric predictors due to low unique count ratio noted.")
}
## [1] "Low variance observed for 4 numeric variable(s) with Unique.Count.Ratio<0.01."
## Column.Name Column.Type Unique.Count Unique.Count.Ratio First.Mode.Value
## 8 NumDblBonds integer 8 0.008 0.000
## 12 NumNitrogen integer 7 0.007 0.000
## 14 NumSulfer integer 5 0.005 0.000
## 17 NumRings integer 8 0.008 1.000
## Second.Mode.Value First.Mode.Count Second.Mode.Count First.Second.Mode.Ratio
## 8 1.000 427 268 1.593
## 12 1.000 546 191 2.859
## 14 1.000 830 96 8.646
## 17 0.000 323 260 1.242
## Minimum Mean Median Maximum Skewness Kurtosis Percentile25th Percentile75th
## 8 0.000 1.006 1.000 7.000 1.360 4.760 0.000 2.000
## 12 0.000 0.813 0.000 6.000 1.554 4.831 0.000 1.000
## 14 0.000 0.164 0.000 4.000 3.842 21.526 0.000 0.000
## 17 0.000 1.402 1.000 7.000 1.034 3.875 0.000 2.000
##################################
# Checking for skewed predictors
##################################
if (length(names(DQA.Predictors.Numeric))==0) {
print("No numeric predictors noted.")
else if (nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))>3 |
} as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))<(-3),])>0){
print(paste0("High skewness observed for ",
nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))>3 |
(as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))<(-3),])),
" numeric variable(s) with Skewness>3 or Skewness<(-3)."))
as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))>3 |
DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))<(-3),]
else {
} print("No skewed numeric predictors noted.")
}
## [1] "High skewness observed for 3 numeric variable(s) with Skewness>3 or Skewness<(-3)."
## Column.Name Column.Type Unique.Count Unique.Count.Ratio
## 14 NumSulfer integer 5 0.005
## 15 NumChlorine integer 11 0.012
## 18 HydrophilicFactor numeric 369 0.388
## First.Mode.Value Second.Mode.Value First.Mode.Count Second.Mode.Count
## 14 0.000 1.000 830 96
## 15 0.000 1.000 750 81
## 18 -0.828 -0.158 21 20
## First.Second.Mode.Ratio Minimum Mean Median Maximum Skewness Kurtosis
## 14 8.646 0.000 0.164 0.000 4.000 3.842 21.526
## 15 9.259 0.000 0.556 0.000 10.000 3.178 13.780
## 18 1.050 -0.985 -0.021 -0.314 13.483 3.404 27.504
## Percentile25th Percentile75th
## 14 0.000 0.000
## 15 0.000 0.000
## 18 -0.763 0.313
##################################
# Loading dataset
##################################
<- Solubility_Train
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("solTrainY")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,-(grep("FP", names(DPA.Predictors)))]
DPA.Predictors.Numeric
##################################
# Identifying outliers for the numeric predictors
##################################
<- c()
OutlierCountList
for (i in 1:ncol(DPA.Predictors.Numeric)) {
<- boxplot.stats(DPA.Predictors.Numeric[,i])$out
Outliers <- length(Outliers)
OutlierCount <- append(OutlierCountList,OutlierCount)
OutlierCountList <- which(DPA.Predictors.Numeric[,i] %in% c(Outliers))
OutlierIndices boxplot(DPA.Predictors.Numeric[,i],
ylab = names(DPA.Predictors.Numeric)[i],
main = names(DPA.Predictors.Numeric)[i],
horizontal=TRUE)
mtext(paste0(OutlierCount, " Outlier(s) Detected"))
}
<- as.data.frame(cbind(names(DPA.Predictors.Numeric),(OutlierCountList)))
OutlierCountSummary names(OutlierCountSummary) <- c("NumericPredictors","OutlierCount")
$OutlierCount <- as.numeric(as.character(OutlierCountSummary$OutlierCount))
OutlierCountSummary<- nrow(OutlierCountSummary[OutlierCountSummary$OutlierCount>0,])
NumericPredictorWithOutlierCount print(paste0(NumericPredictorWithOutlierCount, " numeric variable(s) were noted with outlier(s)." ))
## [1] "20 numeric variable(s) were noted with outlier(s)."
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA.Predictors.Numeric)) (DPA_Skimmed
Name | DPA.Predictors.Numeric |
Number of rows | 951 |
Number of columns | 20 |
_______________________ | |
Column type frequency: | |
numeric | 20 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
MolWeight | 0 | 1 | 201.65 | 97.91 | 46.09 | 122.60 | 179.23 | 264.34 | 665.81 | ▇▆▂▁▁ |
NumAtoms | 0 | 1 | 25.51 | 12.61 | 5.00 | 17.00 | 22.00 | 31.00 | 94.00 | ▇▆▂▁▁ |
NumNonHAtoms | 0 | 1 | 13.16 | 6.50 | 2.00 | 8.00 | 12.00 | 17.00 | 47.00 | ▇▆▂▁▁ |
NumBonds | 0 | 1 | 25.91 | 13.48 | 4.00 | 17.00 | 23.00 | 31.50 | 97.00 | ▇▇▂▁▁ |
NumNonHBonds | 0 | 1 | 13.56 | 7.57 | 1.00 | 8.00 | 12.00 | 18.00 | 50.00 | ▇▇▂▁▁ |
NumMultBonds | 0 | 1 | 6.15 | 5.17 | 0.00 | 1.00 | 6.00 | 10.00 | 25.00 | ▇▆▃▁▁ |
NumRotBonds | 0 | 1 | 2.25 | 2.41 | 0.00 | 0.00 | 2.00 | 3.50 | 16.00 | ▇▂▁▁▁ |
NumDblBonds | 0 | 1 | 1.01 | 1.21 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▂▁▁▁ |
NumAromaticBonds | 0 | 1 | 5.12 | 5.26 | 0.00 | 0.00 | 6.00 | 6.00 | 25.00 | ▇▆▃▁▁ |
NumHydrogen | 0 | 1 | 12.35 | 7.32 | 0.00 | 7.00 | 11.00 | 16.00 | 47.00 | ▇▇▂▁▁ |
NumCarbon | 0 | 1 | 9.89 | 5.29 | 1.00 | 6.00 | 9.00 | 12.00 | 33.00 | ▇▇▃▁▁ |
NumNitrogen | 0 | 1 | 0.81 | 1.19 | 0.00 | 0.00 | 0.00 | 1.00 | 6.00 | ▇▂▁▁▁ |
NumOxygen | 0 | 1 | 1.57 | 1.73 | 0.00 | 0.00 | 1.00 | 2.00 | 13.00 | ▇▂▁▁▁ |
NumSulfer | 0 | 1 | 0.16 | 0.49 | 0.00 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
NumChlorine | 0 | 1 | 0.56 | 1.40 | 0.00 | 0.00 | 0.00 | 0.00 | 10.00 | ▇▁▁▁▁ |
NumHalogen | 0 | 1 | 0.70 | 1.47 | 0.00 | 0.00 | 0.00 | 1.00 | 10.00 | ▇▁▁▁▁ |
NumRings | 0 | 1 | 1.40 | 1.30 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▃▂▁▁ |
HydrophilicFactor | 0 | 1 | -0.02 | 1.13 | -0.98 | -0.76 | -0.31 | 0.31 | 13.48 | ▇▁▁▁▁ |
SurfaceArea1 | 0 | 1 | 36.46 | 35.29 | 0.00 | 9.23 | 29.10 | 53.28 | 331.94 | ▇▂▁▁▁ |
SurfaceArea2 | 0 | 1 | 40.23 | 38.12 | 0.00 | 10.63 | 33.12 | 60.66 | 331.94 | ▇▂▁▁▁ |
###################################
# Verifying the data dimensions
###################################
dim(DPA.Predictors.Numeric)
## [1] 951 20
##################################
# Loading dataset
##################################
<- Solubility_Train
DPA
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA)) (DPA_Skimmed
Name | DPA |
Number of rows | 951 |
Number of columns | 229 |
_______________________ | |
Column type frequency: | |
numeric | 229 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
solTrainY | 0 | 1 | -2.72 | 2.05 | -11.62 | -3.96 | -2.51 | -1.36 | 1.58 | ▁▁▃▇▃ |
FP001 | 0 | 1 | 0.49 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP002 | 0 | 1 | 0.54 | 0.50 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP003 | 0 | 1 | 0.44 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP004 | 0 | 1 | 0.58 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP005 | 0 | 1 | 0.58 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP006 | 0 | 1 | 0.40 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP007 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP008 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP009 | 0 | 1 | 0.28 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP010 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP011 | 0 | 1 | 0.21 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP012 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP013 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP014 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP015 | 0 | 1 | 0.86 | 0.35 | 0.00 | 1.00 | 1.00 | 1.00 | 1.00 | ▁▁▁▁▇ |
FP016 | 0 | 1 | 0.15 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP017 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP018 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP019 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP020 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP021 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP022 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP023 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP024 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP025 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP026 | 0 | 1 | 0.08 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP027 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP028 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP029 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP030 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP031 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP032 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP033 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP034 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP035 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP036 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP037 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP038 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP039 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP040 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP041 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP042 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP043 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP044 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP045 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP046 | 0 | 1 | 0.32 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP047 | 0 | 1 | 0.27 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP048 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP049 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP050 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP051 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP052 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP053 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP054 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP055 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP056 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP057 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP058 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP059 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP060 | 0 | 1 | 0.48 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP061 | 0 | 1 | 0.45 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP062 | 0 | 1 | 0.44 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP063 | 0 | 1 | 0.43 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP064 | 0 | 1 | 0.42 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP065 | 0 | 1 | 0.59 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP066 | 0 | 1 | 0.61 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP067 | 0 | 1 | 0.38 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP068 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP069 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP070 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP071 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP072 | 0 | 1 | 0.66 | 0.47 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP073 | 0 | 1 | 0.31 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP074 | 0 | 1 | 0.32 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP075 | 0 | 1 | 0.34 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP076 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP077 | 0 | 1 | 0.32 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP078 | 0 | 1 | 0.30 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP079 | 0 | 1 | 0.69 | 0.46 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP080 | 0 | 1 | 0.30 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP081 | 0 | 1 | 0.28 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP082 | 0 | 1 | 0.71 | 0.45 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP083 | 0 | 1 | 0.27 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP084 | 0 | 1 | 0.29 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP085 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP086 | 0 | 1 | 0.27 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP087 | 0 | 1 | 0.73 | 0.45 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP088 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP089 | 0 | 1 | 0.25 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP090 | 0 | 1 | 0.25 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP091 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP092 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP093 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP094 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP095 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP096 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP097 | 0 | 1 | 0.24 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP098 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP099 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP100 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP101 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP102 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP103 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP104 | 0 | 1 | 0.22 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP105 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP106 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP107 | 0 | 1 | 0.21 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP108 | 0 | 1 | 0.21 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP109 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP110 | 0 | 1 | 0.21 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP111 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP112 | 0 | 1 | 0.19 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP113 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP114 | 0 | 1 | 0.16 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP115 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP116 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP117 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP118 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP119 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP120 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP121 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP122 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP123 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP124 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP125 | 0 | 1 | 0.16 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP126 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP127 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP128 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP129 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP130 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP131 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP132 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP133 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP134 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP135 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP136 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP137 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP138 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP139 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP140 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP141 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP142 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP143 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP144 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP145 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP146 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP147 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP148 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP149 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP150 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP151 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP152 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP153 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP154 | 0 | 1 | 0.04 | 0.19 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP155 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP156 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP157 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP158 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP159 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP160 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP161 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP162 | 0 | 1 | 0.50 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP163 | 0 | 1 | 0.48 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP164 | 0 | 1 | 0.63 | 0.48 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP165 | 0 | 1 | 0.35 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP166 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP167 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP168 | 0 | 1 | 0.67 | 0.47 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP169 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP170 | 0 | 1 | 0.18 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP171 | 0 | 1 | 0.17 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP172 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP173 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP174 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP175 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP176 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP177 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP178 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP179 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP180 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP181 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP182 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP183 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP184 | 0 | 1 | 0.08 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP185 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP186 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP187 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP188 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP189 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP190 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP191 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP192 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP193 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP194 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP195 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP196 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP197 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP198 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP199 | 0 | 1 | 0.05 | 0.21 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP200 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP201 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP202 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP203 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP204 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP205 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP206 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP207 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP208 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
MolWeight | 0 | 1 | 201.65 | 97.91 | 46.09 | 122.60 | 179.23 | 264.34 | 665.81 | ▇▆▂▁▁ |
NumAtoms | 0 | 1 | 25.51 | 12.61 | 5.00 | 17.00 | 22.00 | 31.00 | 94.00 | ▇▆▂▁▁ |
NumNonHAtoms | 0 | 1 | 13.16 | 6.50 | 2.00 | 8.00 | 12.00 | 17.00 | 47.00 | ▇▆▂▁▁ |
NumBonds | 0 | 1 | 25.91 | 13.48 | 4.00 | 17.00 | 23.00 | 31.50 | 97.00 | ▇▇▂▁▁ |
NumNonHBonds | 0 | 1 | 13.56 | 7.57 | 1.00 | 8.00 | 12.00 | 18.00 | 50.00 | ▇▇▂▁▁ |
NumMultBonds | 0 | 1 | 6.15 | 5.17 | 0.00 | 1.00 | 6.00 | 10.00 | 25.00 | ▇▆▃▁▁ |
NumRotBonds | 0 | 1 | 2.25 | 2.41 | 0.00 | 0.00 | 2.00 | 3.50 | 16.00 | ▇▂▁▁▁ |
NumDblBonds | 0 | 1 | 1.01 | 1.21 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▂▁▁▁ |
NumAromaticBonds | 0 | 1 | 5.12 | 5.26 | 0.00 | 0.00 | 6.00 | 6.00 | 25.00 | ▇▆▃▁▁ |
NumHydrogen | 0 | 1 | 12.35 | 7.32 | 0.00 | 7.00 | 11.00 | 16.00 | 47.00 | ▇▇▂▁▁ |
NumCarbon | 0 | 1 | 9.89 | 5.29 | 1.00 | 6.00 | 9.00 | 12.00 | 33.00 | ▇▇▃▁▁ |
NumNitrogen | 0 | 1 | 0.81 | 1.19 | 0.00 | 0.00 | 0.00 | 1.00 | 6.00 | ▇▂▁▁▁ |
NumOxygen | 0 | 1 | 1.57 | 1.73 | 0.00 | 0.00 | 1.00 | 2.00 | 13.00 | ▇▂▁▁▁ |
NumSulfer | 0 | 1 | 0.16 | 0.49 | 0.00 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
NumChlorine | 0 | 1 | 0.56 | 1.40 | 0.00 | 0.00 | 0.00 | 0.00 | 10.00 | ▇▁▁▁▁ |
NumHalogen | 0 | 1 | 0.70 | 1.47 | 0.00 | 0.00 | 0.00 | 1.00 | 10.00 | ▇▁▁▁▁ |
NumRings | 0 | 1 | 1.40 | 1.30 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▃▂▁▁ |
HydrophilicFactor | 0 | 1 | -0.02 | 1.13 | -0.98 | -0.76 | -0.31 | 0.31 | 13.48 | ▇▁▁▁▁ |
SurfaceArea1 | 0 | 1 | 36.46 | 35.29 | 0.00 | 9.23 | 29.10 | 53.28 | 331.94 | ▇▂▁▁▁ |
SurfaceArea2 | 0 | 1 | 40.23 | 38.12 | 0.00 | 10.63 | 33.12 | 60.66 | 331.94 | ▇▂▁▁▁ |
##################################
# Identifying columns with low variance
###################################
<- nearZeroVar(DPA,
DPA_LowVariance freqCut = 95/5,
uniqueCut = 10,
saveMetrics= TRUE)
$nzv,]) (DPA_LowVariance[DPA_LowVariance
## freqRatio percentUnique zeroVar nzv
## FP154 25.41667 0.2103049 FALSE TRUE
## FP199 20.13333 0.2103049 FALSE TRUE
## FP200 19.23404 0.2103049 FALSE TRUE
if ((nrow(DPA_LowVariance[DPA_LowVariance$nzv,]))==0){
print("No low variance predictors noted.")
else {
}
print(paste0("Low variance observed for ",
nrow(DPA_LowVariance[DPA_LowVariance$nzv,])),
(" numeric variable(s) with First.Second.Mode.Ratio>4 and Unique.Count.Ratio<0.10."))
<- (nrow(DPA_LowVariance[DPA_LowVariance$nzv,]))
DPA_LowVarianceForRemoval
print(paste0("Low variance can be resolved by removing ",
nrow(DPA_LowVariance[DPA_LowVariance$nzv,])),
(" numeric variable(s)."))
for (j in 1:DPA_LowVarianceForRemoval) {
<- rownames(DPA_LowVariance[DPA_LowVariance$nzv,])[j]
DPA_LowVarianceRemovedVariable print(paste0("Variable ",
j," for removal: ",
DPA_LowVarianceRemovedVariable))
}
%>%
DPA skim() %>%
::filter(skim_variable %in% rownames(DPA_LowVariance[DPA_LowVariance$nzv,]))
dplyr
##################################
# Filtering out columns with low variance
#################################
<- DPA[,!names(DPA) %in% rownames(DPA_LowVariance[DPA_LowVariance$nzv,])]
DPA_ExcludedLowVariance
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_ExcludedLowVariance))
(DPA_ExcludedLowVariance_Skimmed }
## [1] "Low variance observed for 3 numeric variable(s) with First.Second.Mode.Ratio>4 and Unique.Count.Ratio<0.10."
## [1] "Low variance can be resolved by removing 3 numeric variable(s)."
## [1] "Variable 1 for removal: FP154"
## [1] "Variable 2 for removal: FP199"
## [1] "Variable 3 for removal: FP200"
Name | DPA_ExcludedLowVariance |
Number of rows | 951 |
Number of columns | 226 |
_______________________ | |
Column type frequency: | |
numeric | 226 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
solTrainY | 0 | 1 | -2.72 | 2.05 | -11.62 | -3.96 | -2.51 | -1.36 | 1.58 | ▁▁▃▇▃ |
FP001 | 0 | 1 | 0.49 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP002 | 0 | 1 | 0.54 | 0.50 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP003 | 0 | 1 | 0.44 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP004 | 0 | 1 | 0.58 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP005 | 0 | 1 | 0.58 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP006 | 0 | 1 | 0.40 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP007 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP008 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP009 | 0 | 1 | 0.28 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP010 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP011 | 0 | 1 | 0.21 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP012 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP013 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP014 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP015 | 0 | 1 | 0.86 | 0.35 | 0.00 | 1.00 | 1.00 | 1.00 | 1.00 | ▁▁▁▁▇ |
FP016 | 0 | 1 | 0.15 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP017 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP018 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP019 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP020 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP021 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP022 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP023 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP024 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP025 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP026 | 0 | 1 | 0.08 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP027 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP028 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP029 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP030 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP031 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP032 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP033 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP034 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP035 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP036 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP037 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP038 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP039 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP040 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP041 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP042 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP043 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP044 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP045 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP046 | 0 | 1 | 0.32 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP047 | 0 | 1 | 0.27 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP048 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP049 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP050 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP051 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP052 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP053 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP054 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP055 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP056 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP057 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP058 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP059 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP060 | 0 | 1 | 0.48 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP061 | 0 | 1 | 0.45 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP062 | 0 | 1 | 0.44 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP063 | 0 | 1 | 0.43 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP064 | 0 | 1 | 0.42 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP065 | 0 | 1 | 0.59 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP066 | 0 | 1 | 0.61 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP067 | 0 | 1 | 0.38 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP068 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP069 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP070 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP071 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP072 | 0 | 1 | 0.66 | 0.47 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP073 | 0 | 1 | 0.31 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP074 | 0 | 1 | 0.32 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP075 | 0 | 1 | 0.34 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP076 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP077 | 0 | 1 | 0.32 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP078 | 0 | 1 | 0.30 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP079 | 0 | 1 | 0.69 | 0.46 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP080 | 0 | 1 | 0.30 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP081 | 0 | 1 | 0.28 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP082 | 0 | 1 | 0.71 | 0.45 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP083 | 0 | 1 | 0.27 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP084 | 0 | 1 | 0.29 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP085 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP086 | 0 | 1 | 0.27 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP087 | 0 | 1 | 0.73 | 0.45 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP088 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP089 | 0 | 1 | 0.25 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP090 | 0 | 1 | 0.25 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP091 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP092 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP093 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP094 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP095 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP096 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP097 | 0 | 1 | 0.24 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP098 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP099 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP100 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP101 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP102 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP103 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP104 | 0 | 1 | 0.22 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP105 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP106 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP107 | 0 | 1 | 0.21 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP108 | 0 | 1 | 0.21 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP109 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP110 | 0 | 1 | 0.21 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP111 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP112 | 0 | 1 | 0.19 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP113 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP114 | 0 | 1 | 0.16 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP115 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP116 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP117 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP118 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP119 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP120 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP121 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP122 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP123 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP124 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP125 | 0 | 1 | 0.16 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP126 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP127 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP128 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP129 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP130 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP131 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP132 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP133 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP134 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP135 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP136 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP137 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP138 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP139 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP140 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP141 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP142 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP143 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP144 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP145 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP146 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP147 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP148 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP149 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP150 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP151 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP152 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP153 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP155 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP156 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP157 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP158 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP159 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP160 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP161 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP162 | 0 | 1 | 0.50 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP163 | 0 | 1 | 0.48 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP164 | 0 | 1 | 0.63 | 0.48 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP165 | 0 | 1 | 0.35 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP166 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP167 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP168 | 0 | 1 | 0.67 | 0.47 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP169 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP170 | 0 | 1 | 0.18 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP171 | 0 | 1 | 0.17 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP172 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP173 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP174 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP175 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP176 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP177 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP178 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP179 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP180 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP181 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP182 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP183 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP184 | 0 | 1 | 0.08 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP185 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP186 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP187 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP188 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP189 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP190 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP191 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP192 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP193 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP194 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP195 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP196 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP197 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP198 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP201 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP202 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP203 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP204 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP205 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP206 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP207 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP208 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
MolWeight | 0 | 1 | 201.65 | 97.91 | 46.09 | 122.60 | 179.23 | 264.34 | 665.81 | ▇▆▂▁▁ |
NumAtoms | 0 | 1 | 25.51 | 12.61 | 5.00 | 17.00 | 22.00 | 31.00 | 94.00 | ▇▆▂▁▁ |
NumNonHAtoms | 0 | 1 | 13.16 | 6.50 | 2.00 | 8.00 | 12.00 | 17.00 | 47.00 | ▇▆▂▁▁ |
NumBonds | 0 | 1 | 25.91 | 13.48 | 4.00 | 17.00 | 23.00 | 31.50 | 97.00 | ▇▇▂▁▁ |
NumNonHBonds | 0 | 1 | 13.56 | 7.57 | 1.00 | 8.00 | 12.00 | 18.00 | 50.00 | ▇▇▂▁▁ |
NumMultBonds | 0 | 1 | 6.15 | 5.17 | 0.00 | 1.00 | 6.00 | 10.00 | 25.00 | ▇▆▃▁▁ |
NumRotBonds | 0 | 1 | 2.25 | 2.41 | 0.00 | 0.00 | 2.00 | 3.50 | 16.00 | ▇▂▁▁▁ |
NumDblBonds | 0 | 1 | 1.01 | 1.21 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▂▁▁▁ |
NumAromaticBonds | 0 | 1 | 5.12 | 5.26 | 0.00 | 0.00 | 6.00 | 6.00 | 25.00 | ▇▆▃▁▁ |
NumHydrogen | 0 | 1 | 12.35 | 7.32 | 0.00 | 7.00 | 11.00 | 16.00 | 47.00 | ▇▇▂▁▁ |
NumCarbon | 0 | 1 | 9.89 | 5.29 | 1.00 | 6.00 | 9.00 | 12.00 | 33.00 | ▇▇▃▁▁ |
NumNitrogen | 0 | 1 | 0.81 | 1.19 | 0.00 | 0.00 | 0.00 | 1.00 | 6.00 | ▇▂▁▁▁ |
NumOxygen | 0 | 1 | 1.57 | 1.73 | 0.00 | 0.00 | 1.00 | 2.00 | 13.00 | ▇▂▁▁▁ |
NumSulfer | 0 | 1 | 0.16 | 0.49 | 0.00 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
NumChlorine | 0 | 1 | 0.56 | 1.40 | 0.00 | 0.00 | 0.00 | 0.00 | 10.00 | ▇▁▁▁▁ |
NumHalogen | 0 | 1 | 0.70 | 1.47 | 0.00 | 0.00 | 0.00 | 1.00 | 10.00 | ▇▁▁▁▁ |
NumRings | 0 | 1 | 1.40 | 1.30 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▃▂▁▁ |
HydrophilicFactor | 0 | 1 | -0.02 | 1.13 | -0.98 | -0.76 | -0.31 | 0.31 | 13.48 | ▇▁▁▁▁ |
SurfaceArea1 | 0 | 1 | 36.46 | 35.29 | 0.00 | 9.23 | 29.10 | 53.28 | 331.94 | ▇▂▁▁▁ |
SurfaceArea2 | 0 | 1 | 40.23 | 38.12 | 0.00 | 10.63 | 33.12 | 60.66 | 331.94 | ▇▂▁▁▁ |
###################################
# Verifying the data dimensions
###################################
dim(DPA_ExcludedLowVariance)
## [1] 951 226
##################################
# Loading dataset
##################################
<- Solubility_Train
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("solTrainY")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,-(grep("FP", names(DPA.Predictors)))]
DPA.Predictors.Numeric
##################################
# Visualizing pairwise correlation between predictors
##################################
<- cor.mtest(DPA.Predictors.Numeric,
DPA_CorrelationTest method = "pearson",
conf.level = .95)
corrplot(cor(DPA.Predictors.Numeric,
method = "pearson",
use="pairwise.complete.obs"),
method = "circle",
type = "upper",
order = "original",
tl.col = "black",
tl.cex = 0.75,
tl.srt = 90,
sig.level = 0.05,
p.mat = DPA_CorrelationTest$p,
insig = "blank")
##################################
# Identifying the highly correlated variables
##################################
<- cor(DPA.Predictors.Numeric,
DPA_Correlation method = "pearson",
use="pairwise.complete.obs")
<- sum(abs(DPA_Correlation[upper.tri(DPA_Correlation)]) > 0.95)) (DPA_HighlyCorrelatedCount
## [1] 3
if (DPA_HighlyCorrelatedCount == 0) {
print("No highly correlated predictors noted.")
else {
} print(paste0("High correlation observed for ",
(DPA_HighlyCorrelatedCount)," pairs of numeric variable(s) with Correlation.Coefficient>0.95."))
<- corr_cross(DPA.Predictors.Numeric,
(DPA_HighlyCorrelatedPairs max_pvalue = 0.05,
top = DPA_HighlyCorrelatedCount,
rm.na = TRUE,
grid = FALSE
))
}
## [1] "High correlation observed for 3 pairs of numeric variable(s) with Correlation.Coefficient>0.95."
if (DPA_HighlyCorrelatedCount > 0) {
<- findCorrelation(DPA_Correlation, cutoff = 0.95)
DPA_HighlyCorrelated
<- length(DPA_HighlyCorrelated))
(DPA_HighlyCorrelatedForRemoval
print(paste0("High correlation can be resolved by removing ",
(DPA_HighlyCorrelatedForRemoval)," numeric variable(s)."))
for (j in 1:DPA_HighlyCorrelatedForRemoval) {
<- colnames(DPA.Predictors.Numeric)[DPA_HighlyCorrelated[j]]
DPA_HighlyCorrelatedRemovedVariable print(paste0("Variable ",
j," for removal: ",
DPA_HighlyCorrelatedRemovedVariable))
}
##################################
# Filtering out columns with high correlation
#################################
<- DPA[,-DPA_HighlyCorrelated]
DPA_ExcludedHighCorrelation
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_ExcludedHighCorrelation))
(DPA_ExcludedHighCorrelation_Skimmed
}
## [1] "High correlation can be resolved by removing 3 numeric variable(s)."
## [1] "Variable 1 for removal: NumNonHAtoms"
## [1] "Variable 2 for removal: NumBonds"
## [1] "Variable 3 for removal: NumAromaticBonds"
Name | DPA_ExcludedHighCorrelati… |
Number of rows | 951 |
Number of columns | 226 |
_______________________ | |
Column type frequency: | |
numeric | 226 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
solTrainY | 0 | 1 | -2.72 | 2.05 | -11.62 | -3.96 | -2.51 | -1.36 | 1.58 | ▁▁▃▇▃ |
FP001 | 0 | 1 | 0.49 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP004 | 0 | 1 | 0.58 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP005 | 0 | 1 | 0.58 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP006 | 0 | 1 | 0.40 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP007 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP009 | 0 | 1 | 0.28 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP010 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP011 | 0 | 1 | 0.21 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP012 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP013 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP014 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP015 | 0 | 1 | 0.86 | 0.35 | 0.00 | 1.00 | 1.00 | 1.00 | 1.00 | ▁▁▁▁▇ |
FP016 | 0 | 1 | 0.15 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP017 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP018 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP019 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP020 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP021 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP022 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP023 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP024 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP025 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP026 | 0 | 1 | 0.08 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP027 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP028 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP029 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP030 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP031 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP032 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP033 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP034 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP035 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP036 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP037 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP038 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP039 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP040 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP041 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP042 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP043 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP044 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP045 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP046 | 0 | 1 | 0.32 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP047 | 0 | 1 | 0.27 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP048 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP049 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP050 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP051 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP052 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP053 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP054 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP055 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP056 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP057 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP058 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP059 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP060 | 0 | 1 | 0.48 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP061 | 0 | 1 | 0.45 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP062 | 0 | 1 | 0.44 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP063 | 0 | 1 | 0.43 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP064 | 0 | 1 | 0.42 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP065 | 0 | 1 | 0.59 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP066 | 0 | 1 | 0.61 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP067 | 0 | 1 | 0.38 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP068 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP069 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP070 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP071 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP072 | 0 | 1 | 0.66 | 0.47 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP073 | 0 | 1 | 0.31 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP074 | 0 | 1 | 0.32 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP075 | 0 | 1 | 0.34 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP076 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP077 | 0 | 1 | 0.32 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP078 | 0 | 1 | 0.30 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP079 | 0 | 1 | 0.69 | 0.46 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP080 | 0 | 1 | 0.30 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP081 | 0 | 1 | 0.28 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP082 | 0 | 1 | 0.71 | 0.45 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP083 | 0 | 1 | 0.27 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP084 | 0 | 1 | 0.29 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP085 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP086 | 0 | 1 | 0.27 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP087 | 0 | 1 | 0.73 | 0.45 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP088 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP089 | 0 | 1 | 0.25 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP090 | 0 | 1 | 0.25 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP091 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP092 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP093 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP094 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP095 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP096 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP097 | 0 | 1 | 0.24 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP098 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP099 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP100 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP101 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP102 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP103 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP104 | 0 | 1 | 0.22 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP105 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP106 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP107 | 0 | 1 | 0.21 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP108 | 0 | 1 | 0.21 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP109 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP110 | 0 | 1 | 0.21 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP111 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP112 | 0 | 1 | 0.19 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP113 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP114 | 0 | 1 | 0.16 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP115 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP116 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP117 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP118 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP119 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP120 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP121 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP122 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP123 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP124 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP125 | 0 | 1 | 0.16 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP126 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP127 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP128 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP129 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP130 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP131 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP132 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP133 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP134 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP135 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP136 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP137 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP138 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP139 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP140 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP141 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP142 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP143 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP144 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP145 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP146 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP147 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP148 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP149 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP150 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP151 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP152 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP153 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP154 | 0 | 1 | 0.04 | 0.19 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP155 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP156 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP157 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP158 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP159 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP160 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP161 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP162 | 0 | 1 | 0.50 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP163 | 0 | 1 | 0.48 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP164 | 0 | 1 | 0.63 | 0.48 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP165 | 0 | 1 | 0.35 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP166 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP167 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP168 | 0 | 1 | 0.67 | 0.47 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP169 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP170 | 0 | 1 | 0.18 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP171 | 0 | 1 | 0.17 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP172 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP173 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP174 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP175 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP176 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP177 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP178 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP179 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP180 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP181 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP182 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP183 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP184 | 0 | 1 | 0.08 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP185 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP186 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP187 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP188 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP189 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP190 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP191 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP192 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP193 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP194 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP195 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP196 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP197 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP198 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP199 | 0 | 1 | 0.05 | 0.21 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP200 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP201 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP202 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP203 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP204 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP205 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP206 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP207 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP208 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
MolWeight | 0 | 1 | 201.65 | 97.91 | 46.09 | 122.60 | 179.23 | 264.34 | 665.81 | ▇▆▂▁▁ |
NumAtoms | 0 | 1 | 25.51 | 12.61 | 5.00 | 17.00 | 22.00 | 31.00 | 94.00 | ▇▆▂▁▁ |
NumNonHAtoms | 0 | 1 | 13.16 | 6.50 | 2.00 | 8.00 | 12.00 | 17.00 | 47.00 | ▇▆▂▁▁ |
NumBonds | 0 | 1 | 25.91 | 13.48 | 4.00 | 17.00 | 23.00 | 31.50 | 97.00 | ▇▇▂▁▁ |
NumNonHBonds | 0 | 1 | 13.56 | 7.57 | 1.00 | 8.00 | 12.00 | 18.00 | 50.00 | ▇▇▂▁▁ |
NumMultBonds | 0 | 1 | 6.15 | 5.17 | 0.00 | 1.00 | 6.00 | 10.00 | 25.00 | ▇▆▃▁▁ |
NumRotBonds | 0 | 1 | 2.25 | 2.41 | 0.00 | 0.00 | 2.00 | 3.50 | 16.00 | ▇▂▁▁▁ |
NumDblBonds | 0 | 1 | 1.01 | 1.21 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▂▁▁▁ |
NumAromaticBonds | 0 | 1 | 5.12 | 5.26 | 0.00 | 0.00 | 6.00 | 6.00 | 25.00 | ▇▆▃▁▁ |
NumHydrogen | 0 | 1 | 12.35 | 7.32 | 0.00 | 7.00 | 11.00 | 16.00 | 47.00 | ▇▇▂▁▁ |
NumCarbon | 0 | 1 | 9.89 | 5.29 | 1.00 | 6.00 | 9.00 | 12.00 | 33.00 | ▇▇▃▁▁ |
NumNitrogen | 0 | 1 | 0.81 | 1.19 | 0.00 | 0.00 | 0.00 | 1.00 | 6.00 | ▇▂▁▁▁ |
NumOxygen | 0 | 1 | 1.57 | 1.73 | 0.00 | 0.00 | 1.00 | 2.00 | 13.00 | ▇▂▁▁▁ |
NumSulfer | 0 | 1 | 0.16 | 0.49 | 0.00 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
NumChlorine | 0 | 1 | 0.56 | 1.40 | 0.00 | 0.00 | 0.00 | 0.00 | 10.00 | ▇▁▁▁▁ |
NumHalogen | 0 | 1 | 0.70 | 1.47 | 0.00 | 0.00 | 0.00 | 1.00 | 10.00 | ▇▁▁▁▁ |
NumRings | 0 | 1 | 1.40 | 1.30 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▃▂▁▁ |
HydrophilicFactor | 0 | 1 | -0.02 | 1.13 | -0.98 | -0.76 | -0.31 | 0.31 | 13.48 | ▇▁▁▁▁ |
SurfaceArea1 | 0 | 1 | 36.46 | 35.29 | 0.00 | 9.23 | 29.10 | 53.28 | 331.94 | ▇▂▁▁▁ |
SurfaceArea2 | 0 | 1 | 40.23 | 38.12 | 0.00 | 10.63 | 33.12 | 60.66 | 331.94 | ▇▂▁▁▁ |
###################################
# Verifying the data dimensions
###################################
dim(DPA_ExcludedHighCorrelation)
## [1] 951 226
##################################
# Loading dataset
##################################
<- Solubility_Train
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("solTrainY")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,sapply(DPA.Predictors, is.numeric)]
DPA.Predictors.Numeric
##################################
# Identifying the linearly dependent variables
##################################
<- findLinearCombos(DPA.Predictors.Numeric)
DPA_LinearlyDependent
<- length(DPA_LinearlyDependent$linearCombos)) (DPA_LinearlyDependentCount
## [1] 2
if (DPA_LinearlyDependentCount == 0) {
print("No linearly dependent predictors noted.")
else {
} print(paste0("Linear dependency observed for ",
(DPA_LinearlyDependentCount)," subset(s) of numeric variable(s)."))
for (i in 1:DPA_LinearlyDependentCount) {
<- colnames(DPA.Predictors.Numeric)[DPA_LinearlyDependent$linearCombos[[i]]]
DPA_LinearlyDependentSubset print(paste0("Linear dependent variable(s) for subset ",
i," include: ",
DPA_LinearlyDependentSubset))
}
}
## [1] "Linear dependency observed for 2 subset(s) of numeric variable(s)."
## [1] "Linear dependent variable(s) for subset 1 include: NumNonHBonds"
## [2] "Linear dependent variable(s) for subset 1 include: NumAtoms"
## [3] "Linear dependent variable(s) for subset 1 include: NumNonHAtoms"
## [4] "Linear dependent variable(s) for subset 1 include: NumBonds"
## [1] "Linear dependent variable(s) for subset 2 include: NumHydrogen"
## [2] "Linear dependent variable(s) for subset 2 include: NumAtoms"
## [3] "Linear dependent variable(s) for subset 2 include: NumNonHAtoms"
##################################
# Identifying the linearly dependent variables for removal
##################################
if (DPA_LinearlyDependentCount > 0) {
<- findLinearCombos(DPA.Predictors.Numeric)
DPA_LinearlyDependent
<- length(DPA_LinearlyDependent$remove)
DPA_LinearlyDependentForRemoval
print(paste0("Linear dependency can be resolved by removing ",
(DPA_LinearlyDependentForRemoval)," numeric variable(s)."))
for (j in 1:DPA_LinearlyDependentForRemoval) {
<- colnames(DPA.Predictors.Numeric)[DPA_LinearlyDependent$remove[j]]
DPA_LinearlyDependentRemovedVariable print(paste0("Variable ",
j," for removal: ",
DPA_LinearlyDependentRemovedVariable))
}
##################################
# Filtering out columns with linear dependency
#################################
<- DPA[,-DPA_LinearlyDependent$remove]
DPA_ExcludedLinearlyDependent
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_ExcludedLinearlyDependent))
(DPA_ExcludedLinearlyDependent_Skimmed
}
## [1] "Linear dependency can be resolved by removing 2 numeric variable(s)."
## [1] "Variable 1 for removal: NumNonHBonds"
## [1] "Variable 2 for removal: NumHydrogen"
Name | DPA_ExcludedLinearlyDepen… |
Number of rows | 951 |
Number of columns | 227 |
_______________________ | |
Column type frequency: | |
numeric | 227 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
solTrainY | 0 | 1 | -2.72 | 2.05 | -11.62 | -3.96 | -2.51 | -1.36 | 1.58 | ▁▁▃▇▃ |
FP001 | 0 | 1 | 0.49 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP002 | 0 | 1 | 0.54 | 0.50 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP003 | 0 | 1 | 0.44 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP004 | 0 | 1 | 0.58 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP005 | 0 | 1 | 0.58 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP006 | 0 | 1 | 0.40 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP007 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP008 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP009 | 0 | 1 | 0.28 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP010 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP011 | 0 | 1 | 0.21 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP012 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP013 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP014 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP015 | 0 | 1 | 0.86 | 0.35 | 0.00 | 1.00 | 1.00 | 1.00 | 1.00 | ▁▁▁▁▇ |
FP016 | 0 | 1 | 0.15 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP017 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP018 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP019 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP020 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP021 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP022 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP023 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP024 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP025 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP026 | 0 | 1 | 0.08 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP027 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP028 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP029 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP030 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP031 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP032 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP033 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP034 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP035 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP036 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP037 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP038 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP039 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP040 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP041 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP042 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP043 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP044 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP045 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP046 | 0 | 1 | 0.32 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP047 | 0 | 1 | 0.27 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP048 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP049 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP050 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP051 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP052 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP053 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP054 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP055 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP056 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP057 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP058 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP059 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP060 | 0 | 1 | 0.48 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP061 | 0 | 1 | 0.45 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP062 | 0 | 1 | 0.44 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP063 | 0 | 1 | 0.43 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP064 | 0 | 1 | 0.42 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP065 | 0 | 1 | 0.59 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP066 | 0 | 1 | 0.61 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP067 | 0 | 1 | 0.38 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP068 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP069 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP070 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP071 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP072 | 0 | 1 | 0.66 | 0.47 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP073 | 0 | 1 | 0.31 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP074 | 0 | 1 | 0.32 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP075 | 0 | 1 | 0.34 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP076 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP077 | 0 | 1 | 0.32 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP078 | 0 | 1 | 0.30 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP079 | 0 | 1 | 0.69 | 0.46 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP080 | 0 | 1 | 0.30 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP081 | 0 | 1 | 0.28 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP082 | 0 | 1 | 0.71 | 0.45 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP083 | 0 | 1 | 0.27 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP084 | 0 | 1 | 0.29 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP085 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP086 | 0 | 1 | 0.27 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP087 | 0 | 1 | 0.73 | 0.45 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP088 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP089 | 0 | 1 | 0.25 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP090 | 0 | 1 | 0.25 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP091 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP092 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP093 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP094 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP095 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP096 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP097 | 0 | 1 | 0.24 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP098 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP099 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP100 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP101 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP102 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP103 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP104 | 0 | 1 | 0.22 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP105 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP106 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP107 | 0 | 1 | 0.21 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP108 | 0 | 1 | 0.21 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP109 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP110 | 0 | 1 | 0.21 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP111 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP112 | 0 | 1 | 0.19 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP113 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP114 | 0 | 1 | 0.16 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP115 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP116 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP117 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP118 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP119 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP120 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP121 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP122 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP123 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP124 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP125 | 0 | 1 | 0.16 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP126 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP127 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP128 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP129 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP130 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP131 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP132 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP133 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP134 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP135 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP136 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP137 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP138 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP139 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP140 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP141 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP142 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP143 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP144 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP145 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP146 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP147 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP148 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP149 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP150 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP151 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP152 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP153 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP154 | 0 | 1 | 0.04 | 0.19 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP155 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP156 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP157 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP158 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP159 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP160 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP161 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP162 | 0 | 1 | 0.50 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP163 | 0 | 1 | 0.48 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP164 | 0 | 1 | 0.63 | 0.48 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP165 | 0 | 1 | 0.35 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP166 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP167 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP168 | 0 | 1 | 0.67 | 0.47 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP169 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP170 | 0 | 1 | 0.18 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP171 | 0 | 1 | 0.17 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP172 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP173 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP174 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP175 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP176 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP177 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP178 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP179 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP180 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP181 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP182 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP183 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP184 | 0 | 1 | 0.08 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP185 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP186 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP187 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP188 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP189 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP190 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP191 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP192 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP193 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP194 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP195 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP196 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP197 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP198 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP199 | 0 | 1 | 0.05 | 0.21 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP200 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP201 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP202 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP203 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP204 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP205 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP206 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP207 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP208 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
MolWeight | 0 | 1 | 201.65 | 97.91 | 46.09 | 122.60 | 179.23 | 264.34 | 665.81 | ▇▆▂▁▁ |
NumAtoms | 0 | 1 | 25.51 | 12.61 | 5.00 | 17.00 | 22.00 | 31.00 | 94.00 | ▇▆▂▁▁ |
NumNonHAtoms | 0 | 1 | 13.16 | 6.50 | 2.00 | 8.00 | 12.00 | 17.00 | 47.00 | ▇▆▂▁▁ |
NumNonHBonds | 0 | 1 | 13.56 | 7.57 | 1.00 | 8.00 | 12.00 | 18.00 | 50.00 | ▇▇▂▁▁ |
NumMultBonds | 0 | 1 | 6.15 | 5.17 | 0.00 | 1.00 | 6.00 | 10.00 | 25.00 | ▇▆▃▁▁ |
NumRotBonds | 0 | 1 | 2.25 | 2.41 | 0.00 | 0.00 | 2.00 | 3.50 | 16.00 | ▇▂▁▁▁ |
NumDblBonds | 0 | 1 | 1.01 | 1.21 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▂▁▁▁ |
NumHydrogen | 0 | 1 | 12.35 | 7.32 | 0.00 | 7.00 | 11.00 | 16.00 | 47.00 | ▇▇▂▁▁ |
NumCarbon | 0 | 1 | 9.89 | 5.29 | 1.00 | 6.00 | 9.00 | 12.00 | 33.00 | ▇▇▃▁▁ |
NumNitrogen | 0 | 1 | 0.81 | 1.19 | 0.00 | 0.00 | 0.00 | 1.00 | 6.00 | ▇▂▁▁▁ |
NumOxygen | 0 | 1 | 1.57 | 1.73 | 0.00 | 0.00 | 1.00 | 2.00 | 13.00 | ▇▂▁▁▁ |
NumSulfer | 0 | 1 | 0.16 | 0.49 | 0.00 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
NumChlorine | 0 | 1 | 0.56 | 1.40 | 0.00 | 0.00 | 0.00 | 0.00 | 10.00 | ▇▁▁▁▁ |
NumHalogen | 0 | 1 | 0.70 | 1.47 | 0.00 | 0.00 | 0.00 | 1.00 | 10.00 | ▇▁▁▁▁ |
NumRings | 0 | 1 | 1.40 | 1.30 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▃▂▁▁ |
HydrophilicFactor | 0 | 1 | -0.02 | 1.13 | -0.98 | -0.76 | -0.31 | 0.31 | 13.48 | ▇▁▁▁▁ |
SurfaceArea1 | 0 | 1 | 36.46 | 35.29 | 0.00 | 9.23 | 29.10 | 53.28 | 331.94 | ▇▂▁▁▁ |
SurfaceArea2 | 0 | 1 | 40.23 | 38.12 | 0.00 | 10.63 | 33.12 | 60.66 | 331.94 | ▇▂▁▁▁ |
###################################
# Verifying the data dimensions
###################################
dim(DPA_ExcludedLinearlyDependent)
## [1] 951 227
##################################
# Loading dataset
##################################
<- Solubility_Train
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("solTrainY")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,-(grep("FP", names(DPA.Predictors)))]
DPA.Predictors.Numeric
##################################
# Applying a Box-Cox transformation
##################################
<- preProcess(DPA.Predictors.Numeric, method = c("BoxCox"))
DPA_BoxCox <- predict(DPA_BoxCox, DPA.Predictors.Numeric)
DPA_BoxCoxTransformed
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_BoxCoxTransformed)) (DPA_BoxCoxTransformedSkimmed
Name | DPA_BoxCoxTransformed |
Number of rows | 951 |
Number of columns | 20 |
_______________________ | |
Column type frequency: | |
numeric | 20 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
MolWeight | 0 | 1 | 5.19 | 0.48 | 3.83 | 4.81 | 5.19 | 5.58 | 6.50 | ▁▆▇▆▁ |
NumAtoms | 0 | 1 | 3.13 | 0.48 | 1.61 | 2.83 | 3.09 | 3.43 | 4.54 | ▁▃▇▃▁ |
NumNonHAtoms | 0 | 1 | 2.46 | 0.50 | 0.69 | 2.08 | 2.48 | 2.83 | 3.85 | ▁▃▇▇▁ |
NumBonds | 0 | 1 | 4.39 | 0.96 | 1.60 | 3.81 | 4.36 | 4.97 | 7.48 | ▁▅▇▃▁ |
NumNonHBonds | 0 | 1 | 3.21 | 0.95 | 0.00 | 2.58 | 3.22 | 3.91 | 5.93 | ▁▃▇▆▁ |
NumMultBonds | 0 | 1 | 6.15 | 5.17 | 0.00 | 1.00 | 6.00 | 10.00 | 25.00 | ▇▆▃▁▁ |
NumRotBonds | 0 | 1 | 2.25 | 2.41 | 0.00 | 0.00 | 2.00 | 3.50 | 16.00 | ▇▂▁▁▁ |
NumDblBonds | 0 | 1 | 1.01 | 1.21 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▂▁▁▁ |
NumAromaticBonds | 0 | 1 | 5.12 | 5.26 | 0.00 | 0.00 | 6.00 | 6.00 | 25.00 | ▇▆▃▁▁ |
NumHydrogen | 0 | 1 | 12.35 | 7.32 | 0.00 | 7.00 | 11.00 | 16.00 | 47.00 | ▇▇▂▁▁ |
NumCarbon | 0 | 1 | 3.54 | 1.34 | 0.00 | 2.62 | 3.52 | 4.25 | 7.62 | ▂▇▇▃▁ |
NumNitrogen | 0 | 1 | 0.81 | 1.19 | 0.00 | 0.00 | 0.00 | 1.00 | 6.00 | ▇▂▁▁▁ |
NumOxygen | 0 | 1 | 1.57 | 1.73 | 0.00 | 0.00 | 1.00 | 2.00 | 13.00 | ▇▂▁▁▁ |
NumSulfer | 0 | 1 | 0.16 | 0.49 | 0.00 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
NumChlorine | 0 | 1 | 0.56 | 1.40 | 0.00 | 0.00 | 0.00 | 0.00 | 10.00 | ▇▁▁▁▁ |
NumHalogen | 0 | 1 | 0.70 | 1.47 | 0.00 | 0.00 | 0.00 | 1.00 | 10.00 | ▇▁▁▁▁ |
NumRings | 0 | 1 | 1.40 | 1.30 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▃▂▁▁ |
HydrophilicFactor | 0 | 1 | -0.02 | 1.13 | -0.98 | -0.76 | -0.31 | 0.31 | 13.48 | ▇▁▁▁▁ |
SurfaceArea1 | 0 | 1 | 36.46 | 35.29 | 0.00 | 9.23 | 29.10 | 53.28 | 331.94 | ▇▂▁▁▁ |
SurfaceArea2 | 0 | 1 | 40.23 | 38.12 | 0.00 | 10.63 | 33.12 | 60.66 | 331.94 | ▇▂▁▁▁ |
###################################
# Verifying the data dimensions
###################################
dim(DPA_BoxCoxTransformed)
## [1] 951 20
##################################
# Loading dataset
##################################
<- Solubility_Train
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("solTrainY")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,-(grep("FP", names(DPA.Predictors)))]
DPA.Predictors.Numeric
##################################
# Applying a Box-Cox transformation
##################################
<- preProcess(DPA.Predictors.Numeric, method = c("BoxCox"))
DPA_BoxCox <- predict(DPA_BoxCox, DPA.Predictors.Numeric)
DPA_BoxCoxTransformed
##################################
# Applying a center and scale data transformation
##################################
<- preProcess(DPA_BoxCoxTransformed, method = c("center","scale"))
DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaled <- predict(DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaled, DPA_BoxCoxTransformed)
DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed)) (DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformedSkimmed
Name | DPA.Predictors.Numeric_Bo… |
Number of rows | 951 |
Number of columns | 20 |
_______________________ | |
Column type frequency: | |
numeric | 20 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
MolWeight | 0 | 1 | 0 | 1 | -2.84 | -0.80 | -0.01 | 0.80 | 2.72 | ▁▆▇▆▁ |
NumAtoms | 0 | 1 | 0 | 1 | -3.16 | -0.61 | -0.07 | 0.64 | 2.95 | ▁▃▇▃▁ |
NumNonHAtoms | 0 | 1 | 0 | 1 | -3.53 | -0.76 | 0.06 | 0.75 | 2.79 | ▁▃▇▇▁ |
NumBonds | 0 | 1 | 0 | 1 | -2.92 | -0.61 | -0.04 | 0.60 | 3.23 | ▁▅▇▃▁ |
NumNonHBonds | 0 | 1 | 0 | 1 | -3.38 | -0.67 | 0.01 | 0.74 | 2.86 | ▁▃▇▆▁ |
NumMultBonds | 0 | 1 | 0 | 1 | -1.19 | -1.00 | -0.03 | 0.74 | 3.65 | ▇▇▃▁▁ |
NumRotBonds | 0 | 1 | 0 | 1 | -0.93 | -0.93 | -0.10 | 0.52 | 5.71 | ▇▂▁▁▁ |
NumDblBonds | 0 | 1 | 0 | 1 | -0.83 | -0.83 | -0.01 | 0.82 | 4.95 | ▇▂▁▁▁ |
NumAromaticBonds | 0 | 1 | 0 | 1 | -0.97 | -0.97 | 0.17 | 0.17 | 3.78 | ▇▆▃▁▁ |
NumHydrogen | 0 | 1 | 0 | 1 | -1.69 | -0.73 | -0.18 | 0.50 | 4.74 | ▇▇▂▁▁ |
NumCarbon | 0 | 1 | 0 | 1 | -2.64 | -0.69 | -0.01 | 0.54 | 3.06 | ▂▇▇▃▁ |
NumNitrogen | 0 | 1 | 0 | 1 | -0.69 | -0.69 | -0.69 | 0.16 | 4.37 | ▇▂▁▁▁ |
NumOxygen | 0 | 1 | 0 | 1 | -0.91 | -0.91 | -0.33 | 0.25 | 6.61 | ▇▂▁▁▁ |
NumSulfer | 0 | 1 | 0 | 1 | -0.34 | -0.34 | -0.34 | -0.34 | 7.86 | ▇▁▁▁▁ |
NumChlorine | 0 | 1 | 0 | 1 | -0.40 | -0.40 | -0.40 | -0.40 | 6.74 | ▇▁▁▁▁ |
NumHalogen | 0 | 1 | 0 | 1 | -0.47 | -0.47 | -0.47 | 0.20 | 6.32 | ▇▁▁▁▁ |
NumRings | 0 | 1 | 0 | 1 | -1.08 | -1.08 | -0.31 | 0.46 | 4.31 | ▇▃▂▁▁ |
HydrophilicFactor | 0 | 1 | 0 | 1 | -0.86 | -0.66 | -0.26 | 0.30 | 11.99 | ▇▁▁▁▁ |
SurfaceArea1 | 0 | 1 | 0 | 1 | -1.03 | -0.77 | -0.21 | 0.48 | 8.37 | ▇▂▁▁▁ |
SurfaceArea2 | 0 | 1 | 0 | 1 | -1.06 | -0.78 | -0.19 | 0.54 | 7.65 | ▇▂▁▁▁ |
###################################
# Verifying the data dimensions
###################################
dim(DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed)
## [1] 951 20
##################################
# Creating the pre-modelling
# train set
##################################
<- DPA$solTrainY
Log_Solubility <- DPA.Predictors[,(grep("FP", names(DPA.Predictors)))]
PMA.Predictors.Factor <- as.data.frame(lapply(PMA.Predictors.Factor,factor))
PMA.Predictors.Factor <- DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed
PMA.Predictors.Numeric <- cbind(Log_Solubility,PMA.Predictors.Factor,PMA.Predictors.Numeric)
PMA_BoxCoxTransformed_CenteredScaledTransformed
##################################
# Filtering out columns noted with data quality issues including
# zero and near-zero variance,
# high correlation and linear dependencies
# to create the pre-modelling dataset
##################################
<- PMA_BoxCoxTransformed_CenteredScaledTransformed[,!names(PMA_BoxCoxTransformed_CenteredScaledTransformed) %in% c("FP154","FP199","FP200","NumNonHBonds","NumHydrogen","NumNonHAtoms","NumAromaticBonds","NumAtoms")]
PMA_BoxCoxTransformed_CenteredScaledTransformed_ExcludedLowVariance_ExcludedLinearlyDependent_ExcludedHighCorrelation
<- PMA_BoxCoxTransformed_CenteredScaledTransformed_ExcludedLowVariance_ExcludedLinearlyDependent_ExcludedHighCorrelation
PMA_PreModelling_Train
##################################
# Gathering descriptive statistics
##################################
<- skim(PMA_PreModelling_Train)) (PMA_PreModelling_Train_Skimmed
Name | PMA_PreModelling_Train |
Number of rows | 951 |
Number of columns | 221 |
_______________________ | |
Column type frequency: | |
factor | 205 |
numeric | 16 |
________________________ | |
Group variables | None |
Variable type: factor
skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
---|---|---|---|---|---|
FP001 | 0 | 1 | FALSE | 2 | 0: 482, 1: 469 |
FP002 | 0 | 1 | FALSE | 2 | 1: 513, 0: 438 |
FP003 | 0 | 1 | FALSE | 2 | 0: 536, 1: 415 |
FP004 | 0 | 1 | FALSE | 2 | 1: 556, 0: 395 |
FP005 | 0 | 1 | FALSE | 2 | 1: 551, 0: 400 |
FP006 | 0 | 1 | FALSE | 2 | 0: 570, 1: 381 |
FP007 | 0 | 1 | FALSE | 2 | 0: 605, 1: 346 |
FP008 | 0 | 1 | FALSE | 2 | 0: 641, 1: 310 |
FP009 | 0 | 1 | FALSE | 2 | 0: 685, 1: 266 |
FP010 | 0 | 1 | FALSE | 2 | 0: 781, 1: 170 |
FP011 | 0 | 1 | FALSE | 2 | 0: 747, 1: 204 |
FP012 | 0 | 1 | FALSE | 2 | 0: 783, 1: 168 |
FP013 | 0 | 1 | FALSE | 2 | 0: 793, 1: 158 |
FP014 | 0 | 1 | FALSE | 2 | 0: 798, 1: 153 |
FP015 | 0 | 1 | FALSE | 2 | 1: 818, 0: 133 |
FP016 | 0 | 1 | FALSE | 2 | 0: 812, 1: 139 |
FP017 | 0 | 1 | FALSE | 2 | 0: 814, 1: 137 |
FP018 | 0 | 1 | FALSE | 2 | 0: 826, 1: 125 |
FP019 | 0 | 1 | FALSE | 2 | 0: 835, 1: 116 |
FP020 | 0 | 1 | FALSE | 2 | 0: 837, 1: 114 |
FP021 | 0 | 1 | FALSE | 2 | 0: 836, 1: 115 |
FP022 | 0 | 1 | FALSE | 2 | 0: 852, 1: 99 |
FP023 | 0 | 1 | FALSE | 2 | 0: 834, 1: 117 |
FP024 | 0 | 1 | FALSE | 2 | 0: 844, 1: 107 |
FP025 | 0 | 1 | FALSE | 2 | 0: 841, 1: 110 |
FP026 | 0 | 1 | FALSE | 2 | 0: 871, 1: 80 |
FP027 | 0 | 1 | FALSE | 2 | 0: 858, 1: 93 |
FP028 | 0 | 1 | FALSE | 2 | 0: 850, 1: 101 |
FP029 | 0 | 1 | FALSE | 2 | 0: 854, 1: 97 |
FP030 | 0 | 1 | FALSE | 2 | 0: 862, 1: 89 |
FP031 | 0 | 1 | FALSE | 2 | 0: 866, 1: 85 |
FP032 | 0 | 1 | FALSE | 2 | 0: 881, 1: 70 |
FP033 | 0 | 1 | FALSE | 2 | 0: 885, 1: 66 |
FP034 | 0 | 1 | FALSE | 2 | 0: 875, 1: 76 |
FP035 | 0 | 1 | FALSE | 2 | 0: 882, 1: 69 |
FP036 | 0 | 1 | FALSE | 2 | 0: 879, 1: 72 |
FP037 | 0 | 1 | FALSE | 2 | 0: 884, 1: 67 |
FP038 | 0 | 1 | FALSE | 2 | 0: 869, 1: 82 |
FP039 | 0 | 1 | FALSE | 2 | 0: 880, 1: 71 |
FP040 | 0 | 1 | FALSE | 2 | 0: 886, 1: 65 |
FP041 | 0 | 1 | FALSE | 2 | 0: 891, 1: 60 |
FP042 | 0 | 1 | FALSE | 2 | 0: 897, 1: 54 |
FP043 | 0 | 1 | FALSE | 2 | 0: 888, 1: 63 |
FP044 | 0 | 1 | FALSE | 2 | 0: 894, 1: 57 |
FP045 | 0 | 1 | FALSE | 2 | 0: 898, 1: 53 |
FP046 | 0 | 1 | FALSE | 2 | 0: 651, 1: 300 |
FP047 | 0 | 1 | FALSE | 2 | 0: 698, 1: 253 |
FP048 | 0 | 1 | FALSE | 2 | 0: 833, 1: 118 |
FP049 | 0 | 1 | FALSE | 2 | 0: 835, 1: 116 |
FP050 | 0 | 1 | FALSE | 2 | 0: 844, 1: 107 |
FP051 | 0 | 1 | FALSE | 2 | 0: 847, 1: 104 |
FP052 | 0 | 1 | FALSE | 2 | 0: 864, 1: 87 |
FP053 | 0 | 1 | FALSE | 2 | 0: 862, 1: 89 |
FP054 | 0 | 1 | FALSE | 2 | 0: 879, 1: 72 |
FP055 | 0 | 1 | FALSE | 2 | 0: 900, 1: 51 |
FP056 | 0 | 1 | FALSE | 2 | 0: 889, 1: 62 |
FP057 | 0 | 1 | FALSE | 2 | 0: 837, 1: 114 |
FP058 | 0 | 1 | FALSE | 2 | 0: 843, 1: 108 |
FP059 | 0 | 1 | FALSE | 2 | 0: 899, 1: 52 |
FP060 | 0 | 1 | FALSE | 2 | 0: 493, 1: 458 |
FP061 | 0 | 1 | FALSE | 2 | 0: 526, 1: 425 |
FP062 | 0 | 1 | FALSE | 2 | 0: 535, 1: 416 |
FP063 | 0 | 1 | FALSE | 2 | 0: 546, 1: 405 |
FP064 | 0 | 1 | FALSE | 2 | 0: 555, 1: 396 |
FP065 | 0 | 1 | FALSE | 2 | 1: 564, 0: 387 |
FP066 | 0 | 1 | FALSE | 2 | 1: 580, 0: 371 |
FP067 | 0 | 1 | FALSE | 2 | 0: 590, 1: 361 |
FP068 | 0 | 1 | FALSE | 2 | 0: 607, 1: 344 |
FP069 | 0 | 1 | FALSE | 2 | 0: 607, 1: 344 |
FP070 | 0 | 1 | FALSE | 2 | 0: 613, 1: 338 |
FP071 | 0 | 1 | FALSE | 2 | 0: 640, 1: 311 |
FP072 | 0 | 1 | FALSE | 2 | 1: 626, 0: 325 |
FP073 | 0 | 1 | FALSE | 2 | 0: 656, 1: 295 |
FP074 | 0 | 1 | FALSE | 2 | 0: 642, 1: 309 |
FP075 | 0 | 1 | FALSE | 2 | 0: 629, 1: 322 |
FP076 | 0 | 1 | FALSE | 2 | 0: 639, 1: 312 |
FP077 | 0 | 1 | FALSE | 2 | 0: 646, 1: 305 |
FP078 | 0 | 1 | FALSE | 2 | 0: 662, 1: 289 |
FP079 | 0 | 1 | FALSE | 2 | 1: 656, 0: 295 |
FP080 | 0 | 1 | FALSE | 2 | 0: 663, 1: 288 |
FP081 | 0 | 1 | FALSE | 2 | 0: 686, 1: 265 |
FP082 | 0 | 1 | FALSE | 2 | 1: 679, 0: 272 |
FP083 | 0 | 1 | FALSE | 2 | 0: 691, 1: 260 |
FP084 | 0 | 1 | FALSE | 2 | 0: 679, 1: 272 |
FP085 | 0 | 1 | FALSE | 2 | 0: 708, 1: 243 |
FP086 | 0 | 1 | FALSE | 2 | 0: 695, 1: 256 |
FP087 | 0 | 1 | FALSE | 2 | 1: 691, 0: 260 |
FP088 | 0 | 1 | FALSE | 2 | 0: 701, 1: 250 |
FP089 | 0 | 1 | FALSE | 2 | 0: 716, 1: 235 |
FP090 | 0 | 1 | FALSE | 2 | 0: 714, 1: 237 |
FP091 | 0 | 1 | FALSE | 2 | 0: 737, 1: 214 |
FP092 | 0 | 1 | FALSE | 2 | 0: 719, 1: 232 |
FP093 | 0 | 1 | FALSE | 2 | 0: 719, 1: 232 |
FP094 | 0 | 1 | FALSE | 2 | 0: 731, 1: 220 |
FP095 | 0 | 1 | FALSE | 2 | 0: 742, 1: 209 |
FP096 | 0 | 1 | FALSE | 2 | 0: 744, 1: 207 |
FP097 | 0 | 1 | FALSE | 2 | 0: 727, 1: 224 |
FP098 | 0 | 1 | FALSE | 2 | 0: 725, 1: 226 |
FP099 | 0 | 1 | FALSE | 2 | 0: 735, 1: 216 |
FP100 | 0 | 1 | FALSE | 2 | 0: 731, 1: 220 |
FP101 | 0 | 1 | FALSE | 2 | 0: 726, 1: 225 |
FP102 | 0 | 1 | FALSE | 2 | 0: 759, 1: 192 |
FP103 | 0 | 1 | FALSE | 2 | 0: 743, 1: 208 |
FP104 | 0 | 1 | FALSE | 2 | 0: 739, 1: 212 |
FP105 | 0 | 1 | FALSE | 2 | 0: 746, 1: 205 |
FP106 | 0 | 1 | FALSE | 2 | 0: 769, 1: 182 |
FP107 | 0 | 1 | FALSE | 2 | 0: 750, 1: 201 |
FP108 | 0 | 1 | FALSE | 2 | 0: 756, 1: 195 |
FP109 | 0 | 1 | FALSE | 2 | 0: 783, 1: 168 |
FP110 | 0 | 1 | FALSE | 2 | 0: 755, 1: 196 |
FP111 | 0 | 1 | FALSE | 2 | 0: 764, 1: 187 |
FP112 | 0 | 1 | FALSE | 2 | 0: 766, 1: 185 |
FP113 | 0 | 1 | FALSE | 2 | 0: 765, 1: 186 |
FP114 | 0 | 1 | FALSE | 2 | 0: 803, 1: 148 |
FP115 | 0 | 1 | FALSE | 2 | 0: 781, 1: 170 |
FP116 | 0 | 1 | FALSE | 2 | 0: 768, 1: 183 |
FP117 | 0 | 1 | FALSE | 2 | 0: 781, 1: 170 |
FP118 | 0 | 1 | FALSE | 2 | 0: 768, 1: 183 |
FP119 | 0 | 1 | FALSE | 2 | 0: 796, 1: 155 |
FP120 | 0 | 1 | FALSE | 2 | 0: 793, 1: 158 |
FP121 | 0 | 1 | FALSE | 2 | 0: 818, 1: 133 |
FP122 | 0 | 1 | FALSE | 2 | 0: 795, 1: 156 |
FP123 | 0 | 1 | FALSE | 2 | 0: 792, 1: 159 |
FP124 | 0 | 1 | FALSE | 2 | 0: 797, 1: 154 |
FP125 | 0 | 1 | FALSE | 2 | 0: 803, 1: 148 |
FP126 | 0 | 1 | FALSE | 2 | 0: 810, 1: 141 |
FP127 | 0 | 1 | FALSE | 2 | 0: 818, 1: 133 |
FP128 | 0 | 1 | FALSE | 2 | 0: 810, 1: 141 |
FP129 | 0 | 1 | FALSE | 2 | 0: 819, 1: 132 |
FP130 | 0 | 1 | FALSE | 2 | 0: 851, 1: 100 |
FP131 | 0 | 1 | FALSE | 2 | 0: 831, 1: 120 |
FP132 | 0 | 1 | FALSE | 2 | 0: 832, 1: 119 |
FP133 | 0 | 1 | FALSE | 2 | 0: 831, 1: 120 |
FP134 | 0 | 1 | FALSE | 2 | 0: 830, 1: 121 |
FP135 | 0 | 1 | FALSE | 2 | 0: 831, 1: 120 |
FP136 | 0 | 1 | FALSE | 2 | 0: 836, 1: 115 |
FP137 | 0 | 1 | FALSE | 2 | 0: 841, 1: 110 |
FP138 | 0 | 1 | FALSE | 2 | 0: 845, 1: 106 |
FP139 | 0 | 1 | FALSE | 2 | 0: 873, 1: 78 |
FP140 | 0 | 1 | FALSE | 2 | 0: 845, 1: 106 |
FP141 | 0 | 1 | FALSE | 2 | 0: 840, 1: 111 |
FP142 | 0 | 1 | FALSE | 2 | 0: 847, 1: 104 |
FP143 | 0 | 1 | FALSE | 2 | 0: 874, 1: 77 |
FP144 | 0 | 1 | FALSE | 2 | 0: 852, 1: 99 |
FP145 | 0 | 1 | FALSE | 2 | 0: 852, 1: 99 |
FP146 | 0 | 1 | FALSE | 2 | 0: 853, 1: 98 |
FP147 | 0 | 1 | FALSE | 2 | 0: 851, 1: 100 |
FP148 | 0 | 1 | FALSE | 2 | 0: 868, 1: 83 |
FP149 | 0 | 1 | FALSE | 2 | 0: 865, 1: 86 |
FP150 | 0 | 1 | FALSE | 2 | 0: 876, 1: 75 |
FP151 | 0 | 1 | FALSE | 2 | 0: 898, 1: 53 |
FP152 | 0 | 1 | FALSE | 2 | 0: 873, 1: 78 |
FP153 | 0 | 1 | FALSE | 2 | 0: 877, 1: 74 |
FP155 | 0 | 1 | FALSE | 2 | 0: 885, 1: 66 |
FP156 | 0 | 1 | FALSE | 2 | 0: 884, 1: 67 |
FP157 | 0 | 1 | FALSE | 2 | 0: 892, 1: 59 |
FP158 | 0 | 1 | FALSE | 2 | 0: 900, 1: 51 |
FP159 | 0 | 1 | FALSE | 2 | 0: 884, 1: 67 |
FP160 | 0 | 1 | FALSE | 2 | 0: 886, 1: 65 |
FP161 | 0 | 1 | FALSE | 2 | 0: 888, 1: 63 |
FP162 | 0 | 1 | FALSE | 2 | 0: 480, 1: 471 |
FP163 | 0 | 1 | FALSE | 2 | 0: 498, 1: 453 |
FP164 | 0 | 1 | FALSE | 2 | 1: 597, 0: 354 |
FP165 | 0 | 1 | FALSE | 2 | 0: 619, 1: 332 |
FP166 | 0 | 1 | FALSE | 2 | 0: 636, 1: 315 |
FP167 | 0 | 1 | FALSE | 2 | 0: 639, 1: 312 |
FP168 | 0 | 1 | FALSE | 2 | 1: 633, 0: 318 |
FP169 | 0 | 1 | FALSE | 2 | 0: 774, 1: 177 |
FP170 | 0 | 1 | FALSE | 2 | 0: 776, 1: 175 |
FP171 | 0 | 1 | FALSE | 2 | 0: 790, 1: 161 |
FP172 | 0 | 1 | FALSE | 2 | 0: 807, 1: 144 |
FP173 | 0 | 1 | FALSE | 2 | 0: 816, 1: 135 |
FP174 | 0 | 1 | FALSE | 2 | 0: 827, 1: 124 |
FP175 | 0 | 1 | FALSE | 2 | 0: 823, 1: 128 |
FP176 | 0 | 1 | FALSE | 2 | 0: 835, 1: 116 |
FP177 | 0 | 1 | FALSE | 2 | 0: 836, 1: 115 |
FP178 | 0 | 1 | FALSE | 2 | 0: 836, 1: 115 |
FP179 | 0 | 1 | FALSE | 2 | 0: 858, 1: 93 |
FP180 | 0 | 1 | FALSE | 2 | 0: 849, 1: 102 |
FP181 | 0 | 1 | FALSE | 2 | 0: 862, 1: 89 |
FP182 | 0 | 1 | FALSE | 2 | 0: 857, 1: 94 |
FP183 | 0 | 1 | FALSE | 2 | 0: 879, 1: 72 |
FP184 | 0 | 1 | FALSE | 2 | 0: 871, 1: 80 |
FP185 | 0 | 1 | FALSE | 2 | 0: 870, 1: 81 |
FP186 | 0 | 1 | FALSE | 2 | 0: 878, 1: 73 |
FP187 | 0 | 1 | FALSE | 2 | 0: 882, 1: 69 |
FP188 | 0 | 1 | FALSE | 2 | 0: 886, 1: 65 |
FP189 | 0 | 1 | FALSE | 2 | 0: 878, 1: 73 |
FP190 | 0 | 1 | FALSE | 2 | 0: 882, 1: 69 |
FP191 | 0 | 1 | FALSE | 2 | 0: 884, 1: 67 |
FP192 | 0 | 1 | FALSE | 2 | 0: 893, 1: 58 |
FP193 | 0 | 1 | FALSE | 2 | 0: 892, 1: 59 |
FP194 | 0 | 1 | FALSE | 2 | 0: 895, 1: 56 |
FP195 | 0 | 1 | FALSE | 2 | 0: 893, 1: 58 |
FP196 | 0 | 1 | FALSE | 2 | 0: 897, 1: 54 |
FP197 | 0 | 1 | FALSE | 2 | 0: 901, 1: 50 |
FP198 | 0 | 1 | FALSE | 2 | 0: 897, 1: 54 |
FP201 | 0 | 1 | FALSE | 2 | 0: 901, 1: 50 |
FP202 | 0 | 1 | FALSE | 2 | 0: 706, 1: 245 |
FP203 | 0 | 1 | FALSE | 2 | 0: 842, 1: 109 |
FP204 | 0 | 1 | FALSE | 2 | 0: 857, 1: 94 |
FP205 | 0 | 1 | FALSE | 2 | 0: 877, 1: 74 |
FP206 | 0 | 1 | FALSE | 2 | 0: 894, 1: 57 |
FP207 | 0 | 1 | FALSE | 2 | 0: 897, 1: 54 |
FP208 | 0 | 1 | FALSE | 2 | 0: 844, 1: 107 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
Log_Solubility | 0 | 1 | -2.72 | 2.05 | -11.62 | -3.96 | -2.51 | -1.36 | 1.58 | ▁▁▃▇▃ |
MolWeight | 0 | 1 | 0.00 | 1.00 | -2.84 | -0.80 | -0.01 | 0.80 | 2.72 | ▁▆▇▆▁ |
NumBonds | 0 | 1 | 0.00 | 1.00 | -2.92 | -0.61 | -0.04 | 0.60 | 3.23 | ▁▅▇▃▁ |
NumMultBonds | 0 | 1 | 0.00 | 1.00 | -1.19 | -1.00 | -0.03 | 0.74 | 3.65 | ▇▇▃▁▁ |
NumRotBonds | 0 | 1 | 0.00 | 1.00 | -0.93 | -0.93 | -0.10 | 0.52 | 5.71 | ▇▂▁▁▁ |
NumDblBonds | 0 | 1 | 0.00 | 1.00 | -0.83 | -0.83 | -0.01 | 0.82 | 4.95 | ▇▂▁▁▁ |
NumCarbon | 0 | 1 | 0.00 | 1.00 | -2.64 | -0.69 | -0.01 | 0.54 | 3.06 | ▂▇▇▃▁ |
NumNitrogen | 0 | 1 | 0.00 | 1.00 | -0.69 | -0.69 | -0.69 | 0.16 | 4.37 | ▇▂▁▁▁ |
NumOxygen | 0 | 1 | 0.00 | 1.00 | -0.91 | -0.91 | -0.33 | 0.25 | 6.61 | ▇▂▁▁▁ |
NumSulfer | 0 | 1 | 0.00 | 1.00 | -0.34 | -0.34 | -0.34 | -0.34 | 7.86 | ▇▁▁▁▁ |
NumChlorine | 0 | 1 | 0.00 | 1.00 | -0.40 | -0.40 | -0.40 | -0.40 | 6.74 | ▇▁▁▁▁ |
NumHalogen | 0 | 1 | 0.00 | 1.00 | -0.47 | -0.47 | -0.47 | 0.20 | 6.32 | ▇▁▁▁▁ |
NumRings | 0 | 1 | 0.00 | 1.00 | -1.08 | -1.08 | -0.31 | 0.46 | 4.31 | ▇▃▂▁▁ |
HydrophilicFactor | 0 | 1 | 0.00 | 1.00 | -0.86 | -0.66 | -0.26 | 0.30 | 11.99 | ▇▁▁▁▁ |
SurfaceArea1 | 0 | 1 | 0.00 | 1.00 | -1.03 | -0.77 | -0.21 | 0.48 | 8.37 | ▇▂▁▁▁ |
SurfaceArea2 | 0 | 1 | 0.00 | 1.00 | -1.06 | -0.78 | -0.19 | 0.54 | 7.65 | ▇▂▁▁▁ |
###################################
# Verifying the data dimensions
# for the train set
###################################
dim(PMA_PreModelling_Train)
## [1] 951 221
##################################
# Formulating the test set
##################################
<- Solubility_Test
DPA_Test <- DPA_Test[,!names(DPA_Test) %in% c("solTestY")]
DPA_Test.Predictors <- DPA_Test.Predictors[,-(grep("FP", names(DPA_Test.Predictors)))]
DPA_Test.Predictors.Numeric <- preProcess(DPA_Test.Predictors.Numeric, method = c("BoxCox"))
DPA_Test_BoxCox <- predict(DPA_Test_BoxCox, DPA_Test.Predictors.Numeric)
DPA_Test_BoxCoxTransformed <- preProcess(DPA_Test_BoxCoxTransformed, method = c("center","scale"))
DPA_Test.Predictors.Numeric_BoxCoxTransformed_CenteredScaled <- predict(DPA_Test.Predictors.Numeric_BoxCoxTransformed_CenteredScaled, DPA_Test_BoxCoxTransformed)
DPA_Test.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed
##################################
# Creating the pre-modelling
# train set
##################################
<- DPA_Test$solTestY
Log_Solubility <- DPA_Test.Predictors[,(grep("FP", names(DPA_Test.Predictors)))]
PMA_Test.Predictors.Factor <- as.data.frame(lapply(PMA_Test.Predictors.Factor,factor))
PMA_Test.Predictors.Factor <- DPA_Test.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed
PMA_Test.Predictors.Numeric <- cbind(Log_Solubility,PMA_Test.Predictors.Factor,PMA_Test.Predictors.Numeric)
PMA_Test_BoxCoxTransformed_CenteredScaledTransformed <- PMA_Test_BoxCoxTransformed_CenteredScaledTransformed[,!names(PMA_Test_BoxCoxTransformed_CenteredScaledTransformed) %in% c("FP154","FP199","FP200","NumNonHBonds","NumHydrogen","NumNonHAtoms","NumAromaticBonds","NumAtoms")]
PMA_Test_BoxCoxTransformed_CenteredScaledTransformed_ExcludedLowVariance_ExcludedLinearlyDependent_ExcludedHighCorrelation
<- PMA_Test_BoxCoxTransformed_CenteredScaledTransformed_ExcludedLowVariance_ExcludedLinearlyDependent_ExcludedHighCorrelation
PMA_PreModelling_Test
##################################
# Gathering descriptive statistics
##################################
<- skim(PMA_PreModelling_Test)) (PMA_PreModelling_Test_Skimmed
Name | PMA_PreModelling_Test |
Number of rows | 316 |
Number of columns | 221 |
_______________________ | |
Column type frequency: | |
factor | 205 |
numeric | 16 |
________________________ | |
Group variables | None |
Variable type: factor
skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
---|---|---|---|---|---|
FP001 | 0 | 1 | FALSE | 2 | 0: 168, 1: 148 |
FP002 | 0 | 1 | FALSE | 2 | 1: 185, 0: 131 |
FP003 | 0 | 1 | FALSE | 2 | 0: 176, 1: 140 |
FP004 | 0 | 1 | FALSE | 2 | 1: 168, 0: 148 |
FP005 | 0 | 1 | FALSE | 2 | 1: 195, 0: 121 |
FP006 | 0 | 1 | FALSE | 2 | 0: 205, 1: 111 |
FP007 | 0 | 1 | FALSE | 2 | 0: 204, 1: 112 |
FP008 | 0 | 1 | FALSE | 2 | 0: 202, 1: 114 |
FP009 | 0 | 1 | FALSE | 2 | 0: 233, 1: 83 |
FP010 | 0 | 1 | FALSE | 2 | 0: 255, 1: 61 |
FP011 | 0 | 1 | FALSE | 2 | 0: 261, 1: 55 |
FP012 | 0 | 1 | FALSE | 2 | 0: 263, 1: 53 |
FP013 | 0 | 1 | FALSE | 2 | 0: 264, 1: 52 |
FP014 | 0 | 1 | FALSE | 2 | 0: 266, 1: 50 |
FP015 | 0 | 1 | FALSE | 2 | 1: 262, 0: 54 |
FP016 | 0 | 1 | FALSE | 2 | 0: 271, 1: 45 |
FP017 | 0 | 1 | FALSE | 2 | 0: 269, 1: 47 |
FP018 | 0 | 1 | FALSE | 2 | 0: 289, 1: 27 |
FP019 | 0 | 1 | FALSE | 2 | 0: 280, 1: 36 |
FP020 | 0 | 1 | FALSE | 2 | 0: 282, 1: 34 |
FP021 | 0 | 1 | FALSE | 2 | 0: 282, 1: 34 |
FP022 | 0 | 1 | FALSE | 2 | 0: 279, 1: 37 |
FP023 | 0 | 1 | FALSE | 2 | 0: 289, 1: 27 |
FP024 | 0 | 1 | FALSE | 2 | 0: 285, 1: 31 |
FP025 | 0 | 1 | FALSE | 2 | 0: 291, 1: 25 |
FP026 | 0 | 1 | FALSE | 2 | 0: 279, 1: 37 |
FP027 | 0 | 1 | FALSE | 2 | 0: 291, 1: 25 |
FP028 | 0 | 1 | FALSE | 2 | 0: 298, 1: 18 |
FP029 | 0 | 1 | FALSE | 2 | 0: 300, 1: 16 |
FP030 | 0 | 1 | FALSE | 2 | 0: 290, 1: 26 |
FP031 | 0 | 1 | FALSE | 2 | 0: 285, 1: 31 |
FP032 | 0 | 1 | FALSE | 2 | 0: 275, 1: 41 |
FP033 | 0 | 1 | FALSE | 2 | 0: 278, 1: 38 |
FP034 | 0 | 1 | FALSE | 2 | 0: 295, 1: 21 |
FP035 | 0 | 1 | FALSE | 2 | 0: 285, 1: 31 |
FP036 | 0 | 1 | FALSE | 2 | 0: 297, 1: 19 |
FP037 | 0 | 1 | FALSE | 2 | 0: 286, 1: 30 |
FP038 | 0 | 1 | FALSE | 2 | 0: 306, 1: 10 |
FP039 | 0 | 1 | FALSE | 2 | 0: 296, 1: 20 |
FP040 | 0 | 1 | FALSE | 2 | 0: 298, 1: 18 |
FP041 | 0 | 1 | FALSE | 2 | 0: 297, 1: 19 |
FP042 | 0 | 1 | FALSE | 2 | 0: 297, 1: 19 |
FP043 | 0 | 1 | FALSE | 2 | 0: 302, 1: 14 |
FP044 | 0 | 1 | FALSE | 2 | 0: 297, 1: 19 |
FP045 | 0 | 1 | FALSE | 2 | 0: 296, 1: 20 |
FP046 | 0 | 1 | FALSE | 2 | 0: 213, 1: 103 |
FP047 | 0 | 1 | FALSE | 2 | 0: 222, 1: 94 |
FP048 | 0 | 1 | FALSE | 2 | 0: 280, 1: 36 |
FP049 | 0 | 1 | FALSE | 2 | 0: 282, 1: 34 |
FP050 | 0 | 1 | FALSE | 2 | 0: 280, 1: 36 |
FP051 | 0 | 1 | FALSE | 2 | 0: 298, 1: 18 |
FP052 | 0 | 1 | FALSE | 2 | 0: 283, 1: 33 |
FP053 | 0 | 1 | FALSE | 2 | 0: 297, 1: 19 |
FP054 | 0 | 1 | FALSE | 2 | 0: 285, 1: 31 |
FP055 | 0 | 1 | FALSE | 2 | 0: 287, 1: 29 |
FP056 | 0 | 1 | FALSE | 2 | 0: 296, 1: 20 |
FP057 | 0 | 1 | FALSE | 2 | 0: 277, 1: 39 |
FP058 | 0 | 1 | FALSE | 2 | 0: 273, 1: 43 |
FP059 | 0 | 1 | FALSE | 2 | 0: 302, 1: 14 |
FP060 | 0 | 1 | FALSE | 2 | 0: 173, 1: 143 |
FP061 | 0 | 1 | FALSE | 2 | 0: 192, 1: 124 |
FP062 | 0 | 1 | FALSE | 2 | 0: 181, 1: 135 |
FP063 | 0 | 1 | FALSE | 2 | 0: 203, 1: 113 |
FP064 | 0 | 1 | FALSE | 2 | 0: 193, 1: 123 |
FP065 | 0 | 1 | FALSE | 2 | 1: 189, 0: 127 |
FP066 | 0 | 1 | FALSE | 2 | 1: 195, 0: 121 |
FP067 | 0 | 1 | FALSE | 2 | 0: 213, 1: 103 |
FP068 | 0 | 1 | FALSE | 2 | 0: 224, 1: 92 |
FP069 | 0 | 1 | FALSE | 2 | 0: 198, 1: 118 |
FP070 | 0 | 1 | FALSE | 2 | 0: 211, 1: 105 |
FP071 | 0 | 1 | FALSE | 2 | 0: 207, 1: 109 |
FP072 | 0 | 1 | FALSE | 2 | 1: 204, 0: 112 |
FP073 | 0 | 1 | FALSE | 2 | 0: 224, 1: 92 |
FP074 | 0 | 1 | FALSE | 2 | 0: 213, 1: 103 |
FP075 | 0 | 1 | FALSE | 2 | 0: 235, 1: 81 |
FP076 | 0 | 1 | FALSE | 2 | 0: 216, 1: 100 |
FP077 | 0 | 1 | FALSE | 2 | 0: 219, 1: 97 |
FP078 | 0 | 1 | FALSE | 2 | 0: 218, 1: 98 |
FP079 | 0 | 1 | FALSE | 2 | 1: 230, 0: 86 |
FP080 | 0 | 1 | FALSE | 2 | 0: 233, 1: 83 |
FP081 | 0 | 1 | FALSE | 2 | 0: 225, 1: 91 |
FP082 | 0 | 1 | FALSE | 2 | 1: 235, 0: 81 |
FP083 | 0 | 1 | FALSE | 2 | 0: 236, 1: 80 |
FP084 | 0 | 1 | FALSE | 2 | 0: 245, 1: 71 |
FP085 | 0 | 1 | FALSE | 2 | 0: 231, 1: 85 |
FP086 | 0 | 1 | FALSE | 2 | 0: 230, 1: 86 |
FP087 | 0 | 1 | FALSE | 2 | 1: 241, 0: 75 |
FP088 | 0 | 1 | FALSE | 2 | 0: 239, 1: 77 |
FP089 | 0 | 1 | FALSE | 2 | 0: 236, 1: 80 |
FP090 | 0 | 1 | FALSE | 2 | 0: 244, 1: 72 |
FP091 | 0 | 1 | FALSE | 2 | 0: 243, 1: 73 |
FP092 | 0 | 1 | FALSE | 2 | 0: 247, 1: 69 |
FP093 | 0 | 1 | FALSE | 2 | 0: 248, 1: 68 |
FP094 | 0 | 1 | FALSE | 2 | 0: 237, 1: 79 |
FP095 | 0 | 1 | FALSE | 2 | 0: 251, 1: 65 |
FP096 | 0 | 1 | FALSE | 2 | 0: 257, 1: 59 |
FP097 | 0 | 1 | FALSE | 2 | 0: 250, 1: 66 |
FP098 | 0 | 1 | FALSE | 2 | 0: 252, 1: 64 |
FP099 | 0 | 1 | FALSE | 2 | 0: 249, 1: 67 |
FP100 | 0 | 1 | FALSE | 2 | 0: 259, 1: 57 |
FP101 | 0 | 1 | FALSE | 2 | 0: 260, 1: 56 |
FP102 | 0 | 1 | FALSE | 2 | 0: 270, 1: 46 |
FP103 | 0 | 1 | FALSE | 2 | 0: 247, 1: 69 |
FP104 | 0 | 1 | FALSE | 2 | 0: 258, 1: 58 |
FP105 | 0 | 1 | FALSE | 2 | 0: 248, 1: 68 |
FP106 | 0 | 1 | FALSE | 2 | 0: 273, 1: 43 |
FP107 | 0 | 1 | FALSE | 2 | 0: 254, 1: 62 |
FP108 | 0 | 1 | FALSE | 2 | 0: 259, 1: 57 |
FP109 | 0 | 1 | FALSE | 2 | 0: 261, 1: 55 |
FP110 | 0 | 1 | FALSE | 2 | 0: 264, 1: 52 |
FP111 | 0 | 1 | FALSE | 2 | 0: 259, 1: 57 |
FP112 | 0 | 1 | FALSE | 2 | 0: 260, 1: 56 |
FP113 | 0 | 1 | FALSE | 2 | 0: 264, 1: 52 |
FP114 | 0 | 1 | FALSE | 2 | 0: 260, 1: 56 |
FP115 | 0 | 1 | FALSE | 2 | 0: 266, 1: 50 |
FP116 | 0 | 1 | FALSE | 2 | 0: 269, 1: 47 |
FP117 | 0 | 1 | FALSE | 2 | 0: 262, 1: 54 |
FP118 | 0 | 1 | FALSE | 2 | 0: 279, 1: 37 |
FP119 | 0 | 1 | FALSE | 2 | 0: 263, 1: 53 |
FP120 | 0 | 1 | FALSE | 2 | 0: 267, 1: 49 |
FP121 | 0 | 1 | FALSE | 2 | 0: 282, 1: 34 |
FP122 | 0 | 1 | FALSE | 2 | 0: 273, 1: 43 |
FP123 | 0 | 1 | FALSE | 2 | 0: 270, 1: 46 |
FP124 | 0 | 1 | FALSE | 2 | 0: 274, 1: 42 |
FP125 | 0 | 1 | FALSE | 2 | 0: 278, 1: 38 |
FP126 | 0 | 1 | FALSE | 2 | 0: 280, 1: 36 |
FP127 | 0 | 1 | FALSE | 2 | 0: 269, 1: 47 |
FP128 | 0 | 1 | FALSE | 2 | 0: 282, 1: 34 |
FP129 | 0 | 1 | FALSE | 2 | 0: 272, 1: 44 |
FP130 | 0 | 1 | FALSE | 2 | 0: 290, 1: 26 |
FP131 | 0 | 1 | FALSE | 2 | 0: 282, 1: 34 |
FP132 | 0 | 1 | FALSE | 2 | 0: 276, 1: 40 |
FP133 | 0 | 1 | FALSE | 2 | 0: 273, 1: 43 |
FP134 | 0 | 1 | FALSE | 2 | 0: 289, 1: 27 |
FP135 | 0 | 1 | FALSE | 2 | 0: 296, 1: 20 |
FP136 | 0 | 1 | FALSE | 2 | 0: 284, 1: 32 |
FP137 | 0 | 1 | FALSE | 2 | 0: 288, 1: 28 |
FP138 | 0 | 1 | FALSE | 2 | 0: 290, 1: 26 |
FP139 | 0 | 1 | FALSE | 2 | 0: 296, 1: 20 |
FP140 | 0 | 1 | FALSE | 2 | 0: 288, 1: 28 |
FP141 | 0 | 1 | FALSE | 2 | 0: 294, 1: 22 |
FP142 | 0 | 1 | FALSE | 2 | 0: 286, 1: 30 |
FP143 | 0 | 1 | FALSE | 2 | 0: 299, 1: 17 |
FP144 | 0 | 1 | FALSE | 2 | 0: 287, 1: 29 |
FP145 | 0 | 1 | FALSE | 2 | 0: 296, 1: 20 |
FP146 | 0 | 1 | FALSE | 2 | 0: 287, 1: 29 |
FP147 | 0 | 1 | FALSE | 2 | 0: 294, 1: 22 |
FP148 | 0 | 1 | FALSE | 2 | 0: 291, 1: 25 |
FP149 | 0 | 1 | FALSE | 2 | 0: 290, 1: 26 |
FP150 | 0 | 1 | FALSE | 2 | 0: 295, 1: 21 |
FP151 | 0 | 1 | FALSE | 2 | 0: 306, 1: 10 |
FP152 | 0 | 1 | FALSE | 2 | 0: 299, 1: 17 |
FP153 | 0 | 1 | FALSE | 2 | 0: 305, 1: 11 |
FP155 | 0 | 1 | FALSE | 2 | 0: 295, 1: 21 |
FP156 | 0 | 1 | FALSE | 2 | 0: 301, 1: 15 |
FP157 | 0 | 1 | FALSE | 2 | 0: 298, 1: 18 |
FP158 | 0 | 1 | FALSE | 2 | 0: 291, 1: 25 |
FP159 | 0 | 1 | FALSE | 2 | 0: 305, 1: 11 |
FP160 | 0 | 1 | FALSE | 2 | 0: 305, 1: 11 |
FP161 | 0 | 1 | FALSE | 2 | 0: 305, 1: 11 |
FP162 | 0 | 1 | FALSE | 2 | 1: 168, 0: 148 |
FP163 | 0 | 1 | FALSE | 2 | 0: 173, 1: 143 |
FP164 | 0 | 1 | FALSE | 2 | 1: 207, 0: 109 |
FP165 | 0 | 1 | FALSE | 2 | 0: 215, 1: 101 |
FP166 | 0 | 1 | FALSE | 2 | 0: 209, 1: 107 |
FP167 | 0 | 1 | FALSE | 2 | 0: 221, 1: 95 |
FP168 | 0 | 1 | FALSE | 2 | 1: 226, 0: 90 |
FP169 | 0 | 1 | FALSE | 2 | 0: 257, 1: 59 |
FP170 | 0 | 1 | FALSE | 2 | 0: 267, 1: 49 |
FP171 | 0 | 1 | FALSE | 2 | 0: 275, 1: 41 |
FP172 | 0 | 1 | FALSE | 2 | 0: 269, 1: 47 |
FP173 | 0 | 1 | FALSE | 2 | 0: 273, 1: 43 |
FP174 | 0 | 1 | FALSE | 2 | 0: 267, 1: 49 |
FP175 | 0 | 1 | FALSE | 2 | 0: 274, 1: 42 |
FP176 | 0 | 1 | FALSE | 2 | 0: 282, 1: 34 |
FP177 | 0 | 1 | FALSE | 2 | 0: 284, 1: 32 |
FP178 | 0 | 1 | FALSE | 2 | 0: 282, 1: 34 |
FP179 | 0 | 1 | FALSE | 2 | 0: 272, 1: 44 |
FP180 | 0 | 1 | FALSE | 2 | 0: 294, 1: 22 |
FP181 | 0 | 1 | FALSE | 2 | 0: 283, 1: 33 |
FP182 | 0 | 1 | FALSE | 2 | 0: 292, 1: 24 |
FP183 | 0 | 1 | FALSE | 2 | 0: 274, 1: 42 |
FP184 | 0 | 1 | FALSE | 2 | 0: 286, 1: 30 |
FP185 | 0 | 1 | FALSE | 2 | 0: 285, 1: 31 |
FP186 | 0 | 1 | FALSE | 2 | 0: 297, 1: 19 |
FP187 | 0 | 1 | FALSE | 2 | 0: 295, 1: 21 |
FP188 | 0 | 1 | FALSE | 2 | 0: 294, 1: 22 |
FP189 | 0 | 1 | FALSE | 2 | 0: 303, 1: 13 |
FP190 | 0 | 1 | FALSE | 2 | 0: 299, 1: 17 |
FP191 | 0 | 1 | FALSE | 2 | 0: 298, 1: 18 |
FP192 | 0 | 1 | FALSE | 2 | 0: 294, 1: 22 |
FP193 | 0 | 1 | FALSE | 2 | 0: 294, 1: 22 |
FP194 | 0 | 1 | FALSE | 2 | 0: 295, 1: 21 |
FP195 | 0 | 1 | FALSE | 2 | 0: 300, 1: 16 |
FP196 | 0 | 1 | FALSE | 2 | 0: 294, 1: 22 |
FP197 | 0 | 1 | FALSE | 2 | 0: 296, 1: 20 |
FP198 | 0 | 1 | FALSE | 2 | 0: 302, 1: 14 |
FP201 | 0 | 1 | FALSE | 2 | 0: 303, 1: 13 |
FP202 | 0 | 1 | FALSE | 2 | 0: 232, 1: 84 |
FP203 | 0 | 1 | FALSE | 2 | 0: 273, 1: 43 |
FP204 | 0 | 1 | FALSE | 2 | 0: 286, 1: 30 |
FP205 | 0 | 1 | FALSE | 2 | 0: 291, 1: 25 |
FP206 | 0 | 1 | FALSE | 2 | 0: 300, 1: 16 |
FP207 | 0 | 1 | FALSE | 2 | 0: 302, 1: 14 |
FP208 | 0 | 1 | FALSE | 2 | 0: 273, 1: 43 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
Log_Solubility | 0 | 1 | -2.8 | 2.08 | -10.41 | -3.95 | -2.48 | -1.37 | 1.07 | ▁▁▅▇▅ |
MolWeight | 0 | 1 | 0.0 | 1.00 | -2.46 | -0.78 | -0.06 | 0.81 | 2.18 | ▁▇▇▇▃ |
NumBonds | 0 | 1 | 0.0 | 1.00 | -2.92 | -0.67 | 0.03 | 0.57 | 2.55 | ▁▂▇▃▂ |
NumMultBonds | 0 | 1 | 0.0 | 1.00 | -1.24 | -1.04 | -0.06 | 0.72 | 4.06 | ▇▇▅▁▁ |
NumRotBonds | 0 | 1 | 0.0 | 1.00 | -0.82 | -0.82 | -0.40 | 0.44 | 5.94 | ▇▁▁▁▁ |
NumDblBonds | 0 | 1 | 0.0 | 1.00 | -0.76 | -0.76 | 0.09 | 0.09 | 4.35 | ▇▁▁▁▁ |
NumCarbon | 0 | 1 | 0.0 | 1.00 | -2.71 | -0.70 | -0.21 | 0.56 | 2.23 | ▁▂▇▅▂ |
NumNitrogen | 0 | 1 | 0.0 | 1.00 | -0.63 | -0.63 | -0.63 | 0.26 | 4.71 | ▇▂▁▁▁ |
NumOxygen | 0 | 1 | 0.0 | 1.00 | -0.92 | -0.92 | -0.26 | 0.40 | 5.02 | ▇▃▁▁▁ |
NumSulfer | 0 | 1 | 0.0 | 1.00 | -0.28 | -0.28 | -0.28 | -0.28 | 8.06 | ▇▁▁▁▁ |
NumChlorine | 0 | 1 | 0.0 | 1.00 | -0.40 | -0.40 | -0.40 | -0.40 | 6.02 | ▇▁▁▁▁ |
NumHalogen | 0 | 1 | 0.0 | 1.00 | -0.48 | -0.48 | -0.48 | 0.20 | 5.57 | ▇▁▁▁▁ |
NumRings | 0 | 1 | 0.0 | 1.00 | -1.14 | -0.32 | -0.32 | 0.49 | 3.74 | ▇▃▁▁▁ |
HydrophilicFactor | 0 | 1 | 0.0 | 1.00 | -0.90 | -0.68 | -0.30 | 0.32 | 5.19 | ▇▂▁▁▁ |
SurfaceArea1 | 0 | 1 | 0.0 | 1.00 | -1.04 | -0.75 | -0.21 | 0.53 | 5.37 | ▇▃▁▁▁ |
SurfaceArea2 | 0 | 1 | 0.0 | 1.00 | -1.05 | -0.77 | -0.26 | 0.52 | 5.00 | ▇▃▁▁▁ |
###################################
# Verifying the data dimensions
# for the test set
###################################
dim(PMA_PreModelling_Test)
## [1] 316 221
##################################
# Loading dataset
##################################
<- PMA_PreModelling_Train
EDA
##################################
# Listing all predictors
##################################
<- EDA[,!names(EDA) %in% c("Log_Solubility")]
EDA.Predictors
##################################
# Listing all numeric predictors
##################################
<- EDA.Predictors[,sapply(EDA.Predictors, is.numeric)]
EDA.Predictors.Numeric ncol(EDA.Predictors.Numeric)
## [1] 15
names(EDA.Predictors.Numeric)
## [1] "MolWeight" "NumBonds" "NumMultBonds"
## [4] "NumRotBonds" "NumDblBonds" "NumCarbon"
## [7] "NumNitrogen" "NumOxygen" "NumSulfer"
## [10] "NumChlorine" "NumHalogen" "NumRings"
## [13] "HydrophilicFactor" "SurfaceArea1" "SurfaceArea2"
##################################
# Listing all factor predictors
##################################
<- EDA.Predictors[,sapply(EDA.Predictors, is.factor)]
EDA.Predictors.Factor ncol(EDA.Predictors.Factor)
## [1] 205
names(EDA.Predictors.Factor)
## [1] "FP001" "FP002" "FP003" "FP004" "FP005" "FP006" "FP007" "FP008" "FP009"
## [10] "FP010" "FP011" "FP012" "FP013" "FP014" "FP015" "FP016" "FP017" "FP018"
## [19] "FP019" "FP020" "FP021" "FP022" "FP023" "FP024" "FP025" "FP026" "FP027"
## [28] "FP028" "FP029" "FP030" "FP031" "FP032" "FP033" "FP034" "FP035" "FP036"
## [37] "FP037" "FP038" "FP039" "FP040" "FP041" "FP042" "FP043" "FP044" "FP045"
## [46] "FP046" "FP047" "FP048" "FP049" "FP050" "FP051" "FP052" "FP053" "FP054"
## [55] "FP055" "FP056" "FP057" "FP058" "FP059" "FP060" "FP061" "FP062" "FP063"
## [64] "FP064" "FP065" "FP066" "FP067" "FP068" "FP069" "FP070" "FP071" "FP072"
## [73] "FP073" "FP074" "FP075" "FP076" "FP077" "FP078" "FP079" "FP080" "FP081"
## [82] "FP082" "FP083" "FP084" "FP085" "FP086" "FP087" "FP088" "FP089" "FP090"
## [91] "FP091" "FP092" "FP093" "FP094" "FP095" "FP096" "FP097" "FP098" "FP099"
## [100] "FP100" "FP101" "FP102" "FP103" "FP104" "FP105" "FP106" "FP107" "FP108"
## [109] "FP109" "FP110" "FP111" "FP112" "FP113" "FP114" "FP115" "FP116" "FP117"
## [118] "FP118" "FP119" "FP120" "FP121" "FP122" "FP123" "FP124" "FP125" "FP126"
## [127] "FP127" "FP128" "FP129" "FP130" "FP131" "FP132" "FP133" "FP134" "FP135"
## [136] "FP136" "FP137" "FP138" "FP139" "FP140" "FP141" "FP142" "FP143" "FP144"
## [145] "FP145" "FP146" "FP147" "FP148" "FP149" "FP150" "FP151" "FP152" "FP153"
## [154] "FP155" "FP156" "FP157" "FP158" "FP159" "FP160" "FP161" "FP162" "FP163"
## [163] "FP164" "FP165" "FP166" "FP167" "FP168" "FP169" "FP170" "FP171" "FP172"
## [172] "FP173" "FP174" "FP175" "FP176" "FP177" "FP178" "FP179" "FP180" "FP181"
## [181] "FP182" "FP183" "FP184" "FP185" "FP186" "FP187" "FP188" "FP189" "FP190"
## [190] "FP191" "FP192" "FP193" "FP194" "FP195" "FP196" "FP197" "FP198" "FP201"
## [199] "FP202" "FP203" "FP204" "FP205" "FP206" "FP207" "FP208"
##################################
# Formulating the scatter plots
##################################
featurePlot(x = EDA.Predictors.Numeric,
y = EDA$Log_Solubility,
between = list(x = 1, y = 1),
type = c("g", "p", "smooth"),
labels = rep("", 2))
##################################
# Restructuring the dataset for
# for boxplot analysis
##################################
<- DPA$solTrainY
Log_Solubility <- cbind(Log_Solubility,
EDA.Boxplot.Source
EDA.Predictors.Factor)
<- gather(EDA.Boxplot.Source,
EDA.Boxplot.Gathered.Group1 'FP001','FP002','FP003','FP004','FP005',
'FP006','FP007','FP008','FP009','FP010',
'FP011','FP012','FP013','FP014','FP015',
'FP016','FP017','FP018','FP019','FP020',
'FP021','FP022','FP023','FP024','FP025',
'FP026','FP027','FP028','FP029','FP030',
'FP031','FP032','FP033','FP034','FP035',
'FP036','FP037','FP038','FP039','FP040',
key="Descriptor",
value="Structure")
<- gather(EDA.Boxplot.Source,
EDA.Boxplot.Gathered.Group2 'FP041','FP042','FP043','FP044','FP045',
'FP046','FP047','FP048','FP049','FP050',
'FP051','FP052','FP053','FP054','FP055',
'FP056','FP057','FP058','FP059','FP060',
'FP061','FP062','FP063','FP064','FP065',
'FP066','FP067','FP068','FP069','FP070',
'FP071','FP072','FP073','FP074','FP075',
'FP076','FP077','FP078','FP079','FP080',
key="Descriptor",
value="Structure")
<- gather(EDA.Boxplot.Source,
EDA.Boxplot.Gathered.Group3 'FP081','FP082','FP083','FP084','FP085',
'FP086','FP087','FP088','FP089','FP090',
'FP091','FP092','FP093','FP094','FP095',
'FP096','FP097','FP098','FP099','FP100',
'FP101','FP102','FP103','FP104','FP105',
'FP106','FP107','FP108','FP109','FP110',
'FP111','FP112','FP113','FP114','FP115',
'FP116','FP117','FP118','FP119','FP120',
key="Descriptor",
value="Structure")
<- gather(EDA.Boxplot.Source,
EDA.Boxplot.Gathered.Group4 'FP121','FP122','FP123','FP124','FP125',
'FP126','FP127','FP128','FP129','FP130',
'FP131','FP132','FP133','FP134','FP135',
'FP136','FP137','FP138','FP139','FP140',
'FP141','FP142','FP143','FP144','FP145',
'FP146','FP147','FP148','FP149','FP150',
'FP151','FP152','FP153','FP155','FP156',
'FP157','FP158','FP159','FP160','FP161',
key="Descriptor",
value="Structure")
<- gather(EDA.Boxplot.Source,
EDA.Boxplot.Gathered.Group5 'FP162','FP163','FP164','FP165','FP166',
'FP167','FP168','FP169','FP170','FP171',
'FP172','FP173','FP174','FP175','FP176',
'FP177','FP178','FP179','FP180','FP181',
'FP182','FP183','FP184','FP185','FP186',
'FP187','FP188','FP189','FP190','FP191',
'FP192','FP193','FP194','FP195','FP196',
'FP197','FP198','FP201','FP202','FP203',
'FP204','FP205','FP206','FP207','FP208',
key="Descriptor",
value="Structure")
bwplot(Log_Solubility~Structure|Descriptor,
data=EDA.Boxplot.Gathered.Group5,
ylab="Log Solubility",
xlab="Structure",
layout=(c(9,5)))
bwplot(Log_Solubility~Structure|Descriptor,
data=EDA.Boxplot.Gathered.Group4,
ylab="Log Solubility",
xlab="Structure",
layout=(c(9,5)))
bwplot(Log_Solubility~Structure|Descriptor,
data=EDA.Boxplot.Gathered.Group3,
ylab="Log Solubility",
xlab="Structure",
layout=(c(9,5)))
bwplot(Log_Solubility~Structure|Descriptor,
data=EDA.Boxplot.Gathered.Group2,
ylab="Log Solubility",
xlab="Structure",
layout=(c(9,5)))
bwplot(Log_Solubility~Structure|Descriptor,
data=EDA.Boxplot.Gathered.Group1,
ylab="Log Solubility",
xlab="Structure",
layout=(c(9,5)))
##################################
# Filtering in the numeric predictors
# with the numeric response variable
##################################
<- PMA_PreModelling_Train[,!grepl("FP", names(PMA_PreModelling_Train))]
PMA_PreModelling_Train_Numeric dim(PMA_PreModelling_Train_Numeric)
## [1] 951 16
str(PMA_PreModelling_Train_Numeric)
## 'data.frame': 951 obs. of 16 variables:
## $ Log_Solubility : num -3.97 -3.98 -3.99 -4 -4.06 -4.08 -4.08 -4.1 -4.1 -4.11 ...
## $ MolWeight : num 0.304 1.475 0.284 -0.579 0.508 ...
## $ NumBonds : num 0.498 1.698 0.697 0.207 0.566 ...
## $ NumMultBonds : num 1.9049 1.3248 0.1647 -0.8021 -0.0287 ...
## $ NumRotBonds : num -0.935 0.726 0.726 -0.52 1.141 ...
## $ NumDblBonds : num -0.83134 -0.83134 -0.00521 0.82092 -0.83134 ...
## $ NumCarbon : num 0.858 1.804 0.701 0.182 -0.012 ...
## $ NumNitrogen : num 1.001 1.844 -0.685 -0.685 3.53 ...
## $ NumOxygen : num -0.91 -0.332 0.246 -0.91 -0.91 ...
## $ NumSulfer : num -0.336 1.712 -0.336 -0.336 -0.336 ...
## $ NumChlorine : num -0.397 -0.397 -0.397 -0.397 0.317 ...
## $ NumHalogen : num -0.474 -0.474 -0.474 -0.474 0.205 ...
## $ NumRings : num 1.231 2.001 -0.309 -0.309 -0.309 ...
## $ HydrophilicFactor: num -0.742 -0.31 -0.275 -0.834 -0.043 ...
## $ SurfaceArea1 : num -0.3026 0.4458 0.0238 -1.0332 0.4954 ...
## $ SurfaceArea2 : num -0.379 1.054 -0.077 -1.055 0.36 ...
summary(PMA_PreModelling_Train_Numeric)
## Log_Solubility MolWeight NumBonds NumMultBonds
## Min. :-11.620 Min. :-2.835229 Min. :-2.9239 Min. :-1.18881
## 1st Qu.: -3.955 1st Qu.:-0.798923 1st Qu.:-0.6096 1st Qu.:-0.99545
## Median : -2.510 Median :-0.008626 Median :-0.0356 Median :-0.02867
## Mean : -2.719 Mean : 0.000000 Mean : 0.0000 Mean : 0.00000
## 3rd Qu.: -1.360 3rd Qu.: 0.800109 3rd Qu.: 0.5994 3rd Qu.: 0.74476
## Max. : 1.580 Max. : 2.722778 Max. : 3.2279 Max. : 3.64511
## NumRotBonds NumDblBonds NumCarbon NumNitrogen
## Min. :-0.9347 Min. :-0.831342 Min. :-2.64433 Min. :-0.6852
## 1st Qu.:-0.9347 1st Qu.:-0.831342 1st Qu.:-0.68596 1st Qu.:-0.6852
## Median :-0.1043 Median :-0.005212 Median :-0.01199 Median :-0.6852
## Mean : 0.0000 Mean : 0.000000 Mean : 0.00000 Mean : 0.0000
## 3rd Qu.: 0.5184 3rd Qu.: 0.820917 3rd Qu.: 0.53700 3rd Qu.: 0.1578
## Max. : 5.7083 Max. : 4.951564 Max. : 3.05604 Max. : 4.3730
## NumOxygen NumSulfer NumChlorine NumHalogen
## Min. :-0.9103 Min. :-0.336 Min. :-0.3972 Min. :-0.4741
## 1st Qu.:-0.9103 1st Qu.:-0.336 1st Qu.:-0.3972 1st Qu.:-0.4741
## Median :-0.3320 Median :-0.336 Median :-0.3972 Median :-0.4741
## Mean : 0.0000 Mean : 0.000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.2463 3rd Qu.:-0.336 3rd Qu.:-0.3972 3rd Qu.: 0.2049
## Max. : 6.6077 Max. : 7.858 Max. : 6.7442 Max. : 6.3162
## NumRings HydrophilicFactor SurfaceArea1 SurfaceArea2
## Min. :-1.0792 Min. :-0.8565 Min. :-1.0332 Min. :-1.0554
## 1st Qu.:-1.0792 1st Qu.:-0.6593 1st Qu.:-0.7716 1st Qu.:-0.7765
## Median :-0.3093 Median :-0.2606 Median :-0.2085 Median :-0.1866
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.4607 3rd Qu.: 0.2963 3rd Qu.: 0.4767 3rd Qu.: 0.5358
## Max. : 4.3103 Max. :11.9924 Max. : 8.3733 Max. : 7.6515
##################################
# Obtaining the LOWESS pseudo-R-Squared
##################################
<- filterVarImp(x = PMA_PreModelling_Train_Numeric[, 2:ncol(PMA_PreModelling_Train_Numeric)],
LOWESS_PR y = PMA_PreModelling_Train_Numeric$Log_Solubility,
nonpara = TRUE)
##################################
# Formulating the summary table
##################################
<- LOWESS_PR
LOWESS_PR_Summary
$Predictor <- rownames(LOWESS_PR)
LOWESS_PR_Summarynames(LOWESS_PR_Summary)[1] <- "LOWESS_PR"
$Metric <- rep("LOWESS_PR",nrow(LOWESS_PR))
LOWESS_PR_Summary
LOWESS_PR_Summary
## LOWESS_PR Predictor Metric
## MolWeight 4.443734e-01 MolWeight LOWESS_PR
## NumBonds 2.093744e-01 NumBonds LOWESS_PR
## NumMultBonds 2.758859e-01 NumMultBonds LOWESS_PR
## NumRotBonds 2.230342e-02 NumRotBonds LOWESS_PR
## NumDblBonds 1.530295e-06 NumDblBonds LOWESS_PR
## NumCarbon 3.657997e-01 NumCarbon LOWESS_PR
## NumNitrogen 1.045101e-02 NumNitrogen LOWESS_PR
## NumOxygen 1.710199e-02 NumOxygen LOWESS_PR
## NumSulfer 8.357325e-03 NumSulfer LOWESS_PR
## NumChlorine 2.540713e-01 NumChlorine LOWESS_PR
## NumHalogen 2.541532e-01 NumHalogen LOWESS_PR
## NumRings 2.384330e-01 NumRings LOWESS_PR
## HydrophilicFactor 1.505216e-01 HydrophilicFactor LOWESS_PR
## SurfaceArea1 1.941711e-01 SurfaceArea1 LOWESS_PR
## SurfaceArea2 2.080820e-01 SurfaceArea2 LOWESS_PR
##################################
# Exploring predictor performance
##################################
dotplot(Predictor ~ LOWESS_PR | Metric,
LOWESS_PR_Summary,origin = 0,
type = c("p", "h"),
pch = 16,
cex = 2,
alpha = 0.45,
prepanel = function(x, y) {
list(ylim = levels(reorder(y, x)))
},panel = function(x, y, ...) {
panel.dotplot(x, reorder(y, x), ...)
})
##################################
# Obtaining the Pearson correlation coefficient
##################################
<- abs(cor(PMA_PreModelling_Train_Numeric, method="pearson")[-1,1])
PCC
##################################
# Formulating the summary table
##################################
<- data.frame(Predictor = names(PMA_PreModelling_Train_Numeric)[2:ncol(PMA_PreModelling_Train_Numeric)],
PCC_Summary PCC = PCC,
Metric = rep("PCC", length(PCC)))
PCC_Summary
## Predictor PCC Metric
## MolWeight MolWeight 0.658495868 PCC
## NumBonds NumBonds 0.457574429 PCC
## NumMultBonds NumMultBonds 0.525248387 PCC
## NumRotBonds NumRotBonds 0.149343282 PCC
## NumDblBonds NumDblBonds 0.001237051 PCC
## NumCarbon NumCarbon 0.604813811 PCC
## NumNitrogen NumNitrogen 0.102230176 PCC
## NumOxygen NumOxygen 0.130774566 PCC
## NumSulfer NumSulfer 0.091418407 PCC
## NumChlorine NumChlorine 0.504054819 PCC
## NumHalogen NumHalogen 0.504136055 PCC
## NumRings NumRings 0.488295986 PCC
## HydrophilicFactor HydrophilicFactor 0.309022159 PCC
## SurfaceArea1 SurfaceArea1 0.193769382 PCC
## SurfaceArea2 SurfaceArea2 0.143941883 PCC
##################################
# Exploring predictor performance
##################################
dotplot(Predictor ~ PCC | Metric,
PCC_Summary,origin = 0,
type = c("p", "h"),
pch = 16,
cex = 2,
alpha = 0.45,
prepanel = function(x, y) {
list(ylim = levels(reorder(y, x)))
},panel = function(x, y, ...) {
panel.dotplot(x, reorder(y, x), ...)
})
##################################
# Obtaining the Spearman correlation coefficient
##################################
<- abs(cor(PMA_PreModelling_Train_Numeric, method="spearman")[-1,1])
SRCC
##################################
# Formulating the summary table
##################################
<- data.frame(Predictor = names(PMA_PreModelling_Train_Numeric)[2:ncol(PMA_PreModelling_Train_Numeric)],
SRCC_Summary SRCC = SRCC,
Metric = rep("SRCC", length(SRCC)))
SRCC_Summary
## Predictor SRCC Metric
## MolWeight MolWeight 0.68529880 SRCC
## NumBonds NumBonds 0.54839850 SRCC
## NumMultBonds NumMultBonds 0.47971353 SRCC
## NumRotBonds NumRotBonds 0.14976036 SRCC
## NumDblBonds NumDblBonds 0.02042731 SRCC
## NumCarbon NumCarbon 0.67359114 SRCC
## NumNitrogen NumNitrogen 0.10078218 SRCC
## NumOxygen NumOxygen 0.14954994 SRCC
## NumSulfer NumSulfer 0.12090249 SRCC
## NumChlorine NumChlorine 0.35707519 SRCC
## NumHalogen NumHalogen 0.38111965 SRCC
## NumRings NumRings 0.50941815 SRCC
## HydrophilicFactor HydrophilicFactor 0.36469127 SRCC
## SurfaceArea1 SurfaceArea1 0.19339720 SRCC
## SurfaceArea2 SurfaceArea2 0.14057885 SRCC
##################################
# Exploring predictor performance
##################################
dotplot(Predictor ~ SRCC | Metric,
SRCC_Summary,origin = 0,
type = c("p", "h"),
pch = 16,
cex = 2,
alpha = 0.45,
prepanel = function(x, y) {
list(ylim = levels(reorder(y, x)))
},panel = function(x, y, ...) {
panel.dotplot(x, reorder(y, x), ...)
})
##################################
# Obtaining the maximal information coefficient
##################################
<- mine(x = PMA_PreModelling_Train_Numeric[, 2:ncol(PMA_PreModelling_Train_Numeric)],
MIC y = PMA_PreModelling_Train_Numeric$Log_Solubility)$MIC
##################################
# Formulating the summary table
##################################
<- data.frame(Predictor = names(PMA_PreModelling_Train_Numeric)[2:ncol(PMA_PreModelling_Train_Numeric)],
MIC_Summary MIC = MIC[,1],
Metric = rep("MIC", length(MIC)))
MIC_Summary
## Predictor MIC Metric
## 1 MolWeight 0.4679277 MIC
## 2 NumBonds 0.3268683 MIC
## 3 NumMultBonds 0.2792600 MIC
## 4 NumRotBonds 0.1754215 MIC
## 5 NumDblBonds 0.1688472 MIC
## 6 NumCarbon 0.4434121 MIC
## 7 NumNitrogen 0.1535738 MIC
## 8 NumOxygen 0.1527421 MIC
## 9 NumSulfer 0.1297052 MIC
## 10 NumChlorine 0.2011708 MIC
## 11 NumHalogen 0.2017841 MIC
## 12 NumRings 0.3161828 MIC
## 13 HydrophilicFactor 0.3208456 MIC
## 14 SurfaceArea1 0.2054896 MIC
## 15 SurfaceArea2 0.2274047 MIC
##################################
# Exploring predictor performance
##################################
dotplot(Predictor ~ MIC | Metric,
MIC_Summary,origin = 0,
type = c("p", "h"),
pch = 16,
cex = 2,
alpha = 0.45,
prepanel = function(x, y) {
list(ylim = levels(reorder(y, x)))
},panel = function(x, y, ...) {
panel.dotplot(x, reorder(y, x), ...)
})
##################################
# Obtaining the relief values
##################################
<- attrEval(Log_Solubility ~ .,
RV data = PMA_PreModelling_Train_Numeric,
estimator = "RReliefFequalK")
##################################
# Formulating the summary table
##################################
<- data.frame(Predictor = names(RV),
RV_Summary RV = RV,
Metric = rep("RV", length(RV)))
RV_Summary
## Predictor RV Metric
## MolWeight MolWeight 0.10334945 RV
## NumBonds NumBonds 0.03989134 RV
## NumMultBonds NumMultBonds 0.06807077 RV
## NumRotBonds NumRotBonds 0.06121663 RV
## NumDblBonds NumDblBonds 0.02974081 RV
## NumCarbon NumCarbon 0.06435924 RV
## NumNitrogen NumNitrogen 0.11583417 RV
## NumOxygen NumOxygen 0.10808031 RV
## NumSulfer NumSulfer 0.05065068 RV
## NumChlorine NumChlorine 0.02781883 RV
## NumHalogen NumHalogen 0.06144457 RV
## NumRings NumRings 0.05518173 RV
## HydrophilicFactor HydrophilicFactor 0.04195617 RV
## SurfaceArea1 SurfaceArea1 0.05366733 RV
## SurfaceArea2 SurfaceArea2 0.06623467 RV
##################################
# Exploring predictor performance
##################################
dotplot(Predictor ~ RV | Metric,
RV_Summary,origin = 0,
type = c("p", "h"),
pch = 16,
cex = 2,
alpha = 0.45,
prepanel = function(x, y) {
list(ylim = levels(reorder(y, x)))
},panel = function(x, y, ...) {
panel.dotplot(x, reorder(y, x), ...)
})
##################################
# Filtering in the factor predictors
# with the numeric response variable
##################################
<- PMA_PreModelling_Train[,grepl("FP", names(PMA_PreModelling_Train))]
PMA_PreModelling_Train_Factor $Log_Solubility <- PMA_PreModelling_Train$Log_Solubility
PMA_PreModelling_Train_Factordim(PMA_PreModelling_Train_Factor)
## [1] 951 206
str(PMA_PreModelling_Train_Factor)
## 'data.frame': 951 obs. of 206 variables:
## $ FP001 : Factor w/ 2 levels "0","1": 1 1 2 1 1 2 1 2 2 2 ...
## $ FP002 : Factor w/ 2 levels "0","1": 2 2 2 1 1 1 2 1 1 2 ...
## $ FP003 : Factor w/ 2 levels "0","1": 1 1 2 2 2 2 1 2 2 2 ...
## $ FP004 : Factor w/ 2 levels "0","1": 1 2 2 1 2 2 2 2 2 2 ...
## $ FP005 : Factor w/ 2 levels "0","1": 2 2 2 1 2 1 2 1 1 2 ...
## $ FP006 : Factor w/ 2 levels "0","1": 1 2 1 1 2 1 1 1 2 2 ...
## $ FP007 : Factor w/ 2 levels "0","1": 1 2 1 2 1 1 1 2 2 2 ...
## $ FP008 : Factor w/ 2 levels "0","1": 2 2 2 1 1 1 2 1 1 1 ...
## $ FP009 : Factor w/ 2 levels "0","1": 1 1 1 1 2 2 2 1 2 1 ...
## $ FP010 : Factor w/ 2 levels "0","1": 1 1 2 1 1 1 1 1 1 1 ...
## $ FP011 : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 2 1 ...
## $ FP012 : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 2 1 1 ...
## $ FP013 : Factor w/ 2 levels "0","1": 1 1 1 1 2 1 2 1 1 1 ...
## $ FP014 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 2 1 1 1 ...
## $ FP015 : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
## $ FP016 : Factor w/ 2 levels "0","1": 1 2 1 1 2 2 1 2 1 1 ...
## $ FP017 : Factor w/ 2 levels "0","1": 1 1 2 2 1 1 1 1 2 2 ...
## $ FP018 : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 1 ...
## $ FP019 : Factor w/ 2 levels "0","1": 2 1 1 1 2 1 2 1 1 1 ...
## $ FP020 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ FP021 : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 2 1 ...
## $ FP022 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
## $ FP023 : Factor w/ 2 levels "0","1": 1 1 1 2 1 1 1 1 2 1 ...
## $ FP024 : Factor w/ 2 levels "0","1": 2 1 1 1 2 1 1 1 1 1 ...
## $ FP025 : Factor w/ 2 levels "0","1": 1 1 2 1 1 1 1 1 1 1 ...
## $ FP026 : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 2 1 1 1 ...
## $ FP027 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
## $ FP028 : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 2 2 ...
## $ FP029 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ FP030 : Factor w/ 2 levels "0","1": 1 1 1 1 2 1 1 1 1 1 ...
## $ FP031 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 2 1 1 ...
## $ FP032 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ FP033 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ FP034 : Factor w/ 2 levels "0","1": 1 1 1 1 2 1 1 1 1 2 ...
## $ FP035 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
## $ FP036 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ FP037 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
## $ FP038 : Factor w/ 2 levels "0","1": 1 1 2 1 1 1 1 1 1 1 ...
## $ FP039 : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 1 1 1 1 ...
## $ FP040 : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 1 1 1 1 ...
## $ FP041 : Factor w/ 2 levels "0","1": 1 1 1 2 1 1 1 1 2 1 ...
## $ FP042 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ FP043 : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 1 ...
## $ FP044 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ FP045 : Factor w/ 2 levels "0","1": 1 1 2 1 1 1 1 1 1 1 ...
## $ FP046 : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 2 1 1 2 ...
## $ FP047 : Factor w/ 2 levels "0","1": 1 2 2 1 1 1 2 1 1 1 ...
## $ FP048 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 2 1 1 ...
## $ FP049 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 2 1 1 1 ...
## $ FP050 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 2 1 2 ...
## $ FP051 : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 1 ...
## $ FP052 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
## $ FP053 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 2 1 1 1 ...
## $ FP054 : Factor w/ 2 levels "0","1": 1 1 1 2 1 1 1 1 2 2 ...
## $ FP055 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ FP056 : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 1 1 1 1 ...
## $ FP057 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 2 1 1 1 ...
## $ FP058 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
## $ FP059 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 2 1 1 ...
## $ FP060 : Factor w/ 2 levels "0","1": 1 2 2 1 1 1 1 2 2 1 ...
## $ FP061 : Factor w/ 2 levels "0","1": 1 1 2 1 1 1 1 2 2 1 ...
## $ FP062 : Factor w/ 2 levels "0","1": 1 1 2 1 1 2 1 2 2 2 ...
## $ FP063 : Factor w/ 2 levels "0","1": 2 2 1 1 2 2 2 1 1 2 ...
## $ FP064 : Factor w/ 2 levels "0","1": 1 2 2 1 2 2 1 2 1 1 ...
## $ FP065 : Factor w/ 2 levels "0","1": 2 2 1 1 2 1 2 1 2 2 ...
## $ FP066 : Factor w/ 2 levels "0","1": 2 1 2 2 2 2 2 2 2 2 ...
## $ FP067 : Factor w/ 2 levels "0","1": 2 2 1 1 2 2 2 1 1 2 ...
## $ FP068 : Factor w/ 2 levels "0","1": 1 2 1 1 2 2 2 1 1 2 ...
## $ FP069 : Factor w/ 2 levels "0","1": 2 1 2 2 2 2 1 2 2 1 ...
## $ FP070 : Factor w/ 2 levels "0","1": 2 2 1 2 1 1 2 1 2 1 ...
## $ FP071 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 2 1 2 2 ...
## $ FP072 : Factor w/ 2 levels "0","1": 1 2 2 1 1 2 1 2 2 2 ...
## $ FP073 : Factor w/ 2 levels "0","1": 1 2 2 1 1 1 1 1 2 1 ...
## $ FP074 : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 2 1 ...
## $ FP075 : Factor w/ 2 levels "0","1": 1 2 1 1 2 2 2 1 1 2 ...
## $ FP076 : Factor w/ 2 levels "0","1": 2 2 1 1 1 1 2 1 2 2 ...
## $ FP077 : Factor w/ 2 levels "0","1": 1 2 1 2 1 1 1 2 2 2 ...
## $ FP078 : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 2 1 ...
## $ FP079 : Factor w/ 2 levels "0","1": 2 2 2 2 2 1 2 1 2 2 ...
## $ FP080 : Factor w/ 2 levels "0","1": 1 2 1 1 2 2 2 2 1 1 ...
## $ FP081 : Factor w/ 2 levels "0","1": 1 1 2 2 1 1 1 2 2 2 ...
## $ FP082 : Factor w/ 2 levels "0","1": 2 2 2 1 2 2 2 1 2 2 ...
## $ FP083 : Factor w/ 2 levels "0","1": 1 1 1 1 2 1 1 1 1 2 ...
## $ FP084 : Factor w/ 2 levels "0","1": 2 2 1 1 2 1 2 1 1 1 ...
## $ FP085 : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 2 1 1 1 ...
## $ FP086 : Factor w/ 2 levels "0","1": 1 1 1 2 2 1 1 2 2 2 ...
## $ FP087 : Factor w/ 2 levels "0","1": 2 2 2 2 2 1 2 1 2 2 ...
## $ FP088 : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 2 2 1 ...
## $ FP089 : Factor w/ 2 levels "0","1": 2 2 1 1 1 1 2 1 1 1 ...
## $ FP090 : Factor w/ 2 levels "0","1": 1 2 1 2 1 1 1 2 2 2 ...
## $ FP091 : Factor w/ 2 levels "0","1": 2 2 1 1 2 1 2 1 1 2 ...
## $ FP092 : Factor w/ 2 levels "0","1": 1 1 1 1 2 2 2 1 2 1 ...
## $ FP093 : Factor w/ 2 levels "0","1": 1 2 1 2 1 1 1 2 2 2 ...
## $ FP094 : Factor w/ 2 levels "0","1": 1 1 1 1 2 1 1 2 1 1 ...
## $ FP095 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 2 ...
## $ FP096 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
## $ FP097 : Factor w/ 2 levels "0","1": 2 2 1 1 1 1 2 1 2 1 ...
## $ FP098 : Factor w/ 2 levels "0","1": 1 1 2 1 1 1 1 2 1 1 ...
## $ FP099 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
## [list output truncated]
summary(PMA_PreModelling_Train_Factor)
## FP001 FP002 FP003 FP004 FP005 FP006 FP007 FP008 FP009
## 0:482 0:438 0:536 0:395 0:400 0:570 0:605 0:641 0:685
## 1:469 1:513 1:415 1:556 1:551 1:381 1:346 1:310 1:266
##
##
##
##
## FP010 FP011 FP012 FP013 FP014 FP015 FP016 FP017 FP018
## 0:781 0:747 0:783 0:793 0:798 0:133 0:812 0:814 0:826
## 1:170 1:204 1:168 1:158 1:153 1:818 1:139 1:137 1:125
##
##
##
##
## FP019 FP020 FP021 FP022 FP023 FP024 FP025 FP026 FP027
## 0:835 0:837 0:836 0:852 0:834 0:844 0:841 0:871 0:858
## 1:116 1:114 1:115 1: 99 1:117 1:107 1:110 1: 80 1: 93
##
##
##
##
## FP028 FP029 FP030 FP031 FP032 FP033 FP034 FP035 FP036
## 0:850 0:854 0:862 0:866 0:881 0:885 0:875 0:882 0:879
## 1:101 1: 97 1: 89 1: 85 1: 70 1: 66 1: 76 1: 69 1: 72
##
##
##
##
## FP037 FP038 FP039 FP040 FP041 FP042 FP043 FP044 FP045
## 0:884 0:869 0:880 0:886 0:891 0:897 0:888 0:894 0:898
## 1: 67 1: 82 1: 71 1: 65 1: 60 1: 54 1: 63 1: 57 1: 53
##
##
##
##
## FP046 FP047 FP048 FP049 FP050 FP051 FP052 FP053 FP054
## 0:651 0:698 0:833 0:835 0:844 0:847 0:864 0:862 0:879
## 1:300 1:253 1:118 1:116 1:107 1:104 1: 87 1: 89 1: 72
##
##
##
##
## FP055 FP056 FP057 FP058 FP059 FP060 FP061 FP062 FP063
## 0:900 0:889 0:837 0:843 0:899 0:493 0:526 0:535 0:546
## 1: 51 1: 62 1:114 1:108 1: 52 1:458 1:425 1:416 1:405
##
##
##
##
## FP064 FP065 FP066 FP067 FP068 FP069 FP070 FP071 FP072
## 0:555 0:387 0:371 0:590 0:607 0:607 0:613 0:640 0:325
## 1:396 1:564 1:580 1:361 1:344 1:344 1:338 1:311 1:626
##
##
##
##
## FP073 FP074 FP075 FP076 FP077 FP078 FP079 FP080 FP081
## 0:656 0:642 0:629 0:639 0:646 0:662 0:295 0:663 0:686
## 1:295 1:309 1:322 1:312 1:305 1:289 1:656 1:288 1:265
##
##
##
##
## FP082 FP083 FP084 FP085 FP086 FP087 FP088 FP089 FP090
## 0:272 0:691 0:679 0:708 0:695 0:260 0:701 0:716 0:714
## 1:679 1:260 1:272 1:243 1:256 1:691 1:250 1:235 1:237
##
##
##
##
## FP091 FP092 FP093 FP094 FP095 FP096 FP097 FP098 FP099
## 0:737 0:719 0:719 0:731 0:742 0:744 0:727 0:725 0:735
## 1:214 1:232 1:232 1:220 1:209 1:207 1:224 1:226 1:216
##
##
##
##
## FP100 FP101 FP102 FP103 FP104 FP105 FP106 FP107 FP108
## 0:731 0:726 0:759 0:743 0:739 0:746 0:769 0:750 0:756
## 1:220 1:225 1:192 1:208 1:212 1:205 1:182 1:201 1:195
##
##
##
##
## FP109 FP110 FP111 FP112 FP113 FP114 FP115 FP116 FP117
## 0:783 0:755 0:764 0:766 0:765 0:803 0:781 0:768 0:781
## 1:168 1:196 1:187 1:185 1:186 1:148 1:170 1:183 1:170
##
##
##
##
## FP118 FP119 FP120 FP121 FP122 FP123 FP124 FP125 FP126
## 0:768 0:796 0:793 0:818 0:795 0:792 0:797 0:803 0:810
## 1:183 1:155 1:158 1:133 1:156 1:159 1:154 1:148 1:141
##
##
##
##
## FP127 FP128 FP129 FP130 FP131 FP132 FP133 FP134 FP135
## 0:818 0:810 0:819 0:851 0:831 0:832 0:831 0:830 0:831
## 1:133 1:141 1:132 1:100 1:120 1:119 1:120 1:121 1:120
##
##
##
##
## FP136 FP137 FP138 FP139 FP140 FP141 FP142 FP143 FP144
## 0:836 0:841 0:845 0:873 0:845 0:840 0:847 0:874 0:852
## 1:115 1:110 1:106 1: 78 1:106 1:111 1:104 1: 77 1: 99
##
##
##
##
## FP145 FP146 FP147 FP148 FP149 FP150 FP151 FP152 FP153
## 0:852 0:853 0:851 0:868 0:865 0:876 0:898 0:873 0:877
## 1: 99 1: 98 1:100 1: 83 1: 86 1: 75 1: 53 1: 78 1: 74
##
##
##
##
## FP155 FP156 FP157 FP158 FP159 FP160 FP161 FP162 FP163
## 0:885 0:884 0:892 0:900 0:884 0:886 0:888 0:480 0:498
## 1: 66 1: 67 1: 59 1: 51 1: 67 1: 65 1: 63 1:471 1:453
##
##
##
##
## FP164 FP165 FP166 FP167 FP168 FP169 FP170 FP171 FP172
## 0:354 0:619 0:636 0:639 0:318 0:774 0:776 0:790 0:807
## 1:597 1:332 1:315 1:312 1:633 1:177 1:175 1:161 1:144
##
##
##
##
## FP173 FP174 FP175 FP176 FP177 FP178 FP179 FP180 FP181
## 0:816 0:827 0:823 0:835 0:836 0:836 0:858 0:849 0:862
## 1:135 1:124 1:128 1:116 1:115 1:115 1: 93 1:102 1: 89
##
##
##
##
## FP182 FP183 FP184 FP185 FP186 FP187 FP188 FP189 FP190
## 0:857 0:879 0:871 0:870 0:878 0:882 0:886 0:878 0:882
## 1: 94 1: 72 1: 80 1: 81 1: 73 1: 69 1: 65 1: 73 1: 69
##
##
##
##
## FP191 FP192 FP193 FP194 FP195 FP196 FP197 FP198 FP201
## 0:884 0:893 0:892 0:895 0:893 0:897 0:901 0:897 0:901
## 1: 67 1: 58 1: 59 1: 56 1: 58 1: 54 1: 50 1: 54 1: 50
##
##
##
##
## FP202 FP203 FP204 FP205 FP206 FP207 FP208 Log_Solubility
## 0:706 0:842 0:857 0:877 0:894 0:897 0:844 Min. :-11.620
## 1:245 1:109 1: 94 1: 74 1: 57 1: 54 1:107 1st Qu.: -3.955
## Median : -2.510
## Mean : -2.719
## 3rd Qu.: -1.360
## Max. : 1.580
##################################
# Obtaining the t-test statistics
##################################
<- apply(PMA_PreModelling_Train_Factor[, 1:(ncol(PMA_PreModelling_Train_Factor)-1)],
VP_T 2,
function(x, y){
<- t.test(y ~ x)[c("statistic", "p.value", "estimate")]
tStats unlist(tStats)},
y=PMA_PreModelling_Train_Factor$Log_Solubility)
##################################
# Formulating the summary table
##################################
<- as.data.frame(t(VP_T))
VP_T_Summary names(VP_T_Summary) <- c("t.Statistic", "t.Test_P.Value", "Mean0", "Mean1")
$MeanDifference <- VP_T_Summary$Mean1 - VP_T_Summary$Mean0
VP_T_Summary$Predictor <- names(PMA_PreModelling_Train_Factor[,-ncol(PMA_PreModelling_Train_Factor)])
VP_T_Summary$Metric <- rep("VP_T", nrow(VP_T_Summary))
VP_T_Summary$NegativeLog10_t.Test_P.Value <- -log10(VP_T_Summary$t.Test_P.Value)
VP_T_Summary$AbsoluteMeanDifference <- abs(VP_T_Summary$Mean1 - VP_T_Summary$Mean0)
VP_T_Summary
$Group <- ifelse(rownames(VP_T_Summary)=="FP076","FP076",
VP_T_Summaryifelse(rownames(VP_T_Summary)=="FP089","FP089",
ifelse(rownames(VP_T_Summary)=="FP065","FP065",
ifelse(rownames(VP_T_Summary)=="FP044","FP044",
ifelse(rownames(VP_T_Summary)=="FP193","FP193",
"Others")))))
VP_T_Summary
## t.Statistic t.Test_P.Value Mean0 Mean1 MeanDifference Predictor
## FP001 -4.02204024 6.287404e-05 -2.978465 -2.451471 0.526993515 FP001
## FP002 10.28672686 1.351580e-23 -2.021347 -3.313860 -1.292512617 FP002
## FP003 -2.03644225 4.198619e-02 -2.832164 -2.571855 0.260308757 FP003
## FP004 -4.94895770 9.551772e-07 -3.128380 -2.427428 0.700951689 FP004
## FP005 10.28247538 1.576549e-23 -1.969000 -3.262722 -1.293722323 FP005
## FP006 -7.87583806 9.287835e-15 -3.109421 -2.133832 0.975589032 FP006
## FP007 -0.88733923 3.751398e-01 -2.759967 -2.646185 0.113781971 FP007
## FP008 3.32843788 9.119521e-04 -2.582652 -2.999613 -0.416960797 FP008
## FP009 11.49360533 7.467714e-27 -2.249591 -3.926278 -1.676686955 FP009
## FP010 -4.11392307 4.973603e-05 -2.824302 -2.232824 0.591478647 FP010
## FP011 -7.01680213 1.067782e-11 -2.934645 -1.927353 1.007292306 FP011
## FP012 -1.89255407 5.953582e-02 -2.773755 -2.461369 0.312385742 FP012
## FP013 11.73267872 1.088092e-24 -2.365485 -4.490696 -2.125210704 FP013
## FP014 11.47456176 1.157457e-23 -2.375401 -4.508431 -2.133030370 FP014
## FP015 -7.73718733 1.432769e-12 -4.404286 -2.444487 1.959799162 FP015
## FP016 -0.61719794 5.377695e-01 -2.733559 -2.631007 0.102551919 FP016
## FP017 2.73915987 6.681864e-03 -2.654607 -3.098613 -0.444006259 FP017
## FP018 4.26743510 2.806561e-05 -2.643402 -3.215280 -0.571878063 FP018
## FP019 -2.31045847 2.207143e-02 -2.766910 -2.370603 0.396306731 FP019
## FP020 -3.44119896 7.251032e-04 -2.785806 -2.224912 0.560894171 FP020
## FP021 3.35165112 1.009498e-03 -2.642392 -3.272348 -0.629955482 FP021
## FP022 -0.66772403 5.051252e-01 -2.728040 -2.637071 0.090969199 FP022
## FP023 2.18958532 2.989162e-02 -2.673106 -3.042650 -0.369544057 FP023
## FP024 -2.43189276 1.617811e-02 -2.766457 -2.340841 0.425616224 FP024
## FP025 -2.68651403 7.981132e-03 -2.771677 -2.312545 0.459131121 FP025
## FP026 0.58596455 5.591541e-01 -2.709082 -2.821875 -0.112793485 FP026
## FP027 -4.46177875 1.714807e-05 -2.793800 -2.024516 0.769283405 FP027
## FP028 -3.36478123 1.011310e-03 -2.791941 -2.101089 0.690852068 FP028
## FP029 1.50309317 1.346711e-01 -2.696475 -2.913093 -0.216617374 FP029
## FP030 -4.18564626 5.684141e-05 -2.799582 -1.933933 0.865649782 FP030
## FP031 -0.19030898 8.494207e-01 -2.721986 -2.683765 0.038221437 FP031
## FP032 -2.86824205 5.100440e-03 -2.757832 -2.224429 0.533403438 FP032
## FP033 -2.48343886 1.492327e-02 -2.751062 -2.282879 0.468183359 FP033
## FP034 0.81786492 4.147985e-01 -2.709737 -2.820263 -0.110526015 FP034
## FP035 4.17698556 6.851675e-05 -2.659660 -3.471594 -0.811934339 FP035
## FP036 -5.31186085 6.344823e-07 -2.787224 -1.880417 0.906807452 FP036
## FP037 1.37213471 1.734895e-01 -2.700271 -2.960000 -0.259728507 FP037
## FP038 -2.55044552 1.224045e-02 -2.764833 -2.228293 0.536540459 FP038
## FP039 6.83856010 1.396591e-09 -2.588330 -4.332817 -1.744487356 FP039
## FP040 -4.96957478 3.640553e-06 -2.788036 -1.771692 1.016343810 FP040
## FP041 3.86443922 2.274448e-04 -2.672424 -3.403833 -0.731409091 FP041
## FP042 -1.10149897 2.742144e-01 -2.729509 -2.536852 0.192657624 FP042
## FP043 -0.18525729 8.535189e-01 -2.721284 -2.680317 0.040966323 FP043
## FP044 15.19844350 1.458342e-22 -2.472237 -6.582105 -4.109868127 FP044
## FP045 3.26197779 1.781037e-03 -2.678118 -3.403962 -0.725844224 FP045
## FP046 7.19096539 1.949765e-12 -2.405146 -3.398700 -0.993554071 FP046
## FP047 3.08813847 2.106659e-03 -2.611605 -3.013676 -0.402071305 FP047
## FP048 0.78156187 4.354510e-01 -2.703337 -2.826102 -0.122764360 FP048
## FP049 9.32620107 1.541509e-16 -2.494036 -4.334828 -1.840791658 FP049
## FP050 1.78989997 7.537562e-02 -2.684810 -2.984860 -0.300049387 FP050
## FP051 3.85923300 1.590148e-04 -2.656482 -3.224231 -0.567749069 FP051
## FP052 -1.37622794 1.707261e-01 -2.736296 -2.542529 0.193767561 FP052
## FP053 7.79872544 3.863769e-12 -2.565418 -4.201910 -1.636492479 FP053
## FP054 4.71268264 7.815108e-06 -2.656678 -3.474167 -0.817488623 FP054
## FP055 -2.15047129 3.539774e-02 -2.743122 -2.285294 0.457828105 FP055
## FP056 6.56517336 8.289424e-09 -2.598841 -4.435323 -1.836481186 FP056
## FP057 1.55970276 1.207241e-01 -2.686667 -2.952807 -0.266140351 FP057
## FP058 1.31266618 1.913070e-01 -2.691483 -2.930000 -0.238517200 FP058
## FP059 5.30327181 1.388228e-06 -2.662258 -3.692115 -1.029857320 FP059
## FP060 -6.34967826 3.396521e-10 -3.112819 -2.294192 0.818627333 FP060
## FP061 -3.23528852 1.258017e-03 -2.903859 -2.489247 0.414612257 FP061
## FP062 -4.68040368 3.284921e-06 -2.978056 -2.384856 0.593200306 FP062
## FP063 -5.90647947 4.865776e-09 -3.037509 -2.288593 0.748916565 FP063
## FP064 -3.19849081 1.427257e-03 -2.887640 -2.481616 0.406023478 FP064
## FP065 13.67947483 7.369864e-39 -1.740827 -3.389468 -1.648641212 FP065
## FP066 -3.50425986 4.936856e-04 -3.034043 -2.516776 0.517267265 FP066
## FP067 -3.71025855 2.192910e-04 -2.894797 -2.430554 0.464242594 FP067
## FP068 -4.50468714 7.534223e-06 -2.923921 -2.356221 0.567699992 FP068
## FP069 -1.39582672 1.631126e-01 -2.782438 -2.605872 0.176566128 FP069
## FP070 11.33500604 6.532630e-27 -2.155840 -3.739142 -1.583301881 FP070
## FP071 9.16039412 1.012284e-18 -2.295828 -3.588521 -1.292692775 FP071
## FP072 -9.86673490 4.502526e-21 -3.674277 -2.222396 1.451880757 FP072
## FP073 -6.31556184 4.773987e-10 -2.972104 -2.154780 0.817323998 FP073
## FP074 -3.16365915 1.617158e-03 -2.849299 -2.446958 0.402341137 FP074
## FP075 -4.83159241 1.618286e-06 -2.926916 -2.311584 0.615331888 FP075
## FP076 18.19671006 2.170836e-57 -1.949953 -4.292756 -2.342803359 FP076
## FP077 -0.24434665 8.070283e-01 -2.728715 -2.697082 0.031633203 FP077
## FP078 -0.49694487 6.193690e-01 -2.737523 -2.675156 0.062366949 FP078
## FP079 12.46647477 2.609452e-32 -1.649763 -3.199207 -1.549444605 FP079
## FP080 -4.44534892 1.029202e-05 -2.896848 -2.308160 0.588687940 FP080
## FP081 0.11125946 9.114457e-01 -2.714519 -2.729057 -0.014537653 FP081
## FP082 12.55490234 3.329065e-32 -1.573824 -3.177143 -1.603319328 FP082
## FP083 -6.28835488 5.760827e-10 -2.932735 -2.149385 0.783350551 FP083
## FP084 -3.43524930 6.332047e-04 -2.851414 -2.386949 0.464465314 FP084
## FP085 10.47209331 1.134762e-22 -2.307585 -3.916008 -1.608423485 FP085
## FP086 1.02088695 3.077271e-01 -2.682101 -2.817578 -0.135477406 FP086
## FP087 11.07193302 5.850147e-26 -1.684808 -3.107540 -1.422732105 FP087
## FP088 -4.82078133 1.873320e-06 -2.891398 -2.233960 0.657438003 FP088
## FP089 15.68684642 7.559612e-42 -2.131606 -4.506936 -2.375330025 FP089
## FP090 0.72850761 4.666345e-01 -2.693950 -2.792743 -0.098793036 FP090
## FP091 -1.97821299 4.847758e-02 -2.777626 -2.515187 0.262438593 FP091
## FP092 12.71461669 9.160201e-31 -2.250250 -4.169957 -1.919706549 FP092
## FP093 2.40580805 1.652056e-02 -2.636787 -2.972026 -0.335238658 FP093
## FP094 -1.08529331 2.783195e-01 -2.751874 -2.607909 0.143965054 FP094
## FP095 -4.83150303 1.885749e-06 -2.863571 -2.203780 0.659791524 FP095
## FP096 -0.05816460 9.536450e-01 -2.720323 -2.712271 0.008052049 FP096
## FP097 9.06740092 4.508890e-18 -2.420977 -3.684420 -1.263443027 FP097
## FP098 -3.09495737 2.088014e-03 -2.820538 -2.391460 0.429077754 FP098
## FP099 4.51553294 8.153915e-06 -2.575959 -3.203843 -0.627883409 FP099
## FP100 -4.26730797 2.354655e-05 -2.846430 -2.293727 0.552702276 FP100
## FP101 -3.33565277 9.211008e-04 -2.828760 -2.363022 0.465738108 FP101
## FP102 1.25032500 2.119440e-01 -2.683373 -2.857708 -0.174335474 FP102
## FP103 2.51185846 1.236590e-02 -2.644038 -2.984808 -0.340770007 FP103
## FP104 1.23433987 2.176989e-01 -2.681746 -2.846934 -0.165188360 FP104
## FP105 2.56644125 1.063908e-02 -2.640201 -3.003756 -0.363555025 FP105
## FP106 2.42187970 1.595574e-02 -2.652367 -2.998297 -0.345929993 FP106
## FP107 10.92623859 2.395320e-23 -2.328707 -4.173284 -1.844576915 FP107
## FP108 -0.88386799 3.773218e-01 -2.744087 -2.619641 0.124446276 FP108
## FP109 1.72666429 8.493856e-02 -2.681392 -2.891845 -0.210453156 FP109
## FP110 -4.30633122 2.083157e-05 -2.839272 -2.253622 0.585649074 FP110
## FP111 0.07891212 9.371465e-01 -2.716361 -2.727594 -0.011232326 FP111
## FP112 13.31169435 4.090297e-31 -2.293512 -4.478541 -2.185028791 FP112
## FP113 -4.25438885 2.743420e-05 -2.842824 -2.207527 0.635296648 FP113
## FP114 0.38442341 7.009005e-01 -2.711034 -2.759459 -0.048425836 FP114
## FP115 -0.49398272 6.216320e-01 -2.730653 -2.663059 0.067594185 FP115
## FP116 -3.39726200 7.657795e-04 -2.815911 -2.310055 0.505856814 FP116
## FP117 3.16005628 1.769096e-03 -2.623060 -3.157353 -0.534292762 FP117
## FP118 -3.88255786 1.272871e-04 -2.835755 -2.226776 0.608979252 FP118
## FP119 -0.71996857 4.720764e-01 -2.734485 -2.636839 0.097646215 FP119
## FP120 -3.25854728 1.280523e-03 -2.807793 -2.270759 0.537033697 FP120
## FP121 0.62156119 5.349141e-01 -2.704487 -2.805188 -0.100701417 FP121
## FP122 -2.44169102 1.530759e-02 -2.781836 -2.396154 0.385682632 FP122
## FP123 3.52755166 4.929055e-04 -2.628914 -3.165157 -0.536243091 FP123
## FP124 -3.58983366 3.953044e-04 -2.806888 -2.261494 0.545394825 FP124
## FP125 -2.91655379 3.853055e-03 -2.786364 -2.350743 0.435620393 FP125
## FP126 -1.44180023 1.505173e-01 -2.748395 -2.547234 0.201161019 FP126
## FP127 -2.66597987 8.213408e-03 -2.773386 -2.381429 0.391957737 FP127
## FP128 -3.37747584 8.536233e-04 -2.794086 -2.284752 0.509334647 FP128
## FP129 3.28855844 1.192299e-03 -2.642100 -3.193030 -0.550930181 FP129
## FP130 1.02990587 3.048783e-01 -2.698555 -2.888900 -0.190345358 FP130
## FP131 -0.49682548 6.198471e-01 -2.727954 -2.653583 0.074370939 FP131
## FP132 -5.89680424 1.633112e-08 -2.832055 -1.925126 0.906929238 FP132
## FP133 -1.83896087 6.756107e-02 -2.757100 -2.451750 0.305349880 FP133
## FP134 3.16620016 1.761695e-03 -2.661506 -3.110000 -0.448493976 FP134
## FP135 -2.94236705 3.709259e-03 -2.783827 -2.266667 0.517160048 FP135
## FP136 -2.02006233 4.501990e-02 -2.761938 -2.403304 0.358633451 FP136
## FP137 -0.07855180 9.374873e-01 -2.720131 -2.706636 0.013494433 FP137
## FP138 -1.44829927 1.496787e-01 -2.748083 -2.483302 0.264780953 FP138
## FP139 -0.22212826 8.246439e-01 -2.721936 -2.680897 0.041038417 FP139
## FP140 -1.86990507 6.355486e-02 -2.758036 -2.403962 0.354073239 FP140
## FP141 4.15441700 4.792655e-05 -2.650655 -3.232523 -0.581867761 FP141
## FP142 -2.92307611 4.047862e-03 -2.779233 -2.224519 0.554713355 FP142
## FP143 0.83414756 4.061300e-01 -2.705904 -2.862338 -0.156433772 FP143
## FP144 -4.98991305 1.904653e-06 -2.819214 -1.852424 0.966789373 FP144
## FP145 -3.99831545 1.002597e-04 -2.787077 -2.128990 0.658087566 FP145
## FP146 6.08904552 1.064009e-08 -2.608687 -3.675000 -1.066313013 FP146
## FP147 -2.98364059 3.376138e-03 -2.776357 -2.226800 0.549557227 FP147
## FP148 -4.00444775 1.101041e-04 -2.780300 -2.073012 0.707287491 FP148
## FP149 9.67498002 8.530838e-16 -2.479225 -5.125930 -2.646704799 FP149
## FP150 -1.59224059 1.145443e-01 -2.742808 -2.435467 0.307341553 FP150
## FP151 -1.68674372 9.608846e-02 -2.736013 -2.423019 0.312994495 FP151
## FP152 2.02103329 4.549820e-02 -2.692325 -3.012308 -0.319982377 FP152
## FP153 0.83775227 4.044086e-01 -2.703900 -2.892432 -0.188532775 FP153
## FP155 4.93743429 3.813516e-06 -2.653412 -3.592273 -0.938860298 FP155
## FP156 2.70254904 8.178498e-03 -2.685045 -3.160896 -0.475850274 FP156
## FP157 -1.19798365 2.351567e-01 -2.738105 -2.423220 0.314885042 FP157
## FP158 -3.18371959 2.293303e-03 -2.757078 -2.039020 0.718058170 FP158
## FP159 2.90626659 4.444806e-03 -2.687590 -3.127313 -0.439722935 FP159
## FP160 0.72930617 4.673596e-01 -2.711400 -2.816308 -0.104908144 FP160
## FP161 -8.02084404 8.158474e-12 -2.826779 -1.193333 1.633445946 FP161
## FP162 9.05654884 7.502729e-19 -2.147208 -3.300849 -1.153640924 FP162
## FP163 -4.73411111 2.565152e-06 -3.009759 -2.398455 0.611304290 FP163
## FP164 11.15556043 6.131703e-27 -1.830706 -3.245042 -1.414335661 FP164
## FP165 -3.26163144 1.150990e-03 -2.862294 -2.450602 0.411691613 FP165
## FP166 6.01599552 3.059094e-09 -2.441541 -3.277905 -0.836363881 FP166
## FP167 -3.77468033 1.718080e-04 -2.874742 -2.398718 0.476023835 FP167
## FP168 12.78784085 6.302482e-34 -1.659686 -3.250521 -1.590835792 FP168
## FP169 10.79840624 1.952902e-22 -2.370413 -4.241017 -1.870603512 FP169
## FP170 1.45059296 1.480425e-01 -2.674961 -2.911943 -0.236981517 FP170
## FP171 -3.56151646 4.354270e-04 -2.810722 -2.266398 0.544324003 FP171
## FP172 13.04070659 8.112523e-28 -2.345390 -4.809931 -2.464540221 FP172
## FP173 2.68918003 7.770466e-03 -2.653554 -3.111556 -0.458001634 FP173
## FP174 0.94721964 3.446525e-01 -2.699492 -2.845806 -0.146314311 FP174
## FP175 0.01020115 9.918704e-01 -2.718360 -2.719922 -0.001562215 FP175
## FP176 -2.29447613 2.298911e-02 -2.766395 -2.374310 0.392084865 FP176
## FP177 -1.08253877 2.802959e-01 -2.737548 -2.580609 0.156939151 FP177
## FP178 3.27582610 1.258481e-03 -2.656782 -3.167739 -0.510956834 FP178
## FP179 0.85670987 3.931634e-01 -2.703846 -2.854409 -0.150562448 FP179
## FP180 -2.83913345 5.188161e-03 -2.773274 -2.263235 0.510039146 FP180
## FP181 6.24259165 6.005980e-09 -2.617726 -3.695281 -1.077554681 FP181
## FP182 -2.11887211 3.595632e-02 -2.755239 -2.384255 0.370983887 FP182
## FP183 -2.62186301 1.015591e-02 -2.755210 -2.271250 0.483960466 FP183
## FP184 10.24979020 9.572172e-17 -2.493318 -5.171000 -2.677681975 FP184
## FP185 3.21519455 1.718715e-03 -2.667230 -3.270000 -0.602770115 FP185
## FP186 -2.10893733 3.756740e-02 -2.749818 -2.342740 0.407078042 FP186
## FP187 -0.14233858 8.871705e-01 -2.721122 -2.685942 0.035180420 FP187
## FP188 -2.76497219 7.083803e-03 -2.760011 -2.153692 0.606318979 FP188
## FP189 0.29230393 7.707177e-01 -2.713884 -2.774932 -0.061047680 FP189
## FP190 8.23796541 2.799252e-12 -2.574785 -4.556522 -1.981737159 FP190
## FP191 -1.62000293 1.089976e-01 -2.742364 -2.404627 0.337737388 FP191
## FP192 0.55100083 5.833593e-01 -2.711377 -2.829310 -0.117932965 FP192
## FP193 11.06173597 1.595927e-16 -2.525146 -5.642881 -3.117735616 FP193
## FP194 -1.03294441 3.047671e-01 -2.728916 -2.553214 0.175701915 FP194
## FP195 -5.88072667 1.035398e-07 -2.786495 -1.672759 1.113736340 FP195
## FP196 6.42707826 1.269199e-08 -2.651126 -3.838889 -1.187762913 FP196
## FP197 3.82944792 3.167065e-04 -2.670555 -3.583800 -0.913245061 FP197
## FP198 -3.87872401 2.598433e-04 -2.776165 -1.761852 1.014313143 FP198
## FP201 -3.92629512 2.100852e-04 -2.757414 -2.018600 0.738813984 FP201
## FP202 5.92935333 6.082278e-09 -2.496969 -3.357143 -0.860174019 FP202
## FP203 1.09341446 2.759667e-01 -2.695582 -2.896147 -0.200564841 FP203
## FP204 2.86078975 4.868444e-03 -2.672159 -3.141702 -0.469543435 FP204
## FP205 5.61427744 2.488511e-07 -2.605564 -4.057838 -1.452273414 FP205
## FP206 3.58353985 6.162975e-04 -2.674519 -3.409474 -0.734954669 FP206
## FP207 8.34894566 1.153650e-11 -2.595151 -4.768704 -2.173553202 FP207
## FP208 1.37823055 1.702203e-01 -2.690237 -2.942056 -0.251819108 FP208
## Metric NegativeLog10_t.Test_P.Value AbsoluteMeanDifference Group
## FP001 VP_T 4.201528604 0.526993515 Others
## FP002 VP_T 22.869158221 1.292512617 Others
## FP003 VP_T 1.376893575 0.260308757 Others
## FP004 VP_T 6.019916058 0.700951689 Others
## FP005 VP_T 22.802292476 1.293722323 Others
## FP006 VP_T 14.032085507 0.975589032 Others
## FP007 VP_T 0.425806855 0.113781971 Others
## FP008 VP_T 3.040027990 0.416960797 Others
## FP009 VP_T 26.126812307 1.676686955 Others
## FP010 VP_T 4.303328923 0.591478647 Others
## FP011 VP_T 10.971517457 1.007292306 Others
## FP012 VP_T 1.225221683 0.312385742 Others
## FP013 VP_T 23.963334377 2.125210704 Others
## FP014 VP_T 22.936495310 2.133030370 Others
## FP015 VP_T 11.843823827 1.959799162 Others
## FP016 VP_T 0.269403810 0.102551919 Others
## FP017 VP_T 2.175102341 0.444006259 Others
## FP018 VP_T 4.551825465 0.571878063 Others
## FP019 VP_T 1.656169620 0.396306731 Others
## FP020 VP_T 3.139600190 0.560894171 Others
## FP021 VP_T 2.995894414 0.629955482 Others
## FP022 VP_T 0.296600971 0.090969199 Others
## FP023 VP_T 1.524450486 0.369544057 Others
## FP024 VP_T 1.791072101 0.425616224 Others
## FP025 VP_T 2.097935479 0.459131121 Others
## FP026 VP_T 0.252468464 0.112793485 Others
## FP027 VP_T 4.765784712 0.769283405 Others
## FP028 VP_T 2.995115516 0.690852068 Others
## FP029 VP_T 0.870725522 0.216617374 Others
## FP030 VP_T 4.245335156 0.865649782 Others
## FP031 VP_T 0.070877154 0.038221437 Others
## FP032 VP_T 2.292392384 0.533403438 Others
## FP033 VP_T 1.826136061 0.468183359 Others
## FP034 VP_T 0.382162790 0.110526015 Others
## FP035 VP_T 4.164203229 0.811934339 Others
## FP036 VP_T 6.197580475 0.906807452 Others
## FP037 VP_T 0.760726840 0.259728507 Others
## FP038 VP_T 1.912202707 0.536540459 Others
## FP039 VP_T 8.854930829 1.744487356 Others
## FP040 VP_T 5.438832659 1.016343810 Others
## FP041 VP_T 3.643123974 0.731409091 Others
## FP042 VP_T 0.561909802 0.192657624 Others
## FP043 VP_T 0.068786845 0.040966323 Others
## FP044 VP_T 21.836140499 4.109868127 FP044
## FP045 VP_T 2.749327017 0.725844224 Others
## FP046 VP_T 11.710017638 0.993554071 Others
## FP047 VP_T 2.676405787 0.402071305 Others
## FP048 VP_T 0.361060666 0.122764360 Others
## FP049 VP_T 15.812053987 1.840791658 Others
## FP050 VP_T 1.122769076 0.300049387 Others
## FP051 VP_T 3.798562513 0.567749069 Others
## FP052 VP_T 0.767700036 0.193767561 Others
## FP053 VP_T 11.412988867 1.636492479 Others
## FP054 VP_T 5.107065013 0.817488623 Others
## FP055 VP_T 1.451024435 0.457828105 Others
## FP056 VP_T 8.081475624 1.836481186 Others
## FP057 VP_T 0.918205855 0.266140351 Others
## FP058 VP_T 0.718269116 0.238517200 Others
## FP059 VP_T 5.857539068 1.029857320 Others
## FP060 VP_T 9.468965656 0.818627333 Others
## FP061 VP_T 2.900313656 0.414612257 Others
## FP062 VP_T 5.483475029 0.593200306 Others
## FP063 VP_T 8.312847852 0.748916565 Others
## FP064 VP_T 2.845497682 0.406023478 Others
## FP065 VP_T 38.132540555 1.648641212 FP065
## FP066 VP_T 3.306549572 0.517267265 Others
## FP067 VP_T 3.658979139 0.464242594 Others
## FP068 VP_T 5.122961557 0.567699992 Others
## FP069 VP_T 0.787512584 0.176566128 Others
## FP070 VP_T 26.184911955 1.583301881 Others
## FP071 VP_T 17.994697417 1.292692775 Others
## FP072 VP_T 20.346543773 1.451880757 Others
## FP073 VP_T 9.321118800 0.817323998 Others
## FP074 VP_T 2.791247434 0.402341137 Others
## FP075 VP_T 5.790944794 0.615331888 Others
## FP076 VP_T 56.663372962 2.342803359 FP076
## FP077 VP_T 0.093111212 0.031633203 Others
## FP078 VP_T 0.208050512 0.062366949 Others
## FP079 VP_T 31.583450764 1.549444605 Others
## FP080 VP_T 4.987499431 0.588687940 Others
## FP081 VP_T 0.040269209 0.014537653 Others
## FP082 VP_T 31.477677785 1.603319328 Others
## FP083 VP_T 9.239515137 0.783350551 Others
## FP084 VP_T 3.198455901 0.464465314 Others
## FP085 VP_T 21.945095151 1.608423485 Others
## FP086 VP_T 0.511834307 0.135477406 Others
## FP087 VP_T 25.232833199 1.422732105 Others
## FP088 VP_T 5.727388075 0.657438003 Others
## FP089 VP_T 41.121500514 2.375330025 FP089
## FP090 VP_T 0.331023127 0.098793036 Others
## FP091 VP_T 1.314459073 0.262438593 Others
## FP092 VP_T 30.038095004 1.919706549 Others
## FP093 VP_T 1.781975360 0.335238658 Others
## FP094 VP_T 0.555456425 0.143965054 Others
## FP095 VP_T 5.724516186 0.659791524 Others
## FP096 VP_T 0.020613278 0.008052049 Others
## FP097 VP_T 17.345930392 1.263443027 Others
## FP098 VP_T 2.680266578 0.429077754 Others
## FP099 VP_T 5.088633828 0.627883409 Others
## FP100 VP_T 4.628072635 0.552702276 Others
## FP101 VP_T 3.035692821 0.465738108 Others
## FP102 VP_T 0.673778966 0.174335474 Others
## FP103 VP_T 1.907774119 0.340770007 Others
## FP104 VP_T 0.662143682 0.165188360 Others
## FP105 VP_T 1.973096120 0.363555025 Others
## FP106 VP_T 1.797083057 0.345929993 Others
## FP107 VP_T 22.620636474 1.844576915 Others
## FP108 VP_T 0.423288091 0.124446276 Others
## FP109 VP_T 1.070895104 0.210453156 Others
## FP110 VP_T 4.681277976 0.585649074 Others
## FP111 VP_T 0.028192507 0.011232326 Others
## FP112 VP_T 30.388245112 2.185028791 Others
## FP113 VP_T 4.561707729 0.635296648 Others
## FP114 VP_T 0.154343648 0.048425836 Others
## FP115 VP_T 0.206466608 0.067594185 Others
## FP116 VP_T 3.115896288 0.505856814 Others
## FP117 VP_T 2.752248553 0.534292762 Others
## FP118 VP_T 3.895215544 0.608979252 Others
## FP119 VP_T 0.325987674 0.097646215 Others
## FP120 VP_T 2.892612706 0.537033697 Others
## FP121 VP_T 0.271715970 0.100701417 Others
## FP122 VP_T 1.815093207 0.385682632 Others
## FP123 VP_T 3.307236303 0.536243091 Others
## FP124 VP_T 3.403068381 0.545394825 Others
## FP125 VP_T 2.414194812 0.435620393 Others
## FP126 VP_T 0.822413480 0.201161019 Others
## FP127 VP_T 2.085476582 0.391957737 Others
## FP128 VP_T 3.068733732 0.509334647 Others
## FP129 VP_T 2.923614965 0.550930181 Others
## FP130 VP_T 0.515873515 0.190345358 Others
## FP131 VP_T 0.207715419 0.074370939 Others
## FP132 VP_T 7.786984099 0.906929238 Others
## FP133 VP_T 1.170303502 0.305349880 Others
## FP134 VP_T 2.754069157 0.448493976 Others
## FP135 VP_T 2.430712786 0.517160048 Others
## FP136 VP_T 1.346595486 0.358633451 Others
## FP137 VP_T 0.028034596 0.013494433 Others
## FP138 VP_T 0.824839912 0.264780953 Others
## FP139 VP_T 0.083733572 0.041038417 Others
## FP140 VP_T 1.196851246 0.354073239 Others
## FP141 VP_T 4.319423830 0.581867761 Others
## FP142 VP_T 2.392774312 0.554713355 Others
## FP143 VP_T 0.391334955 0.156433772 Others
## FP144 VP_T 5.720184136 0.966789373 Others
## FP145 VP_T 3.998873639 0.658087566 Others
## FP146 VP_T 7.973054883 1.066313013 Others
## FP147 VP_T 2.471579818 0.549557227 Others
## FP148 VP_T 3.958196591 0.707287491 Others
## FP149 VP_T 15.069008282 2.646704799 Others
## FP150 VP_T 0.941026543 0.307341553 Others
## FP151 VP_T 1.017328768 0.312994495 Others
## FP152 VP_T 1.342005745 0.319982377 Others
## FP153 VP_T 0.393179623 0.188532775 Others
## FP155 VP_T 5.418674482 0.938860298 Others
## FP156 VP_T 2.087326470 0.475850274 Others
## FP157 VP_T 0.628642607 0.314885042 Others
## FP158 VP_T 2.639538515 0.718058170 Others
## FP159 VP_T 2.352147186 0.439722935 Others
## FP160 VP_T 0.330348826 0.104908144 Others
## FP161 VP_T 11.088391050 1.633445946 Others
## FP162 VP_T 18.124780712 1.153640924 Others
## FP163 VP_T 5.590886912 0.611304290 Others
## FP164 VP_T 26.212418905 1.414335661 Others
## FP165 VP_T 2.938928297 0.411691613 Others
## FP166 VP_T 8.514407198 0.836363881 Others
## FP167 VP_T 3.764956674 0.476023835 Others
## FP168 VP_T 33.200488373 1.590835792 Others
## FP169 VP_T 21.709319649 1.870603512 Others
## FP170 VP_T 0.829613712 0.236981517 Others
## FP171 VP_T 3.361084630 0.544324003 Others
## FP172 VP_T 27.090844062 2.464540221 Others
## FP173 VP_T 2.109552932 0.458001634 Others
## FP174 VP_T 0.462618628 0.146314311 Others
## FP175 VP_T 0.003545084 0.001562215 Others
## FP176 VP_T 1.638477843 0.392084865 Others
## FP177 VP_T 0.552383284 0.156939151 Others
## FP178 VP_T 2.900153450 0.510956834 Others
## FP179 VP_T 0.405426904 0.150562448 Others
## FP180 VP_T 2.284986523 0.510039146 Others
## FP181 VP_T 8.221416123 1.077554681 Others
## FP182 VP_T 1.444224705 0.370983887 Others
## FP183 VP_T 1.993281105 0.483960466 Others
## FP184 VP_T 16.018989514 2.677681975 Others
## FP185 VP_T 2.764796188 0.602770115 Others
## FP186 VP_T 1.425188897 0.407078042 Others
## FP187 VP_T 0.051992920 0.035180420 Others
## FP188 VP_T 2.149733499 0.606318979 Others
## FP189 VP_T 0.113104683 0.061047680 Others
## FP190 VP_T 11.552957932 1.981737159 Others
## FP191 VP_T 0.962583182 0.337737388 Others
## FP192 VP_T 0.234063851 0.117932965 Others
## FP193 VP_T 15.796986918 3.117735616 FP193
## FP194 VP_T 0.516031959 0.175701915 Others
## FP195 VP_T 6.984892592 1.113736340 Others
## FP196 VP_T 7.896470322 1.187762913 Others
## FP197 VP_T 3.499343085 0.913245061 Others
## FP198 VP_T 3.585288501 1.014313143 Others
## FP201 VP_T 3.677604573 0.738813984 Others
## FP202 VP_T 8.215933727 0.860174019 Others
## FP203 VP_T 0.559143393 0.200564841 Others
## FP204 VP_T 2.312609803 0.469543435 Others
## FP205 VP_T 6.604060360 1.452273414 Others
## FP206 VP_T 3.210209626 0.734954669 Others
## FP207 VP_T 10.937926026 2.173553202 Others
## FP208 VP_T 0.768988609 0.251819108 Others
##################################
# Selecting the best-performing
# predictors based from metrics
##################################
<- VP_T_Summary[order(VP_T_Summary$NegativeLog10_t.Test_P.Value,decreasing=TRUE),]
VP_T_Summary_Top15_TTestPValue <- VP_T_Summary_Top15_TTestPValue[1:15,]) (VP_T_Summary_Top15_TTestPValue
## t.Statistic t.Test_P.Value Mean0 Mean1 MeanDifference Predictor
## FP076 18.19671 2.170836e-57 -1.949953 -4.292756 -2.342803 FP076
## FP089 15.68685 7.559612e-42 -2.131606 -4.506936 -2.375330 FP089
## FP065 13.67947 7.369864e-39 -1.740827 -3.389468 -1.648641 FP065
## FP168 12.78784 6.302482e-34 -1.659686 -3.250521 -1.590836 FP168
## FP079 12.46647 2.609452e-32 -1.649763 -3.199207 -1.549445 FP079
## FP082 12.55490 3.329065e-32 -1.573824 -3.177143 -1.603319 FP082
## FP112 13.31169 4.090297e-31 -2.293512 -4.478541 -2.185029 FP112
## FP092 12.71462 9.160201e-31 -2.250250 -4.169957 -1.919707 FP092
## FP172 13.04071 8.112523e-28 -2.345390 -4.809931 -2.464540 FP172
## FP164 11.15556 6.131703e-27 -1.830706 -3.245042 -1.414336 FP164
## FP070 11.33501 6.532630e-27 -2.155840 -3.739142 -1.583302 FP070
## FP009 11.49361 7.467714e-27 -2.249591 -3.926278 -1.676687 FP009
## FP087 11.07193 5.850147e-26 -1.684808 -3.107540 -1.422732 FP087
## FP013 11.73268 1.088092e-24 -2.365485 -4.490696 -2.125211 FP013
## FP014 11.47456 1.157457e-23 -2.375401 -4.508431 -2.133030 FP014
## Metric NegativeLog10_t.Test_P.Value AbsoluteMeanDifference Group
## FP076 VP_T 56.66337 2.342803 FP076
## FP089 VP_T 41.12150 2.375330 FP089
## FP065 VP_T 38.13254 1.648641 FP065
## FP168 VP_T 33.20049 1.590836 Others
## FP079 VP_T 31.58345 1.549445 Others
## FP082 VP_T 31.47768 1.603319 Others
## FP112 VP_T 30.38825 2.185029 Others
## FP092 VP_T 30.03810 1.919707 Others
## FP172 VP_T 27.09084 2.464540 Others
## FP164 VP_T 26.21242 1.414336 Others
## FP070 VP_T 26.18491 1.583302 Others
## FP009 VP_T 26.12681 1.676687 Others
## FP087 VP_T 25.23283 1.422732 Others
## FP013 VP_T 23.96333 2.125211 Others
## FP014 VP_T 22.93650 2.133030 Others
<- VP_T_Summary[order(VP_T_Summary$AbsoluteMeanDifference,decreasing=TRUE),]
VP_T_Summary_Top15_AbsoluteMeanDifference <- VP_T_Summary_Top15_AbsoluteMeanDifference[1:15,]) (VP_T_Summary_Top15_AbsoluteMeanDifference
## t.Statistic t.Test_P.Value Mean0 Mean1 MeanDifference Predictor
## FP044 15.198443 1.458342e-22 -2.472237 -6.582105 -4.109868 FP044
## FP193 11.061736 1.595927e-16 -2.525146 -5.642881 -3.117736 FP193
## FP184 10.249790 9.572172e-17 -2.493318 -5.171000 -2.677682 FP184
## FP149 9.674980 8.530838e-16 -2.479225 -5.125930 -2.646705 FP149
## FP172 13.040707 8.112523e-28 -2.345390 -4.809931 -2.464540 FP172
## FP089 15.686846 7.559612e-42 -2.131606 -4.506936 -2.375330 FP089
## FP076 18.196710 2.170836e-57 -1.949953 -4.292756 -2.342803 FP076
## FP112 13.311694 4.090297e-31 -2.293512 -4.478541 -2.185029 FP112
## FP207 8.348946 1.153650e-11 -2.595151 -4.768704 -2.173553 FP207
## FP014 11.474562 1.157457e-23 -2.375401 -4.508431 -2.133030 FP014
## FP013 11.732679 1.088092e-24 -2.365485 -4.490696 -2.125211 FP013
## FP190 8.237965 2.799252e-12 -2.574785 -4.556522 -1.981737 FP190
## FP015 -7.737187 1.432769e-12 -4.404286 -2.444487 1.959799 FP015
## FP092 12.714617 9.160201e-31 -2.250250 -4.169957 -1.919707 FP092
## FP169 10.798406 1.952902e-22 -2.370413 -4.241017 -1.870604 FP169
## Metric NegativeLog10_t.Test_P.Value AbsoluteMeanDifference Group
## FP044 VP_T 21.83614 4.109868 FP044
## FP193 VP_T 15.79699 3.117736 FP193
## FP184 VP_T 16.01899 2.677682 Others
## FP149 VP_T 15.06901 2.646705 Others
## FP172 VP_T 27.09084 2.464540 Others
## FP089 VP_T 41.12150 2.375330 FP089
## FP076 VP_T 56.66337 2.342803 FP076
## FP112 VP_T 30.38825 2.185029 Others
## FP207 VP_T 10.93793 2.173553 Others
## FP014 VP_T 22.93650 2.133030 Others
## FP013 VP_T 23.96333 2.125211 Others
## FP190 VP_T 11.55296 1.981737 Others
## FP015 VP_T 11.84382 1.959799 Others
## FP092 VP_T 30.03810 1.919707 Others
## FP169 VP_T 21.70932 1.870604 Others
##################################
# Exploring predictor performance
##################################
dotplot(Predictor ~ NegativeLog10_t.Test_P.Value | Metric,
VP_T_Summary_Top15_TTestPValue,origin = 0,
xlab = "-Log10(T-Test P-Value)",
type = c("p", "h"),
pch = 16,
cex = 2,
alpha = 0.45,
prepanel = function(x, y) {
list(ylim = levels(reorder(y, x)))
},panel = function(x, y, ...) {
panel.dotplot(x, reorder(y, x), ...)
})
dotplot(Predictor ~ AbsoluteMeanDifference | Metric,
VP_T_Summary_Top15_AbsoluteMeanDifference,origin = 0,
xlab = "Absolute (Mean With Structure - Mean Without Structure)",
type = c("p", "h"),
pch = 16,
cex = 2,
alpha = 0.45,
prepanel = function(x, y) {
list(ylim = levels(reorder(y, x)))
},panel = function(x, y, ...) {
panel.dotplot(x, reorder(y, x), ...)
})
##################################
# Consolidating all performance metrics
# for the numeric predictors
##################################
<- cbind(LOWESS_PR_Summary$LOWESS_PR,
NumericPredictor_Metrics $PCC,
PCC_Summary$SRCC,
SRCC_Summary$MIC,
MIC_Summary$RV)
RV_Summary
colnames(NumericPredictor_Metrics) <- c("LOWESS_PR",
"PCC",
"SRCC",
"MIC",
"RV")
rownames(NumericPredictor_Metrics) <- names(PMA_PreModelling_Train_Numeric)[2:ncol(PMA_PreModelling_Train_Numeric)]
<- as.data.frame(NumericPredictor_Metrics)
NumericPredictor_Metrics
$Group <- ifelse(rownames(NumericPredictor_Metrics)=="MolWeight","MolWeight",
NumericPredictor_Metricsifelse(rownames(NumericPredictor_Metrics)=="NumCarbon","NumCarbon",
ifelse(rownames(NumericPredictor_Metrics)=="NumMultBonds","NumMultBonds",
ifelse(rownames(NumericPredictor_Metrics)=="NumHalogen","NumHalogen",
ifelse(rownames(NumericPredictor_Metrics)=="NumRings","NumRings",
"Others")))))
NumericPredictor_Metrics
## LOWESS_PR PCC SRCC MIC RV
## MolWeight 4.443734e-01 0.658495868 0.68529880 0.4679277 0.10334945
## NumBonds 2.093744e-01 0.457574429 0.54839850 0.3268683 0.03989134
## NumMultBonds 2.758859e-01 0.525248387 0.47971353 0.2792600 0.06807077
## NumRotBonds 2.230342e-02 0.149343282 0.14976036 0.1754215 0.06121663
## NumDblBonds 1.530295e-06 0.001237051 0.02042731 0.1688472 0.02974081
## NumCarbon 3.657997e-01 0.604813811 0.67359114 0.4434121 0.06435924
## NumNitrogen 1.045101e-02 0.102230176 0.10078218 0.1535738 0.11583417
## NumOxygen 1.710199e-02 0.130774566 0.14954994 0.1527421 0.10808031
## NumSulfer 8.357325e-03 0.091418407 0.12090249 0.1297052 0.05065068
## NumChlorine 2.540713e-01 0.504054819 0.35707519 0.2011708 0.02781883
## NumHalogen 2.541532e-01 0.504136055 0.38111965 0.2017841 0.06144457
## NumRings 2.384330e-01 0.488295986 0.50941815 0.3161828 0.05518173
## HydrophilicFactor 1.505216e-01 0.309022159 0.36469127 0.3208456 0.04195617
## SurfaceArea1 1.941711e-01 0.193769382 0.19339720 0.2054896 0.05366733
## SurfaceArea2 2.080820e-01 0.143941883 0.14057885 0.2274047 0.06623467
## Group
## MolWeight MolWeight
## NumBonds Others
## NumMultBonds NumMultBonds
## NumRotBonds Others
## NumDblBonds Others
## NumCarbon NumCarbon
## NumNitrogen Others
## NumOxygen Others
## NumSulfer Others
## NumChlorine Others
## NumHalogen NumHalogen
## NumRings NumRings
## HydrophilicFactor Others
## SurfaceArea1 Others
## SurfaceArea2 Others
splom(~NumericPredictor_Metrics[,c(1:5)],
groups = NumericPredictor_Metrics$Group,
pch = 16,
cex = 2,
alpha = 0.45,
varnames = c("LOWESS_PR", "PCC", "SRCC", "MIC", "RV"),
auto.key = list(points=TRUE, space="top", columns=2),
main = "Feature Importance Comparison for Numeric Predictors",
xlab = "Scatterplot Matrix of Feature Importance Metrics" )
##################################
# Consolidating all performance metrics
# for the factor predictors
##################################
<- xyplot(NegativeLog10_t.Test_P.Value ~ MeanDifference,
TTEST_P.Value_Plot groups = VP_T_Summary$Group,
data = VP_T_Summary,
xlab = "Mean With Structure - Mean Without Structure",
ylab = "-Log10(T-Test P-Value)",
type = "p",
pch = 16,
cex = 2,
alpha = 0.45,
auto.key = list(points=TRUE, space="top", columns=2),
main = "Feature Importance Comparison for Numeric Predictors")
grid.arrange(TTEST_P.Value_Plot,
ncol = 2)