##################################
# Loading R libraries
##################################
library(AppliedPredictiveModeling)
library(caret)
library(rpart)
library(lattice)
library(dplyr)
library(tidyr)
library(moments)
library(skimr)
library(RANN)
library(pls)
library(corrplot)
library(tidyverse)
library(lares)
library(DMwR2)
library(gridExtra)
library(rattle)
library(rpart.plot)
library(RColorBrewer)
library(stats)
library(nnet)
library(elasticnet)
library(earth)
library(party)
library(kernlab)
library(randomForest)
library(Cubist)
library(pROC)
library(mda)
library(klaR)
library(pamr)
library(minerva)
library(CORElearn)
##################################
# Loading source and
# formulating the train set
##################################
data(solubility)
<- as.data.frame(cbind(solTrainY,solTrainX))
Solubility_Train <- as.data.frame(cbind(solTestY,solTestX))
Solubility_Test
##################################
# Applying dichotomization and
# defining the response variable
##################################
$Log_Solubility_Class <- ifelse(Solubility_Train$solTrainY<mean(Solubility_Train$solTrainY),
Solubility_Train"Low","High")
$Log_Solubility_Class <- factor(Solubility_Train$Log_Solubility_Class,
Solubility_Trainlevels = c("Low","High"))
$Log_Solubility_Class <- ifelse(Solubility_Test$solTestY<mean(Solubility_Train$solTrainY),
Solubility_Test"Low","High")
$Log_Solubility_Class <- factor(Solubility_Test$Log_Solubility_Class,
Solubility_Testlevels = c("Low","High"))
$solTrainY <- NULL
Solubility_Train$solTestY <- NULL
Solubility_Test
##################################
# Performing a general exploration of the train set
##################################
dim(Solubility_Train)
## [1] 951 229
str(Solubility_Train)
## 'data.frame': 951 obs. of 229 variables:
## $ FP001 : int 0 0 1 0 0 1 0 1 1 1 ...
## $ FP002 : int 1 1 1 0 0 0 1 0 0 1 ...
## $ FP003 : int 0 0 1 1 1 1 0 1 1 1 ...
## $ FP004 : int 0 1 1 0 1 1 1 1 1 1 ...
## $ FP005 : int 1 1 1 0 1 0 1 0 0 1 ...
## $ FP006 : int 0 1 0 0 1 0 0 0 1 1 ...
## $ FP007 : int 0 1 0 1 0 0 0 1 1 1 ...
## $ FP008 : int 1 1 1 0 0 0 1 0 0 0 ...
## $ FP009 : int 0 0 0 0 1 1 1 0 1 0 ...
## $ FP010 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ FP011 : int 0 1 0 0 0 0 0 0 1 0 ...
## $ FP012 : int 0 0 0 0 0 1 0 1 0 0 ...
## $ FP013 : int 0 0 0 0 1 0 1 0 0 0 ...
## $ FP014 : int 0 0 0 0 0 0 1 0 0 0 ...
## $ FP015 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ FP016 : int 0 1 0 0 1 1 0 1 0 0 ...
## $ FP017 : int 0 0 1 1 0 0 0 0 1 1 ...
## $ FP018 : int 0 1 0 0 0 0 0 0 0 0 ...
## $ FP019 : int 1 0 0 0 1 0 1 0 0 0 ...
## $ FP020 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP021 : int 0 0 0 0 0 1 0 0 1 0 ...
## $ FP022 : int 0 0 0 0 0 0 0 0 0 1 ...
## $ FP023 : int 0 0 0 1 0 0 0 0 1 0 ...
## $ FP024 : int 1 0 0 0 1 0 0 0 0 0 ...
## $ FP025 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ FP026 : int 1 0 0 0 0 0 1 0 0 0 ...
## $ FP027 : int 0 0 0 0 0 0 0 0 0 1 ...
## $ FP028 : int 0 1 0 0 0 0 0 0 1 1 ...
## $ FP029 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP030 : int 0 0 0 0 1 0 0 0 0 0 ...
## $ FP031 : int 0 0 0 0 0 0 0 1 0 0 ...
## $ FP032 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP033 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP034 : int 0 0 0 0 1 0 0 0 0 1 ...
## $ FP035 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ FP036 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP037 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ FP038 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ FP039 : int 1 0 0 0 0 0 0 0 0 0 ...
## $ FP040 : int 1 0 0 0 0 0 0 0 0 0 ...
## $ FP041 : int 0 0 0 1 0 0 0 0 1 0 ...
## $ FP042 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP043 : int 0 1 0 0 0 0 0 0 0 0 ...
## $ FP044 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP045 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ FP046 : int 0 1 0 0 0 0 1 0 0 1 ...
## $ FP047 : int 0 1 1 0 0 0 1 0 0 0 ...
## $ FP048 : int 0 0 0 0 0 0 0 1 0 0 ...
## $ FP049 : int 0 0 0 0 0 0 1 0 0 0 ...
## $ FP050 : int 0 0 0 0 0 0 0 1 0 1 ...
## $ FP051 : int 0 1 0 0 0 0 0 0 0 0 ...
## $ FP052 : int 0 0 0 0 0 0 0 0 0 1 ...
## $ FP053 : int 0 0 0 0 0 0 1 0 0 0 ...
## $ FP054 : int 0 0 0 1 0 0 0 0 1 1 ...
## $ FP055 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP056 : int 1 0 0 0 0 0 0 0 0 0 ...
## $ FP057 : int 0 0 0 0 0 0 1 0 0 0 ...
## $ FP058 : int 0 0 0 0 0 0 0 0 0 1 ...
## $ FP059 : int 0 0 0 0 0 0 0 1 0 0 ...
## $ FP060 : int 0 1 1 0 0 0 0 1 1 0 ...
## $ FP061 : int 0 0 1 0 0 0 0 1 1 0 ...
## $ FP062 : int 0 0 1 0 0 1 0 1 1 1 ...
## $ FP063 : int 1 1 0 0 1 1 1 0 0 1 ...
## $ FP064 : int 0 1 1 0 1 1 0 1 0 0 ...
## $ FP065 : int 1 1 0 0 1 0 1 0 1 1 ...
## $ FP066 : int 1 0 1 1 1 1 1 1 1 1 ...
## $ FP067 : int 1 1 0 0 1 1 1 0 0 1 ...
## $ FP068 : int 0 1 0 0 1 1 1 0 0 1 ...
## $ FP069 : int 1 0 1 1 1 1 0 1 1 0 ...
## $ FP070 : int 1 1 0 1 0 0 1 0 1 0 ...
## $ FP071 : int 0 0 0 0 0 0 1 0 1 1 ...
## $ FP072 : int 0 1 1 0 0 1 0 1 1 1 ...
## $ FP073 : int 0 1 1 0 0 0 0 0 1 0 ...
## $ FP074 : int 0 1 0 0 0 0 0 0 1 0 ...
## $ FP075 : int 0 1 0 0 1 1 1 0 0 1 ...
## $ FP076 : int 1 1 0 0 0 0 1 0 1 1 ...
## $ FP077 : int 0 1 0 1 0 0 0 1 1 1 ...
## $ FP078 : int 0 1 0 0 0 0 0 0 1 0 ...
## $ FP079 : int 1 1 1 1 1 0 1 0 1 1 ...
## $ FP080 : int 0 1 0 0 1 1 1 1 0 0 ...
## $ FP081 : int 0 0 1 1 0 0 0 1 1 1 ...
## $ FP082 : int 1 1 1 0 1 1 1 0 1 1 ...
## $ FP083 : int 0 0 0 0 1 0 0 0 0 1 ...
## $ FP084 : int 1 1 0 0 1 0 1 0 0 0 ...
## $ FP085 : int 0 1 0 0 0 0 1 0 0 0 ...
## $ FP086 : int 0 0 0 1 1 0 0 1 1 1 ...
## $ FP087 : int 1 1 1 1 1 0 1 0 1 1 ...
## $ FP088 : int 0 1 0 0 0 0 0 1 1 0 ...
## $ FP089 : int 1 1 0 0 0 0 1 0 0 0 ...
## $ FP090 : int 0 1 0 1 0 0 0 1 1 1 ...
## $ FP091 : int 1 1 0 0 1 0 1 0 0 1 ...
## $ FP092 : int 0 0 0 0 1 1 1 0 1 0 ...
## $ FP093 : int 0 1 0 1 0 0 0 1 1 1 ...
## $ FP094 : int 0 0 0 0 1 0 0 1 0 0 ...
## $ FP095 : int 0 0 0 0 0 0 0 0 1 1 ...
## $ FP096 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ FP097 : int 1 1 0 0 0 0 1 0 1 0 ...
## $ FP098 : int 0 0 1 0 0 0 0 1 0 0 ...
## $ FP099 : int 0 0 0 0 0 0 0 0 1 0 ...
## [list output truncated]
summary(Solubility_Train)
## FP001 FP002 FP003 FP004
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :1.0000 Median :0.0000 Median :1.0000
## Mean :0.4932 Mean :0.5394 Mean :0.4364 Mean :0.5846
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP005 FP006 FP007 FP008
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :1.0000 Median :0.0000 Median :0.0000 Median :0.000
## Mean :0.5794 Mean :0.4006 Mean :0.3638 Mean :0.326
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.000
## FP009 FP010 FP011 FP012
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.2797 Mean :0.1788 Mean :0.2145 Mean :0.1767
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP013 FP014 FP015 FP016
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:1.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :1.0000 Median :0.0000
## Mean :0.1661 Mean :0.1609 Mean :0.8601 Mean :0.1462
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP017 FP018 FP019 FP020
## Min. :0.0000 Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.000 Median :0.0000
## Mean :0.1441 Mean :0.1314 Mean :0.122 Mean :0.1199
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.000 Max. :1.0000
## FP021 FP022 FP023 FP024
## Min. :0.0000 Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.000 Median :0.0000
## Mean :0.1209 Mean :0.1041 Mean :0.123 Mean :0.1125
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.000 Max. :1.0000
## FP025 FP026 FP027 FP028
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.1157 Mean :0.08412 Mean :0.09779 Mean :0.1062
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.00000 Max. :1.00000 Max. :1.0000
## FP029 FP030 FP031 FP032
## Min. :0.000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.102 Mean :0.09359 Mean :0.08938 Mean :0.07361
## 3rd Qu.:0.000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP033 FP034 FP035 FP036
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.0000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.0694 Mean :0.07992 Mean :0.07256 Mean :0.07571
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP037 FP038 FP039 FP040
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.07045 Mean :0.08622 Mean :0.07466 Mean :0.06835
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP041 FP042 FP043 FP044
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.06309 Mean :0.05678 Mean :0.06625 Mean :0.05994
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP045 FP046 FP047 FP048
## Min. :0.00000 Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.00000 Median :0.0000 Median :0.000 Median :0.0000
## Mean :0.05573 Mean :0.3155 Mean :0.266 Mean :0.1241
## 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:1.000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.0000 Max. :1.000 Max. :1.0000
## FP049 FP050 FP051 FP052
## Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.122 Mean :0.1125 Mean :0.1094 Mean :0.09148
## 3rd Qu.:0.000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## FP053 FP054 FP055 FP056
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.09359 Mean :0.07571 Mean :0.05363 Mean :0.06519
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP057 FP058 FP059 FP060
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.00000 Median :0.0000
## Mean :0.1199 Mean :0.1136 Mean :0.05468 Mean :0.4816
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.0000
## FP061 FP062 FP063 FP064
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.4469 Mean :0.4374 Mean :0.4259 Mean :0.4164
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP065 FP066 FP067 FP068
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :1.0000 Median :0.0000 Median :0.0000
## Mean :0.5931 Mean :0.6099 Mean :0.3796 Mean :0.3617
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP069 FP070 FP071 FP072
## Min. :0.0000 Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.000 Median :1.0000
## Mean :0.3617 Mean :0.3554 Mean :0.327 Mean :0.6583
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.000 Max. :1.0000
## FP073 FP074 FP075 FP076
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.3102 Mean :0.3249 Mean :0.3386 Mean :0.3281
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP077 FP078 FP079 FP080
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :1.0000 Median :0.0000
## Mean :0.3207 Mean :0.3039 Mean :0.6898 Mean :0.3028
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP081 FP082 FP083 FP084
## Min. :0.0000 Min. :0.000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.0000 Median :1.000 Median :0.0000 Median :0.000
## Mean :0.2787 Mean :0.714 Mean :0.2734 Mean :0.286
## 3rd Qu.:1.0000 3rd Qu.:1.000 3rd Qu.:1.0000 3rd Qu.:1.000
## Max. :1.0000 Max. :1.000 Max. :1.0000 Max. :1.000
## FP085 FP086 FP087 FP088
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :1.0000 Median :0.0000
## Mean :0.2555 Mean :0.2692 Mean :0.7266 Mean :0.2629
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP089 FP090 FP091 FP092
## Min. :0.0000 Min. :0.0000 Min. :0.000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.0000 Median :0.0000 Median :0.000 Median :0.000
## Mean :0.2471 Mean :0.2492 Mean :0.225 Mean :0.244
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.0000 Max. :1.0000 Max. :1.000 Max. :1.000
## FP093 FP094 FP095 FP096
## Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.244 Mean :0.2313 Mean :0.2198 Mean :0.2177
## 3rd Qu.:0.000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP097 FP098 FP099 FP100
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.2355 Mean :0.2376 Mean :0.2271 Mean :0.2313
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP101 FP102 FP103 FP104
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.2366 Mean :0.2019 Mean :0.2187 Mean :0.2229
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP105 FP106 FP107 FP108
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.000
## Mean :0.2156 Mean :0.1914 Mean :0.2114 Mean :0.205
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.000
## FP109 FP110 FP111 FP112
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1767 Mean :0.2061 Mean :0.1966 Mean :0.1945
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP113 FP114 FP115 FP116
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1956 Mean :0.1556 Mean :0.1788 Mean :0.1924
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP117 FP118 FP119 FP120
## Min. :0.0000 Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.000 Median :0.0000
## Mean :0.1788 Mean :0.1924 Mean :0.163 Mean :0.1661
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.000 Max. :1.0000
## FP121 FP122 FP123 FP124
## Min. :0.0000 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.000 Median :0.0000 Median :0.0000
## Mean :0.1399 Mean :0.164 Mean :0.1672 Mean :0.1619
## 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.000 Max. :1.0000 Max. :1.0000
## FP125 FP126 FP127 FP128
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1556 Mean :0.1483 Mean :0.1399 Mean :0.1483
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP129 FP130 FP131 FP132
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1388 Mean :0.1052 Mean :0.1262 Mean :0.1251
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP133 FP134 FP135 FP136
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1262 Mean :0.1272 Mean :0.1262 Mean :0.1209
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP137 FP138 FP139 FP140
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.00000 Median :0.0000
## Mean :0.1157 Mean :0.1115 Mean :0.08202 Mean :0.1115
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.0000
## FP141 FP142 FP143 FP144
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.00000 Median :0.0000
## Mean :0.1167 Mean :0.1094 Mean :0.08097 Mean :0.1041
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.0000
## FP145 FP146 FP147 FP148
## Min. :0.0000 Min. :0.000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.0000 Median :0.000 Median :0.0000 Median :0.00000
## Mean :0.1041 Mean :0.103 Mean :0.1052 Mean :0.08728
## 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.000 Max. :1.0000 Max. :1.00000
## FP149 FP150 FP151 FP152
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.09043 Mean :0.07886 Mean :0.05573 Mean :0.08202
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP153 FP154 FP155 FP156
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.0000 Median :0.00000
## Mean :0.07781 Mean :0.03785 Mean :0.0694 Mean :0.07045
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.0000 Max. :1.00000
## FP157 FP158 FP159 FP160
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.06204 Mean :0.05363 Mean :0.07045 Mean :0.06835
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP161 FP162 FP163 FP164
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.0000 Median :0.0000 Median :1.0000
## Mean :0.06625 Mean :0.4953 Mean :0.4763 Mean :0.6278
## 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.00000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP165 FP166 FP167 FP168
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :1.0000
## Mean :0.3491 Mean :0.3312 Mean :0.3281 Mean :0.6656
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP169 FP170 FP171 FP172
## Min. :0.0000 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.000 Median :0.0000 Median :0.0000
## Mean :0.1861 Mean :0.184 Mean :0.1693 Mean :0.1514
## 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.000 Max. :1.0000 Max. :1.0000
## FP173 FP174 FP175 FP176
## Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.000 Median :0.0000 Median :0.0000 Median :0.000
## Mean :0.142 Mean :0.1304 Mean :0.1346 Mean :0.122
## 3rd Qu.:0.000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.000
## Max. :1.000 Max. :1.0000 Max. :1.0000 Max. :1.000
## FP177 FP178 FP179 FP180
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.00000 Median :0.0000
## Mean :0.1209 Mean :0.1209 Mean :0.09779 Mean :0.1073
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.0000
## FP181 FP182 FP183 FP184
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.09359 Mean :0.09884 Mean :0.07571 Mean :0.08412
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP185 FP186 FP187 FP188
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.08517 Mean :0.07676 Mean :0.07256 Mean :0.06835
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP189 FP190 FP191 FP192
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.07676 Mean :0.07256 Mean :0.07045 Mean :0.06099
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP193 FP194 FP195 FP196
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.06204 Mean :0.05889 Mean :0.06099 Mean :0.05678
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP197 FP198 FP199 FP200
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.05258 Mean :0.05678 Mean :0.04732 Mean :0.04942
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP201 FP202 FP203 FP204
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.00000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.05258 Mean :0.2576 Mean :0.1146 Mean :0.09884
## 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## FP205 FP206 FP207 FP208
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.07781 Mean :0.05994 Mean :0.05678 Mean :0.1125
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.0000
## MolWeight NumAtoms NumNonHAtoms NumBonds
## Min. : 46.09 Min. : 5.00 Min. : 2.00 Min. : 4.00
## 1st Qu.:122.61 1st Qu.:17.00 1st Qu.: 8.00 1st Qu.:17.00
## Median :179.23 Median :22.00 Median :12.00 Median :23.00
## Mean :201.65 Mean :25.51 Mean :13.16 Mean :25.91
## 3rd Qu.:264.34 3rd Qu.:31.00 3rd Qu.:17.00 3rd Qu.:31.50
## Max. :665.81 Max. :94.00 Max. :47.00 Max. :97.00
## NumNonHBonds NumMultBonds NumRotBonds NumDblBonds
## Min. : 1.00 Min. : 0.000 Min. : 0.000 Min. :0.000
## 1st Qu.: 8.00 1st Qu.: 1.000 1st Qu.: 0.000 1st Qu.:0.000
## Median :12.00 Median : 6.000 Median : 2.000 Median :1.000
## Mean :13.56 Mean : 6.148 Mean : 2.251 Mean :1.006
## 3rd Qu.:18.00 3rd Qu.:10.000 3rd Qu.: 3.500 3rd Qu.:2.000
## Max. :50.00 Max. :25.000 Max. :16.000 Max. :7.000
## NumAromaticBonds NumHydrogen NumCarbon NumNitrogen
## Min. : 0.000 Min. : 0.00 Min. : 1.000 Min. :0.0000
## 1st Qu.: 0.000 1st Qu.: 7.00 1st Qu.: 6.000 1st Qu.:0.0000
## Median : 6.000 Median :11.00 Median : 9.000 Median :0.0000
## Mean : 5.121 Mean :12.35 Mean : 9.893 Mean :0.8128
## 3rd Qu.: 6.000 3rd Qu.:16.00 3rd Qu.:12.000 3rd Qu.:1.0000
## Max. :25.000 Max. :47.00 Max. :33.000 Max. :6.0000
## NumOxygen NumSulfer NumChlorine NumHalogen
## Min. : 0.000 Min. :0.000 Min. : 0.0000 Min. : 0.0000
## 1st Qu.: 0.000 1st Qu.:0.000 1st Qu.: 0.0000 1st Qu.: 0.0000
## Median : 1.000 Median :0.000 Median : 0.0000 Median : 0.0000
## Mean : 1.574 Mean :0.164 Mean : 0.5563 Mean : 0.6982
## 3rd Qu.: 2.000 3rd Qu.:0.000 3rd Qu.: 0.0000 3rd Qu.: 1.0000
## Max. :13.000 Max. :4.000 Max. :10.0000 Max. :10.0000
## NumRings HydrophilicFactor SurfaceArea1 SurfaceArea2
## Min. :0.000 Min. :-0.98500 Min. : 0.00 Min. : 0.00
## 1st Qu.:0.000 1st Qu.:-0.76300 1st Qu.: 9.23 1st Qu.: 10.63
## Median :1.000 Median :-0.31400 Median : 29.10 Median : 33.12
## Mean :1.402 Mean :-0.02059 Mean : 36.46 Mean : 40.23
## 3rd Qu.:2.000 3rd Qu.: 0.31300 3rd Qu.: 53.28 3rd Qu.: 60.66
## Max. :7.000 Max. :13.48300 Max. :331.94 Max. :331.94
## Log_Solubility_Class
## Low :427
## High:524
##
##
##
##
##################################
# Performing a general exploration of the test set
##################################
dim(Solubility_Test)
## [1] 316 229
str(Solubility_Test)
## 'data.frame': 316 obs. of 229 variables:
## $ FP001 : int 1 1 0 0 1 1 1 0 1 0 ...
## $ FP002 : int 0 0 1 0 1 0 0 0 0 1 ...
## $ FP003 : int 0 1 0 1 0 0 0 0 1 0 ...
## $ FP004 : int 1 1 0 0 1 1 1 1 1 0 ...
## $ FP005 : int 0 0 1 0 1 0 0 0 0 1 ...
## $ FP006 : int 0 1 0 1 1 0 0 0 0 0 ...
## $ FP007 : int 0 0 0 0 0 0 0 1 1 0 ...
## $ FP008 : int 0 0 0 0 1 0 0 0 0 0 ...
## $ FP009 : int 1 0 0 0 0 0 0 0 0 0 ...
## $ FP010 : int 1 0 1 0 0 0 0 0 0 0 ...
## $ FP011 : int 0 1 0 0 1 0 0 0 0 0 ...
## $ FP012 : int 0 1 0 0 0 1 0 1 0 0 ...
## $ FP013 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP014 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP015 : int 1 1 0 1 1 1 1 1 1 1 ...
## $ FP016 : int 0 1 0 0 0 0 0 1 0 0 ...
## $ FP017 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP018 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP019 : int 0 0 0 0 1 0 0 0 0 1 ...
## $ FP020 : int 0 0 0 0 0 1 0 0 0 0 ...
## $ FP021 : int 1 0 0 0 0 0 0 0 0 0 ...
## $ FP022 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP023 : int 0 0 0 0 0 0 1 0 0 0 ...
## $ FP024 : int 0 0 0 0 1 0 0 0 0 1 ...
## $ FP025 : int 1 0 0 0 0 0 0 0 0 0 ...
## $ FP026 : int 0 0 0 0 0 0 0 0 0 1 ...
## $ FP027 : int 0 0 0 1 0 0 0 0 0 0 ...
## $ FP028 : int 0 0 0 1 0 0 0 0 0 0 ...
## $ FP029 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP030 : int 0 0 0 1 0 0 0 0 0 0 ...
## $ FP031 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP032 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ FP033 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ FP034 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP035 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP036 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP037 : int 0 0 0 0 0 0 0 0 1 0 ...
## $ FP038 : int 1 0 0 0 0 0 0 0 0 0 ...
## $ FP039 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP040 : int 0 0 0 0 1 0 0 0 0 0 ...
## $ FP041 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP042 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP043 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP044 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP045 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP046 : int 0 0 1 0 0 0 0 0 0 1 ...
## $ FP047 : int 0 0 0 0 1 0 0 0 0 0 ...
## $ FP048 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP049 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP050 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP051 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP052 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP053 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP054 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP055 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ FP056 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP057 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP058 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP059 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP060 : int 1 1 1 0 0 1 0 1 0 0 ...
## $ FP061 : int 1 1 1 0 0 1 0 0 0 0 ...
## $ FP062 : int 1 1 0 0 1 1 1 0 1 0 ...
## $ FP063 : int 0 1 0 1 1 0 0 0 0 1 ...
## $ FP064 : int 1 1 0 0 0 0 0 0 1 0 ...
## $ FP065 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ FP066 : int 0 1 0 1 0 1 0 0 1 1 ...
## $ FP067 : int 0 1 0 1 1 0 0 0 0 1 ...
## $ FP068 : int 0 1 0 1 1 0 0 0 0 0 ...
## $ FP069 : int 0 0 0 0 0 0 0 0 1 1 ...
## $ FP070 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP071 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP072 : int 1 1 1 0 1 1 1 1 1 0 ...
## $ FP073 : int 1 0 1 0 0 0 0 0 0 0 ...
## $ FP074 : int 0 0 1 0 0 0 0 0 1 0 ...
## $ FP075 : int 0 1 0 1 0 0 0 1 0 0 ...
## $ FP076 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP077 : int 0 0 0 1 0 0 0 1 0 0 ...
## $ FP078 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ FP079 : int 0 0 1 1 1 0 0 0 0 1 ...
## $ FP080 : int 1 1 0 1 0 0 0 1 0 0 ...
## $ FP081 : int 0 0 0 1 0 0 0 0 1 0 ...
## $ FP082 : int 0 0 1 0 1 0 0 0 0 1 ...
## $ FP083 : int 0 1 0 1 1 0 0 0 0 0 ...
## $ FP084 : int 0 0 0 1 1 0 0 1 0 1 ...
## $ FP085 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP086 : int 0 0 0 1 0 0 0 0 0 0 ...
## $ FP087 : int 0 0 1 1 1 0 0 1 0 1 ...
## $ FP088 : int 1 0 0 0 0 0 0 1 1 0 ...
## $ FP089 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP090 : int 0 0 0 1 0 0 0 1 0 0 ...
## $ FP091 : int 0 0 0 1 1 0 0 0 0 0 ...
## $ FP092 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP093 : int 0 0 0 1 0 0 0 1 0 0 ...
## $ FP094 : int 0 1 0 0 0 0 0 0 1 0 ...
## $ FP095 : int 0 0 1 1 0 0 0 0 0 0 ...
## $ FP096 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP097 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FP098 : int 1 1 0 0 0 1 0 0 0 0 ...
## $ FP099 : int 0 0 0 0 0 0 0 0 0 0 ...
## [list output truncated]
summary(Solubility_Test)
## FP001 FP002 FP003 FP004
## Min. :0.0000 Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.0000 Median :1.0000 Median :0.000 Median :1.0000
## Mean :0.4684 Mean :0.5854 Mean :0.443 Mean :0.5316
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.000 Max. :1.0000
## FP005 FP006 FP007 FP008
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.6171 Mean :0.3513 Mean :0.3544 Mean :0.3608
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP009 FP010 FP011 FP012
## Min. :0.0000 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.000 Median :0.0000 Median :0.0000
## Mean :0.2627 Mean :0.193 Mean :0.1741 Mean :0.1677
## 3rd Qu.:1.0000 3rd Qu.:0.000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.000 Max. :1.0000 Max. :1.0000
## FP013 FP014 FP015 FP016
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:1.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :1.0000 Median :0.0000
## Mean :0.1646 Mean :0.1582 Mean :0.8291 Mean :0.1424
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP017 FP018 FP019 FP020
## Min. :0.0000 Min. :0.00000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.00000 Median :0.0000 Median :0.0000
## Mean :0.1487 Mean :0.08544 Mean :0.1139 Mean :0.1076
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.00000 Max. :1.0000 Max. :1.0000
## FP021 FP022 FP023 FP024
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.00000 Median :0.0000
## Mean :0.1076 Mean :0.1171 Mean :0.08544 Mean :0.0981
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.0000
## FP025 FP026 FP027 FP028
## Min. :0.00000 Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.0000 Median :0.00000 Median :0.00000
## Mean :0.07911 Mean :0.1171 Mean :0.07911 Mean :0.05696
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.0000 Max. :1.00000 Max. :1.00000
## FP029 FP030 FP031 FP032
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.0000 Median :0.0000
## Mean :0.05063 Mean :0.08228 Mean :0.0981 Mean :0.1297
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.00000 Max. :1.0000 Max. :1.0000
## FP033 FP034 FP035 FP036
## Min. :0.0000 Min. :0.00000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.0000 Median :0.00000 Median :0.0000 Median :0.00000
## Mean :0.1203 Mean :0.06646 Mean :0.0981 Mean :0.06013
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.00000 Max. :1.0000 Max. :1.00000
## FP037 FP038 FP039 FP040
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.09494 Mean :0.03165 Mean :0.06329 Mean :0.05696
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP041 FP042 FP043 FP044
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.0000 Median :0.00000
## Mean :0.06013 Mean :0.06013 Mean :0.0443 Mean :0.06013
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.0000 Max. :1.00000
## FP045 FP046 FP047 FP048
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.06329 Mean :0.3259 Mean :0.2975 Mean :0.1139
## 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP049 FP050 FP051 FP052
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.00000 Median :0.0000
## Mean :0.1076 Mean :0.1139 Mean :0.05696 Mean :0.1044
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.0000
## FP053 FP054 FP055 FP056
## Min. :0.00000 Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.0000 Median :0.00000 Median :0.00000
## Mean :0.06013 Mean :0.0981 Mean :0.09177 Mean :0.06329
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.0000 Max. :1.00000 Max. :1.00000
## FP057 FP058 FP059 FP060
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1234 Mean :0.1361 Mean :0.0443 Mean :0.4525
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP061 FP062 FP063 FP064
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.3924 Mean :0.4272 Mean :0.3576 Mean :0.3892
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP065 FP066 FP067 FP068
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :1.0000 Median :0.0000 Median :0.0000
## Mean :0.5981 Mean :0.6171 Mean :0.3259 Mean :0.2911
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP069 FP070 FP071 FP072
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :1.0000
## Mean :0.3734 Mean :0.3323 Mean :0.3449 Mean :0.6456
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP073 FP074 FP075 FP076
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.2911 Mean :0.3259 Mean :0.2563 Mean :0.3165
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP077 FP078 FP079 FP080
## Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.000 Median :0.0000 Median :1.0000 Median :0.0000
## Mean :0.307 Mean :0.3101 Mean :0.7278 Mean :0.2627
## 3rd Qu.:1.000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP081 FP082 FP083 FP084
## Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.000 Median :1.0000 Median :0.0000 Median :0.0000
## Mean :0.288 Mean :0.7437 Mean :0.2532 Mean :0.2247
## 3rd Qu.:1.000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP085 FP086 FP087 FP088
## Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:1.0000 1st Qu.:0.0000
## Median :0.000 Median :0.0000 Median :1.0000 Median :0.0000
## Mean :0.269 Mean :0.2722 Mean :0.7627 Mean :0.2437
## 3rd Qu.:1.000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP089 FP090 FP091 FP092
## Min. :0.0000 Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.000 Median :0.0000
## Mean :0.2532 Mean :0.2278 Mean :0.231 Mean :0.2184
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.000 Max. :1.0000
## FP093 FP094 FP095 FP096
## Min. :0.0000 Min. :0.00 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.00 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.00 Median :0.0000 Median :0.0000
## Mean :0.2152 Mean :0.25 Mean :0.2057 Mean :0.1867
## 3rd Qu.:0.0000 3rd Qu.:0.25 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.00 Max. :1.0000 Max. :1.0000
## FP097 FP098 FP099 FP100
## Min. :0.0000 Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.000 Median :0.0000
## Mean :0.2089 Mean :0.2025 Mean :0.212 Mean :0.1804
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.000 Max. :1.0000
## FP101 FP102 FP103 FP104
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1772 Mean :0.1456 Mean :0.2184 Mean :0.1835
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP105 FP106 FP107 FP108
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.2152 Mean :0.1361 Mean :0.1962 Mean :0.1804
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP109 FP110 FP111 FP112
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1741 Mean :0.1646 Mean :0.1804 Mean :0.1772
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP113 FP114 FP115 FP116
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1646 Mean :0.1772 Mean :0.1582 Mean :0.1487
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP117 FP118 FP119 FP120
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1709 Mean :0.1171 Mean :0.1677 Mean :0.1551
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP121 FP122 FP123 FP124
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1076 Mean :0.1361 Mean :0.1456 Mean :0.1329
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP125 FP126 FP127 FP128
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1203 Mean :0.1139 Mean :0.1487 Mean :0.1076
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP129 FP130 FP131 FP132
## Min. :0.0000 Min. :0.00000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.00000 Median :0.0000 Median :0.0000
## Mean :0.1392 Mean :0.08228 Mean :0.1076 Mean :0.1266
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.00000 Max. :1.0000 Max. :1.0000
## FP133 FP134 FP135 FP136
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.1361 Mean :0.08544 Mean :0.06329 Mean :0.1013
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.00000 Max. :1.00000 Max. :1.0000
## FP137 FP138 FP139 FP140
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.08861 Mean :0.08228 Mean :0.06329 Mean :0.08861
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP141 FP142 FP143 FP144
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.0000 Median :0.00000
## Mean :0.06962 Mean :0.09494 Mean :0.0538 Mean :0.09177
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.0000 Max. :1.00000
## FP145 FP146 FP147 FP148
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.06329 Mean :0.09177 Mean :0.06962 Mean :0.07911
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP149 FP150 FP151 FP152
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.08228 Mean :0.06646 Mean :0.03165 Mean :0.0538
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.0000
## FP153 FP154 FP155 FP156
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.03481 Mean :0.03165 Mean :0.06646 Mean :0.04747
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP157 FP158 FP159 FP160
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.05696 Mean :0.07911 Mean :0.03481 Mean :0.03481
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP161 FP162 FP163 FP164
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :1.0000 Median :0.0000 Median :1.0000
## Mean :0.03481 Mean :0.5316 Mean :0.4525 Mean :0.6551
## 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.00000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP165 FP166 FP167 FP168
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :1.0000
## Mean :0.3196 Mean :0.3386 Mean :0.3006 Mean :0.7152
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP169 FP170 FP171 FP172
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1867 Mean :0.1551 Mean :0.1297 Mean :0.1487
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP173 FP174 FP175 FP176
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1361 Mean :0.1551 Mean :0.1329 Mean :0.1076
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FP177 FP178 FP179 FP180
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.1013 Mean :0.1076 Mean :0.1392 Mean :0.06962
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## FP181 FP182 FP183 FP184
## Min. :0.0000 Min. :0.00000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.0000 Median :0.00000 Median :0.0000 Median :0.00000
## Mean :0.1044 Mean :0.07595 Mean :0.1329 Mean :0.09494
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.00000 Max. :1.0000 Max. :1.00000
## FP185 FP186 FP187 FP188
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.0000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.0981 Mean :0.06013 Mean :0.06646 Mean :0.06962
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP189 FP190 FP191 FP192
## Min. :0.00000 Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.0000 Median :0.00000 Median :0.00000
## Mean :0.04114 Mean :0.0538 Mean :0.05696 Mean :0.06962
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.0000 Max. :1.00000 Max. :1.00000
## FP193 FP194 FP195 FP196
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.06962 Mean :0.06646 Mean :0.05063 Mean :0.06962
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :1.00000
## FP197 FP198 FP199 FP200
## Min. :0.00000 Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.0000 Median :0.00000 Median :0.00000
## Mean :0.06329 Mean :0.0443 Mean :0.07278 Mean :0.06329
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.0000 Max. :1.00000 Max. :1.00000
## FP201 FP202 FP203 FP204
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.00000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.04114 Mean :0.2658 Mean :0.1361 Mean :0.09494
## 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## FP205 FP206 FP207 FP208
## Min. :0.00000 Min. :0.00000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.0000 Median :0.0000
## Mean :0.07911 Mean :0.05063 Mean :0.0443 Mean :0.1361
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.00000 Max. :1.0000 Max. :1.0000
## MolWeight NumAtoms NumNonHAtoms NumBonds NumNonHBonds
## Min. : 56.07 Min. : 5.0 Min. : 3.00 Min. : 4 Min. : 2.0
## 1st Qu.:121.91 1st Qu.:17.0 1st Qu.: 8.00 1st Qu.:16 1st Qu.: 8.0
## Median :170.11 Median :22.0 Median :11.00 Median :23 Median :12.0
## Mean :194.12 Mean :24.6 Mean :12.71 Mean :25 Mean :13.1
## 3rd Qu.:253.82 3rd Qu.:29.0 3rd Qu.:16.00 3rd Qu.:30 3rd Qu.:17.0
## Max. :478.92 Max. :68.0 Max. :33.00 Max. :71 Max. :36.0
## NumMultBonds NumRotBonds NumDblBonds NumAromaticBonds
## Min. : 0.000 Min. : 0.000 Min. :0.0000 Min. : 0.000
## 1st Qu.: 1.000 1st Qu.: 0.000 1st Qu.:0.0000 1st Qu.: 0.000
## Median : 6.000 Median : 1.000 Median :1.0000 Median : 6.000
## Mean : 6.313 Mean : 1.949 Mean :0.8892 Mean : 5.399
## 3rd Qu.:10.000 3rd Qu.: 3.000 3rd Qu.:1.0000 3rd Qu.:10.000
## Max. :27.000 Max. :16.000 Max. :6.0000 Max. :27.000
## NumHydrogen NumCarbon NumNitrogen NumOxygen
## Min. : 0.0 Min. : 1.000 Min. :0.0000 Min. :0.000
## 1st Qu.: 7.0 1st Qu.: 6.000 1st Qu.:0.0000 1st Qu.:0.000
## Median :11.0 Median : 8.000 Median :0.0000 Median :1.000
## Mean :11.9 Mean : 9.785 Mean :0.7089 Mean :1.389
## 3rd Qu.:15.0 3rd Qu.:12.000 3rd Qu.:1.0000 3rd Qu.:2.000
## Max. :40.0 Max. :24.000 Max. :6.0000 Max. :9.000
## NumSulfer NumChlorine NumHalogen NumRings
## Min. :0.0000 Min. :0.000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:1.000
## Median :0.0000 Median :0.000 Median :0.0000 Median :1.000
## Mean :0.1013 Mean :0.557 Mean :0.7089 Mean :1.399
## 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:1.0000 3rd Qu.:2.000
## Max. :3.0000 Max. :9.000 Max. :9.0000 Max. :6.000
## HydrophilicFactor SurfaceArea1 SurfaceArea2 Log_Solubility_Class
## Min. :-0.9860 Min. : 0.00 Min. : 0.00 Low :143
## 1st Qu.:-0.7670 1st Qu.: 9.23 1st Qu.: 9.23 High:173
## Median :-0.3970 Median : 26.30 Median : 26.30
## Mean :-0.1022 Mean : 32.76 Mean : 35.04
## 3rd Qu.: 0.2140 3rd Qu.: 49.55 3rd Qu.: 52.32
## Max. : 5.0000 Max. :201.85 Max. :201.85
##################################
# Formulating a data type assessment summary
##################################
<- Solubility_Train
PDA <- data.frame(
(PDA.Summary Column.Index=c(1:length(names(PDA))),
Column.Name= names(PDA),
Column.Type=sapply(PDA, function(x) class(x)),
row.names=NULL)
)
## Column.Index Column.Name Column.Type
## 1 1 FP001 integer
## 2 2 FP002 integer
## 3 3 FP003 integer
## 4 4 FP004 integer
## 5 5 FP005 integer
## 6 6 FP006 integer
## 7 7 FP007 integer
## 8 8 FP008 integer
## 9 9 FP009 integer
## 10 10 FP010 integer
## 11 11 FP011 integer
## 12 12 FP012 integer
## 13 13 FP013 integer
## 14 14 FP014 integer
## 15 15 FP015 integer
## 16 16 FP016 integer
## 17 17 FP017 integer
## 18 18 FP018 integer
## 19 19 FP019 integer
## 20 20 FP020 integer
## 21 21 FP021 integer
## 22 22 FP022 integer
## 23 23 FP023 integer
## 24 24 FP024 integer
## 25 25 FP025 integer
## 26 26 FP026 integer
## 27 27 FP027 integer
## 28 28 FP028 integer
## 29 29 FP029 integer
## 30 30 FP030 integer
## 31 31 FP031 integer
## 32 32 FP032 integer
## 33 33 FP033 integer
## 34 34 FP034 integer
## 35 35 FP035 integer
## 36 36 FP036 integer
## 37 37 FP037 integer
## 38 38 FP038 integer
## 39 39 FP039 integer
## 40 40 FP040 integer
## 41 41 FP041 integer
## 42 42 FP042 integer
## 43 43 FP043 integer
## 44 44 FP044 integer
## 45 45 FP045 integer
## 46 46 FP046 integer
## 47 47 FP047 integer
## 48 48 FP048 integer
## 49 49 FP049 integer
## 50 50 FP050 integer
## 51 51 FP051 integer
## 52 52 FP052 integer
## 53 53 FP053 integer
## 54 54 FP054 integer
## 55 55 FP055 integer
## 56 56 FP056 integer
## 57 57 FP057 integer
## 58 58 FP058 integer
## 59 59 FP059 integer
## 60 60 FP060 integer
## 61 61 FP061 integer
## 62 62 FP062 integer
## 63 63 FP063 integer
## 64 64 FP064 integer
## 65 65 FP065 integer
## 66 66 FP066 integer
## 67 67 FP067 integer
## 68 68 FP068 integer
## 69 69 FP069 integer
## 70 70 FP070 integer
## 71 71 FP071 integer
## 72 72 FP072 integer
## 73 73 FP073 integer
## 74 74 FP074 integer
## 75 75 FP075 integer
## 76 76 FP076 integer
## 77 77 FP077 integer
## 78 78 FP078 integer
## 79 79 FP079 integer
## 80 80 FP080 integer
## 81 81 FP081 integer
## 82 82 FP082 integer
## 83 83 FP083 integer
## 84 84 FP084 integer
## 85 85 FP085 integer
## 86 86 FP086 integer
## 87 87 FP087 integer
## 88 88 FP088 integer
## 89 89 FP089 integer
## 90 90 FP090 integer
## 91 91 FP091 integer
## 92 92 FP092 integer
## 93 93 FP093 integer
## 94 94 FP094 integer
## 95 95 FP095 integer
## 96 96 FP096 integer
## 97 97 FP097 integer
## 98 98 FP098 integer
## 99 99 FP099 integer
## 100 100 FP100 integer
## 101 101 FP101 integer
## 102 102 FP102 integer
## 103 103 FP103 integer
## 104 104 FP104 integer
## 105 105 FP105 integer
## 106 106 FP106 integer
## 107 107 FP107 integer
## 108 108 FP108 integer
## 109 109 FP109 integer
## 110 110 FP110 integer
## 111 111 FP111 integer
## 112 112 FP112 integer
## 113 113 FP113 integer
## 114 114 FP114 integer
## 115 115 FP115 integer
## 116 116 FP116 integer
## 117 117 FP117 integer
## 118 118 FP118 integer
## 119 119 FP119 integer
## 120 120 FP120 integer
## 121 121 FP121 integer
## 122 122 FP122 integer
## 123 123 FP123 integer
## 124 124 FP124 integer
## 125 125 FP125 integer
## 126 126 FP126 integer
## 127 127 FP127 integer
## 128 128 FP128 integer
## 129 129 FP129 integer
## 130 130 FP130 integer
## 131 131 FP131 integer
## 132 132 FP132 integer
## 133 133 FP133 integer
## 134 134 FP134 integer
## 135 135 FP135 integer
## 136 136 FP136 integer
## 137 137 FP137 integer
## 138 138 FP138 integer
## 139 139 FP139 integer
## 140 140 FP140 integer
## 141 141 FP141 integer
## 142 142 FP142 integer
## 143 143 FP143 integer
## 144 144 FP144 integer
## 145 145 FP145 integer
## 146 146 FP146 integer
## 147 147 FP147 integer
## 148 148 FP148 integer
## 149 149 FP149 integer
## 150 150 FP150 integer
## 151 151 FP151 integer
## 152 152 FP152 integer
## 153 153 FP153 integer
## 154 154 FP154 integer
## 155 155 FP155 integer
## 156 156 FP156 integer
## 157 157 FP157 integer
## 158 158 FP158 integer
## 159 159 FP159 integer
## 160 160 FP160 integer
## 161 161 FP161 integer
## 162 162 FP162 integer
## 163 163 FP163 integer
## 164 164 FP164 integer
## 165 165 FP165 integer
## 166 166 FP166 integer
## 167 167 FP167 integer
## 168 168 FP168 integer
## 169 169 FP169 integer
## 170 170 FP170 integer
## 171 171 FP171 integer
## 172 172 FP172 integer
## 173 173 FP173 integer
## 174 174 FP174 integer
## 175 175 FP175 integer
## 176 176 FP176 integer
## 177 177 FP177 integer
## 178 178 FP178 integer
## 179 179 FP179 integer
## 180 180 FP180 integer
## 181 181 FP181 integer
## 182 182 FP182 integer
## 183 183 FP183 integer
## 184 184 FP184 integer
## 185 185 FP185 integer
## 186 186 FP186 integer
## 187 187 FP187 integer
## 188 188 FP188 integer
## 189 189 FP189 integer
## 190 190 FP190 integer
## 191 191 FP191 integer
## 192 192 FP192 integer
## 193 193 FP193 integer
## 194 194 FP194 integer
## 195 195 FP195 integer
## 196 196 FP196 integer
## 197 197 FP197 integer
## 198 198 FP198 integer
## 199 199 FP199 integer
## 200 200 FP200 integer
## 201 201 FP201 integer
## 202 202 FP202 integer
## 203 203 FP203 integer
## 204 204 FP204 integer
## 205 205 FP205 integer
## 206 206 FP206 integer
## 207 207 FP207 integer
## 208 208 FP208 integer
## 209 209 MolWeight numeric
## 210 210 NumAtoms integer
## 211 211 NumNonHAtoms integer
## 212 212 NumBonds integer
## 213 213 NumNonHBonds integer
## 214 214 NumMultBonds integer
## 215 215 NumRotBonds integer
## 216 216 NumDblBonds integer
## 217 217 NumAromaticBonds integer
## 218 218 NumHydrogen integer
## 219 219 NumCarbon integer
## 220 220 NumNitrogen integer
## 221 221 NumOxygen integer
## 222 222 NumSulfer integer
## 223 223 NumChlorine integer
## 224 224 NumHalogen integer
## 225 225 NumRings integer
## 226 226 HydrophilicFactor numeric
## 227 227 SurfaceArea1 numeric
## 228 228 SurfaceArea2 numeric
## 229 229 Log_Solubility_Class factor
##################################
# Loading dataset
##################################
<- Solubility_Train
DQA
##################################
# Formulating an overall data quality assessment summary
##################################
<- data.frame(
(DQA.Summary Column.Index=c(1:length(names(DQA))),
Column.Name= names(DQA),
Column.Type=sapply(DQA, function(x) class(x)),
Row.Count=sapply(DQA, function(x) nrow(DQA)),
NA.Count=sapply(DQA,function(x)sum(is.na(x))),
Fill.Rate=sapply(DQA,function(x)format(round((sum(!is.na(x))/nrow(DQA)),3),nsmall=3)),
row.names=NULL)
)
## Column.Index Column.Name Column.Type Row.Count NA.Count Fill.Rate
## 1 1 FP001 integer 951 0 1.000
## 2 2 FP002 integer 951 0 1.000
## 3 3 FP003 integer 951 0 1.000
## 4 4 FP004 integer 951 0 1.000
## 5 5 FP005 integer 951 0 1.000
## 6 6 FP006 integer 951 0 1.000
## 7 7 FP007 integer 951 0 1.000
## 8 8 FP008 integer 951 0 1.000
## 9 9 FP009 integer 951 0 1.000
## 10 10 FP010 integer 951 0 1.000
## 11 11 FP011 integer 951 0 1.000
## 12 12 FP012 integer 951 0 1.000
## 13 13 FP013 integer 951 0 1.000
## 14 14 FP014 integer 951 0 1.000
## 15 15 FP015 integer 951 0 1.000
## 16 16 FP016 integer 951 0 1.000
## 17 17 FP017 integer 951 0 1.000
## 18 18 FP018 integer 951 0 1.000
## 19 19 FP019 integer 951 0 1.000
## 20 20 FP020 integer 951 0 1.000
## 21 21 FP021 integer 951 0 1.000
## 22 22 FP022 integer 951 0 1.000
## 23 23 FP023 integer 951 0 1.000
## 24 24 FP024 integer 951 0 1.000
## 25 25 FP025 integer 951 0 1.000
## 26 26 FP026 integer 951 0 1.000
## 27 27 FP027 integer 951 0 1.000
## 28 28 FP028 integer 951 0 1.000
## 29 29 FP029 integer 951 0 1.000
## 30 30 FP030 integer 951 0 1.000
## 31 31 FP031 integer 951 0 1.000
## 32 32 FP032 integer 951 0 1.000
## 33 33 FP033 integer 951 0 1.000
## 34 34 FP034 integer 951 0 1.000
## 35 35 FP035 integer 951 0 1.000
## 36 36 FP036 integer 951 0 1.000
## 37 37 FP037 integer 951 0 1.000
## 38 38 FP038 integer 951 0 1.000
## 39 39 FP039 integer 951 0 1.000
## 40 40 FP040 integer 951 0 1.000
## 41 41 FP041 integer 951 0 1.000
## 42 42 FP042 integer 951 0 1.000
## 43 43 FP043 integer 951 0 1.000
## 44 44 FP044 integer 951 0 1.000
## 45 45 FP045 integer 951 0 1.000
## 46 46 FP046 integer 951 0 1.000
## 47 47 FP047 integer 951 0 1.000
## 48 48 FP048 integer 951 0 1.000
## 49 49 FP049 integer 951 0 1.000
## 50 50 FP050 integer 951 0 1.000
## 51 51 FP051 integer 951 0 1.000
## 52 52 FP052 integer 951 0 1.000
## 53 53 FP053 integer 951 0 1.000
## 54 54 FP054 integer 951 0 1.000
## 55 55 FP055 integer 951 0 1.000
## 56 56 FP056 integer 951 0 1.000
## 57 57 FP057 integer 951 0 1.000
## 58 58 FP058 integer 951 0 1.000
## 59 59 FP059 integer 951 0 1.000
## 60 60 FP060 integer 951 0 1.000
## 61 61 FP061 integer 951 0 1.000
## 62 62 FP062 integer 951 0 1.000
## 63 63 FP063 integer 951 0 1.000
## 64 64 FP064 integer 951 0 1.000
## 65 65 FP065 integer 951 0 1.000
## 66 66 FP066 integer 951 0 1.000
## 67 67 FP067 integer 951 0 1.000
## 68 68 FP068 integer 951 0 1.000
## 69 69 FP069 integer 951 0 1.000
## 70 70 FP070 integer 951 0 1.000
## 71 71 FP071 integer 951 0 1.000
## 72 72 FP072 integer 951 0 1.000
## 73 73 FP073 integer 951 0 1.000
## 74 74 FP074 integer 951 0 1.000
## 75 75 FP075 integer 951 0 1.000
## 76 76 FP076 integer 951 0 1.000
## 77 77 FP077 integer 951 0 1.000
## 78 78 FP078 integer 951 0 1.000
## 79 79 FP079 integer 951 0 1.000
## 80 80 FP080 integer 951 0 1.000
## 81 81 FP081 integer 951 0 1.000
## 82 82 FP082 integer 951 0 1.000
## 83 83 FP083 integer 951 0 1.000
## 84 84 FP084 integer 951 0 1.000
## 85 85 FP085 integer 951 0 1.000
## 86 86 FP086 integer 951 0 1.000
## 87 87 FP087 integer 951 0 1.000
## 88 88 FP088 integer 951 0 1.000
## 89 89 FP089 integer 951 0 1.000
## 90 90 FP090 integer 951 0 1.000
## 91 91 FP091 integer 951 0 1.000
## 92 92 FP092 integer 951 0 1.000
## 93 93 FP093 integer 951 0 1.000
## 94 94 FP094 integer 951 0 1.000
## 95 95 FP095 integer 951 0 1.000
## 96 96 FP096 integer 951 0 1.000
## 97 97 FP097 integer 951 0 1.000
## 98 98 FP098 integer 951 0 1.000
## 99 99 FP099 integer 951 0 1.000
## 100 100 FP100 integer 951 0 1.000
## 101 101 FP101 integer 951 0 1.000
## 102 102 FP102 integer 951 0 1.000
## 103 103 FP103 integer 951 0 1.000
## 104 104 FP104 integer 951 0 1.000
## 105 105 FP105 integer 951 0 1.000
## 106 106 FP106 integer 951 0 1.000
## 107 107 FP107 integer 951 0 1.000
## 108 108 FP108 integer 951 0 1.000
## 109 109 FP109 integer 951 0 1.000
## 110 110 FP110 integer 951 0 1.000
## 111 111 FP111 integer 951 0 1.000
## 112 112 FP112 integer 951 0 1.000
## 113 113 FP113 integer 951 0 1.000
## 114 114 FP114 integer 951 0 1.000
## 115 115 FP115 integer 951 0 1.000
## 116 116 FP116 integer 951 0 1.000
## 117 117 FP117 integer 951 0 1.000
## 118 118 FP118 integer 951 0 1.000
## 119 119 FP119 integer 951 0 1.000
## 120 120 FP120 integer 951 0 1.000
## 121 121 FP121 integer 951 0 1.000
## 122 122 FP122 integer 951 0 1.000
## 123 123 FP123 integer 951 0 1.000
## 124 124 FP124 integer 951 0 1.000
## 125 125 FP125 integer 951 0 1.000
## 126 126 FP126 integer 951 0 1.000
## 127 127 FP127 integer 951 0 1.000
## 128 128 FP128 integer 951 0 1.000
## 129 129 FP129 integer 951 0 1.000
## 130 130 FP130 integer 951 0 1.000
## 131 131 FP131 integer 951 0 1.000
## 132 132 FP132 integer 951 0 1.000
## 133 133 FP133 integer 951 0 1.000
## 134 134 FP134 integer 951 0 1.000
## 135 135 FP135 integer 951 0 1.000
## 136 136 FP136 integer 951 0 1.000
## 137 137 FP137 integer 951 0 1.000
## 138 138 FP138 integer 951 0 1.000
## 139 139 FP139 integer 951 0 1.000
## 140 140 FP140 integer 951 0 1.000
## 141 141 FP141 integer 951 0 1.000
## 142 142 FP142 integer 951 0 1.000
## 143 143 FP143 integer 951 0 1.000
## 144 144 FP144 integer 951 0 1.000
## 145 145 FP145 integer 951 0 1.000
## 146 146 FP146 integer 951 0 1.000
## 147 147 FP147 integer 951 0 1.000
## 148 148 FP148 integer 951 0 1.000
## 149 149 FP149 integer 951 0 1.000
## 150 150 FP150 integer 951 0 1.000
## 151 151 FP151 integer 951 0 1.000
## 152 152 FP152 integer 951 0 1.000
## 153 153 FP153 integer 951 0 1.000
## 154 154 FP154 integer 951 0 1.000
## 155 155 FP155 integer 951 0 1.000
## 156 156 FP156 integer 951 0 1.000
## 157 157 FP157 integer 951 0 1.000
## 158 158 FP158 integer 951 0 1.000
## 159 159 FP159 integer 951 0 1.000
## 160 160 FP160 integer 951 0 1.000
## 161 161 FP161 integer 951 0 1.000
## 162 162 FP162 integer 951 0 1.000
## 163 163 FP163 integer 951 0 1.000
## 164 164 FP164 integer 951 0 1.000
## 165 165 FP165 integer 951 0 1.000
## 166 166 FP166 integer 951 0 1.000
## 167 167 FP167 integer 951 0 1.000
## 168 168 FP168 integer 951 0 1.000
## 169 169 FP169 integer 951 0 1.000
## 170 170 FP170 integer 951 0 1.000
## 171 171 FP171 integer 951 0 1.000
## 172 172 FP172 integer 951 0 1.000
## 173 173 FP173 integer 951 0 1.000
## 174 174 FP174 integer 951 0 1.000
## 175 175 FP175 integer 951 0 1.000
## 176 176 FP176 integer 951 0 1.000
## 177 177 FP177 integer 951 0 1.000
## 178 178 FP178 integer 951 0 1.000
## 179 179 FP179 integer 951 0 1.000
## 180 180 FP180 integer 951 0 1.000
## 181 181 FP181 integer 951 0 1.000
## 182 182 FP182 integer 951 0 1.000
## 183 183 FP183 integer 951 0 1.000
## 184 184 FP184 integer 951 0 1.000
## 185 185 FP185 integer 951 0 1.000
## 186 186 FP186 integer 951 0 1.000
## 187 187 FP187 integer 951 0 1.000
## 188 188 FP188 integer 951 0 1.000
## 189 189 FP189 integer 951 0 1.000
## 190 190 FP190 integer 951 0 1.000
## 191 191 FP191 integer 951 0 1.000
## 192 192 FP192 integer 951 0 1.000
## 193 193 FP193 integer 951 0 1.000
## 194 194 FP194 integer 951 0 1.000
## 195 195 FP195 integer 951 0 1.000
## 196 196 FP196 integer 951 0 1.000
## 197 197 FP197 integer 951 0 1.000
## 198 198 FP198 integer 951 0 1.000
## 199 199 FP199 integer 951 0 1.000
## 200 200 FP200 integer 951 0 1.000
## 201 201 FP201 integer 951 0 1.000
## 202 202 FP202 integer 951 0 1.000
## 203 203 FP203 integer 951 0 1.000
## 204 204 FP204 integer 951 0 1.000
## 205 205 FP205 integer 951 0 1.000
## 206 206 FP206 integer 951 0 1.000
## 207 207 FP207 integer 951 0 1.000
## 208 208 FP208 integer 951 0 1.000
## 209 209 MolWeight numeric 951 0 1.000
## 210 210 NumAtoms integer 951 0 1.000
## 211 211 NumNonHAtoms integer 951 0 1.000
## 212 212 NumBonds integer 951 0 1.000
## 213 213 NumNonHBonds integer 951 0 1.000
## 214 214 NumMultBonds integer 951 0 1.000
## 215 215 NumRotBonds integer 951 0 1.000
## 216 216 NumDblBonds integer 951 0 1.000
## 217 217 NumAromaticBonds integer 951 0 1.000
## 218 218 NumHydrogen integer 951 0 1.000
## 219 219 NumCarbon integer 951 0 1.000
## 220 220 NumNitrogen integer 951 0 1.000
## 221 221 NumOxygen integer 951 0 1.000
## 222 222 NumSulfer integer 951 0 1.000
## 223 223 NumChlorine integer 951 0 1.000
## 224 224 NumHalogen integer 951 0 1.000
## 225 225 NumRings integer 951 0 1.000
## 226 226 HydrophilicFactor numeric 951 0 1.000
## 227 227 SurfaceArea1 numeric 951 0 1.000
## 228 228 SurfaceArea2 numeric 951 0 1.000
## 229 229 Log_Solubility_Class factor 951 0 1.000
##################################
# Listing all predictors
##################################
<- DQA[,!names(DQA) %in% c("Log_Solubility_Class")]
DQA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DQA.Predictors[,-(grep("FP", names(DQA.Predictors)))]
DQA.Predictors.Numeric
if (length(names(DQA.Predictors.Numeric))>0) {
print(paste0("There are ",
length(names(DQA.Predictors.Numeric))),
(" numeric predictor variable(s)."))
else {
} print("There are no numeric predictor variables.")
}
## [1] "There are 20 numeric predictor variable(s)."
##################################
# Listing all factor predictors
##################################
<-as.data.frame(lapply(DQA.Predictors[(grep("FP", names(DQA.Predictors)))],factor))
DQA.Predictors.Factor
if (length(names(DQA.Predictors.Factor))>0) {
print(paste0("There are ",
length(names(DQA.Predictors.Factor))),
(" factor predictor variable(s)."))
else {
} print("There are no factor predictor variables.")
}
## [1] "There are 208 factor predictor variable(s)."
##################################
# Formulating a data quality assessment summary for factor predictors
##################################
if (length(names(DQA.Predictors.Factor))>0) {
##################################
# Formulating a function to determine the first mode
##################################
<- function(x) {
FirstModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab == max(tab)]
ux[tab
}
##################################
# Formulating a function to determine the second mode
##################################
<- function(x) {
SecondModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab = ux[tab == max(tab)]
fm = x[!(x %in% fm)]
sm <- unique(sm)
usm <- tabulate(match(sm, usm))
tabsm ifelse(is.na(usm[tabsm == max(tabsm)])==TRUE,
return("x"),
return(usm[tabsm == max(tabsm)]))
}
<- data.frame(
(DQA.Predictors.Factor.Summary Column.Name= names(DQA.Predictors.Factor),
Column.Type=sapply(DQA.Predictors.Factor, function(x) class(x)),
Unique.Count=sapply(DQA.Predictors.Factor, function(x) length(unique(x))),
First.Mode.Value=sapply(DQA.Predictors.Factor, function(x) as.character(FirstModes(x)[1])),
Second.Mode.Value=sapply(DQA.Predictors.Factor, function(x) as.character(SecondModes(x)[1])),
First.Mode.Count=sapply(DQA.Predictors.Factor, function(x) sum(na.omit(x) == FirstModes(x)[1])),
Second.Mode.Count=sapply(DQA.Predictors.Factor, function(x) sum(na.omit(x) == SecondModes(x)[1])),
Unique.Count.Ratio=sapply(DQA.Predictors.Factor, function(x) format(round((length(unique(x))/nrow(DQA.Predictors.Factor)),3), nsmall=3)),
First.Second.Mode.Ratio=sapply(DQA.Predictors.Factor, function(x) format(round((sum(na.omit(x) == FirstModes(x)[1])/sum(na.omit(x) == SecondModes(x)[1])),3), nsmall=3)),
row.names=NULL)
)
}
## Column.Name Column.Type Unique.Count First.Mode.Value Second.Mode.Value
## 1 FP001 factor 2 0 1
## 2 FP002 factor 2 1 0
## 3 FP003 factor 2 0 1
## 4 FP004 factor 2 1 0
## 5 FP005 factor 2 1 0
## 6 FP006 factor 2 0 1
## 7 FP007 factor 2 0 1
## 8 FP008 factor 2 0 1
## 9 FP009 factor 2 0 1
## 10 FP010 factor 2 0 1
## 11 FP011 factor 2 0 1
## 12 FP012 factor 2 0 1
## 13 FP013 factor 2 0 1
## 14 FP014 factor 2 0 1
## 15 FP015 factor 2 1 0
## 16 FP016 factor 2 0 1
## 17 FP017 factor 2 0 1
## 18 FP018 factor 2 0 1
## 19 FP019 factor 2 0 1
## 20 FP020 factor 2 0 1
## 21 FP021 factor 2 0 1
## 22 FP022 factor 2 0 1
## 23 FP023 factor 2 0 1
## 24 FP024 factor 2 0 1
## 25 FP025 factor 2 0 1
## 26 FP026 factor 2 0 1
## 27 FP027 factor 2 0 1
## 28 FP028 factor 2 0 1
## 29 FP029 factor 2 0 1
## 30 FP030 factor 2 0 1
## 31 FP031 factor 2 0 1
## 32 FP032 factor 2 0 1
## 33 FP033 factor 2 0 1
## 34 FP034 factor 2 0 1
## 35 FP035 factor 2 0 1
## 36 FP036 factor 2 0 1
## 37 FP037 factor 2 0 1
## 38 FP038 factor 2 0 1
## 39 FP039 factor 2 0 1
## 40 FP040 factor 2 0 1
## 41 FP041 factor 2 0 1
## 42 FP042 factor 2 0 1
## 43 FP043 factor 2 0 1
## 44 FP044 factor 2 0 1
## 45 FP045 factor 2 0 1
## 46 FP046 factor 2 0 1
## 47 FP047 factor 2 0 1
## 48 FP048 factor 2 0 1
## 49 FP049 factor 2 0 1
## 50 FP050 factor 2 0 1
## 51 FP051 factor 2 0 1
## 52 FP052 factor 2 0 1
## 53 FP053 factor 2 0 1
## 54 FP054 factor 2 0 1
## 55 FP055 factor 2 0 1
## 56 FP056 factor 2 0 1
## 57 FP057 factor 2 0 1
## 58 FP058 factor 2 0 1
## 59 FP059 factor 2 0 1
## 60 FP060 factor 2 0 1
## 61 FP061 factor 2 0 1
## 62 FP062 factor 2 0 1
## 63 FP063 factor 2 0 1
## 64 FP064 factor 2 0 1
## 65 FP065 factor 2 1 0
## 66 FP066 factor 2 1 0
## 67 FP067 factor 2 0 1
## 68 FP068 factor 2 0 1
## 69 FP069 factor 2 0 1
## 70 FP070 factor 2 0 1
## 71 FP071 factor 2 0 1
## 72 FP072 factor 2 1 0
## 73 FP073 factor 2 0 1
## 74 FP074 factor 2 0 1
## 75 FP075 factor 2 0 1
## 76 FP076 factor 2 0 1
## 77 FP077 factor 2 0 1
## 78 FP078 factor 2 0 1
## 79 FP079 factor 2 1 0
## 80 FP080 factor 2 0 1
## 81 FP081 factor 2 0 1
## 82 FP082 factor 2 1 0
## 83 FP083 factor 2 0 1
## 84 FP084 factor 2 0 1
## 85 FP085 factor 2 0 1
## 86 FP086 factor 2 0 1
## 87 FP087 factor 2 1 0
## 88 FP088 factor 2 0 1
## 89 FP089 factor 2 0 1
## 90 FP090 factor 2 0 1
## 91 FP091 factor 2 0 1
## 92 FP092 factor 2 0 1
## 93 FP093 factor 2 0 1
## 94 FP094 factor 2 0 1
## 95 FP095 factor 2 0 1
## 96 FP096 factor 2 0 1
## 97 FP097 factor 2 0 1
## 98 FP098 factor 2 0 1
## 99 FP099 factor 2 0 1
## 100 FP100 factor 2 0 1
## 101 FP101 factor 2 0 1
## 102 FP102 factor 2 0 1
## 103 FP103 factor 2 0 1
## 104 FP104 factor 2 0 1
## 105 FP105 factor 2 0 1
## 106 FP106 factor 2 0 1
## 107 FP107 factor 2 0 1
## 108 FP108 factor 2 0 1
## 109 FP109 factor 2 0 1
## 110 FP110 factor 2 0 1
## 111 FP111 factor 2 0 1
## 112 FP112 factor 2 0 1
## 113 FP113 factor 2 0 1
## 114 FP114 factor 2 0 1
## 115 FP115 factor 2 0 1
## 116 FP116 factor 2 0 1
## 117 FP117 factor 2 0 1
## 118 FP118 factor 2 0 1
## 119 FP119 factor 2 0 1
## 120 FP120 factor 2 0 1
## 121 FP121 factor 2 0 1
## 122 FP122 factor 2 0 1
## 123 FP123 factor 2 0 1
## 124 FP124 factor 2 0 1
## 125 FP125 factor 2 0 1
## 126 FP126 factor 2 0 1
## 127 FP127 factor 2 0 1
## 128 FP128 factor 2 0 1
## 129 FP129 factor 2 0 1
## 130 FP130 factor 2 0 1
## 131 FP131 factor 2 0 1
## 132 FP132 factor 2 0 1
## 133 FP133 factor 2 0 1
## 134 FP134 factor 2 0 1
## 135 FP135 factor 2 0 1
## 136 FP136 factor 2 0 1
## 137 FP137 factor 2 0 1
## 138 FP138 factor 2 0 1
## 139 FP139 factor 2 0 1
## 140 FP140 factor 2 0 1
## 141 FP141 factor 2 0 1
## 142 FP142 factor 2 0 1
## 143 FP143 factor 2 0 1
## 144 FP144 factor 2 0 1
## 145 FP145 factor 2 0 1
## 146 FP146 factor 2 0 1
## 147 FP147 factor 2 0 1
## 148 FP148 factor 2 0 1
## 149 FP149 factor 2 0 1
## 150 FP150 factor 2 0 1
## 151 FP151 factor 2 0 1
## 152 FP152 factor 2 0 1
## 153 FP153 factor 2 0 1
## 154 FP154 factor 2 0 1
## 155 FP155 factor 2 0 1
## 156 FP156 factor 2 0 1
## 157 FP157 factor 2 0 1
## 158 FP158 factor 2 0 1
## 159 FP159 factor 2 0 1
## 160 FP160 factor 2 0 1
## 161 FP161 factor 2 0 1
## 162 FP162 factor 2 0 1
## 163 FP163 factor 2 0 1
## 164 FP164 factor 2 1 0
## 165 FP165 factor 2 0 1
## 166 FP166 factor 2 0 1
## 167 FP167 factor 2 0 1
## 168 FP168 factor 2 1 0
## 169 FP169 factor 2 0 1
## 170 FP170 factor 2 0 1
## 171 FP171 factor 2 0 1
## 172 FP172 factor 2 0 1
## 173 FP173 factor 2 0 1
## 174 FP174 factor 2 0 1
## 175 FP175 factor 2 0 1
## 176 FP176 factor 2 0 1
## 177 FP177 factor 2 0 1
## 178 FP178 factor 2 0 1
## 179 FP179 factor 2 0 1
## 180 FP180 factor 2 0 1
## 181 FP181 factor 2 0 1
## 182 FP182 factor 2 0 1
## 183 FP183 factor 2 0 1
## 184 FP184 factor 2 0 1
## 185 FP185 factor 2 0 1
## 186 FP186 factor 2 0 1
## 187 FP187 factor 2 0 1
## 188 FP188 factor 2 0 1
## 189 FP189 factor 2 0 1
## 190 FP190 factor 2 0 1
## 191 FP191 factor 2 0 1
## 192 FP192 factor 2 0 1
## 193 FP193 factor 2 0 1
## 194 FP194 factor 2 0 1
## 195 FP195 factor 2 0 1
## 196 FP196 factor 2 0 1
## 197 FP197 factor 2 0 1
## 198 FP198 factor 2 0 1
## 199 FP199 factor 2 0 1
## 200 FP200 factor 2 0 1
## 201 FP201 factor 2 0 1
## 202 FP202 factor 2 0 1
## 203 FP203 factor 2 0 1
## 204 FP204 factor 2 0 1
## 205 FP205 factor 2 0 1
## 206 FP206 factor 2 0 1
## 207 FP207 factor 2 0 1
## 208 FP208 factor 2 0 1
## First.Mode.Count Second.Mode.Count Unique.Count.Ratio
## 1 482 469 0.002
## 2 513 438 0.002
## 3 536 415 0.002
## 4 556 395 0.002
## 5 551 400 0.002
## 6 570 381 0.002
## 7 605 346 0.002
## 8 641 310 0.002
## 9 685 266 0.002
## 10 781 170 0.002
## 11 747 204 0.002
## 12 783 168 0.002
## 13 793 158 0.002
## 14 798 153 0.002
## 15 818 133 0.002
## 16 812 139 0.002
## 17 814 137 0.002
## 18 826 125 0.002
## 19 835 116 0.002
## 20 837 114 0.002
## 21 836 115 0.002
## 22 852 99 0.002
## 23 834 117 0.002
## 24 844 107 0.002
## 25 841 110 0.002
## 26 871 80 0.002
## 27 858 93 0.002
## 28 850 101 0.002
## 29 854 97 0.002
## 30 862 89 0.002
## 31 866 85 0.002
## 32 881 70 0.002
## 33 885 66 0.002
## 34 875 76 0.002
## 35 882 69 0.002
## 36 879 72 0.002
## 37 884 67 0.002
## 38 869 82 0.002
## 39 880 71 0.002
## 40 886 65 0.002
## 41 891 60 0.002
## 42 897 54 0.002
## 43 888 63 0.002
## 44 894 57 0.002
## 45 898 53 0.002
## 46 651 300 0.002
## 47 698 253 0.002
## 48 833 118 0.002
## 49 835 116 0.002
## 50 844 107 0.002
## 51 847 104 0.002
## 52 864 87 0.002
## 53 862 89 0.002
## 54 879 72 0.002
## 55 900 51 0.002
## 56 889 62 0.002
## 57 837 114 0.002
## 58 843 108 0.002
## 59 899 52 0.002
## 60 493 458 0.002
## 61 526 425 0.002
## 62 535 416 0.002
## 63 546 405 0.002
## 64 555 396 0.002
## 65 564 387 0.002
## 66 580 371 0.002
## 67 590 361 0.002
## 68 607 344 0.002
## 69 607 344 0.002
## 70 613 338 0.002
## 71 640 311 0.002
## 72 626 325 0.002
## 73 656 295 0.002
## 74 642 309 0.002
## 75 629 322 0.002
## 76 639 312 0.002
## 77 646 305 0.002
## 78 662 289 0.002
## 79 656 295 0.002
## 80 663 288 0.002
## 81 686 265 0.002
## 82 679 272 0.002
## 83 691 260 0.002
## 84 679 272 0.002
## 85 708 243 0.002
## 86 695 256 0.002
## 87 691 260 0.002
## 88 701 250 0.002
## 89 716 235 0.002
## 90 714 237 0.002
## 91 737 214 0.002
## 92 719 232 0.002
## 93 719 232 0.002
## 94 731 220 0.002
## 95 742 209 0.002
## 96 744 207 0.002
## 97 727 224 0.002
## 98 725 226 0.002
## 99 735 216 0.002
## 100 731 220 0.002
## 101 726 225 0.002
## 102 759 192 0.002
## 103 743 208 0.002
## 104 739 212 0.002
## 105 746 205 0.002
## 106 769 182 0.002
## 107 750 201 0.002
## 108 756 195 0.002
## 109 783 168 0.002
## 110 755 196 0.002
## 111 764 187 0.002
## 112 766 185 0.002
## 113 765 186 0.002
## 114 803 148 0.002
## 115 781 170 0.002
## 116 768 183 0.002
## 117 781 170 0.002
## 118 768 183 0.002
## 119 796 155 0.002
## 120 793 158 0.002
## 121 818 133 0.002
## 122 795 156 0.002
## 123 792 159 0.002
## 124 797 154 0.002
## 125 803 148 0.002
## 126 810 141 0.002
## 127 818 133 0.002
## 128 810 141 0.002
## 129 819 132 0.002
## 130 851 100 0.002
## 131 831 120 0.002
## 132 832 119 0.002
## 133 831 120 0.002
## 134 830 121 0.002
## 135 831 120 0.002
## 136 836 115 0.002
## 137 841 110 0.002
## 138 845 106 0.002
## 139 873 78 0.002
## 140 845 106 0.002
## 141 840 111 0.002
## 142 847 104 0.002
## 143 874 77 0.002
## 144 852 99 0.002
## 145 852 99 0.002
## 146 853 98 0.002
## 147 851 100 0.002
## 148 868 83 0.002
## 149 865 86 0.002
## 150 876 75 0.002
## 151 898 53 0.002
## 152 873 78 0.002
## 153 877 74 0.002
## 154 915 36 0.002
## 155 885 66 0.002
## 156 884 67 0.002
## 157 892 59 0.002
## 158 900 51 0.002
## 159 884 67 0.002
## 160 886 65 0.002
## 161 888 63 0.002
## 162 480 471 0.002
## 163 498 453 0.002
## 164 597 354 0.002
## 165 619 332 0.002
## 166 636 315 0.002
## 167 639 312 0.002
## 168 633 318 0.002
## 169 774 177 0.002
## 170 776 175 0.002
## 171 790 161 0.002
## 172 807 144 0.002
## 173 816 135 0.002
## 174 827 124 0.002
## 175 823 128 0.002
## 176 835 116 0.002
## 177 836 115 0.002
## 178 836 115 0.002
## 179 858 93 0.002
## 180 849 102 0.002
## 181 862 89 0.002
## 182 857 94 0.002
## 183 879 72 0.002
## 184 871 80 0.002
## 185 870 81 0.002
## 186 878 73 0.002
## 187 882 69 0.002
## 188 886 65 0.002
## 189 878 73 0.002
## 190 882 69 0.002
## 191 884 67 0.002
## 192 893 58 0.002
## 193 892 59 0.002
## 194 895 56 0.002
## 195 893 58 0.002
## 196 897 54 0.002
## 197 901 50 0.002
## 198 897 54 0.002
## 199 906 45 0.002
## 200 904 47 0.002
## 201 901 50 0.002
## 202 706 245 0.002
## 203 842 109 0.002
## 204 857 94 0.002
## 205 877 74 0.002
## 206 894 57 0.002
## 207 897 54 0.002
## 208 844 107 0.002
## First.Second.Mode.Ratio
## 1 1.028
## 2 1.171
## 3 1.292
## 4 1.408
## 5 1.378
## 6 1.496
## 7 1.749
## 8 2.068
## 9 2.575
## 10 4.594
## 11 3.662
## 12 4.661
## 13 5.019
## 14 5.216
## 15 6.150
## 16 5.842
## 17 5.942
## 18 6.608
## 19 7.198
## 20 7.342
## 21 7.270
## 22 8.606
## 23 7.128
## 24 7.888
## 25 7.645
## 26 10.887
## 27 9.226
## 28 8.416
## 29 8.804
## 30 9.685
## 31 10.188
## 32 12.586
## 33 13.409
## 34 11.513
## 35 12.783
## 36 12.208
## 37 13.194
## 38 10.598
## 39 12.394
## 40 13.631
## 41 14.850
## 42 16.611
## 43 14.095
## 44 15.684
## 45 16.943
## 46 2.170
## 47 2.759
## 48 7.059
## 49 7.198
## 50 7.888
## 51 8.144
## 52 9.931
## 53 9.685
## 54 12.208
## 55 17.647
## 56 14.339
## 57 7.342
## 58 7.806
## 59 17.288
## 60 1.076
## 61 1.238
## 62 1.286
## 63 1.348
## 64 1.402
## 65 1.457
## 66 1.563
## 67 1.634
## 68 1.765
## 69 1.765
## 70 1.814
## 71 2.058
## 72 1.926
## 73 2.224
## 74 2.078
## 75 1.953
## 76 2.048
## 77 2.118
## 78 2.291
## 79 2.224
## 80 2.302
## 81 2.589
## 82 2.496
## 83 2.658
## 84 2.496
## 85 2.914
## 86 2.715
## 87 2.658
## 88 2.804
## 89 3.047
## 90 3.013
## 91 3.444
## 92 3.099
## 93 3.099
## 94 3.323
## 95 3.550
## 96 3.594
## 97 3.246
## 98 3.208
## 99 3.403
## 100 3.323
## 101 3.227
## 102 3.953
## 103 3.572
## 104 3.486
## 105 3.639
## 106 4.225
## 107 3.731
## 108 3.877
## 109 4.661
## 110 3.852
## 111 4.086
## 112 4.141
## 113 4.113
## 114 5.426
## 115 4.594
## 116 4.197
## 117 4.594
## 118 4.197
## 119 5.135
## 120 5.019
## 121 6.150
## 122 5.096
## 123 4.981
## 124 5.175
## 125 5.426
## 126 5.745
## 127 6.150
## 128 5.745
## 129 6.205
## 130 8.510
## 131 6.925
## 132 6.992
## 133 6.925
## 134 6.860
## 135 6.925
## 136 7.270
## 137 7.645
## 138 7.972
## 139 11.192
## 140 7.972
## 141 7.568
## 142 8.144
## 143 11.351
## 144 8.606
## 145 8.606
## 146 8.704
## 147 8.510
## 148 10.458
## 149 10.058
## 150 11.680
## 151 16.943
## 152 11.192
## 153 11.851
## 154 25.417
## 155 13.409
## 156 13.194
## 157 15.119
## 158 17.647
## 159 13.194
## 160 13.631
## 161 14.095
## 162 1.019
## 163 1.099
## 164 1.686
## 165 1.864
## 166 2.019
## 167 2.048
## 168 1.991
## 169 4.373
## 170 4.434
## 171 4.907
## 172 5.604
## 173 6.044
## 174 6.669
## 175 6.430
## 176 7.198
## 177 7.270
## 178 7.270
## 179 9.226
## 180 8.324
## 181 9.685
## 182 9.117
## 183 12.208
## 184 10.887
## 185 10.741
## 186 12.027
## 187 12.783
## 188 13.631
## 189 12.027
## 190 12.783
## 191 13.194
## 192 15.397
## 193 15.119
## 194 15.982
## 195 15.397
## 196 16.611
## 197 18.020
## 198 16.611
## 199 20.133
## 200 19.234
## 201 18.020
## 202 2.882
## 203 7.725
## 204 9.117
## 205 11.851
## 206 15.684
## 207 16.611
## 208 7.888
##################################
# Formulating a data quality assessment summary for numeric predictors
##################################
if (length(names(DQA.Predictors.Numeric))>0) {
##################################
# Formulating a function to determine the first mode
##################################
<- function(x) {
FirstModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab == max(tab)]
ux[tab
}
##################################
# Formulating a function to determine the second mode
##################################
<- function(x) {
SecondModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab = ux[tab == max(tab)]
fm = na.omit(x)[!(na.omit(x) %in% fm)]
sm <- unique(sm)
usm <- tabulate(match(sm, usm))
tabsm ifelse(is.na(usm[tabsm == max(tabsm)])==TRUE,
return(0.00001),
return(usm[tabsm == max(tabsm)]))
}
<- data.frame(
(DQA.Predictors.Numeric.Summary Column.Name= names(DQA.Predictors.Numeric),
Column.Type=sapply(DQA.Predictors.Numeric, function(x) class(x)),
Unique.Count=sapply(DQA.Predictors.Numeric, function(x) length(unique(x))),
Unique.Count.Ratio=sapply(DQA.Predictors.Numeric, function(x) format(round((length(unique(x))/nrow(DQA.Predictors.Numeric)),3), nsmall=3)),
First.Mode.Value=sapply(DQA.Predictors.Numeric, function(x) format(round((FirstModes(x)[1]),3),nsmall=3)),
Second.Mode.Value=sapply(DQA.Predictors.Numeric, function(x) format(round((SecondModes(x)[1]),3),nsmall=3)),
First.Mode.Count=sapply(DQA.Predictors.Numeric, function(x) sum(na.omit(x) == FirstModes(x)[1])),
Second.Mode.Count=sapply(DQA.Predictors.Numeric, function(x) sum(na.omit(x) == SecondModes(x)[1])),
First.Second.Mode.Ratio=sapply(DQA.Predictors.Numeric, function(x) format(round((sum(na.omit(x) == FirstModes(x)[1])/sum(na.omit(x) == SecondModes(x)[1])),3), nsmall=3)),
Minimum=sapply(DQA.Predictors.Numeric, function(x) format(round(min(x,na.rm = TRUE),3), nsmall=3)),
Mean=sapply(DQA.Predictors.Numeric, function(x) format(round(mean(x,na.rm = TRUE),3), nsmall=3)),
Median=sapply(DQA.Predictors.Numeric, function(x) format(round(median(x,na.rm = TRUE),3), nsmall=3)),
Maximum=sapply(DQA.Predictors.Numeric, function(x) format(round(max(x,na.rm = TRUE),3), nsmall=3)),
Skewness=sapply(DQA.Predictors.Numeric, function(x) format(round(skewness(x,na.rm = TRUE),3), nsmall=3)),
Kurtosis=sapply(DQA.Predictors.Numeric, function(x) format(round(kurtosis(x,na.rm = TRUE),3), nsmall=3)),
Percentile25th=sapply(DQA.Predictors.Numeric, function(x) format(round(quantile(x,probs=0.25,na.rm = TRUE),3), nsmall=3)),
Percentile75th=sapply(DQA.Predictors.Numeric, function(x) format(round(quantile(x,probs=0.75,na.rm = TRUE),3), nsmall=3)),
row.names=NULL)
)
}
## Column.Name Column.Type Unique.Count Unique.Count.Ratio
## 1 MolWeight numeric 646 0.679
## 2 NumAtoms integer 66 0.069
## 3 NumNonHAtoms integer 36 0.038
## 4 NumBonds integer 72 0.076
## 5 NumNonHBonds integer 39 0.041
## 6 NumMultBonds integer 25 0.026
## 7 NumRotBonds integer 15 0.016
## 8 NumDblBonds integer 8 0.008
## 9 NumAromaticBonds integer 16 0.017
## 10 NumHydrogen integer 41 0.043
## 11 NumCarbon integer 28 0.029
## 12 NumNitrogen integer 7 0.007
## 13 NumOxygen integer 11 0.012
## 14 NumSulfer integer 5 0.005
## 15 NumChlorine integer 11 0.012
## 16 NumHalogen integer 11 0.012
## 17 NumRings integer 8 0.008
## 18 HydrophilicFactor numeric 369 0.388
## 19 SurfaceArea1 numeric 252 0.265
## 20 SurfaceArea2 numeric 287 0.302
## First.Mode.Value Second.Mode.Value First.Mode.Count Second.Mode.Count
## 1 102.200 116.230 16 14
## 2 22.000 24.000 73 51
## 3 8.000 11.000 104 73
## 4 23.000 19.000 69 56
## 5 8.000 7.000 82 66
## 6 0.000 7.000 158 122
## 7 0.000 1.000 272 186
## 8 0.000 1.000 427 268
## 9 0.000 6.000 400 302
## 10 12.000 8.000 83 79
## 11 6.000 7.000 105 97
## 12 0.000 1.000 546 191
## 13 0.000 2.000 325 218
## 14 0.000 1.000 830 96
## 15 0.000 1.000 750 81
## 16 0.000 1.000 685 107
## 17 1.000 0.000 323 260
## 18 -0.828 -0.158 21 20
## 19 0.000 20.230 218 76
## 20 0.000 20.230 211 75
## First.Second.Mode.Ratio Minimum Mean Median Maximum Skewness Kurtosis
## 1 1.143 46.090 201.654 179.230 665.810 0.988 3.945
## 2 1.431 5.000 25.507 22.000 94.000 1.364 5.523
## 3 1.425 2.000 13.161 12.000 47.000 0.993 4.129
## 4 1.232 4.000 25.909 23.000 97.000 1.360 5.408
## 5 1.242 1.000 13.563 12.000 50.000 0.969 3.842
## 6 1.295 0.000 6.148 6.000 25.000 0.670 3.053
## 7 1.462 0.000 2.251 2.000 16.000 1.577 6.437
## 8 1.593 0.000 1.006 1.000 7.000 1.360 4.760
## 9 1.325 0.000 5.121 6.000 25.000 0.796 3.241
## 10 1.051 0.000 12.346 11.000 47.000 1.262 5.261
## 11 1.082 1.000 9.893 9.000 33.000 0.927 3.616
## 12 2.859 0.000 0.813 0.000 6.000 1.554 4.831
## 13 1.491 0.000 1.574 1.000 13.000 1.772 8.494
## 14 8.646 0.000 0.164 0.000 4.000 3.842 21.526
## 15 9.259 0.000 0.556 0.000 10.000 3.178 13.780
## 16 6.402 0.000 0.698 0.000 10.000 2.691 10.808
## 17 1.242 0.000 1.402 1.000 7.000 1.034 3.875
## 18 1.050 -0.985 -0.021 -0.314 13.483 3.404 27.504
## 19 2.868 0.000 36.459 29.100 331.940 1.714 9.714
## 20 2.813 0.000 40.234 33.120 331.940 1.475 7.485
## Percentile25th Percentile75th
## 1 122.605 264.340
## 2 17.000 31.000
## 3 8.000 17.000
## 4 17.000 31.500
## 5 8.000 18.000
## 6 1.000 10.000
## 7 0.000 3.500
## 8 0.000 2.000
## 9 0.000 6.000
## 10 7.000 16.000
## 11 6.000 12.000
## 12 0.000 1.000
## 13 0.000 2.000
## 14 0.000 0.000
## 15 0.000 0.000
## 16 0.000 1.000
## 17 0.000 2.000
## 18 -0.763 0.313
## 19 9.230 53.280
## 20 10.630 60.660
##################################
# Identifying potential data quality issues
##################################
##################################
# Checking for missing observations
##################################
if ((nrow(DQA.Summary[DQA.Summary$NA.Count>0,]))>0){
print(paste0("Missing observations noted for ",
nrow(DQA.Summary[DQA.Summary$NA.Count>0,])),
(" variable(s) with NA.Count>0 and Fill.Rate<1.0."))
$NA.Count>0,]
DQA.Summary[DQA.Summaryelse {
} print("No missing observations noted.")
}
## [1] "No missing observations noted."
##################################
# Checking for zero or near-zero variance predictors
##################################
if (length(names(DQA.Predictors.Factor))==0) {
print("No factor predictors noted.")
else if (nrow(DQA.Predictors.Factor.Summary[as.numeric(as.character(DQA.Predictors.Factor.Summary$First.Second.Mode.Ratio))>5,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Predictors.Factor.Summary[as.numeric(as.character(DQA.Predictors.Factor.Summary$First.Second.Mode.Ratio))>5,])),
(" factor variable(s) with First.Second.Mode.Ratio>5."))
as.numeric(as.character(DQA.Predictors.Factor.Summary$First.Second.Mode.Ratio))>5,]
DQA.Predictors.Factor.Summary[else {
} print("No low variance factor predictors due to high first-second mode ratio noted.")
}
## [1] "Low variance observed for 124 factor variable(s) with First.Second.Mode.Ratio>5."
## Column.Name Column.Type Unique.Count First.Mode.Value Second.Mode.Value
## 13 FP013 factor 2 0 1
## 14 FP014 factor 2 0 1
## 15 FP015 factor 2 1 0
## 16 FP016 factor 2 0 1
## 17 FP017 factor 2 0 1
## 18 FP018 factor 2 0 1
## 19 FP019 factor 2 0 1
## 20 FP020 factor 2 0 1
## 21 FP021 factor 2 0 1
## 22 FP022 factor 2 0 1
## 23 FP023 factor 2 0 1
## 24 FP024 factor 2 0 1
## 25 FP025 factor 2 0 1
## 26 FP026 factor 2 0 1
## 27 FP027 factor 2 0 1
## 28 FP028 factor 2 0 1
## 29 FP029 factor 2 0 1
## 30 FP030 factor 2 0 1
## 31 FP031 factor 2 0 1
## 32 FP032 factor 2 0 1
## 33 FP033 factor 2 0 1
## 34 FP034 factor 2 0 1
## 35 FP035 factor 2 0 1
## 36 FP036 factor 2 0 1
## 37 FP037 factor 2 0 1
## 38 FP038 factor 2 0 1
## 39 FP039 factor 2 0 1
## 40 FP040 factor 2 0 1
## 41 FP041 factor 2 0 1
## 42 FP042 factor 2 0 1
## 43 FP043 factor 2 0 1
## 44 FP044 factor 2 0 1
## 45 FP045 factor 2 0 1
## 48 FP048 factor 2 0 1
## 49 FP049 factor 2 0 1
## 50 FP050 factor 2 0 1
## 51 FP051 factor 2 0 1
## 52 FP052 factor 2 0 1
## 53 FP053 factor 2 0 1
## 54 FP054 factor 2 0 1
## 55 FP055 factor 2 0 1
## 56 FP056 factor 2 0 1
## 57 FP057 factor 2 0 1
## 58 FP058 factor 2 0 1
## 59 FP059 factor 2 0 1
## 114 FP114 factor 2 0 1
## 119 FP119 factor 2 0 1
## 120 FP120 factor 2 0 1
## 121 FP121 factor 2 0 1
## 122 FP122 factor 2 0 1
## 124 FP124 factor 2 0 1
## 125 FP125 factor 2 0 1
## 126 FP126 factor 2 0 1
## 127 FP127 factor 2 0 1
## 128 FP128 factor 2 0 1
## 129 FP129 factor 2 0 1
## 130 FP130 factor 2 0 1
## 131 FP131 factor 2 0 1
## 132 FP132 factor 2 0 1
## 133 FP133 factor 2 0 1
## 134 FP134 factor 2 0 1
## 135 FP135 factor 2 0 1
## 136 FP136 factor 2 0 1
## 137 FP137 factor 2 0 1
## 138 FP138 factor 2 0 1
## 139 FP139 factor 2 0 1
## 140 FP140 factor 2 0 1
## 141 FP141 factor 2 0 1
## 142 FP142 factor 2 0 1
## 143 FP143 factor 2 0 1
## 144 FP144 factor 2 0 1
## 145 FP145 factor 2 0 1
## 146 FP146 factor 2 0 1
## 147 FP147 factor 2 0 1
## 148 FP148 factor 2 0 1
## 149 FP149 factor 2 0 1
## 150 FP150 factor 2 0 1
## 151 FP151 factor 2 0 1
## 152 FP152 factor 2 0 1
## 153 FP153 factor 2 0 1
## 154 FP154 factor 2 0 1
## 155 FP155 factor 2 0 1
## 156 FP156 factor 2 0 1
## 157 FP157 factor 2 0 1
## 158 FP158 factor 2 0 1
## 159 FP159 factor 2 0 1
## 160 FP160 factor 2 0 1
## 161 FP161 factor 2 0 1
## 172 FP172 factor 2 0 1
## 173 FP173 factor 2 0 1
## 174 FP174 factor 2 0 1
## 175 FP175 factor 2 0 1
## 176 FP176 factor 2 0 1
## 177 FP177 factor 2 0 1
## 178 FP178 factor 2 0 1
## 179 FP179 factor 2 0 1
## 180 FP180 factor 2 0 1
## 181 FP181 factor 2 0 1
## 182 FP182 factor 2 0 1
## 183 FP183 factor 2 0 1
## 184 FP184 factor 2 0 1
## 185 FP185 factor 2 0 1
## 186 FP186 factor 2 0 1
## 187 FP187 factor 2 0 1
## 188 FP188 factor 2 0 1
## 189 FP189 factor 2 0 1
## 190 FP190 factor 2 0 1
## 191 FP191 factor 2 0 1
## 192 FP192 factor 2 0 1
## 193 FP193 factor 2 0 1
## 194 FP194 factor 2 0 1
## 195 FP195 factor 2 0 1
## 196 FP196 factor 2 0 1
## 197 FP197 factor 2 0 1
## 198 FP198 factor 2 0 1
## 199 FP199 factor 2 0 1
## 200 FP200 factor 2 0 1
## 201 FP201 factor 2 0 1
## 203 FP203 factor 2 0 1
## 204 FP204 factor 2 0 1
## 205 FP205 factor 2 0 1
## 206 FP206 factor 2 0 1
## 207 FP207 factor 2 0 1
## 208 FP208 factor 2 0 1
## First.Mode.Count Second.Mode.Count Unique.Count.Ratio
## 13 793 158 0.002
## 14 798 153 0.002
## 15 818 133 0.002
## 16 812 139 0.002
## 17 814 137 0.002
## 18 826 125 0.002
## 19 835 116 0.002
## 20 837 114 0.002
## 21 836 115 0.002
## 22 852 99 0.002
## 23 834 117 0.002
## 24 844 107 0.002
## 25 841 110 0.002
## 26 871 80 0.002
## 27 858 93 0.002
## 28 850 101 0.002
## 29 854 97 0.002
## 30 862 89 0.002
## 31 866 85 0.002
## 32 881 70 0.002
## 33 885 66 0.002
## 34 875 76 0.002
## 35 882 69 0.002
## 36 879 72 0.002
## 37 884 67 0.002
## 38 869 82 0.002
## 39 880 71 0.002
## 40 886 65 0.002
## 41 891 60 0.002
## 42 897 54 0.002
## 43 888 63 0.002
## 44 894 57 0.002
## 45 898 53 0.002
## 48 833 118 0.002
## 49 835 116 0.002
## 50 844 107 0.002
## 51 847 104 0.002
## 52 864 87 0.002
## 53 862 89 0.002
## 54 879 72 0.002
## 55 900 51 0.002
## 56 889 62 0.002
## 57 837 114 0.002
## 58 843 108 0.002
## 59 899 52 0.002
## 114 803 148 0.002
## 119 796 155 0.002
## 120 793 158 0.002
## 121 818 133 0.002
## 122 795 156 0.002
## 124 797 154 0.002
## 125 803 148 0.002
## 126 810 141 0.002
## 127 818 133 0.002
## 128 810 141 0.002
## 129 819 132 0.002
## 130 851 100 0.002
## 131 831 120 0.002
## 132 832 119 0.002
## 133 831 120 0.002
## 134 830 121 0.002
## 135 831 120 0.002
## 136 836 115 0.002
## 137 841 110 0.002
## 138 845 106 0.002
## 139 873 78 0.002
## 140 845 106 0.002
## 141 840 111 0.002
## 142 847 104 0.002
## 143 874 77 0.002
## 144 852 99 0.002
## 145 852 99 0.002
## 146 853 98 0.002
## 147 851 100 0.002
## 148 868 83 0.002
## 149 865 86 0.002
## 150 876 75 0.002
## 151 898 53 0.002
## 152 873 78 0.002
## 153 877 74 0.002
## 154 915 36 0.002
## 155 885 66 0.002
## 156 884 67 0.002
## 157 892 59 0.002
## 158 900 51 0.002
## 159 884 67 0.002
## 160 886 65 0.002
## 161 888 63 0.002
## 172 807 144 0.002
## 173 816 135 0.002
## 174 827 124 0.002
## 175 823 128 0.002
## 176 835 116 0.002
## 177 836 115 0.002
## 178 836 115 0.002
## 179 858 93 0.002
## 180 849 102 0.002
## 181 862 89 0.002
## 182 857 94 0.002
## 183 879 72 0.002
## 184 871 80 0.002
## 185 870 81 0.002
## 186 878 73 0.002
## 187 882 69 0.002
## 188 886 65 0.002
## 189 878 73 0.002
## 190 882 69 0.002
## 191 884 67 0.002
## 192 893 58 0.002
## 193 892 59 0.002
## 194 895 56 0.002
## 195 893 58 0.002
## 196 897 54 0.002
## 197 901 50 0.002
## 198 897 54 0.002
## 199 906 45 0.002
## 200 904 47 0.002
## 201 901 50 0.002
## 203 842 109 0.002
## 204 857 94 0.002
## 205 877 74 0.002
## 206 894 57 0.002
## 207 897 54 0.002
## 208 844 107 0.002
## First.Second.Mode.Ratio
## 13 5.019
## 14 5.216
## 15 6.150
## 16 5.842
## 17 5.942
## 18 6.608
## 19 7.198
## 20 7.342
## 21 7.270
## 22 8.606
## 23 7.128
## 24 7.888
## 25 7.645
## 26 10.887
## 27 9.226
## 28 8.416
## 29 8.804
## 30 9.685
## 31 10.188
## 32 12.586
## 33 13.409
## 34 11.513
## 35 12.783
## 36 12.208
## 37 13.194
## 38 10.598
## 39 12.394
## 40 13.631
## 41 14.850
## 42 16.611
## 43 14.095
## 44 15.684
## 45 16.943
## 48 7.059
## 49 7.198
## 50 7.888
## 51 8.144
## 52 9.931
## 53 9.685
## 54 12.208
## 55 17.647
## 56 14.339
## 57 7.342
## 58 7.806
## 59 17.288
## 114 5.426
## 119 5.135
## 120 5.019
## 121 6.150
## 122 5.096
## 124 5.175
## 125 5.426
## 126 5.745
## 127 6.150
## 128 5.745
## 129 6.205
## 130 8.510
## 131 6.925
## 132 6.992
## 133 6.925
## 134 6.860
## 135 6.925
## 136 7.270
## 137 7.645
## 138 7.972
## 139 11.192
## 140 7.972
## 141 7.568
## 142 8.144
## 143 11.351
## 144 8.606
## 145 8.606
## 146 8.704
## 147 8.510
## 148 10.458
## 149 10.058
## 150 11.680
## 151 16.943
## 152 11.192
## 153 11.851
## 154 25.417
## 155 13.409
## 156 13.194
## 157 15.119
## 158 17.647
## 159 13.194
## 160 13.631
## 161 14.095
## 172 5.604
## 173 6.044
## 174 6.669
## 175 6.430
## 176 7.198
## 177 7.270
## 178 7.270
## 179 9.226
## 180 8.324
## 181 9.685
## 182 9.117
## 183 12.208
## 184 10.887
## 185 10.741
## 186 12.027
## 187 12.783
## 188 13.631
## 189 12.027
## 190 12.783
## 191 13.194
## 192 15.397
## 193 15.119
## 194 15.982
## 195 15.397
## 196 16.611
## 197 18.020
## 198 16.611
## 199 20.133
## 200 19.234
## 201 18.020
## 203 7.725
## 204 9.117
## 205 11.851
## 206 15.684
## 207 16.611
## 208 7.888
if (length(names(DQA.Predictors.Numeric))==0) {
print("No numeric predictors noted.")
else if (nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$First.Second.Mode.Ratio))>5,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$First.Second.Mode.Ratio))>5,])),
(" numeric variable(s) with First.Second.Mode.Ratio>5."))
as.numeric(as.character(DQA.Predictors.Numeric.Summary$First.Second.Mode.Ratio))>5,]
DQA.Predictors.Numeric.Summary[else {
} print("No low variance numeric predictors due to high first-second mode ratio noted.")
}
## [1] "Low variance observed for 3 numeric variable(s) with First.Second.Mode.Ratio>5."
## Column.Name Column.Type Unique.Count Unique.Count.Ratio First.Mode.Value
## 14 NumSulfer integer 5 0.005 0.000
## 15 NumChlorine integer 11 0.012 0.000
## 16 NumHalogen integer 11 0.012 0.000
## Second.Mode.Value First.Mode.Count Second.Mode.Count First.Second.Mode.Ratio
## 14 1.000 830 96 8.646
## 15 1.000 750 81 9.259
## 16 1.000 685 107 6.402
## Minimum Mean Median Maximum Skewness Kurtosis Percentile25th Percentile75th
## 14 0.000 0.164 0.000 4.000 3.842 21.526 0.000 0.000
## 15 0.000 0.556 0.000 10.000 3.178 13.780 0.000 0.000
## 16 0.000 0.698 0.000 10.000 2.691 10.808 0.000 1.000
if (length(names(DQA.Predictors.Numeric))==0) {
print("No numeric predictors noted.")
else if (nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Unique.Count.Ratio))<0.01,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Unique.Count.Ratio))<0.01,])),
(" numeric variable(s) with Unique.Count.Ratio<0.01."))
as.numeric(as.character(DQA.Predictors.Numeric.Summary$Unique.Count.Ratio))<0.01,]
DQA.Predictors.Numeric.Summary[else {
} print("No low variance numeric predictors due to low unique count ratio noted.")
}
## [1] "Low variance observed for 4 numeric variable(s) with Unique.Count.Ratio<0.01."
## Column.Name Column.Type Unique.Count Unique.Count.Ratio First.Mode.Value
## 8 NumDblBonds integer 8 0.008 0.000
## 12 NumNitrogen integer 7 0.007 0.000
## 14 NumSulfer integer 5 0.005 0.000
## 17 NumRings integer 8 0.008 1.000
## Second.Mode.Value First.Mode.Count Second.Mode.Count First.Second.Mode.Ratio
## 8 1.000 427 268 1.593
## 12 1.000 546 191 2.859
## 14 1.000 830 96 8.646
## 17 0.000 323 260 1.242
## Minimum Mean Median Maximum Skewness Kurtosis Percentile25th Percentile75th
## 8 0.000 1.006 1.000 7.000 1.360 4.760 0.000 2.000
## 12 0.000 0.813 0.000 6.000 1.554 4.831 0.000 1.000
## 14 0.000 0.164 0.000 4.000 3.842 21.526 0.000 0.000
## 17 0.000 1.402 1.000 7.000 1.034 3.875 0.000 2.000
##################################
# Checking for skewed predictors
##################################
if (length(names(DQA.Predictors.Numeric))==0) {
print("No numeric predictors noted.")
else if (nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))>3 |
} as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))<(-3),])>0){
print(paste0("High skewness observed for ",
nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))>3 |
(as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))<(-3),])),
" numeric variable(s) with Skewness>3 or Skewness<(-3)."))
as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))>3 |
DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))<(-3),]
else {
} print("No skewed numeric predictors noted.")
}
## [1] "High skewness observed for 3 numeric variable(s) with Skewness>3 or Skewness<(-3)."
## Column.Name Column.Type Unique.Count Unique.Count.Ratio
## 14 NumSulfer integer 5 0.005
## 15 NumChlorine integer 11 0.012
## 18 HydrophilicFactor numeric 369 0.388
## First.Mode.Value Second.Mode.Value First.Mode.Count Second.Mode.Count
## 14 0.000 1.000 830 96
## 15 0.000 1.000 750 81
## 18 -0.828 -0.158 21 20
## First.Second.Mode.Ratio Minimum Mean Median Maximum Skewness Kurtosis
## 14 8.646 0.000 0.164 0.000 4.000 3.842 21.526
## 15 9.259 0.000 0.556 0.000 10.000 3.178 13.780
## 18 1.050 -0.985 -0.021 -0.314 13.483 3.404 27.504
## Percentile25th Percentile75th
## 14 0.000 0.000
## 15 0.000 0.000
## 18 -0.763 0.313
##################################
# Loading dataset
##################################
<- Solubility_Train
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("Log_Solubility_Class")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,-(grep("FP", names(DPA.Predictors)))]
DPA.Predictors.Numeric
##################################
# Identifying outliers for the numeric predictors
##################################
<- c()
OutlierCountList
for (i in 1:ncol(DPA.Predictors.Numeric)) {
<- boxplot.stats(DPA.Predictors.Numeric[,i])$out
Outliers <- length(Outliers)
OutlierCount <- append(OutlierCountList,OutlierCount)
OutlierCountList <- which(DPA.Predictors.Numeric[,i] %in% c(Outliers))
OutlierIndices boxplot(DPA.Predictors.Numeric[,i],
ylab = names(DPA.Predictors.Numeric)[i],
main = names(DPA.Predictors.Numeric)[i],
horizontal=TRUE)
mtext(paste0(OutlierCount, " Outlier(s) Detected"))
}
<- as.data.frame(cbind(names(DPA.Predictors.Numeric),(OutlierCountList)))
OutlierCountSummary names(OutlierCountSummary) <- c("NumericPredictors","OutlierCount")
$OutlierCount <- as.numeric(as.character(OutlierCountSummary$OutlierCount))
OutlierCountSummary<- nrow(OutlierCountSummary[OutlierCountSummary$OutlierCount>0,])
NumericPredictorWithOutlierCount print(paste0(NumericPredictorWithOutlierCount, " numeric variable(s) were noted with outlier(s)." ))
## [1] "20 numeric variable(s) were noted with outlier(s)."
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA.Predictors.Numeric)) (DPA_Skimmed
Name | DPA.Predictors.Numeric |
Number of rows | 951 |
Number of columns | 20 |
_______________________ | |
Column type frequency: | |
numeric | 20 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
MolWeight | 0 | 1 | 201.65 | 97.91 | 46.09 | 122.60 | 179.23 | 264.34 | 665.81 | ▇▆▂▁▁ |
NumAtoms | 0 | 1 | 25.51 | 12.61 | 5.00 | 17.00 | 22.00 | 31.00 | 94.00 | ▇▆▂▁▁ |
NumNonHAtoms | 0 | 1 | 13.16 | 6.50 | 2.00 | 8.00 | 12.00 | 17.00 | 47.00 | ▇▆▂▁▁ |
NumBonds | 0 | 1 | 25.91 | 13.48 | 4.00 | 17.00 | 23.00 | 31.50 | 97.00 | ▇▇▂▁▁ |
NumNonHBonds | 0 | 1 | 13.56 | 7.57 | 1.00 | 8.00 | 12.00 | 18.00 | 50.00 | ▇▇▂▁▁ |
NumMultBonds | 0 | 1 | 6.15 | 5.17 | 0.00 | 1.00 | 6.00 | 10.00 | 25.00 | ▇▆▃▁▁ |
NumRotBonds | 0 | 1 | 2.25 | 2.41 | 0.00 | 0.00 | 2.00 | 3.50 | 16.00 | ▇▂▁▁▁ |
NumDblBonds | 0 | 1 | 1.01 | 1.21 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▂▁▁▁ |
NumAromaticBonds | 0 | 1 | 5.12 | 5.26 | 0.00 | 0.00 | 6.00 | 6.00 | 25.00 | ▇▆▃▁▁ |
NumHydrogen | 0 | 1 | 12.35 | 7.32 | 0.00 | 7.00 | 11.00 | 16.00 | 47.00 | ▇▇▂▁▁ |
NumCarbon | 0 | 1 | 9.89 | 5.29 | 1.00 | 6.00 | 9.00 | 12.00 | 33.00 | ▇▇▃▁▁ |
NumNitrogen | 0 | 1 | 0.81 | 1.19 | 0.00 | 0.00 | 0.00 | 1.00 | 6.00 | ▇▂▁▁▁ |
NumOxygen | 0 | 1 | 1.57 | 1.73 | 0.00 | 0.00 | 1.00 | 2.00 | 13.00 | ▇▂▁▁▁ |
NumSulfer | 0 | 1 | 0.16 | 0.49 | 0.00 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
NumChlorine | 0 | 1 | 0.56 | 1.40 | 0.00 | 0.00 | 0.00 | 0.00 | 10.00 | ▇▁▁▁▁ |
NumHalogen | 0 | 1 | 0.70 | 1.47 | 0.00 | 0.00 | 0.00 | 1.00 | 10.00 | ▇▁▁▁▁ |
NumRings | 0 | 1 | 1.40 | 1.30 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▃▂▁▁ |
HydrophilicFactor | 0 | 1 | -0.02 | 1.13 | -0.98 | -0.76 | -0.31 | 0.31 | 13.48 | ▇▁▁▁▁ |
SurfaceArea1 | 0 | 1 | 36.46 | 35.29 | 0.00 | 9.23 | 29.10 | 53.28 | 331.94 | ▇▂▁▁▁ |
SurfaceArea2 | 0 | 1 | 40.23 | 38.12 | 0.00 | 10.63 | 33.12 | 60.66 | 331.94 | ▇▂▁▁▁ |
###################################
# Verifying the data dimensions
###################################
dim(DPA.Predictors.Numeric)
## [1] 951 20
##################################
# Loading dataset
##################################
<- Solubility_Train
DPA
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA)) (DPA_Skimmed
Name | DPA |
Number of rows | 951 |
Number of columns | 229 |
_______________________ | |
Column type frequency: | |
factor | 1 |
numeric | 228 |
________________________ | |
Group variables | None |
Variable type: factor
skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
---|---|---|---|---|---|
Log_Solubility_Class | 0 | 1 | FALSE | 2 | Hig: 524, Low: 427 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
FP001 | 0 | 1 | 0.49 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP002 | 0 | 1 | 0.54 | 0.50 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP003 | 0 | 1 | 0.44 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP004 | 0 | 1 | 0.58 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP005 | 0 | 1 | 0.58 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP006 | 0 | 1 | 0.40 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP007 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP008 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP009 | 0 | 1 | 0.28 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP010 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP011 | 0 | 1 | 0.21 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP012 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP013 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP014 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP015 | 0 | 1 | 0.86 | 0.35 | 0.00 | 1.00 | 1.00 | 1.00 | 1.00 | ▁▁▁▁▇ |
FP016 | 0 | 1 | 0.15 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP017 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP018 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP019 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP020 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP021 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP022 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP023 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP024 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP025 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP026 | 0 | 1 | 0.08 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP027 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP028 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP029 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP030 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP031 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP032 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP033 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP034 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP035 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP036 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP037 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP038 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP039 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP040 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP041 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP042 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP043 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP044 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP045 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP046 | 0 | 1 | 0.32 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP047 | 0 | 1 | 0.27 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP048 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP049 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP050 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP051 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP052 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP053 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP054 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP055 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP056 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP057 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP058 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP059 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP060 | 0 | 1 | 0.48 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP061 | 0 | 1 | 0.45 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP062 | 0 | 1 | 0.44 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP063 | 0 | 1 | 0.43 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP064 | 0 | 1 | 0.42 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP065 | 0 | 1 | 0.59 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP066 | 0 | 1 | 0.61 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP067 | 0 | 1 | 0.38 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP068 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP069 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP070 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP071 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP072 | 0 | 1 | 0.66 | 0.47 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP073 | 0 | 1 | 0.31 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP074 | 0 | 1 | 0.32 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP075 | 0 | 1 | 0.34 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP076 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP077 | 0 | 1 | 0.32 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP078 | 0 | 1 | 0.30 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP079 | 0 | 1 | 0.69 | 0.46 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP080 | 0 | 1 | 0.30 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP081 | 0 | 1 | 0.28 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP082 | 0 | 1 | 0.71 | 0.45 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP083 | 0 | 1 | 0.27 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP084 | 0 | 1 | 0.29 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP085 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP086 | 0 | 1 | 0.27 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP087 | 0 | 1 | 0.73 | 0.45 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP088 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP089 | 0 | 1 | 0.25 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP090 | 0 | 1 | 0.25 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP091 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP092 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP093 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP094 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP095 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP096 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP097 | 0 | 1 | 0.24 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP098 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP099 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP100 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP101 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP102 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP103 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP104 | 0 | 1 | 0.22 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP105 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP106 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP107 | 0 | 1 | 0.21 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP108 | 0 | 1 | 0.21 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP109 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP110 | 0 | 1 | 0.21 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP111 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP112 | 0 | 1 | 0.19 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP113 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP114 | 0 | 1 | 0.16 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP115 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP116 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP117 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP118 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP119 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP120 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP121 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP122 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP123 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP124 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP125 | 0 | 1 | 0.16 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP126 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP127 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP128 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP129 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP130 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP131 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP132 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP133 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP134 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP135 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP136 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP137 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP138 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP139 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP140 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP141 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP142 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP143 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP144 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP145 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP146 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP147 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP148 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP149 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP150 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP151 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP152 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP153 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP154 | 0 | 1 | 0.04 | 0.19 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP155 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP156 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP157 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP158 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP159 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP160 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP161 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP162 | 0 | 1 | 0.50 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP163 | 0 | 1 | 0.48 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP164 | 0 | 1 | 0.63 | 0.48 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP165 | 0 | 1 | 0.35 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP166 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP167 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP168 | 0 | 1 | 0.67 | 0.47 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP169 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP170 | 0 | 1 | 0.18 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP171 | 0 | 1 | 0.17 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP172 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP173 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP174 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP175 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP176 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP177 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP178 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP179 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP180 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP181 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP182 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP183 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP184 | 0 | 1 | 0.08 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP185 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP186 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP187 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP188 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP189 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP190 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP191 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP192 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP193 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP194 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP195 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP196 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP197 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP198 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP199 | 0 | 1 | 0.05 | 0.21 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP200 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP201 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP202 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP203 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP204 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP205 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP206 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP207 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP208 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
MolWeight | 0 | 1 | 201.65 | 97.91 | 46.09 | 122.60 | 179.23 | 264.34 | 665.81 | ▇▆▂▁▁ |
NumAtoms | 0 | 1 | 25.51 | 12.61 | 5.00 | 17.00 | 22.00 | 31.00 | 94.00 | ▇▆▂▁▁ |
NumNonHAtoms | 0 | 1 | 13.16 | 6.50 | 2.00 | 8.00 | 12.00 | 17.00 | 47.00 | ▇▆▂▁▁ |
NumBonds | 0 | 1 | 25.91 | 13.48 | 4.00 | 17.00 | 23.00 | 31.50 | 97.00 | ▇▇▂▁▁ |
NumNonHBonds | 0 | 1 | 13.56 | 7.57 | 1.00 | 8.00 | 12.00 | 18.00 | 50.00 | ▇▇▂▁▁ |
NumMultBonds | 0 | 1 | 6.15 | 5.17 | 0.00 | 1.00 | 6.00 | 10.00 | 25.00 | ▇▆▃▁▁ |
NumRotBonds | 0 | 1 | 2.25 | 2.41 | 0.00 | 0.00 | 2.00 | 3.50 | 16.00 | ▇▂▁▁▁ |
NumDblBonds | 0 | 1 | 1.01 | 1.21 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▂▁▁▁ |
NumAromaticBonds | 0 | 1 | 5.12 | 5.26 | 0.00 | 0.00 | 6.00 | 6.00 | 25.00 | ▇▆▃▁▁ |
NumHydrogen | 0 | 1 | 12.35 | 7.32 | 0.00 | 7.00 | 11.00 | 16.00 | 47.00 | ▇▇▂▁▁ |
NumCarbon | 0 | 1 | 9.89 | 5.29 | 1.00 | 6.00 | 9.00 | 12.00 | 33.00 | ▇▇▃▁▁ |
NumNitrogen | 0 | 1 | 0.81 | 1.19 | 0.00 | 0.00 | 0.00 | 1.00 | 6.00 | ▇▂▁▁▁ |
NumOxygen | 0 | 1 | 1.57 | 1.73 | 0.00 | 0.00 | 1.00 | 2.00 | 13.00 | ▇▂▁▁▁ |
NumSulfer | 0 | 1 | 0.16 | 0.49 | 0.00 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
NumChlorine | 0 | 1 | 0.56 | 1.40 | 0.00 | 0.00 | 0.00 | 0.00 | 10.00 | ▇▁▁▁▁ |
NumHalogen | 0 | 1 | 0.70 | 1.47 | 0.00 | 0.00 | 0.00 | 1.00 | 10.00 | ▇▁▁▁▁ |
NumRings | 0 | 1 | 1.40 | 1.30 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▃▂▁▁ |
HydrophilicFactor | 0 | 1 | -0.02 | 1.13 | -0.98 | -0.76 | -0.31 | 0.31 | 13.48 | ▇▁▁▁▁ |
SurfaceArea1 | 0 | 1 | 36.46 | 35.29 | 0.00 | 9.23 | 29.10 | 53.28 | 331.94 | ▇▂▁▁▁ |
SurfaceArea2 | 0 | 1 | 40.23 | 38.12 | 0.00 | 10.63 | 33.12 | 60.66 | 331.94 | ▇▂▁▁▁ |
##################################
# Identifying columns with low variance
###################################
<- nearZeroVar(DPA,
DPA_LowVariance freqCut = 95/5,
uniqueCut = 10,
saveMetrics= TRUE)
$nzv,]) (DPA_LowVariance[DPA_LowVariance
## freqRatio percentUnique zeroVar nzv
## FP154 25.41667 0.2103049 FALSE TRUE
## FP199 20.13333 0.2103049 FALSE TRUE
## FP200 19.23404 0.2103049 FALSE TRUE
if ((nrow(DPA_LowVariance[DPA_LowVariance$nzv,]))==0){
print("No low variance predictors noted.")
else {
}
print(paste0("Low variance observed for ",
nrow(DPA_LowVariance[DPA_LowVariance$nzv,])),
(" numeric variable(s) with First.Second.Mode.Ratio>4 and Unique.Count.Ratio<0.10."))
<- (nrow(DPA_LowVariance[DPA_LowVariance$nzv,]))
DPA_LowVarianceForRemoval
print(paste0("Low variance can be resolved by removing ",
nrow(DPA_LowVariance[DPA_LowVariance$nzv,])),
(" numeric variable(s)."))
for (j in 1:DPA_LowVarianceForRemoval) {
<- rownames(DPA_LowVariance[DPA_LowVariance$nzv,])[j]
DPA_LowVarianceRemovedVariable print(paste0("Variable ",
j," for removal: ",
DPA_LowVarianceRemovedVariable))
}
%>%
DPA skim() %>%
::filter(skim_variable %in% rownames(DPA_LowVariance[DPA_LowVariance$nzv,]))
dplyr
##################################
# Filtering out columns with low variance
#################################
<- DPA[,!names(DPA) %in% rownames(DPA_LowVariance[DPA_LowVariance$nzv,])]
DPA_ExcludedLowVariance
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_ExcludedLowVariance))
(DPA_ExcludedLowVariance_Skimmed }
## [1] "Low variance observed for 3 numeric variable(s) with First.Second.Mode.Ratio>4 and Unique.Count.Ratio<0.10."
## [1] "Low variance can be resolved by removing 3 numeric variable(s)."
## [1] "Variable 1 for removal: FP154"
## [1] "Variable 2 for removal: FP199"
## [1] "Variable 3 for removal: FP200"
Name | DPA_ExcludedLowVariance |
Number of rows | 951 |
Number of columns | 226 |
_______________________ | |
Column type frequency: | |
factor | 1 |
numeric | 225 |
________________________ | |
Group variables | None |
Variable type: factor
skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
---|---|---|---|---|---|
Log_Solubility_Class | 0 | 1 | FALSE | 2 | Hig: 524, Low: 427 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
FP001 | 0 | 1 | 0.49 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP002 | 0 | 1 | 0.54 | 0.50 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP003 | 0 | 1 | 0.44 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP004 | 0 | 1 | 0.58 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP005 | 0 | 1 | 0.58 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP006 | 0 | 1 | 0.40 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP007 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP008 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP009 | 0 | 1 | 0.28 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP010 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP011 | 0 | 1 | 0.21 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP012 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP013 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP014 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP015 | 0 | 1 | 0.86 | 0.35 | 0.00 | 1.00 | 1.00 | 1.00 | 1.00 | ▁▁▁▁▇ |
FP016 | 0 | 1 | 0.15 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP017 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP018 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP019 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP020 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP021 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP022 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP023 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP024 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP025 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP026 | 0 | 1 | 0.08 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP027 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP028 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP029 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP030 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP031 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP032 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP033 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP034 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP035 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP036 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP037 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP038 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP039 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP040 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP041 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP042 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP043 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP044 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP045 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP046 | 0 | 1 | 0.32 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP047 | 0 | 1 | 0.27 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP048 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP049 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP050 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP051 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP052 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP053 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP054 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP055 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP056 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP057 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP058 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP059 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP060 | 0 | 1 | 0.48 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP061 | 0 | 1 | 0.45 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP062 | 0 | 1 | 0.44 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP063 | 0 | 1 | 0.43 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP064 | 0 | 1 | 0.42 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP065 | 0 | 1 | 0.59 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP066 | 0 | 1 | 0.61 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP067 | 0 | 1 | 0.38 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP068 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP069 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP070 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP071 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP072 | 0 | 1 | 0.66 | 0.47 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP073 | 0 | 1 | 0.31 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP074 | 0 | 1 | 0.32 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP075 | 0 | 1 | 0.34 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP076 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP077 | 0 | 1 | 0.32 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP078 | 0 | 1 | 0.30 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP079 | 0 | 1 | 0.69 | 0.46 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP080 | 0 | 1 | 0.30 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP081 | 0 | 1 | 0.28 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP082 | 0 | 1 | 0.71 | 0.45 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP083 | 0 | 1 | 0.27 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP084 | 0 | 1 | 0.29 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP085 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP086 | 0 | 1 | 0.27 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP087 | 0 | 1 | 0.73 | 0.45 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP088 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP089 | 0 | 1 | 0.25 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP090 | 0 | 1 | 0.25 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP091 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP092 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP093 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP094 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP095 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP096 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP097 | 0 | 1 | 0.24 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP098 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP099 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP100 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP101 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP102 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP103 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP104 | 0 | 1 | 0.22 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP105 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP106 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP107 | 0 | 1 | 0.21 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP108 | 0 | 1 | 0.21 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP109 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP110 | 0 | 1 | 0.21 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP111 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP112 | 0 | 1 | 0.19 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP113 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP114 | 0 | 1 | 0.16 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP115 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP116 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP117 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP118 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP119 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP120 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP121 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP122 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP123 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP124 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP125 | 0 | 1 | 0.16 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP126 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP127 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP128 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP129 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP130 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP131 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP132 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP133 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP134 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP135 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP136 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP137 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP138 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP139 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP140 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP141 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP142 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP143 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP144 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP145 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP146 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP147 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP148 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP149 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP150 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP151 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP152 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP153 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP155 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP156 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP157 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP158 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP159 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP160 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP161 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP162 | 0 | 1 | 0.50 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP163 | 0 | 1 | 0.48 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP164 | 0 | 1 | 0.63 | 0.48 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP165 | 0 | 1 | 0.35 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP166 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP167 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP168 | 0 | 1 | 0.67 | 0.47 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP169 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP170 | 0 | 1 | 0.18 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP171 | 0 | 1 | 0.17 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP172 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP173 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP174 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP175 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP176 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP177 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP178 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP179 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP180 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP181 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP182 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP183 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP184 | 0 | 1 | 0.08 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP185 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP186 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP187 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP188 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP189 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP190 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP191 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP192 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP193 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP194 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP195 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP196 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP197 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP198 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP201 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP202 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP203 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP204 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP205 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP206 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP207 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP208 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
MolWeight | 0 | 1 | 201.65 | 97.91 | 46.09 | 122.60 | 179.23 | 264.34 | 665.81 | ▇▆▂▁▁ |
NumAtoms | 0 | 1 | 25.51 | 12.61 | 5.00 | 17.00 | 22.00 | 31.00 | 94.00 | ▇▆▂▁▁ |
NumNonHAtoms | 0 | 1 | 13.16 | 6.50 | 2.00 | 8.00 | 12.00 | 17.00 | 47.00 | ▇▆▂▁▁ |
NumBonds | 0 | 1 | 25.91 | 13.48 | 4.00 | 17.00 | 23.00 | 31.50 | 97.00 | ▇▇▂▁▁ |
NumNonHBonds | 0 | 1 | 13.56 | 7.57 | 1.00 | 8.00 | 12.00 | 18.00 | 50.00 | ▇▇▂▁▁ |
NumMultBonds | 0 | 1 | 6.15 | 5.17 | 0.00 | 1.00 | 6.00 | 10.00 | 25.00 | ▇▆▃▁▁ |
NumRotBonds | 0 | 1 | 2.25 | 2.41 | 0.00 | 0.00 | 2.00 | 3.50 | 16.00 | ▇▂▁▁▁ |
NumDblBonds | 0 | 1 | 1.01 | 1.21 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▂▁▁▁ |
NumAromaticBonds | 0 | 1 | 5.12 | 5.26 | 0.00 | 0.00 | 6.00 | 6.00 | 25.00 | ▇▆▃▁▁ |
NumHydrogen | 0 | 1 | 12.35 | 7.32 | 0.00 | 7.00 | 11.00 | 16.00 | 47.00 | ▇▇▂▁▁ |
NumCarbon | 0 | 1 | 9.89 | 5.29 | 1.00 | 6.00 | 9.00 | 12.00 | 33.00 | ▇▇▃▁▁ |
NumNitrogen | 0 | 1 | 0.81 | 1.19 | 0.00 | 0.00 | 0.00 | 1.00 | 6.00 | ▇▂▁▁▁ |
NumOxygen | 0 | 1 | 1.57 | 1.73 | 0.00 | 0.00 | 1.00 | 2.00 | 13.00 | ▇▂▁▁▁ |
NumSulfer | 0 | 1 | 0.16 | 0.49 | 0.00 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
NumChlorine | 0 | 1 | 0.56 | 1.40 | 0.00 | 0.00 | 0.00 | 0.00 | 10.00 | ▇▁▁▁▁ |
NumHalogen | 0 | 1 | 0.70 | 1.47 | 0.00 | 0.00 | 0.00 | 1.00 | 10.00 | ▇▁▁▁▁ |
NumRings | 0 | 1 | 1.40 | 1.30 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▃▂▁▁ |
HydrophilicFactor | 0 | 1 | -0.02 | 1.13 | -0.98 | -0.76 | -0.31 | 0.31 | 13.48 | ▇▁▁▁▁ |
SurfaceArea1 | 0 | 1 | 36.46 | 35.29 | 0.00 | 9.23 | 29.10 | 53.28 | 331.94 | ▇▂▁▁▁ |
SurfaceArea2 | 0 | 1 | 40.23 | 38.12 | 0.00 | 10.63 | 33.12 | 60.66 | 331.94 | ▇▂▁▁▁ |
###################################
# Verifying the data dimensions
###################################
dim(DPA_ExcludedLowVariance)
## [1] 951 226
##################################
# Loading dataset
##################################
<- Solubility_Train
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("Log_Solubility_Class")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,-(grep("FP", names(DPA.Predictors)))]
DPA.Predictors.Numeric
##################################
# Visualizing pairwise correlation between predictors
##################################
<- cor.mtest(DPA.Predictors.Numeric,
DPA_CorrelationTest method = "pearson",
conf.level = .95)
corrplot(cor(DPA.Predictors.Numeric,
method = "pearson",
use="pairwise.complete.obs"),
method = "circle",
type = "upper",
order = "original",
tl.col = "black",
tl.cex = 0.75,
tl.srt = 90,
sig.level = 0.05,
p.mat = DPA_CorrelationTest$p,
insig = "blank")
##################################
# Identifying the highly correlated variables
##################################
<- cor(DPA.Predictors.Numeric,
DPA_Correlation method = "pearson",
use="pairwise.complete.obs")
<- sum(abs(DPA_Correlation[upper.tri(DPA_Correlation)]) > 0.95)) (DPA_HighlyCorrelatedCount
## [1] 3
if (DPA_HighlyCorrelatedCount == 0) {
print("No highly correlated predictors noted.")
else {
} print(paste0("High correlation observed for ",
(DPA_HighlyCorrelatedCount)," pairs of numeric variable(s) with Correlation.Coefficient>0.95."))
<- corr_cross(DPA.Predictors.Numeric,
(DPA_HighlyCorrelatedPairs max_pvalue = 0.05,
top = DPA_HighlyCorrelatedCount,
rm.na = TRUE,
grid = FALSE
))
}
## [1] "High correlation observed for 3 pairs of numeric variable(s) with Correlation.Coefficient>0.95."
if (DPA_HighlyCorrelatedCount > 0) {
<- findCorrelation(DPA_Correlation, cutoff = 0.95)
DPA_HighlyCorrelated
<- length(DPA_HighlyCorrelated))
(DPA_HighlyCorrelatedForRemoval
print(paste0("High correlation can be resolved by removing ",
(DPA_HighlyCorrelatedForRemoval)," numeric variable(s)."))
for (j in 1:DPA_HighlyCorrelatedForRemoval) {
<- colnames(DPA.Predictors.Numeric)[DPA_HighlyCorrelated[j]]
DPA_HighlyCorrelatedRemovedVariable print(paste0("Variable ",
j," for removal: ",
DPA_HighlyCorrelatedRemovedVariable))
}
##################################
# Filtering out columns with high correlation
#################################
<- DPA[,-DPA_HighlyCorrelated]
DPA_ExcludedHighCorrelation
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_ExcludedHighCorrelation))
(DPA_ExcludedHighCorrelation_Skimmed
}
## [1] "High correlation can be resolved by removing 3 numeric variable(s)."
## [1] "Variable 1 for removal: NumNonHAtoms"
## [1] "Variable 2 for removal: NumBonds"
## [1] "Variable 3 for removal: NumAromaticBonds"
Name | DPA_ExcludedHighCorrelati… |
Number of rows | 951 |
Number of columns | 226 |
_______________________ | |
Column type frequency: | |
factor | 1 |
numeric | 225 |
________________________ | |
Group variables | None |
Variable type: factor
skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
---|---|---|---|---|---|
Log_Solubility_Class | 0 | 1 | FALSE | 2 | Hig: 524, Low: 427 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
FP001 | 0 | 1 | 0.49 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP002 | 0 | 1 | 0.54 | 0.50 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP005 | 0 | 1 | 0.58 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP006 | 0 | 1 | 0.40 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP007 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP008 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP010 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP011 | 0 | 1 | 0.21 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP012 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP013 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP014 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP015 | 0 | 1 | 0.86 | 0.35 | 0.00 | 1.00 | 1.00 | 1.00 | 1.00 | ▁▁▁▁▇ |
FP016 | 0 | 1 | 0.15 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP017 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP018 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP019 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP020 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP021 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP022 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP023 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP024 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP025 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP026 | 0 | 1 | 0.08 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP027 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP028 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP029 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP030 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP031 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP032 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP033 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP034 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP035 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP036 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP037 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP038 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP039 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP040 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP041 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP042 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP043 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP044 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP045 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP046 | 0 | 1 | 0.32 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP047 | 0 | 1 | 0.27 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP048 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP049 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP050 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP051 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP052 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP053 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP054 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP055 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP056 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP057 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP058 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP059 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP060 | 0 | 1 | 0.48 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP061 | 0 | 1 | 0.45 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP062 | 0 | 1 | 0.44 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP063 | 0 | 1 | 0.43 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP064 | 0 | 1 | 0.42 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP065 | 0 | 1 | 0.59 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP066 | 0 | 1 | 0.61 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP067 | 0 | 1 | 0.38 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP068 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP069 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP070 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP071 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP072 | 0 | 1 | 0.66 | 0.47 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP073 | 0 | 1 | 0.31 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP074 | 0 | 1 | 0.32 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP075 | 0 | 1 | 0.34 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP076 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP077 | 0 | 1 | 0.32 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP078 | 0 | 1 | 0.30 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP079 | 0 | 1 | 0.69 | 0.46 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP080 | 0 | 1 | 0.30 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP081 | 0 | 1 | 0.28 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP082 | 0 | 1 | 0.71 | 0.45 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP083 | 0 | 1 | 0.27 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP084 | 0 | 1 | 0.29 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP085 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP086 | 0 | 1 | 0.27 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP087 | 0 | 1 | 0.73 | 0.45 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP088 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP089 | 0 | 1 | 0.25 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP090 | 0 | 1 | 0.25 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP091 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP092 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP093 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP094 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP095 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP096 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP097 | 0 | 1 | 0.24 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP098 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP099 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP100 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP101 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP102 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP103 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP104 | 0 | 1 | 0.22 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP105 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP106 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP107 | 0 | 1 | 0.21 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP108 | 0 | 1 | 0.21 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP109 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP110 | 0 | 1 | 0.21 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP111 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP112 | 0 | 1 | 0.19 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP113 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP114 | 0 | 1 | 0.16 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP115 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP116 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP117 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP118 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP119 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP120 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP121 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP122 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP123 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP124 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP125 | 0 | 1 | 0.16 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP126 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP127 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP128 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP129 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP130 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP131 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP132 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP133 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP134 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP135 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP136 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP137 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP138 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP139 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP140 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP141 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP142 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP143 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP144 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP145 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP146 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP147 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP148 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP149 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP150 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP151 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP152 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP153 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP154 | 0 | 1 | 0.04 | 0.19 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP155 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP156 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP157 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP158 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP159 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP160 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP161 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP162 | 0 | 1 | 0.50 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP163 | 0 | 1 | 0.48 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP164 | 0 | 1 | 0.63 | 0.48 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP165 | 0 | 1 | 0.35 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP166 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP167 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP168 | 0 | 1 | 0.67 | 0.47 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP169 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP170 | 0 | 1 | 0.18 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP171 | 0 | 1 | 0.17 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP172 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP173 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP174 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP175 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP176 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP177 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP178 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP179 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP180 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP181 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP182 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP183 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP184 | 0 | 1 | 0.08 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP185 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP186 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP187 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP188 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP189 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP190 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP191 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP192 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP193 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP194 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP195 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP196 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP197 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP198 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP199 | 0 | 1 | 0.05 | 0.21 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP200 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP201 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP202 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP203 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP204 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP205 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP206 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP207 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP208 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
MolWeight | 0 | 1 | 201.65 | 97.91 | 46.09 | 122.60 | 179.23 | 264.34 | 665.81 | ▇▆▂▁▁ |
NumAtoms | 0 | 1 | 25.51 | 12.61 | 5.00 | 17.00 | 22.00 | 31.00 | 94.00 | ▇▆▂▁▁ |
NumNonHAtoms | 0 | 1 | 13.16 | 6.50 | 2.00 | 8.00 | 12.00 | 17.00 | 47.00 | ▇▆▂▁▁ |
NumBonds | 0 | 1 | 25.91 | 13.48 | 4.00 | 17.00 | 23.00 | 31.50 | 97.00 | ▇▇▂▁▁ |
NumNonHBonds | 0 | 1 | 13.56 | 7.57 | 1.00 | 8.00 | 12.00 | 18.00 | 50.00 | ▇▇▂▁▁ |
NumMultBonds | 0 | 1 | 6.15 | 5.17 | 0.00 | 1.00 | 6.00 | 10.00 | 25.00 | ▇▆▃▁▁ |
NumRotBonds | 0 | 1 | 2.25 | 2.41 | 0.00 | 0.00 | 2.00 | 3.50 | 16.00 | ▇▂▁▁▁ |
NumDblBonds | 0 | 1 | 1.01 | 1.21 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▂▁▁▁ |
NumAromaticBonds | 0 | 1 | 5.12 | 5.26 | 0.00 | 0.00 | 6.00 | 6.00 | 25.00 | ▇▆▃▁▁ |
NumHydrogen | 0 | 1 | 12.35 | 7.32 | 0.00 | 7.00 | 11.00 | 16.00 | 47.00 | ▇▇▂▁▁ |
NumCarbon | 0 | 1 | 9.89 | 5.29 | 1.00 | 6.00 | 9.00 | 12.00 | 33.00 | ▇▇▃▁▁ |
NumNitrogen | 0 | 1 | 0.81 | 1.19 | 0.00 | 0.00 | 0.00 | 1.00 | 6.00 | ▇▂▁▁▁ |
NumOxygen | 0 | 1 | 1.57 | 1.73 | 0.00 | 0.00 | 1.00 | 2.00 | 13.00 | ▇▂▁▁▁ |
NumSulfer | 0 | 1 | 0.16 | 0.49 | 0.00 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
NumChlorine | 0 | 1 | 0.56 | 1.40 | 0.00 | 0.00 | 0.00 | 0.00 | 10.00 | ▇▁▁▁▁ |
NumHalogen | 0 | 1 | 0.70 | 1.47 | 0.00 | 0.00 | 0.00 | 1.00 | 10.00 | ▇▁▁▁▁ |
NumRings | 0 | 1 | 1.40 | 1.30 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▃▂▁▁ |
HydrophilicFactor | 0 | 1 | -0.02 | 1.13 | -0.98 | -0.76 | -0.31 | 0.31 | 13.48 | ▇▁▁▁▁ |
SurfaceArea1 | 0 | 1 | 36.46 | 35.29 | 0.00 | 9.23 | 29.10 | 53.28 | 331.94 | ▇▂▁▁▁ |
SurfaceArea2 | 0 | 1 | 40.23 | 38.12 | 0.00 | 10.63 | 33.12 | 60.66 | 331.94 | ▇▂▁▁▁ |
###################################
# Verifying the data dimensions
###################################
dim(DPA_ExcludedHighCorrelation)
## [1] 951 226
##################################
# Loading dataset
##################################
<- Solubility_Train
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("Log_Solubility_Class")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,sapply(DPA.Predictors, is.numeric)]
DPA.Predictors.Numeric
##################################
# Identifying the linearly dependent variables
##################################
<- findLinearCombos(DPA.Predictors.Numeric)
DPA_LinearlyDependent
<- length(DPA_LinearlyDependent$linearCombos)) (DPA_LinearlyDependentCount
## [1] 2
if (DPA_LinearlyDependentCount == 0) {
print("No linearly dependent predictors noted.")
else {
} print(paste0("Linear dependency observed for ",
(DPA_LinearlyDependentCount)," subset(s) of numeric variable(s)."))
for (i in 1:DPA_LinearlyDependentCount) {
<- colnames(DPA.Predictors.Numeric)[DPA_LinearlyDependent$linearCombos[[i]]]
DPA_LinearlyDependentSubset print(paste0("Linear dependent variable(s) for subset ",
i," include: ",
DPA_LinearlyDependentSubset))
}
}
## [1] "Linear dependency observed for 2 subset(s) of numeric variable(s)."
## [1] "Linear dependent variable(s) for subset 1 include: NumNonHBonds"
## [2] "Linear dependent variable(s) for subset 1 include: NumAtoms"
## [3] "Linear dependent variable(s) for subset 1 include: NumNonHAtoms"
## [4] "Linear dependent variable(s) for subset 1 include: NumBonds"
## [1] "Linear dependent variable(s) for subset 2 include: NumHydrogen"
## [2] "Linear dependent variable(s) for subset 2 include: NumAtoms"
## [3] "Linear dependent variable(s) for subset 2 include: NumNonHAtoms"
##################################
# Identifying the linearly dependent variables for removal
##################################
if (DPA_LinearlyDependentCount > 0) {
<- findLinearCombos(DPA.Predictors.Numeric)
DPA_LinearlyDependent
<- length(DPA_LinearlyDependent$remove)
DPA_LinearlyDependentForRemoval
print(paste0("Linear dependency can be resolved by removing ",
(DPA_LinearlyDependentForRemoval)," numeric variable(s)."))
for (j in 1:DPA_LinearlyDependentForRemoval) {
<- colnames(DPA.Predictors.Numeric)[DPA_LinearlyDependent$remove[j]]
DPA_LinearlyDependentRemovedVariable print(paste0("Variable ",
j," for removal: ",
DPA_LinearlyDependentRemovedVariable))
}
##################################
# Filtering out columns with linear dependency
#################################
<- DPA[,-DPA_LinearlyDependent$remove]
DPA_ExcludedLinearlyDependent
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_ExcludedLinearlyDependent))
(DPA_ExcludedLinearlyDependent_Skimmed
}
## [1] "Linear dependency can be resolved by removing 2 numeric variable(s)."
## [1] "Variable 1 for removal: NumNonHBonds"
## [1] "Variable 2 for removal: NumHydrogen"
Name | DPA_ExcludedLinearlyDepen… |
Number of rows | 951 |
Number of columns | 227 |
_______________________ | |
Column type frequency: | |
factor | 1 |
numeric | 226 |
________________________ | |
Group variables | None |
Variable type: factor
skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
---|---|---|---|---|---|
Log_Solubility_Class | 0 | 1 | FALSE | 2 | Hig: 524, Low: 427 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
FP001 | 0 | 1 | 0.49 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP002 | 0 | 1 | 0.54 | 0.50 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP003 | 0 | 1 | 0.44 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP004 | 0 | 1 | 0.58 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP005 | 0 | 1 | 0.58 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP006 | 0 | 1 | 0.40 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP007 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP008 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP009 | 0 | 1 | 0.28 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP010 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP011 | 0 | 1 | 0.21 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP012 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP013 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP014 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP015 | 0 | 1 | 0.86 | 0.35 | 0.00 | 1.00 | 1.00 | 1.00 | 1.00 | ▁▁▁▁▇ |
FP016 | 0 | 1 | 0.15 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP017 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP018 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP019 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP020 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP021 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP022 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP023 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP024 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP025 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP026 | 0 | 1 | 0.08 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP027 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP028 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP029 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP030 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP031 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP032 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP033 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP034 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP035 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP036 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP037 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP038 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP039 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP040 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP041 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP042 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP043 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP044 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP045 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP046 | 0 | 1 | 0.32 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP047 | 0 | 1 | 0.27 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP048 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP049 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP050 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP051 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP052 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP053 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP054 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP055 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP056 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP057 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP058 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP059 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP060 | 0 | 1 | 0.48 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP061 | 0 | 1 | 0.45 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP062 | 0 | 1 | 0.44 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP063 | 0 | 1 | 0.43 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP064 | 0 | 1 | 0.42 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
FP065 | 0 | 1 | 0.59 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
FP066 | 0 | 1 | 0.61 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP067 | 0 | 1 | 0.38 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP068 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP069 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP070 | 0 | 1 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP071 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP072 | 0 | 1 | 0.66 | 0.47 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP073 | 0 | 1 | 0.31 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP074 | 0 | 1 | 0.32 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP075 | 0 | 1 | 0.34 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP076 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP077 | 0 | 1 | 0.32 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP078 | 0 | 1 | 0.30 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP079 | 0 | 1 | 0.69 | 0.46 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP080 | 0 | 1 | 0.30 | 0.46 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP081 | 0 | 1 | 0.28 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP082 | 0 | 1 | 0.71 | 0.45 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP083 | 0 | 1 | 0.27 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP084 | 0 | 1 | 0.29 | 0.45 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP085 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP086 | 0 | 1 | 0.27 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP087 | 0 | 1 | 0.73 | 0.45 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
FP088 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP089 | 0 | 1 | 0.25 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP090 | 0 | 1 | 0.25 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP091 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP092 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP093 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP094 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP095 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP096 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP097 | 0 | 1 | 0.24 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP098 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP099 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP100 | 0 | 1 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP101 | 0 | 1 | 0.24 | 0.43 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP102 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP103 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP104 | 0 | 1 | 0.22 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP105 | 0 | 1 | 0.22 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP106 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP107 | 0 | 1 | 0.21 | 0.41 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP108 | 0 | 1 | 0.21 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP109 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP110 | 0 | 1 | 0.21 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP111 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP112 | 0 | 1 | 0.19 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP113 | 0 | 1 | 0.20 | 0.40 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP114 | 0 | 1 | 0.16 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP115 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP116 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP117 | 0 | 1 | 0.18 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP118 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP119 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP120 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP121 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP122 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP123 | 0 | 1 | 0.17 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP124 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP125 | 0 | 1 | 0.16 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP126 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP127 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP128 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP129 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP130 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP131 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP132 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP133 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP134 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP135 | 0 | 1 | 0.13 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP136 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP137 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP138 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP139 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP140 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP141 | 0 | 1 | 0.12 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP142 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP143 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP144 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP145 | 0 | 1 | 0.10 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP146 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP147 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP148 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP149 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP150 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP151 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP152 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP153 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP154 | 0 | 1 | 0.04 | 0.19 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP155 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP156 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP157 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP158 | 0 | 1 | 0.05 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP159 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP160 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP161 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP162 | 0 | 1 | 0.50 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP163 | 0 | 1 | 0.48 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
FP164 | 0 | 1 | 0.63 | 0.48 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP165 | 0 | 1 | 0.35 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
FP166 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP167 | 0 | 1 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP168 | 0 | 1 | 0.67 | 0.47 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▅▁▁▁▇ |
FP169 | 0 | 1 | 0.19 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP170 | 0 | 1 | 0.18 | 0.39 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP171 | 0 | 1 | 0.17 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP172 | 0 | 1 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
FP173 | 0 | 1 | 0.14 | 0.35 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP174 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP175 | 0 | 1 | 0.13 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP176 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP177 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP178 | 0 | 1 | 0.12 | 0.33 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP179 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP180 | 0 | 1 | 0.11 | 0.31 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP181 | 0 | 1 | 0.09 | 0.29 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP182 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP183 | 0 | 1 | 0.08 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP184 | 0 | 1 | 0.08 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP185 | 0 | 1 | 0.09 | 0.28 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP186 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP187 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP188 | 0 | 1 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP189 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP190 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP191 | 0 | 1 | 0.07 | 0.26 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP192 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP193 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP194 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP195 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP196 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP197 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP198 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP199 | 0 | 1 | 0.05 | 0.21 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP200 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP201 | 0 | 1 | 0.05 | 0.22 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP202 | 0 | 1 | 0.26 | 0.44 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▃ |
FP203 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP204 | 0 | 1 | 0.10 | 0.30 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP205 | 0 | 1 | 0.08 | 0.27 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP206 | 0 | 1 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP207 | 0 | 1 | 0.06 | 0.23 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
FP208 | 0 | 1 | 0.11 | 0.32 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
MolWeight | 0 | 1 | 201.65 | 97.91 | 46.09 | 122.60 | 179.23 | 264.34 | 665.81 | ▇▆▂▁▁ |
NumAtoms | 0 | 1 | 25.51 | 12.61 | 5.00 | 17.00 | 22.00 | 31.00 | 94.00 | ▇▆▂▁▁ |
NumNonHAtoms | 0 | 1 | 13.16 | 6.50 | 2.00 | 8.00 | 12.00 | 17.00 | 47.00 | ▇▆▂▁▁ |
NumBonds | 0 | 1 | 25.91 | 13.48 | 4.00 | 17.00 | 23.00 | 31.50 | 97.00 | ▇▇▂▁▁ |
NumMultBonds | 0 | 1 | 6.15 | 5.17 | 0.00 | 1.00 | 6.00 | 10.00 | 25.00 | ▇▆▃▁▁ |
NumRotBonds | 0 | 1 | 2.25 | 2.41 | 0.00 | 0.00 | 2.00 | 3.50 | 16.00 | ▇▂▁▁▁ |
NumDblBonds | 0 | 1 | 1.01 | 1.21 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▂▁▁▁ |
NumAromaticBonds | 0 | 1 | 5.12 | 5.26 | 0.00 | 0.00 | 6.00 | 6.00 | 25.00 | ▇▆▃▁▁ |
NumCarbon | 0 | 1 | 9.89 | 5.29 | 1.00 | 6.00 | 9.00 | 12.00 | 33.00 | ▇▇▃▁▁ |
NumNitrogen | 0 | 1 | 0.81 | 1.19 | 0.00 | 0.00 | 0.00 | 1.00 | 6.00 | ▇▂▁▁▁ |
NumOxygen | 0 | 1 | 1.57 | 1.73 | 0.00 | 0.00 | 1.00 | 2.00 | 13.00 | ▇▂▁▁▁ |
NumSulfer | 0 | 1 | 0.16 | 0.49 | 0.00 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
NumChlorine | 0 | 1 | 0.56 | 1.40 | 0.00 | 0.00 | 0.00 | 0.00 | 10.00 | ▇▁▁▁▁ |
NumHalogen | 0 | 1 | 0.70 | 1.47 | 0.00 | 0.00 | 0.00 | 1.00 | 10.00 | ▇▁▁▁▁ |
NumRings | 0 | 1 | 1.40 | 1.30 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▃▂▁▁ |
HydrophilicFactor | 0 | 1 | -0.02 | 1.13 | -0.98 | -0.76 | -0.31 | 0.31 | 13.48 | ▇▁▁▁▁ |
SurfaceArea1 | 0 | 1 | 36.46 | 35.29 | 0.00 | 9.23 | 29.10 | 53.28 | 331.94 | ▇▂▁▁▁ |
SurfaceArea2 | 0 | 1 | 40.23 | 38.12 | 0.00 | 10.63 | 33.12 | 60.66 | 331.94 | ▇▂▁▁▁ |
###################################
# Verifying the data dimensions
###################################
dim(DPA_ExcludedLinearlyDependent)
## [1] 951 227
##################################
# Loading dataset
##################################
<- Solubility_Train
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("Log_Solubility_Class")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,-(grep("FP", names(DPA.Predictors)))]
DPA.Predictors.Numeric
##################################
# Applying a Box-Cox transformation
##################################
<- preProcess(DPA.Predictors.Numeric, method = c("BoxCox"))
DPA_BoxCox <- predict(DPA_BoxCox, DPA.Predictors.Numeric)
DPA_BoxCoxTransformed
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_BoxCoxTransformed)) (DPA_BoxCoxTransformedSkimmed
Name | DPA_BoxCoxTransformed |
Number of rows | 951 |
Number of columns | 20 |
_______________________ | |
Column type frequency: | |
numeric | 20 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
MolWeight | 0 | 1 | 5.19 | 0.48 | 3.83 | 4.81 | 5.19 | 5.58 | 6.50 | ▁▆▇▆▁ |
NumAtoms | 0 | 1 | 3.13 | 0.48 | 1.61 | 2.83 | 3.09 | 3.43 | 4.54 | ▁▃▇▃▁ |
NumNonHAtoms | 0 | 1 | 2.46 | 0.50 | 0.69 | 2.08 | 2.48 | 2.83 | 3.85 | ▁▃▇▇▁ |
NumBonds | 0 | 1 | 4.39 | 0.96 | 1.60 | 3.81 | 4.36 | 4.97 | 7.48 | ▁▅▇▃▁ |
NumNonHBonds | 0 | 1 | 3.21 | 0.95 | 0.00 | 2.58 | 3.22 | 3.91 | 5.93 | ▁▃▇▆▁ |
NumMultBonds | 0 | 1 | 6.15 | 5.17 | 0.00 | 1.00 | 6.00 | 10.00 | 25.00 | ▇▆▃▁▁ |
NumRotBonds | 0 | 1 | 2.25 | 2.41 | 0.00 | 0.00 | 2.00 | 3.50 | 16.00 | ▇▂▁▁▁ |
NumDblBonds | 0 | 1 | 1.01 | 1.21 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▂▁▁▁ |
NumAromaticBonds | 0 | 1 | 5.12 | 5.26 | 0.00 | 0.00 | 6.00 | 6.00 | 25.00 | ▇▆▃▁▁ |
NumHydrogen | 0 | 1 | 12.35 | 7.32 | 0.00 | 7.00 | 11.00 | 16.00 | 47.00 | ▇▇▂▁▁ |
NumCarbon | 0 | 1 | 3.54 | 1.34 | 0.00 | 2.62 | 3.52 | 4.25 | 7.62 | ▂▇▇▃▁ |
NumNitrogen | 0 | 1 | 0.81 | 1.19 | 0.00 | 0.00 | 0.00 | 1.00 | 6.00 | ▇▂▁▁▁ |
NumOxygen | 0 | 1 | 1.57 | 1.73 | 0.00 | 0.00 | 1.00 | 2.00 | 13.00 | ▇▂▁▁▁ |
NumSulfer | 0 | 1 | 0.16 | 0.49 | 0.00 | 0.00 | 0.00 | 0.00 | 4.00 | ▇▁▁▁▁ |
NumChlorine | 0 | 1 | 0.56 | 1.40 | 0.00 | 0.00 | 0.00 | 0.00 | 10.00 | ▇▁▁▁▁ |
NumHalogen | 0 | 1 | 0.70 | 1.47 | 0.00 | 0.00 | 0.00 | 1.00 | 10.00 | ▇▁▁▁▁ |
NumRings | 0 | 1 | 1.40 | 1.30 | 0.00 | 0.00 | 1.00 | 2.00 | 7.00 | ▇▃▂▁▁ |
HydrophilicFactor | 0 | 1 | -0.02 | 1.13 | -0.98 | -0.76 | -0.31 | 0.31 | 13.48 | ▇▁▁▁▁ |
SurfaceArea1 | 0 | 1 | 36.46 | 35.29 | 0.00 | 9.23 | 29.10 | 53.28 | 331.94 | ▇▂▁▁▁ |
SurfaceArea2 | 0 | 1 | 40.23 | 38.12 | 0.00 | 10.63 | 33.12 | 60.66 | 331.94 | ▇▂▁▁▁ |
###################################
# Verifying the data dimensions
###################################
dim(DPA_BoxCoxTransformed)
## [1] 951 20
##################################
# Loading dataset
##################################
<- Solubility_Train
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("Log_Solubility_Class")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,-(grep("FP", names(DPA.Predictors)))]
DPA.Predictors.Numeric
##################################
# Applying a Box-Cox transformation
##################################
<- preProcess(DPA.Predictors.Numeric, method = c("BoxCox"))
DPA_BoxCox <- predict(DPA_BoxCox, DPA.Predictors.Numeric)
DPA_BoxCoxTransformed
##################################
# Applying a center and scale data transformation
##################################
<- preProcess(DPA_BoxCoxTransformed, method = c("center","scale"))
DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaled <- predict(DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaled, DPA_BoxCoxTransformed)
DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed)) (DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformedSkimmed
Name | DPA.Predictors.Numeric_Bo… |
Number of rows | 951 |
Number of columns | 20 |
_______________________ | |
Column type frequency: | |
numeric | 20 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
MolWeight | 0 | 1 | 0 | 1 | -2.84 | -0.80 | -0.01 | 0.80 | 2.72 | ▁▆▇▆▁ |
NumAtoms | 0 | 1 | 0 | 1 | -3.16 | -0.61 | -0.07 | 0.64 | 2.95 | ▁▃▇▃▁ |
NumNonHAtoms | 0 | 1 | 0 | 1 | -3.53 | -0.76 | 0.06 | 0.75 | 2.79 | ▁▃▇▇▁ |
NumBonds | 0 | 1 | 0 | 1 | -2.92 | -0.61 | -0.04 | 0.60 | 3.23 | ▁▅▇▃▁ |
NumNonHBonds | 0 | 1 | 0 | 1 | -3.38 | -0.67 | 0.01 | 0.74 | 2.86 | ▁▃▇▆▁ |
NumMultBonds | 0 | 1 | 0 | 1 | -1.19 | -1.00 | -0.03 | 0.74 | 3.65 | ▇▇▃▁▁ |
NumRotBonds | 0 | 1 | 0 | 1 | -0.93 | -0.93 | -0.10 | 0.52 | 5.71 | ▇▂▁▁▁ |
NumDblBonds | 0 | 1 | 0 | 1 | -0.83 | -0.83 | -0.01 | 0.82 | 4.95 | ▇▂▁▁▁ |
NumAromaticBonds | 0 | 1 | 0 | 1 | -0.97 | -0.97 | 0.17 | 0.17 | 3.78 | ▇▆▃▁▁ |
NumHydrogen | 0 | 1 | 0 | 1 | -1.69 | -0.73 | -0.18 | 0.50 | 4.74 | ▇▇▂▁▁ |
NumCarbon | 0 | 1 | 0 | 1 | -2.64 | -0.69 | -0.01 | 0.54 | 3.06 | ▂▇▇▃▁ |
NumNitrogen | 0 | 1 | 0 | 1 | -0.69 | -0.69 | -0.69 | 0.16 | 4.37 | ▇▂▁▁▁ |
NumOxygen | 0 | 1 | 0 | 1 | -0.91 | -0.91 | -0.33 | 0.25 | 6.61 | ▇▂▁▁▁ |
NumSulfer | 0 | 1 | 0 | 1 | -0.34 | -0.34 | -0.34 | -0.34 | 7.86 | ▇▁▁▁▁ |
NumChlorine | 0 | 1 | 0 | 1 | -0.40 | -0.40 | -0.40 | -0.40 | 6.74 | ▇▁▁▁▁ |
NumHalogen | 0 | 1 | 0 | 1 | -0.47 | -0.47 | -0.47 | 0.20 | 6.32 | ▇▁▁▁▁ |
NumRings | 0 | 1 | 0 | 1 | -1.08 | -1.08 | -0.31 | 0.46 | 4.31 | ▇▃▂▁▁ |
HydrophilicFactor | 0 | 1 | 0 | 1 | -0.86 | -0.66 | -0.26 | 0.30 | 11.99 | ▇▁▁▁▁ |
SurfaceArea1 | 0 | 1 | 0 | 1 | -1.03 | -0.77 | -0.21 | 0.48 | 8.37 | ▇▂▁▁▁ |
SurfaceArea2 | 0 | 1 | 0 | 1 | -1.06 | -0.78 | -0.19 | 0.54 | 7.65 | ▇▂▁▁▁ |
###################################
# Verifying the data dimensions
###################################
dim(DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed)
## [1] 951 20
##################################
# Creating the pre-modelling
# train set
##################################
<- DPA$Log_Solubility_Class
Log_Solubility_Class <- DPA.Predictors[,(grep("FP", names(DPA.Predictors)))]
PMA.Predictors.Factor <- as.data.frame(lapply(PMA.Predictors.Factor,factor))
PMA.Predictors.Factor <- DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed
PMA.Predictors.Numeric <- cbind(Log_Solubility_Class,PMA.Predictors.Factor,PMA.Predictors.Numeric)
PMA_BoxCoxTransformed_CenteredScaledTransformed
##################################
# Filtering out columns noted with data quality issues including
# zero and near-zero variance,
# high correlation and linear dependencies
# to create the pre-modelling dataset
##################################
<- PMA_BoxCoxTransformed_CenteredScaledTransformed[,!names(PMA_BoxCoxTransformed_CenteredScaledTransformed) %in% c("FP154","FP199","FP200","NumNonHBonds","NumHydrogen","NumNonHAtoms","NumAromaticBonds","NumAtoms")]
PMA_BoxCoxTransformed_CenteredScaledTransformed_ExcludedLowVariance_ExcludedLinearlyDependent_ExcludedHighCorrelation
<- PMA_BoxCoxTransformed_CenteredScaledTransformed_ExcludedLowVariance_ExcludedLinearlyDependent_ExcludedHighCorrelation
PMA_PreModelling_Train
##################################
# Gathering descriptive statistics
##################################
<- skim(PMA_PreModelling_Train)) (PMA_PreModelling_Train_Skimmed
Name | PMA_PreModelling_Train |
Number of rows | 951 |
Number of columns | 221 |
_______________________ | |
Column type frequency: | |
factor | 206 |
numeric | 15 |
________________________ | |
Group variables | None |
Variable type: factor
skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
---|---|---|---|---|---|
Log_Solubility_Class | 0 | 1 | FALSE | 2 | Hig: 524, Low: 427 |
FP001 | 0 | 1 | FALSE | 2 | 0: 482, 1: 469 |
FP002 | 0 | 1 | FALSE | 2 | 1: 513, 0: 438 |
FP003 | 0 | 1 | FALSE | 2 | 0: 536, 1: 415 |
FP004 | 0 | 1 | FALSE | 2 | 1: 556, 0: 395 |
FP005 | 0 | 1 | FALSE | 2 | 1: 551, 0: 400 |
FP006 | 0 | 1 | FALSE | 2 | 0: 570, 1: 381 |
FP007 | 0 | 1 | FALSE | 2 | 0: 605, 1: 346 |
FP008 | 0 | 1 | FALSE | 2 | 0: 641, 1: 310 |
FP009 | 0 | 1 | FALSE | 2 | 0: 685, 1: 266 |
FP010 | 0 | 1 | FALSE | 2 | 0: 781, 1: 170 |
FP011 | 0 | 1 | FALSE | 2 | 0: 747, 1: 204 |
FP012 | 0 | 1 | FALSE | 2 | 0: 783, 1: 168 |
FP013 | 0 | 1 | FALSE | 2 | 0: 793, 1: 158 |
FP014 | 0 | 1 | FALSE | 2 | 0: 798, 1: 153 |
FP015 | 0 | 1 | FALSE | 2 | 1: 818, 0: 133 |
FP016 | 0 | 1 | FALSE | 2 | 0: 812, 1: 139 |
FP017 | 0 | 1 | FALSE | 2 | 0: 814, 1: 137 |
FP018 | 0 | 1 | FALSE | 2 | 0: 826, 1: 125 |
FP019 | 0 | 1 | FALSE | 2 | 0: 835, 1: 116 |
FP020 | 0 | 1 | FALSE | 2 | 0: 837, 1: 114 |
FP021 | 0 | 1 | FALSE | 2 | 0: 836, 1: 115 |
FP022 | 0 | 1 | FALSE | 2 | 0: 852, 1: 99 |
FP023 | 0 | 1 | FALSE | 2 | 0: 834, 1: 117 |
FP024 | 0 | 1 | FALSE | 2 | 0: 844, 1: 107 |
FP025 | 0 | 1 | FALSE | 2 | 0: 841, 1: 110 |
FP026 | 0 | 1 | FALSE | 2 | 0: 871, 1: 80 |
FP027 | 0 | 1 | FALSE | 2 | 0: 858, 1: 93 |
FP028 | 0 | 1 | FALSE | 2 | 0: 850, 1: 101 |
FP029 | 0 | 1 | FALSE | 2 | 0: 854, 1: 97 |
FP030 | 0 | 1 | FALSE | 2 | 0: 862, 1: 89 |
FP031 | 0 | 1 | FALSE | 2 | 0: 866, 1: 85 |
FP032 | 0 | 1 | FALSE | 2 | 0: 881, 1: 70 |
FP033 | 0 | 1 | FALSE | 2 | 0: 885, 1: 66 |
FP034 | 0 | 1 | FALSE | 2 | 0: 875, 1: 76 |
FP035 | 0 | 1 | FALSE | 2 | 0: 882, 1: 69 |
FP036 | 0 | 1 | FALSE | 2 | 0: 879, 1: 72 |
FP037 | 0 | 1 | FALSE | 2 | 0: 884, 1: 67 |
FP038 | 0 | 1 | FALSE | 2 | 0: 869, 1: 82 |
FP039 | 0 | 1 | FALSE | 2 | 0: 880, 1: 71 |
FP040 | 0 | 1 | FALSE | 2 | 0: 886, 1: 65 |
FP041 | 0 | 1 | FALSE | 2 | 0: 891, 1: 60 |
FP042 | 0 | 1 | FALSE | 2 | 0: 897, 1: 54 |
FP043 | 0 | 1 | FALSE | 2 | 0: 888, 1: 63 |
FP044 | 0 | 1 | FALSE | 2 | 0: 894, 1: 57 |
FP045 | 0 | 1 | FALSE | 2 | 0: 898, 1: 53 |
FP046 | 0 | 1 | FALSE | 2 | 0: 651, 1: 300 |
FP047 | 0 | 1 | FALSE | 2 | 0: 698, 1: 253 |
FP048 | 0 | 1 | FALSE | 2 | 0: 833, 1: 118 |
FP049 | 0 | 1 | FALSE | 2 | 0: 835, 1: 116 |
FP050 | 0 | 1 | FALSE | 2 | 0: 844, 1: 107 |
FP051 | 0 | 1 | FALSE | 2 | 0: 847, 1: 104 |
FP052 | 0 | 1 | FALSE | 2 | 0: 864, 1: 87 |
FP053 | 0 | 1 | FALSE | 2 | 0: 862, 1: 89 |
FP054 | 0 | 1 | FALSE | 2 | 0: 879, 1: 72 |
FP055 | 0 | 1 | FALSE | 2 | 0: 900, 1: 51 |
FP056 | 0 | 1 | FALSE | 2 | 0: 889, 1: 62 |
FP057 | 0 | 1 | FALSE | 2 | 0: 837, 1: 114 |
FP058 | 0 | 1 | FALSE | 2 | 0: 843, 1: 108 |
FP059 | 0 | 1 | FALSE | 2 | 0: 899, 1: 52 |
FP060 | 0 | 1 | FALSE | 2 | 0: 493, 1: 458 |
FP061 | 0 | 1 | FALSE | 2 | 0: 526, 1: 425 |
FP062 | 0 | 1 | FALSE | 2 | 0: 535, 1: 416 |
FP063 | 0 | 1 | FALSE | 2 | 0: 546, 1: 405 |
FP064 | 0 | 1 | FALSE | 2 | 0: 555, 1: 396 |
FP065 | 0 | 1 | FALSE | 2 | 1: 564, 0: 387 |
FP066 | 0 | 1 | FALSE | 2 | 1: 580, 0: 371 |
FP067 | 0 | 1 | FALSE | 2 | 0: 590, 1: 361 |
FP068 | 0 | 1 | FALSE | 2 | 0: 607, 1: 344 |
FP069 | 0 | 1 | FALSE | 2 | 0: 607, 1: 344 |
FP070 | 0 | 1 | FALSE | 2 | 0: 613, 1: 338 |
FP071 | 0 | 1 | FALSE | 2 | 0: 640, 1: 311 |
FP072 | 0 | 1 | FALSE | 2 | 1: 626, 0: 325 |
FP073 | 0 | 1 | FALSE | 2 | 0: 656, 1: 295 |
FP074 | 0 | 1 | FALSE | 2 | 0: 642, 1: 309 |
FP075 | 0 | 1 | FALSE | 2 | 0: 629, 1: 322 |
FP076 | 0 | 1 | FALSE | 2 | 0: 639, 1: 312 |
FP077 | 0 | 1 | FALSE | 2 | 0: 646, 1: 305 |
FP078 | 0 | 1 | FALSE | 2 | 0: 662, 1: 289 |
FP079 | 0 | 1 | FALSE | 2 | 1: 656, 0: 295 |
FP080 | 0 | 1 | FALSE | 2 | 0: 663, 1: 288 |
FP081 | 0 | 1 | FALSE | 2 | 0: 686, 1: 265 |
FP082 | 0 | 1 | FALSE | 2 | 1: 679, 0: 272 |
FP083 | 0 | 1 | FALSE | 2 | 0: 691, 1: 260 |
FP084 | 0 | 1 | FALSE | 2 | 0: 679, 1: 272 |
FP085 | 0 | 1 | FALSE | 2 | 0: 708, 1: 243 |
FP086 | 0 | 1 | FALSE | 2 | 0: 695, 1: 256 |
FP087 | 0 | 1 | FALSE | 2 | 1: 691, 0: 260 |
FP088 | 0 | 1 | FALSE | 2 | 0: 701, 1: 250 |
FP089 | 0 | 1 | FALSE | 2 | 0: 716, 1: 235 |
FP090 | 0 | 1 | FALSE | 2 | 0: 714, 1: 237 |
FP091 | 0 | 1 | FALSE | 2 | 0: 737, 1: 214 |
FP092 | 0 | 1 | FALSE | 2 | 0: 719, 1: 232 |
FP093 | 0 | 1 | FALSE | 2 | 0: 719, 1: 232 |
FP094 | 0 | 1 | FALSE | 2 | 0: 731, 1: 220 |
FP095 | 0 | 1 | FALSE | 2 | 0: 742, 1: 209 |
FP096 | 0 | 1 | FALSE | 2 | 0: 744, 1: 207 |
FP097 | 0 | 1 | FALSE | 2 | 0: 727, 1: 224 |
FP098 | 0 | 1 | FALSE | 2 | 0: 725, 1: 226 |
FP099 | 0 | 1 | FALSE | 2 | 0: 735, 1: 216 |
FP100 | 0 | 1 | FALSE | 2 | 0: 731, 1: 220 |
FP101 | 0 | 1 | FALSE | 2 | 0: 726, 1: 225 |
FP102 | 0 | 1 | FALSE | 2 | 0: 759, 1: 192 |
FP103 | 0 | 1 | FALSE | 2 | 0: 743, 1: 208 |
FP104 | 0 | 1 | FALSE | 2 | 0: 739, 1: 212 |
FP105 | 0 | 1 | FALSE | 2 | 0: 746, 1: 205 |
FP106 | 0 | 1 | FALSE | 2 | 0: 769, 1: 182 |
FP107 | 0 | 1 | FALSE | 2 | 0: 750, 1: 201 |
FP108 | 0 | 1 | FALSE | 2 | 0: 756, 1: 195 |
FP109 | 0 | 1 | FALSE | 2 | 0: 783, 1: 168 |
FP110 | 0 | 1 | FALSE | 2 | 0: 755, 1: 196 |
FP111 | 0 | 1 | FALSE | 2 | 0: 764, 1: 187 |
FP112 | 0 | 1 | FALSE | 2 | 0: 766, 1: 185 |
FP113 | 0 | 1 | FALSE | 2 | 0: 765, 1: 186 |
FP114 | 0 | 1 | FALSE | 2 | 0: 803, 1: 148 |
FP115 | 0 | 1 | FALSE | 2 | 0: 781, 1: 170 |
FP116 | 0 | 1 | FALSE | 2 | 0: 768, 1: 183 |
FP117 | 0 | 1 | FALSE | 2 | 0: 781, 1: 170 |
FP118 | 0 | 1 | FALSE | 2 | 0: 768, 1: 183 |
FP119 | 0 | 1 | FALSE | 2 | 0: 796, 1: 155 |
FP120 | 0 | 1 | FALSE | 2 | 0: 793, 1: 158 |
FP121 | 0 | 1 | FALSE | 2 | 0: 818, 1: 133 |
FP122 | 0 | 1 | FALSE | 2 | 0: 795, 1: 156 |
FP123 | 0 | 1 | FALSE | 2 | 0: 792, 1: 159 |
FP124 | 0 | 1 | FALSE | 2 | 0: 797, 1: 154 |
FP125 | 0 | 1 | FALSE | 2 | 0: 803, 1: 148 |
FP126 | 0 | 1 | FALSE | 2 | 0: 810, 1: 141 |
FP127 | 0 | 1 | FALSE | 2 | 0: 818, 1: 133 |
FP128 | 0 | 1 | FALSE | 2 | 0: 810, 1: 141 |
FP129 | 0 | 1 | FALSE | 2 | 0: 819, 1: 132 |
FP130 | 0 | 1 | FALSE | 2 | 0: 851, 1: 100 |
FP131 | 0 | 1 | FALSE | 2 | 0: 831, 1: 120 |
FP132 | 0 | 1 | FALSE | 2 | 0: 832, 1: 119 |
FP133 | 0 | 1 | FALSE | 2 | 0: 831, 1: 120 |
FP134 | 0 | 1 | FALSE | 2 | 0: 830, 1: 121 |
FP135 | 0 | 1 | FALSE | 2 | 0: 831, 1: 120 |
FP136 | 0 | 1 | FALSE | 2 | 0: 836, 1: 115 |
FP137 | 0 | 1 | FALSE | 2 | 0: 841, 1: 110 |
FP138 | 0 | 1 | FALSE | 2 | 0: 845, 1: 106 |
FP139 | 0 | 1 | FALSE | 2 | 0: 873, 1: 78 |
FP140 | 0 | 1 | FALSE | 2 | 0: 845, 1: 106 |
FP141 | 0 | 1 | FALSE | 2 | 0: 840, 1: 111 |
FP142 | 0 | 1 | FALSE | 2 | 0: 847, 1: 104 |
FP143 | 0 | 1 | FALSE | 2 | 0: 874, 1: 77 |
FP144 | 0 | 1 | FALSE | 2 | 0: 852, 1: 99 |
FP145 | 0 | 1 | FALSE | 2 | 0: 852, 1: 99 |
FP146 | 0 | 1 | FALSE | 2 | 0: 853, 1: 98 |
FP147 | 0 | 1 | FALSE | 2 | 0: 851, 1: 100 |
FP148 | 0 | 1 | FALSE | 2 | 0: 868, 1: 83 |
FP149 | 0 | 1 | FALSE | 2 | 0: 865, 1: 86 |
FP150 | 0 | 1 | FALSE | 2 | 0: 876, 1: 75 |
FP151 | 0 | 1 | FALSE | 2 | 0: 898, 1: 53 |
FP152 | 0 | 1 | FALSE | 2 | 0: 873, 1: 78 |
FP153 | 0 | 1 | FALSE | 2 | 0: 877, 1: 74 |
FP155 | 0 | 1 | FALSE | 2 | 0: 885, 1: 66 |
FP156 | 0 | 1 | FALSE | 2 | 0: 884, 1: 67 |
FP157 | 0 | 1 | FALSE | 2 | 0: 892, 1: 59 |
FP158 | 0 | 1 | FALSE | 2 | 0: 900, 1: 51 |
FP159 | 0 | 1 | FALSE | 2 | 0: 884, 1: 67 |
FP160 | 0 | 1 | FALSE | 2 | 0: 886, 1: 65 |
FP161 | 0 | 1 | FALSE | 2 | 0: 888, 1: 63 |
FP162 | 0 | 1 | FALSE | 2 | 0: 480, 1: 471 |
FP163 | 0 | 1 | FALSE | 2 | 0: 498, 1: 453 |
FP164 | 0 | 1 | FALSE | 2 | 1: 597, 0: 354 |
FP165 | 0 | 1 | FALSE | 2 | 0: 619, 1: 332 |
FP166 | 0 | 1 | FALSE | 2 | 0: 636, 1: 315 |
FP167 | 0 | 1 | FALSE | 2 | 0: 639, 1: 312 |
FP168 | 0 | 1 | FALSE | 2 | 1: 633, 0: 318 |
FP169 | 0 | 1 | FALSE | 2 | 0: 774, 1: 177 |
FP170 | 0 | 1 | FALSE | 2 | 0: 776, 1: 175 |
FP171 | 0 | 1 | FALSE | 2 | 0: 790, 1: 161 |
FP172 | 0 | 1 | FALSE | 2 | 0: 807, 1: 144 |
FP173 | 0 | 1 | FALSE | 2 | 0: 816, 1: 135 |
FP174 | 0 | 1 | FALSE | 2 | 0: 827, 1: 124 |
FP175 | 0 | 1 | FALSE | 2 | 0: 823, 1: 128 |
FP176 | 0 | 1 | FALSE | 2 | 0: 835, 1: 116 |
FP177 | 0 | 1 | FALSE | 2 | 0: 836, 1: 115 |
FP178 | 0 | 1 | FALSE | 2 | 0: 836, 1: 115 |
FP179 | 0 | 1 | FALSE | 2 | 0: 858, 1: 93 |
FP180 | 0 | 1 | FALSE | 2 | 0: 849, 1: 102 |
FP181 | 0 | 1 | FALSE | 2 | 0: 862, 1: 89 |
FP182 | 0 | 1 | FALSE | 2 | 0: 857, 1: 94 |
FP183 | 0 | 1 | FALSE | 2 | 0: 879, 1: 72 |
FP184 | 0 | 1 | FALSE | 2 | 0: 871, 1: 80 |
FP185 | 0 | 1 | FALSE | 2 | 0: 870, 1: 81 |
FP186 | 0 | 1 | FALSE | 2 | 0: 878, 1: 73 |
FP187 | 0 | 1 | FALSE | 2 | 0: 882, 1: 69 |
FP188 | 0 | 1 | FALSE | 2 | 0: 886, 1: 65 |
FP189 | 0 | 1 | FALSE | 2 | 0: 878, 1: 73 |
FP190 | 0 | 1 | FALSE | 2 | 0: 882, 1: 69 |
FP191 | 0 | 1 | FALSE | 2 | 0: 884, 1: 67 |
FP192 | 0 | 1 | FALSE | 2 | 0: 893, 1: 58 |
FP193 | 0 | 1 | FALSE | 2 | 0: 892, 1: 59 |
FP194 | 0 | 1 | FALSE | 2 | 0: 895, 1: 56 |
FP195 | 0 | 1 | FALSE | 2 | 0: 893, 1: 58 |
FP196 | 0 | 1 | FALSE | 2 | 0: 897, 1: 54 |
FP197 | 0 | 1 | FALSE | 2 | 0: 901, 1: 50 |
FP198 | 0 | 1 | FALSE | 2 | 0: 897, 1: 54 |
FP201 | 0 | 1 | FALSE | 2 | 0: 901, 1: 50 |
FP202 | 0 | 1 | FALSE | 2 | 0: 706, 1: 245 |
FP203 | 0 | 1 | FALSE | 2 | 0: 842, 1: 109 |
FP204 | 0 | 1 | FALSE | 2 | 0: 857, 1: 94 |
FP205 | 0 | 1 | FALSE | 2 | 0: 877, 1: 74 |
FP206 | 0 | 1 | FALSE | 2 | 0: 894, 1: 57 |
FP207 | 0 | 1 | FALSE | 2 | 0: 897, 1: 54 |
FP208 | 0 | 1 | FALSE | 2 | 0: 844, 1: 107 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
MolWeight | 0 | 1 | 0 | 1 | -2.84 | -0.80 | -0.01 | 0.80 | 2.72 | ▁▆▇▆▁ |
NumBonds | 0 | 1 | 0 | 1 | -2.92 | -0.61 | -0.04 | 0.60 | 3.23 | ▁▅▇▃▁ |
NumMultBonds | 0 | 1 | 0 | 1 | -1.19 | -1.00 | -0.03 | 0.74 | 3.65 | ▇▇▃▁▁ |
NumRotBonds | 0 | 1 | 0 | 1 | -0.93 | -0.93 | -0.10 | 0.52 | 5.71 | ▇▂▁▁▁ |
NumDblBonds | 0 | 1 | 0 | 1 | -0.83 | -0.83 | -0.01 | 0.82 | 4.95 | ▇▂▁▁▁ |
NumCarbon | 0 | 1 | 0 | 1 | -2.64 | -0.69 | -0.01 | 0.54 | 3.06 | ▂▇▇▃▁ |
NumNitrogen | 0 | 1 | 0 | 1 | -0.69 | -0.69 | -0.69 | 0.16 | 4.37 | ▇▂▁▁▁ |
NumOxygen | 0 | 1 | 0 | 1 | -0.91 | -0.91 | -0.33 | 0.25 | 6.61 | ▇▂▁▁▁ |
NumSulfer | 0 | 1 | 0 | 1 | -0.34 | -0.34 | -0.34 | -0.34 | 7.86 | ▇▁▁▁▁ |
NumChlorine | 0 | 1 | 0 | 1 | -0.40 | -0.40 | -0.40 | -0.40 | 6.74 | ▇▁▁▁▁ |
NumHalogen | 0 | 1 | 0 | 1 | -0.47 | -0.47 | -0.47 | 0.20 | 6.32 | ▇▁▁▁▁ |
NumRings | 0 | 1 | 0 | 1 | -1.08 | -1.08 | -0.31 | 0.46 | 4.31 | ▇▃▂▁▁ |
HydrophilicFactor | 0 | 1 | 0 | 1 | -0.86 | -0.66 | -0.26 | 0.30 | 11.99 | ▇▁▁▁▁ |
SurfaceArea1 | 0 | 1 | 0 | 1 | -1.03 | -0.77 | -0.21 | 0.48 | 8.37 | ▇▂▁▁▁ |
SurfaceArea2 | 0 | 1 | 0 | 1 | -1.06 | -0.78 | -0.19 | 0.54 | 7.65 | ▇▂▁▁▁ |
###################################
# Verifying the data dimensions
# for the train set
###################################
dim(PMA_PreModelling_Train)
## [1] 951 221
##################################
# Formulating the test set
##################################
<- Solubility_Test
DPA_Test <- DPA_Test[,!names(DPA_Test) %in% c("Log_Solubility_Class")]
DPA_Test.Predictors <- DPA_Test.Predictors[,-(grep("FP", names(DPA_Test.Predictors)))]
DPA_Test.Predictors.Numeric <- preProcess(DPA_Test.Predictors.Numeric, method = c("BoxCox"))
DPA_Test_BoxCox <- predict(DPA_Test_BoxCox, DPA_Test.Predictors.Numeric)
DPA_Test_BoxCoxTransformed <- preProcess(DPA_Test_BoxCoxTransformed, method = c("center","scale"))
DPA_Test.Predictors.Numeric_BoxCoxTransformed_CenteredScaled <- predict(DPA_Test.Predictors.Numeric_BoxCoxTransformed_CenteredScaled, DPA_Test_BoxCoxTransformed)
DPA_Test.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed
##################################
# Creating the pre-modelling
# test set
##################################
<- DPA_Test$Log_Solubility_Class
Log_Solubility_Class <- DPA_Test.Predictors[,(grep("FP", names(DPA_Test.Predictors)))]
PMA_Test.Predictors.Factor <- as.data.frame(lapply(PMA_Test.Predictors.Factor,factor))
PMA_Test.Predictors.Factor <- DPA_Test.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed
PMA_Test.Predictors.Numeric <- cbind(Log_Solubility_Class,PMA_Test.Predictors.Factor,PMA_Test.Predictors.Numeric)
PMA_Test_BoxCoxTransformed_CenteredScaledTransformed <- PMA_Test_BoxCoxTransformed_CenteredScaledTransformed[,!names(PMA_Test_BoxCoxTransformed_CenteredScaledTransformed) %in% c("FP154","FP199","FP200","NumNonHBonds","NumHydrogen","NumNonHAtoms","NumAromaticBonds","NumAtoms")]
PMA_Test_BoxCoxTransformed_CenteredScaledTransformed_ExcludedLowVariance_ExcludedLinearlyDependent_ExcludedHighCorrelation
<- PMA_Test_BoxCoxTransformed_CenteredScaledTransformed_ExcludedLowVariance_ExcludedLinearlyDependent_ExcludedHighCorrelation
PMA_PreModelling_Test
##################################
# Gathering descriptive statistics
##################################
<- skim(PMA_PreModelling_Test)) (PMA_PreModelling_Test_Skimmed
Name | PMA_PreModelling_Test |
Number of rows | 316 |
Number of columns | 221 |
_______________________ | |
Column type frequency: | |
factor | 206 |
numeric | 15 |
________________________ | |
Group variables | None |
Variable type: factor
skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
---|---|---|---|---|---|
Log_Solubility_Class | 0 | 1 | FALSE | 2 | Hig: 173, Low: 143 |
FP001 | 0 | 1 | FALSE | 2 | 0: 168, 1: 148 |
FP002 | 0 | 1 | FALSE | 2 | 1: 185, 0: 131 |
FP003 | 0 | 1 | FALSE | 2 | 0: 176, 1: 140 |
FP004 | 0 | 1 | FALSE | 2 | 1: 168, 0: 148 |
FP005 | 0 | 1 | FALSE | 2 | 1: 195, 0: 121 |
FP006 | 0 | 1 | FALSE | 2 | 0: 205, 1: 111 |
FP007 | 0 | 1 | FALSE | 2 | 0: 204, 1: 112 |
FP008 | 0 | 1 | FALSE | 2 | 0: 202, 1: 114 |
FP009 | 0 | 1 | FALSE | 2 | 0: 233, 1: 83 |
FP010 | 0 | 1 | FALSE | 2 | 0: 255, 1: 61 |
FP011 | 0 | 1 | FALSE | 2 | 0: 261, 1: 55 |
FP012 | 0 | 1 | FALSE | 2 | 0: 263, 1: 53 |
FP013 | 0 | 1 | FALSE | 2 | 0: 264, 1: 52 |
FP014 | 0 | 1 | FALSE | 2 | 0: 266, 1: 50 |
FP015 | 0 | 1 | FALSE | 2 | 1: 262, 0: 54 |
FP016 | 0 | 1 | FALSE | 2 | 0: 271, 1: 45 |
FP017 | 0 | 1 | FALSE | 2 | 0: 269, 1: 47 |
FP018 | 0 | 1 | FALSE | 2 | 0: 289, 1: 27 |
FP019 | 0 | 1 | FALSE | 2 | 0: 280, 1: 36 |
FP020 | 0 | 1 | FALSE | 2 | 0: 282, 1: 34 |
FP021 | 0 | 1 | FALSE | 2 | 0: 282, 1: 34 |
FP022 | 0 | 1 | FALSE | 2 | 0: 279, 1: 37 |
FP023 | 0 | 1 | FALSE | 2 | 0: 289, 1: 27 |
FP024 | 0 | 1 | FALSE | 2 | 0: 285, 1: 31 |
FP025 | 0 | 1 | FALSE | 2 | 0: 291, 1: 25 |
FP026 | 0 | 1 | FALSE | 2 | 0: 279, 1: 37 |
FP027 | 0 | 1 | FALSE | 2 | 0: 291, 1: 25 |
FP028 | 0 | 1 | FALSE | 2 | 0: 298, 1: 18 |
FP029 | 0 | 1 | FALSE | 2 | 0: 300, 1: 16 |
FP030 | 0 | 1 | FALSE | 2 | 0: 290, 1: 26 |
FP031 | 0 | 1 | FALSE | 2 | 0: 285, 1: 31 |
FP032 | 0 | 1 | FALSE | 2 | 0: 275, 1: 41 |
FP033 | 0 | 1 | FALSE | 2 | 0: 278, 1: 38 |
FP034 | 0 | 1 | FALSE | 2 | 0: 295, 1: 21 |
FP035 | 0 | 1 | FALSE | 2 | 0: 285, 1: 31 |
FP036 | 0 | 1 | FALSE | 2 | 0: 297, 1: 19 |
FP037 | 0 | 1 | FALSE | 2 | 0: 286, 1: 30 |
FP038 | 0 | 1 | FALSE | 2 | 0: 306, 1: 10 |
FP039 | 0 | 1 | FALSE | 2 | 0: 296, 1: 20 |
FP040 | 0 | 1 | FALSE | 2 | 0: 298, 1: 18 |
FP041 | 0 | 1 | FALSE | 2 | 0: 297, 1: 19 |
FP042 | 0 | 1 | FALSE | 2 | 0: 297, 1: 19 |
FP043 | 0 | 1 | FALSE | 2 | 0: 302, 1: 14 |
FP044 | 0 | 1 | FALSE | 2 | 0: 297, 1: 19 |
FP045 | 0 | 1 | FALSE | 2 | 0: 296, 1: 20 |
FP046 | 0 | 1 | FALSE | 2 | 0: 213, 1: 103 |
FP047 | 0 | 1 | FALSE | 2 | 0: 222, 1: 94 |
FP048 | 0 | 1 | FALSE | 2 | 0: 280, 1: 36 |
FP049 | 0 | 1 | FALSE | 2 | 0: 282, 1: 34 |
FP050 | 0 | 1 | FALSE | 2 | 0: 280, 1: 36 |
FP051 | 0 | 1 | FALSE | 2 | 0: 298, 1: 18 |
FP052 | 0 | 1 | FALSE | 2 | 0: 283, 1: 33 |
FP053 | 0 | 1 | FALSE | 2 | 0: 297, 1: 19 |
FP054 | 0 | 1 | FALSE | 2 | 0: 285, 1: 31 |
FP055 | 0 | 1 | FALSE | 2 | 0: 287, 1: 29 |
FP056 | 0 | 1 | FALSE | 2 | 0: 296, 1: 20 |
FP057 | 0 | 1 | FALSE | 2 | 0: 277, 1: 39 |
FP058 | 0 | 1 | FALSE | 2 | 0: 273, 1: 43 |
FP059 | 0 | 1 | FALSE | 2 | 0: 302, 1: 14 |
FP060 | 0 | 1 | FALSE | 2 | 0: 173, 1: 143 |
FP061 | 0 | 1 | FALSE | 2 | 0: 192, 1: 124 |
FP062 | 0 | 1 | FALSE | 2 | 0: 181, 1: 135 |
FP063 | 0 | 1 | FALSE | 2 | 0: 203, 1: 113 |
FP064 | 0 | 1 | FALSE | 2 | 0: 193, 1: 123 |
FP065 | 0 | 1 | FALSE | 2 | 1: 189, 0: 127 |
FP066 | 0 | 1 | FALSE | 2 | 1: 195, 0: 121 |
FP067 | 0 | 1 | FALSE | 2 | 0: 213, 1: 103 |
FP068 | 0 | 1 | FALSE | 2 | 0: 224, 1: 92 |
FP069 | 0 | 1 | FALSE | 2 | 0: 198, 1: 118 |
FP070 | 0 | 1 | FALSE | 2 | 0: 211, 1: 105 |
FP071 | 0 | 1 | FALSE | 2 | 0: 207, 1: 109 |
FP072 | 0 | 1 | FALSE | 2 | 1: 204, 0: 112 |
FP073 | 0 | 1 | FALSE | 2 | 0: 224, 1: 92 |
FP074 | 0 | 1 | FALSE | 2 | 0: 213, 1: 103 |
FP075 | 0 | 1 | FALSE | 2 | 0: 235, 1: 81 |
FP076 | 0 | 1 | FALSE | 2 | 0: 216, 1: 100 |
FP077 | 0 | 1 | FALSE | 2 | 0: 219, 1: 97 |
FP078 | 0 | 1 | FALSE | 2 | 0: 218, 1: 98 |
FP079 | 0 | 1 | FALSE | 2 | 1: 230, 0: 86 |
FP080 | 0 | 1 | FALSE | 2 | 0: 233, 1: 83 |
FP081 | 0 | 1 | FALSE | 2 | 0: 225, 1: 91 |
FP082 | 0 | 1 | FALSE | 2 | 1: 235, 0: 81 |
FP083 | 0 | 1 | FALSE | 2 | 0: 236, 1: 80 |
FP084 | 0 | 1 | FALSE | 2 | 0: 245, 1: 71 |
FP085 | 0 | 1 | FALSE | 2 | 0: 231, 1: 85 |
FP086 | 0 | 1 | FALSE | 2 | 0: 230, 1: 86 |
FP087 | 0 | 1 | FALSE | 2 | 1: 241, 0: 75 |
FP088 | 0 | 1 | FALSE | 2 | 0: 239, 1: 77 |
FP089 | 0 | 1 | FALSE | 2 | 0: 236, 1: 80 |
FP090 | 0 | 1 | FALSE | 2 | 0: 244, 1: 72 |
FP091 | 0 | 1 | FALSE | 2 | 0: 243, 1: 73 |
FP092 | 0 | 1 | FALSE | 2 | 0: 247, 1: 69 |
FP093 | 0 | 1 | FALSE | 2 | 0: 248, 1: 68 |
FP094 | 0 | 1 | FALSE | 2 | 0: 237, 1: 79 |
FP095 | 0 | 1 | FALSE | 2 | 0: 251, 1: 65 |
FP096 | 0 | 1 | FALSE | 2 | 0: 257, 1: 59 |
FP097 | 0 | 1 | FALSE | 2 | 0: 250, 1: 66 |
FP098 | 0 | 1 | FALSE | 2 | 0: 252, 1: 64 |
FP099 | 0 | 1 | FALSE | 2 | 0: 249, 1: 67 |
FP100 | 0 | 1 | FALSE | 2 | 0: 259, 1: 57 |
FP101 | 0 | 1 | FALSE | 2 | 0: 260, 1: 56 |
FP102 | 0 | 1 | FALSE | 2 | 0: 270, 1: 46 |
FP103 | 0 | 1 | FALSE | 2 | 0: 247, 1: 69 |
FP104 | 0 | 1 | FALSE | 2 | 0: 258, 1: 58 |
FP105 | 0 | 1 | FALSE | 2 | 0: 248, 1: 68 |
FP106 | 0 | 1 | FALSE | 2 | 0: 273, 1: 43 |
FP107 | 0 | 1 | FALSE | 2 | 0: 254, 1: 62 |
FP108 | 0 | 1 | FALSE | 2 | 0: 259, 1: 57 |
FP109 | 0 | 1 | FALSE | 2 | 0: 261, 1: 55 |
FP110 | 0 | 1 | FALSE | 2 | 0: 264, 1: 52 |
FP111 | 0 | 1 | FALSE | 2 | 0: 259, 1: 57 |
FP112 | 0 | 1 | FALSE | 2 | 0: 260, 1: 56 |
FP113 | 0 | 1 | FALSE | 2 | 0: 264, 1: 52 |
FP114 | 0 | 1 | FALSE | 2 | 0: 260, 1: 56 |
FP115 | 0 | 1 | FALSE | 2 | 0: 266, 1: 50 |
FP116 | 0 | 1 | FALSE | 2 | 0: 269, 1: 47 |
FP117 | 0 | 1 | FALSE | 2 | 0: 262, 1: 54 |
FP118 | 0 | 1 | FALSE | 2 | 0: 279, 1: 37 |
FP119 | 0 | 1 | FALSE | 2 | 0: 263, 1: 53 |
FP120 | 0 | 1 | FALSE | 2 | 0: 267, 1: 49 |
FP121 | 0 | 1 | FALSE | 2 | 0: 282, 1: 34 |
FP122 | 0 | 1 | FALSE | 2 | 0: 273, 1: 43 |
FP123 | 0 | 1 | FALSE | 2 | 0: 270, 1: 46 |
FP124 | 0 | 1 | FALSE | 2 | 0: 274, 1: 42 |
FP125 | 0 | 1 | FALSE | 2 | 0: 278, 1: 38 |
FP126 | 0 | 1 | FALSE | 2 | 0: 280, 1: 36 |
FP127 | 0 | 1 | FALSE | 2 | 0: 269, 1: 47 |
FP128 | 0 | 1 | FALSE | 2 | 0: 282, 1: 34 |
FP129 | 0 | 1 | FALSE | 2 | 0: 272, 1: 44 |
FP130 | 0 | 1 | FALSE | 2 | 0: 290, 1: 26 |
FP131 | 0 | 1 | FALSE | 2 | 0: 282, 1: 34 |
FP132 | 0 | 1 | FALSE | 2 | 0: 276, 1: 40 |
FP133 | 0 | 1 | FALSE | 2 | 0: 273, 1: 43 |
FP134 | 0 | 1 | FALSE | 2 | 0: 289, 1: 27 |
FP135 | 0 | 1 | FALSE | 2 | 0: 296, 1: 20 |
FP136 | 0 | 1 | FALSE | 2 | 0: 284, 1: 32 |
FP137 | 0 | 1 | FALSE | 2 | 0: 288, 1: 28 |
FP138 | 0 | 1 | FALSE | 2 | 0: 290, 1: 26 |
FP139 | 0 | 1 | FALSE | 2 | 0: 296, 1: 20 |
FP140 | 0 | 1 | FALSE | 2 | 0: 288, 1: 28 |
FP141 | 0 | 1 | FALSE | 2 | 0: 294, 1: 22 |
FP142 | 0 | 1 | FALSE | 2 | 0: 286, 1: 30 |
FP143 | 0 | 1 | FALSE | 2 | 0: 299, 1: 17 |
FP144 | 0 | 1 | FALSE | 2 | 0: 287, 1: 29 |
FP145 | 0 | 1 | FALSE | 2 | 0: 296, 1: 20 |
FP146 | 0 | 1 | FALSE | 2 | 0: 287, 1: 29 |
FP147 | 0 | 1 | FALSE | 2 | 0: 294, 1: 22 |
FP148 | 0 | 1 | FALSE | 2 | 0: 291, 1: 25 |
FP149 | 0 | 1 | FALSE | 2 | 0: 290, 1: 26 |
FP150 | 0 | 1 | FALSE | 2 | 0: 295, 1: 21 |
FP151 | 0 | 1 | FALSE | 2 | 0: 306, 1: 10 |
FP152 | 0 | 1 | FALSE | 2 | 0: 299, 1: 17 |
FP153 | 0 | 1 | FALSE | 2 | 0: 305, 1: 11 |
FP155 | 0 | 1 | FALSE | 2 | 0: 295, 1: 21 |
FP156 | 0 | 1 | FALSE | 2 | 0: 301, 1: 15 |
FP157 | 0 | 1 | FALSE | 2 | 0: 298, 1: 18 |
FP158 | 0 | 1 | FALSE | 2 | 0: 291, 1: 25 |
FP159 | 0 | 1 | FALSE | 2 | 0: 305, 1: 11 |
FP160 | 0 | 1 | FALSE | 2 | 0: 305, 1: 11 |
FP161 | 0 | 1 | FALSE | 2 | 0: 305, 1: 11 |
FP162 | 0 | 1 | FALSE | 2 | 1: 168, 0: 148 |
FP163 | 0 | 1 | FALSE | 2 | 0: 173, 1: 143 |
FP164 | 0 | 1 | FALSE | 2 | 1: 207, 0: 109 |
FP165 | 0 | 1 | FALSE | 2 | 0: 215, 1: 101 |
FP166 | 0 | 1 | FALSE | 2 | 0: 209, 1: 107 |
FP167 | 0 | 1 | FALSE | 2 | 0: 221, 1: 95 |
FP168 | 0 | 1 | FALSE | 2 | 1: 226, 0: 90 |
FP169 | 0 | 1 | FALSE | 2 | 0: 257, 1: 59 |
FP170 | 0 | 1 | FALSE | 2 | 0: 267, 1: 49 |
FP171 | 0 | 1 | FALSE | 2 | 0: 275, 1: 41 |
FP172 | 0 | 1 | FALSE | 2 | 0: 269, 1: 47 |
FP173 | 0 | 1 | FALSE | 2 | 0: 273, 1: 43 |
FP174 | 0 | 1 | FALSE | 2 | 0: 267, 1: 49 |
FP175 | 0 | 1 | FALSE | 2 | 0: 274, 1: 42 |
FP176 | 0 | 1 | FALSE | 2 | 0: 282, 1: 34 |
FP177 | 0 | 1 | FALSE | 2 | 0: 284, 1: 32 |
FP178 | 0 | 1 | FALSE | 2 | 0: 282, 1: 34 |
FP179 | 0 | 1 | FALSE | 2 | 0: 272, 1: 44 |
FP180 | 0 | 1 | FALSE | 2 | 0: 294, 1: 22 |
FP181 | 0 | 1 | FALSE | 2 | 0: 283, 1: 33 |
FP182 | 0 | 1 | FALSE | 2 | 0: 292, 1: 24 |
FP183 | 0 | 1 | FALSE | 2 | 0: 274, 1: 42 |
FP184 | 0 | 1 | FALSE | 2 | 0: 286, 1: 30 |
FP185 | 0 | 1 | FALSE | 2 | 0: 285, 1: 31 |
FP186 | 0 | 1 | FALSE | 2 | 0: 297, 1: 19 |
FP187 | 0 | 1 | FALSE | 2 | 0: 295, 1: 21 |
FP188 | 0 | 1 | FALSE | 2 | 0: 294, 1: 22 |
FP189 | 0 | 1 | FALSE | 2 | 0: 303, 1: 13 |
FP190 | 0 | 1 | FALSE | 2 | 0: 299, 1: 17 |
FP191 | 0 | 1 | FALSE | 2 | 0: 298, 1: 18 |
FP192 | 0 | 1 | FALSE | 2 | 0: 294, 1: 22 |
FP193 | 0 | 1 | FALSE | 2 | 0: 294, 1: 22 |
FP194 | 0 | 1 | FALSE | 2 | 0: 295, 1: 21 |
FP195 | 0 | 1 | FALSE | 2 | 0: 300, 1: 16 |
FP196 | 0 | 1 | FALSE | 2 | 0: 294, 1: 22 |
FP197 | 0 | 1 | FALSE | 2 | 0: 296, 1: 20 |
FP198 | 0 | 1 | FALSE | 2 | 0: 302, 1: 14 |
FP201 | 0 | 1 | FALSE | 2 | 0: 303, 1: 13 |
FP202 | 0 | 1 | FALSE | 2 | 0: 232, 1: 84 |
FP203 | 0 | 1 | FALSE | 2 | 0: 273, 1: 43 |
FP204 | 0 | 1 | FALSE | 2 | 0: 286, 1: 30 |
FP205 | 0 | 1 | FALSE | 2 | 0: 291, 1: 25 |
FP206 | 0 | 1 | FALSE | 2 | 0: 300, 1: 16 |
FP207 | 0 | 1 | FALSE | 2 | 0: 302, 1: 14 |
FP208 | 0 | 1 | FALSE | 2 | 0: 273, 1: 43 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
MolWeight | 0 | 1 | 0 | 1 | -2.46 | -0.78 | -0.06 | 0.81 | 2.18 | ▁▇▇▇▃ |
NumBonds | 0 | 1 | 0 | 1 | -2.92 | -0.67 | 0.03 | 0.57 | 2.55 | ▁▂▇▃▂ |
NumMultBonds | 0 | 1 | 0 | 1 | -1.24 | -1.04 | -0.06 | 0.72 | 4.06 | ▇▇▅▁▁ |
NumRotBonds | 0 | 1 | 0 | 1 | -0.82 | -0.82 | -0.40 | 0.44 | 5.94 | ▇▁▁▁▁ |
NumDblBonds | 0 | 1 | 0 | 1 | -0.76 | -0.76 | 0.09 | 0.09 | 4.35 | ▇▁▁▁▁ |
NumCarbon | 0 | 1 | 0 | 1 | -2.71 | -0.70 | -0.21 | 0.56 | 2.23 | ▁▂▇▅▂ |
NumNitrogen | 0 | 1 | 0 | 1 | -0.63 | -0.63 | -0.63 | 0.26 | 4.71 | ▇▂▁▁▁ |
NumOxygen | 0 | 1 | 0 | 1 | -0.92 | -0.92 | -0.26 | 0.40 | 5.02 | ▇▃▁▁▁ |
NumSulfer | 0 | 1 | 0 | 1 | -0.28 | -0.28 | -0.28 | -0.28 | 8.06 | ▇▁▁▁▁ |
NumChlorine | 0 | 1 | 0 | 1 | -0.40 | -0.40 | -0.40 | -0.40 | 6.02 | ▇▁▁▁▁ |
NumHalogen | 0 | 1 | 0 | 1 | -0.48 | -0.48 | -0.48 | 0.20 | 5.57 | ▇▁▁▁▁ |
NumRings | 0 | 1 | 0 | 1 | -1.14 | -0.32 | -0.32 | 0.49 | 3.74 | ▇▃▁▁▁ |
HydrophilicFactor | 0 | 1 | 0 | 1 | -0.90 | -0.68 | -0.30 | 0.32 | 5.19 | ▇▂▁▁▁ |
SurfaceArea1 | 0 | 1 | 0 | 1 | -1.04 | -0.75 | -0.21 | 0.53 | 5.37 | ▇▃▁▁▁ |
SurfaceArea2 | 0 | 1 | 0 | 1 | -1.05 | -0.77 | -0.26 | 0.52 | 5.00 | ▇▃▁▁▁ |
###################################
# Verifying the data dimensions
# for the test set
###################################
dim(PMA_PreModelling_Test)
## [1] 316 221
##################################
# Loading dataset
##################################
<- PMA_PreModelling_Train
EDA
##################################
# Listing all predictors
##################################
<- EDA[,!names(EDA) %in% c("Log_Solubility_Class")]
EDA.Predictors
##################################
# Listing all numeric predictors
##################################
<- EDA.Predictors[,sapply(EDA.Predictors, is.numeric)]
EDA.Predictors.Numeric ncol(EDA.Predictors.Numeric)
## [1] 15
names(EDA.Predictors.Numeric)
## [1] "MolWeight" "NumBonds" "NumMultBonds"
## [4] "NumRotBonds" "NumDblBonds" "NumCarbon"
## [7] "NumNitrogen" "NumOxygen" "NumSulfer"
## [10] "NumChlorine" "NumHalogen" "NumRings"
## [13] "HydrophilicFactor" "SurfaceArea1" "SurfaceArea2"
##################################
# Listing all factor predictors
##################################
<- EDA.Predictors[,sapply(EDA.Predictors, is.factor)]
EDA.Predictors.Factor ncol(EDA.Predictors.Factor)
## [1] 205
names(EDA.Predictors.Factor)
## [1] "FP001" "FP002" "FP003" "FP004" "FP005" "FP006" "FP007" "FP008" "FP009"
## [10] "FP010" "FP011" "FP012" "FP013" "FP014" "FP015" "FP016" "FP017" "FP018"
## [19] "FP019" "FP020" "FP021" "FP022" "FP023" "FP024" "FP025" "FP026" "FP027"
## [28] "FP028" "FP029" "FP030" "FP031" "FP032" "FP033" "FP034" "FP035" "FP036"
## [37] "FP037" "FP038" "FP039" "FP040" "FP041" "FP042" "FP043" "FP044" "FP045"
## [46] "FP046" "FP047" "FP048" "FP049" "FP050" "FP051" "FP052" "FP053" "FP054"
## [55] "FP055" "FP056" "FP057" "FP058" "FP059" "FP060" "FP061" "FP062" "FP063"
## [64] "FP064" "FP065" "FP066" "FP067" "FP068" "FP069" "FP070" "FP071" "FP072"
## [73] "FP073" "FP074" "FP075" "FP076" "FP077" "FP078" "FP079" "FP080" "FP081"
## [82] "FP082" "FP083" "FP084" "FP085" "FP086" "FP087" "FP088" "FP089" "FP090"
## [91] "FP091" "FP092" "FP093" "FP094" "FP095" "FP096" "FP097" "FP098" "FP099"
## [100] "FP100" "FP101" "FP102" "FP103" "FP104" "FP105" "FP106" "FP107" "FP108"
## [109] "FP109" "FP110" "FP111" "FP112" "FP113" "FP114" "FP115" "FP116" "FP117"
## [118] "FP118" "FP119" "FP120" "FP121" "FP122" "FP123" "FP124" "FP125" "FP126"
## [127] "FP127" "FP128" "FP129" "FP130" "FP131" "FP132" "FP133" "FP134" "FP135"
## [136] "FP136" "FP137" "FP138" "FP139" "FP140" "FP141" "FP142" "FP143" "FP144"
## [145] "FP145" "FP146" "FP147" "FP148" "FP149" "FP150" "FP151" "FP152" "FP153"
## [154] "FP155" "FP156" "FP157" "FP158" "FP159" "FP160" "FP161" "FP162" "FP163"
## [163] "FP164" "FP165" "FP166" "FP167" "FP168" "FP169" "FP170" "FP171" "FP172"
## [172] "FP173" "FP174" "FP175" "FP176" "FP177" "FP178" "FP179" "FP180" "FP181"
## [181] "FP182" "FP183" "FP184" "FP185" "FP186" "FP187" "FP188" "FP189" "FP190"
## [190] "FP191" "FP192" "FP193" "FP194" "FP195" "FP196" "FP197" "FP198" "FP201"
## [199] "FP202" "FP203" "FP204" "FP205" "FP206" "FP207" "FP208"
##################################
# Formulating the box plots
##################################
featurePlot(x = EDA.Predictors.Numeric,
y = EDA$Log_Solubility_Class,
plot = "box",
scales = list(x = list(relation="free", rot = 90),
y = list(relation="free")),
adjust = 1.5,
pch = "|")
##################################
# Restructuring the dataset for
# for barchart analysis
##################################
<- DPA$Log_Solubility_Class
Log_Solubility_Class <- as.data.frame(cbind(Log_Solubility_Class,
EDA.Bar.Source
EDA.Predictors.Factor))ncol(EDA.Bar.Source)
## [1] 206
##################################
# Creating a function to formulate
# the proportions table
##################################
<- function(FactorVar) {
EDA.PropTable.Function <- EDA.Bar.Source[,c("Log_Solubility_Class",
EDA.Bar.Source.FactorVar
FactorVar)]<- as.data.frame(prop.table(table(EDA.Bar.Source.FactorVar), 2))
EDA.Bar.Source.FactorVar.Prop names(EDA.Bar.Source.FactorVar.Prop)[2] <- "Structure"
$Variable <- rep(FactorVar,nrow(EDA.Bar.Source.FactorVar.Prop))
EDA.Bar.Source.FactorVar.Prop
return(EDA.Bar.Source.FactorVar.Prop)
}
<- rbind(EDA.PropTable.Function(names(EDA.Bar.Source)[162]),
EDA.Bar.Source.FactorVar.Prop.Group5 EDA.PropTable.Function(names(EDA.Bar.Source)[163]),
EDA.PropTable.Function(names(EDA.Bar.Source)[164]),
EDA.PropTable.Function(names(EDA.Bar.Source)[165]),
EDA.PropTable.Function(names(EDA.Bar.Source)[166]),
EDA.PropTable.Function(names(EDA.Bar.Source)[167]),
EDA.PropTable.Function(names(EDA.Bar.Source)[168]),
EDA.PropTable.Function(names(EDA.Bar.Source)[169]),
EDA.PropTable.Function(names(EDA.Bar.Source)[170]),
EDA.PropTable.Function(names(EDA.Bar.Source)[171]),
EDA.PropTable.Function(names(EDA.Bar.Source)[172]),
EDA.PropTable.Function(names(EDA.Bar.Source)[173]),
EDA.PropTable.Function(names(EDA.Bar.Source)[174]),
EDA.PropTable.Function(names(EDA.Bar.Source)[175]),
EDA.PropTable.Function(names(EDA.Bar.Source)[176]),
EDA.PropTable.Function(names(EDA.Bar.Source)[177]),
EDA.PropTable.Function(names(EDA.Bar.Source)[178]),
EDA.PropTable.Function(names(EDA.Bar.Source)[179]),
EDA.PropTable.Function(names(EDA.Bar.Source)[180]),
EDA.PropTable.Function(names(EDA.Bar.Source)[181]),
EDA.PropTable.Function(names(EDA.Bar.Source)[182]),
EDA.PropTable.Function(names(EDA.Bar.Source)[183]),
EDA.PropTable.Function(names(EDA.Bar.Source)[184]),
EDA.PropTable.Function(names(EDA.Bar.Source)[185]),
EDA.PropTable.Function(names(EDA.Bar.Source)[186]),
EDA.PropTable.Function(names(EDA.Bar.Source)[187]),
EDA.PropTable.Function(names(EDA.Bar.Source)[188]),
EDA.PropTable.Function(names(EDA.Bar.Source)[189]),
EDA.PropTable.Function(names(EDA.Bar.Source)[190]),
EDA.PropTable.Function(names(EDA.Bar.Source)[191]),
EDA.PropTable.Function(names(EDA.Bar.Source)[192]),
EDA.PropTable.Function(names(EDA.Bar.Source)[193]),
EDA.PropTable.Function(names(EDA.Bar.Source)[194]),
EDA.PropTable.Function(names(EDA.Bar.Source)[195]),
EDA.PropTable.Function(names(EDA.Bar.Source)[196]),
EDA.PropTable.Function(names(EDA.Bar.Source)[197]),
EDA.PropTable.Function(names(EDA.Bar.Source)[198]),
EDA.PropTable.Function(names(EDA.Bar.Source)[199]),
EDA.PropTable.Function(names(EDA.Bar.Source)[200]),
EDA.PropTable.Function(names(EDA.Bar.Source)[201]),
EDA.PropTable.Function(names(EDA.Bar.Source)[202]),
EDA.PropTable.Function(names(EDA.Bar.Source)[203]),
EDA.PropTable.Function(names(EDA.Bar.Source)[204]),
EDA.PropTable.Function(names(EDA.Bar.Source)[205]),
EDA.PropTable.Function(names(EDA.Bar.Source)[206]))
<- barchart(EDA.Bar.Source.FactorVar.Prop.Group5[,3] ~
(EDA.Barchart.FactorVar 2] | EDA.Bar.Source.FactorVar.Prop.Group5[,4],
EDA.Bar.Source.FactorVar.Prop.Group5[,data=EDA.Bar.Source.FactorVar.Prop.Group5,
groups = EDA.Bar.Source.FactorVar.Prop.Group5[,1],
stack=TRUE,
ylab = "Proportion",
xlab = "Structure",
auto.key = list(adj=1, space="top", columns=2),
layout=(c(9,5))))
<- rbind(EDA.PropTable.Function(names(EDA.Bar.Source)[122]),
EDA.Bar.Source.FactorVar.Prop.Group4 EDA.PropTable.Function(names(EDA.Bar.Source)[123]),
EDA.PropTable.Function(names(EDA.Bar.Source)[124]),
EDA.PropTable.Function(names(EDA.Bar.Source)[125]),
EDA.PropTable.Function(names(EDA.Bar.Source)[126]),
EDA.PropTable.Function(names(EDA.Bar.Source)[127]),
EDA.PropTable.Function(names(EDA.Bar.Source)[128]),
EDA.PropTable.Function(names(EDA.Bar.Source)[129]),
EDA.PropTable.Function(names(EDA.Bar.Source)[130]),
EDA.PropTable.Function(names(EDA.Bar.Source)[131]),
EDA.PropTable.Function(names(EDA.Bar.Source)[132]),
EDA.PropTable.Function(names(EDA.Bar.Source)[133]),
EDA.PropTable.Function(names(EDA.Bar.Source)[134]),
EDA.PropTable.Function(names(EDA.Bar.Source)[135]),
EDA.PropTable.Function(names(EDA.Bar.Source)[136]),
EDA.PropTable.Function(names(EDA.Bar.Source)[137]),
EDA.PropTable.Function(names(EDA.Bar.Source)[138]),
EDA.PropTable.Function(names(EDA.Bar.Source)[139]),
EDA.PropTable.Function(names(EDA.Bar.Source)[140]),
EDA.PropTable.Function(names(EDA.Bar.Source)[141]),
EDA.PropTable.Function(names(EDA.Bar.Source)[142]),
EDA.PropTable.Function(names(EDA.Bar.Source)[143]),
EDA.PropTable.Function(names(EDA.Bar.Source)[144]),
EDA.PropTable.Function(names(EDA.Bar.Source)[145]),
EDA.PropTable.Function(names(EDA.Bar.Source)[146]),
EDA.PropTable.Function(names(EDA.Bar.Source)[147]),
EDA.PropTable.Function(names(EDA.Bar.Source)[148]),
EDA.PropTable.Function(names(EDA.Bar.Source)[149]),
EDA.PropTable.Function(names(EDA.Bar.Source)[150]),
EDA.PropTable.Function(names(EDA.Bar.Source)[151]),
EDA.PropTable.Function(names(EDA.Bar.Source)[152]),
EDA.PropTable.Function(names(EDA.Bar.Source)[153]),
EDA.PropTable.Function(names(EDA.Bar.Source)[154]),
EDA.PropTable.Function(names(EDA.Bar.Source)[155]),
EDA.PropTable.Function(names(EDA.Bar.Source)[156]),
EDA.PropTable.Function(names(EDA.Bar.Source)[157]),
EDA.PropTable.Function(names(EDA.Bar.Source)[158]),
EDA.PropTable.Function(names(EDA.Bar.Source)[159]),
EDA.PropTable.Function(names(EDA.Bar.Source)[160]),
EDA.PropTable.Function(names(EDA.Bar.Source)[161]))
<- barchart(EDA.Bar.Source.FactorVar.Prop.Group4[,3] ~
(EDA.Barchart.FactorVar 2] | EDA.Bar.Source.FactorVar.Prop.Group4[,4],
EDA.Bar.Source.FactorVar.Prop.Group4[,data=EDA.Bar.Source.FactorVar.Prop.Group4,
groups = EDA.Bar.Source.FactorVar.Prop.Group4[,1],
stack=TRUE,
ylab = "Proportion",
xlab = "Structure",
auto.key = list(adj=1, space="top", columns=2),
layout=(c(9,5))))
<- rbind(EDA.PropTable.Function(names(EDA.Bar.Source)[82]),
EDA.Bar.Source.FactorVar.Prop.Group3 EDA.PropTable.Function(names(EDA.Bar.Source)[83]),
EDA.PropTable.Function(names(EDA.Bar.Source)[84]),
EDA.PropTable.Function(names(EDA.Bar.Source)[85]),
EDA.PropTable.Function(names(EDA.Bar.Source)[86]),
EDA.PropTable.Function(names(EDA.Bar.Source)[87]),
EDA.PropTable.Function(names(EDA.Bar.Source)[88]),
EDA.PropTable.Function(names(EDA.Bar.Source)[89]),
EDA.PropTable.Function(names(EDA.Bar.Source)[90]),
EDA.PropTable.Function(names(EDA.Bar.Source)[91]),
EDA.PropTable.Function(names(EDA.Bar.Source)[92]),
EDA.PropTable.Function(names(EDA.Bar.Source)[93]),
EDA.PropTable.Function(names(EDA.Bar.Source)[94]),
EDA.PropTable.Function(names(EDA.Bar.Source)[95]),
EDA.PropTable.Function(names(EDA.Bar.Source)[96]),
EDA.PropTable.Function(names(EDA.Bar.Source)[97]),
EDA.PropTable.Function(names(EDA.Bar.Source)[98]),
EDA.PropTable.Function(names(EDA.Bar.Source)[99]),
EDA.PropTable.Function(names(EDA.Bar.Source)[100]),
EDA.PropTable.Function(names(EDA.Bar.Source)[101]),
EDA.PropTable.Function(names(EDA.Bar.Source)[102]),
EDA.PropTable.Function(names(EDA.Bar.Source)[103]),
EDA.PropTable.Function(names(EDA.Bar.Source)[104]),
EDA.PropTable.Function(names(EDA.Bar.Source)[105]),
EDA.PropTable.Function(names(EDA.Bar.Source)[106]),
EDA.PropTable.Function(names(EDA.Bar.Source)[107]),
EDA.PropTable.Function(names(EDA.Bar.Source)[108]),
EDA.PropTable.Function(names(EDA.Bar.Source)[109]),
EDA.PropTable.Function(names(EDA.Bar.Source)[110]),
EDA.PropTable.Function(names(EDA.Bar.Source)[111]),
EDA.PropTable.Function(names(EDA.Bar.Source)[112]),
EDA.PropTable.Function(names(EDA.Bar.Source)[113]),
EDA.PropTable.Function(names(EDA.Bar.Source)[114]),
EDA.PropTable.Function(names(EDA.Bar.Source)[115]),
EDA.PropTable.Function(names(EDA.Bar.Source)[116]),
EDA.PropTable.Function(names(EDA.Bar.Source)[117]),
EDA.PropTable.Function(names(EDA.Bar.Source)[118]),
EDA.PropTable.Function(names(EDA.Bar.Source)[119]),
EDA.PropTable.Function(names(EDA.Bar.Source)[120]),
EDA.PropTable.Function(names(EDA.Bar.Source)[121]))
<- barchart(EDA.Bar.Source.FactorVar.Prop.Group3[,3] ~
(EDA.Barchart.FactorVar 2] | EDA.Bar.Source.FactorVar.Prop.Group3[,4],
EDA.Bar.Source.FactorVar.Prop.Group3[,data=EDA.Bar.Source.FactorVar.Prop.Group3,
groups = EDA.Bar.Source.FactorVar.Prop.Group3[,1],
stack=TRUE,
ylab = "Proportion",
xlab = "Structure",
auto.key = list(adj=1, space="top", columns=2),
layout=(c(9,5))))
<- rbind(EDA.PropTable.Function(names(EDA.Bar.Source)[42]),
EDA.Bar.Source.FactorVar.Prop.Group2 EDA.PropTable.Function(names(EDA.Bar.Source)[43]),
EDA.PropTable.Function(names(EDA.Bar.Source)[44]),
EDA.PropTable.Function(names(EDA.Bar.Source)[45]),
EDA.PropTable.Function(names(EDA.Bar.Source)[46]),
EDA.PropTable.Function(names(EDA.Bar.Source)[47]),
EDA.PropTable.Function(names(EDA.Bar.Source)[48]),
EDA.PropTable.Function(names(EDA.Bar.Source)[49]),
EDA.PropTable.Function(names(EDA.Bar.Source)[50]),
EDA.PropTable.Function(names(EDA.Bar.Source)[51]),
EDA.PropTable.Function(names(EDA.Bar.Source)[52]),
EDA.PropTable.Function(names(EDA.Bar.Source)[53]),
EDA.PropTable.Function(names(EDA.Bar.Source)[54]),
EDA.PropTable.Function(names(EDA.Bar.Source)[55]),
EDA.PropTable.Function(names(EDA.Bar.Source)[56]),
EDA.PropTable.Function(names(EDA.Bar.Source)[57]),
EDA.PropTable.Function(names(EDA.Bar.Source)[58]),
EDA.PropTable.Function(names(EDA.Bar.Source)[59]),
EDA.PropTable.Function(names(EDA.Bar.Source)[60]),
EDA.PropTable.Function(names(EDA.Bar.Source)[61]),
EDA.PropTable.Function(names(EDA.Bar.Source)[62]),
EDA.PropTable.Function(names(EDA.Bar.Source)[63]),
EDA.PropTable.Function(names(EDA.Bar.Source)[64]),
EDA.PropTable.Function(names(EDA.Bar.Source)[65]),
EDA.PropTable.Function(names(EDA.Bar.Source)[66]),
EDA.PropTable.Function(names(EDA.Bar.Source)[67]),
EDA.PropTable.Function(names(EDA.Bar.Source)[68]),
EDA.PropTable.Function(names(EDA.Bar.Source)[69]),
EDA.PropTable.Function(names(EDA.Bar.Source)[70]),
EDA.PropTable.Function(names(EDA.Bar.Source)[71]),
EDA.PropTable.Function(names(EDA.Bar.Source)[72]),
EDA.PropTable.Function(names(EDA.Bar.Source)[73]),
EDA.PropTable.Function(names(EDA.Bar.Source)[74]),
EDA.PropTable.Function(names(EDA.Bar.Source)[75]),
EDA.PropTable.Function(names(EDA.Bar.Source)[76]),
EDA.PropTable.Function(names(EDA.Bar.Source)[77]),
EDA.PropTable.Function(names(EDA.Bar.Source)[78]),
EDA.PropTable.Function(names(EDA.Bar.Source)[79]),
EDA.PropTable.Function(names(EDA.Bar.Source)[80]),
EDA.PropTable.Function(names(EDA.Bar.Source)[81]))
<- barchart(EDA.Bar.Source.FactorVar.Prop.Group2[,3] ~
(EDA.Barchart.FactorVar 2] | EDA.Bar.Source.FactorVar.Prop.Group2[,4],
EDA.Bar.Source.FactorVar.Prop.Group2[,data=EDA.Bar.Source.FactorVar.Prop.Group2,
groups = EDA.Bar.Source.FactorVar.Prop.Group2[,1],
stack=TRUE,
ylab = "Proportion",
xlab = "Structure",
auto.key = list(adj=1, space="top", columns=2),
layout=(c(9,5))))
<- rbind(EDA.PropTable.Function(names(EDA.Bar.Source)[2]),
EDA.Bar.Source.FactorVar.Prop.Group1 EDA.PropTable.Function(names(EDA.Bar.Source)[3]),
EDA.PropTable.Function(names(EDA.Bar.Source)[4]),
EDA.PropTable.Function(names(EDA.Bar.Source)[5]),
EDA.PropTable.Function(names(EDA.Bar.Source)[6]),
EDA.PropTable.Function(names(EDA.Bar.Source)[7]),
EDA.PropTable.Function(names(EDA.Bar.Source)[8]),
EDA.PropTable.Function(names(EDA.Bar.Source)[9]),
EDA.PropTable.Function(names(EDA.Bar.Source)[10]),
EDA.PropTable.Function(names(EDA.Bar.Source)[11]),
EDA.PropTable.Function(names(EDA.Bar.Source)[12]),
EDA.PropTable.Function(names(EDA.Bar.Source)[13]),
EDA.PropTable.Function(names(EDA.Bar.Source)[14]),
EDA.PropTable.Function(names(EDA.Bar.Source)[15]),
EDA.PropTable.Function(names(EDA.Bar.Source)[16]),
EDA.PropTable.Function(names(EDA.Bar.Source)[17]),
EDA.PropTable.Function(names(EDA.Bar.Source)[18]),
EDA.PropTable.Function(names(EDA.Bar.Source)[19]),
EDA.PropTable.Function(names(EDA.Bar.Source)[20]),
EDA.PropTable.Function(names(EDA.Bar.Source)[21]),
EDA.PropTable.Function(names(EDA.Bar.Source)[22]),
EDA.PropTable.Function(names(EDA.Bar.Source)[23]),
EDA.PropTable.Function(names(EDA.Bar.Source)[24]),
EDA.PropTable.Function(names(EDA.Bar.Source)[25]),
EDA.PropTable.Function(names(EDA.Bar.Source)[26]),
EDA.PropTable.Function(names(EDA.Bar.Source)[27]),
EDA.PropTable.Function(names(EDA.Bar.Source)[28]),
EDA.PropTable.Function(names(EDA.Bar.Source)[29]),
EDA.PropTable.Function(names(EDA.Bar.Source)[30]),
EDA.PropTable.Function(names(EDA.Bar.Source)[31]),
EDA.PropTable.Function(names(EDA.Bar.Source)[32]),
EDA.PropTable.Function(names(EDA.Bar.Source)[33]),
EDA.PropTable.Function(names(EDA.Bar.Source)[34]),
EDA.PropTable.Function(names(EDA.Bar.Source)[35]),
EDA.PropTable.Function(names(EDA.Bar.Source)[36]),
EDA.PropTable.Function(names(EDA.Bar.Source)[37]),
EDA.PropTable.Function(names(EDA.Bar.Source)[38]),
EDA.PropTable.Function(names(EDA.Bar.Source)[39]),
EDA.PropTable.Function(names(EDA.Bar.Source)[40]),
EDA.PropTable.Function(names(EDA.Bar.Source)[41]))
<- barchart(EDA.Bar.Source.FactorVar.Prop.Group1[,3] ~
(EDA.Barchart.FactorVar 2] | EDA.Bar.Source.FactorVar.Prop.Group1[,4],
EDA.Bar.Source.FactorVar.Prop.Group1[,data=EDA.Bar.Source.FactorVar.Prop.Group1,
groups = EDA.Bar.Source.FactorVar.Prop.Group1[,1],
stack=TRUE,
ylab = "Proportion",
xlab = "Structure",
auto.key = list(adj=1, space="top", columns=2),
layout=(c(9,5))))
##################################
# Filtering in the numeric predictors
# with the factor response variable
##################################
<- PMA_PreModelling_Train[,!grepl("FP", names(PMA_PreModelling_Train))]
PMA_PreModelling_Train_Numeric dim(PMA_PreModelling_Train_Numeric)
## [1] 951 16
str(PMA_PreModelling_Train_Numeric)
## 'data.frame': 951 obs. of 16 variables:
## $ Log_Solubility_Class: Factor w/ 2 levels "Low","High": 1 1 1 1 1 1 1 1 1 1 ...
## $ MolWeight : num 0.304 1.475 0.284 -0.579 0.508 ...
## $ NumBonds : num 0.498 1.698 0.697 0.207 0.566 ...
## $ NumMultBonds : num 1.9049 1.3248 0.1647 -0.8021 -0.0287 ...
## $ NumRotBonds : num -0.935 0.726 0.726 -0.52 1.141 ...
## $ NumDblBonds : num -0.83134 -0.83134 -0.00521 0.82092 -0.83134 ...
## $ NumCarbon : num 0.858 1.804 0.701 0.182 -0.012 ...
## $ NumNitrogen : num 1.001 1.844 -0.685 -0.685 3.53 ...
## $ NumOxygen : num -0.91 -0.332 0.246 -0.91 -0.91 ...
## $ NumSulfer : num -0.336 1.712 -0.336 -0.336 -0.336 ...
## $ NumChlorine : num -0.397 -0.397 -0.397 -0.397 0.317 ...
## $ NumHalogen : num -0.474 -0.474 -0.474 -0.474 0.205 ...
## $ NumRings : num 1.231 2.001 -0.309 -0.309 -0.309 ...
## $ HydrophilicFactor : num -0.742 -0.31 -0.275 -0.834 -0.043 ...
## $ SurfaceArea1 : num -0.3026 0.4458 0.0238 -1.0332 0.4954 ...
## $ SurfaceArea2 : num -0.379 1.054 -0.077 -1.055 0.36 ...
summary(PMA_PreModelling_Train_Numeric)
## Log_Solubility_Class MolWeight NumBonds NumMultBonds
## Low :427 Min. :-2.835229 Min. :-2.9239 Min. :-1.18881
## High:524 1st Qu.:-0.798923 1st Qu.:-0.6096 1st Qu.:-0.99545
## Median :-0.008626 Median :-0.0356 Median :-0.02867
## Mean : 0.000000 Mean : 0.0000 Mean : 0.00000
## 3rd Qu.: 0.800109 3rd Qu.: 0.5994 3rd Qu.: 0.74476
## Max. : 2.722778 Max. : 3.2279 Max. : 3.64511
## NumRotBonds NumDblBonds NumCarbon NumNitrogen
## Min. :-0.9347 Min. :-0.831342 Min. :-2.64433 Min. :-0.6852
## 1st Qu.:-0.9347 1st Qu.:-0.831342 1st Qu.:-0.68596 1st Qu.:-0.6852
## Median :-0.1043 Median :-0.005212 Median :-0.01199 Median :-0.6852
## Mean : 0.0000 Mean : 0.000000 Mean : 0.00000 Mean : 0.0000
## 3rd Qu.: 0.5184 3rd Qu.: 0.820917 3rd Qu.: 0.53700 3rd Qu.: 0.1578
## Max. : 5.7083 Max. : 4.951564 Max. : 3.05604 Max. : 4.3730
## NumOxygen NumSulfer NumChlorine NumHalogen
## Min. :-0.9103 Min. :-0.336 Min. :-0.3972 Min. :-0.4741
## 1st Qu.:-0.9103 1st Qu.:-0.336 1st Qu.:-0.3972 1st Qu.:-0.4741
## Median :-0.3320 Median :-0.336 Median :-0.3972 Median :-0.4741
## Mean : 0.0000 Mean : 0.000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.2463 3rd Qu.:-0.336 3rd Qu.:-0.3972 3rd Qu.: 0.2049
## Max. : 6.6077 Max. : 7.858 Max. : 6.7442 Max. : 6.3162
## NumRings HydrophilicFactor SurfaceArea1 SurfaceArea2
## Min. :-1.0792 Min. :-0.8565 Min. :-1.0332 Min. :-1.0554
## 1st Qu.:-1.0792 1st Qu.:-0.6593 1st Qu.:-0.7716 1st Qu.:-0.7765
## Median :-0.3093 Median :-0.2606 Median :-0.2085 Median :-0.1866
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.4607 3rd Qu.: 0.2963 3rd Qu.: 0.4767 3rd Qu.: 0.5358
## Max. : 4.3103 Max. :11.9924 Max. : 8.3733 Max. : 7.6515
##################################
# Obtaining the ROC AUC
##################################
<- filterVarImp(x = PMA_PreModelling_Train_Numeric[, 2:ncol(PMA_PreModelling_Train_Numeric)],
AUROC y = PMA_PreModelling_Train_Numeric$Log_Solubility_Class)
##################################
# Formulating the summary table
##################################
<- AUROC
AUROC_Summary
$Predictor <- rownames(AUROC)
AUROC_Summarynames(AUROC_Summary)[1] <- "AUROC"
$Metric <- rep("AUROC",nrow(AUROC))
AUROC_Summary
AUROC_Summary
## AUROC High Predictor Metric
## MolWeight 0.8441930 0.8441930 MolWeight AUROC
## NumBonds 0.7893255 0.7893255 NumBonds AUROC
## NumMultBonds 0.7230098 0.7230098 NumMultBonds AUROC
## NumRotBonds 0.5847695 0.5847695 NumRotBonds AUROC
## NumDblBonds 0.5142035 0.5142035 NumDblBonds AUROC
## NumCarbon 0.8408857 0.8408857 NumCarbon AUROC
## NumNitrogen 0.5285634 0.5285634 NumNitrogen AUROC
## NumOxygen 0.5536988 0.5536988 NumOxygen AUROC
## NumSulfer 0.5317902 0.5317902 NumSulfer AUROC
## NumChlorine 0.6104904 0.6104904 NumChlorine AUROC
## NumHalogen 0.6381755 0.6381755 NumHalogen AUROC
## NumRings 0.7438569 0.7438569 NumRings AUROC
## HydrophilicFactor 0.6699367 0.6699367 HydrophilicFactor AUROC
## SurfaceArea1 0.5726889 0.5726889 SurfaceArea1 AUROC
## SurfaceArea2 0.5479915 0.5479915 SurfaceArea2 AUROC
##################################
# Exploring predictor performance
##################################
dotplot(Predictor ~ AUROC | Metric,
AUROC_Summary,origin = 0,
type = c("p", "h"),
pch = 16,
cex = 2,
alpha = 0.45,
prepanel = function(x, y) {
list(ylim = levels(reorder(y, x)))
},panel = function(x, y, ...) {
panel.dotplot(x, reorder(y, x), ...)
})
##################################
# Obtaining the t-test statistics
##################################
<- apply(PMA_PreModelling_Train_Numeric[, 2:ncol(PMA_PreModelling_Train_Numeric)],
ATS 2,
function(x, y){
<- t.test(x ~ y)[c("statistic", "p.value", "estimate")]
tStats unlist(tStats)},
y=PMA_PreModelling_Train_Numeric$Log_Solubility_Class)
##################################
# Formulating the summary table
##################################
<- as.data.frame(t(ATS))
ATS_Summary names(ATS_Summary) <- c("t.Statistic", "t.Test_P.Value", "Mean0", "Mean1")
$Predictor <- names(PMA_PreModelling_Train_Numeric[,-1])
ATS_Summary$Metric <- rep("ATS", nrow(ATS_Summary))
ATS_Summary$ATS <- abs(ATS_Summary$t.Statistic)
ATS_Summary
ATS_Summary
## t.Statistic t.Test_P.Value Mean0 Mean1
## MolWeight 22.0675750 8.568100e-87 0.64470512 -0.52536085
## NumBonds 16.4907720 2.071672e-53 0.52608866 -0.42870202
## NumMultBonds 13.0920514 2.095787e-35 0.44544021 -0.36298277
## NumRotBonds 5.8690062 6.747824e-09 0.21555838 -0.17565539
## NumDblBonds 1.9672950 4.949190e-02 0.07217700 -0.05881599
## NumCarbon 21.0036355 8.156177e-80 0.62632485 -0.51038304
## NumNitrogen -0.8247139 4.097543e-01 -0.02976857 0.02425798
## NumOxygen -0.9312510 3.519946e-01 -0.03406840 0.02776184
## NumSulfer 3.2021823 1.427379e-03 0.11971764 -0.09755617
## NumChlorine 8.3362925 7.315159e-16 0.31187920 -0.25414584
## NumHalogen 9.1800512 9.409615e-19 0.33850129 -0.27583979
## NumRings 14.3956856 1.979115e-41 0.48410734 -0.39449205
## HydrophilicFactor -7.3321483 5.053677e-13 -0.24599149 0.20045490
## SurfaceArea1 -2.5597702 1.063897e-02 -0.09225436 0.07517674
## SurfaceArea2 -1.1459666 2.521288e-01 -0.04168274 0.03396666
## Predictor Metric ATS
## MolWeight MolWeight ATS 22.0675750
## NumBonds NumBonds ATS 16.4907720
## NumMultBonds NumMultBonds ATS 13.0920514
## NumRotBonds NumRotBonds ATS 5.8690062
## NumDblBonds NumDblBonds ATS 1.9672950
## NumCarbon NumCarbon ATS 21.0036355
## NumNitrogen NumNitrogen ATS 0.8247139
## NumOxygen NumOxygen ATS 0.9312510
## NumSulfer NumSulfer ATS 3.2021823
## NumChlorine NumChlorine ATS 8.3362925
## NumHalogen NumHalogen ATS 9.1800512
## NumRings NumRings ATS 14.3956856
## HydrophilicFactor HydrophilicFactor ATS 7.3321483
## SurfaceArea1 SurfaceArea1 ATS 2.5597702
## SurfaceArea2 SurfaceArea2 ATS 1.1459666
##################################
# Exploring predictor performance
##################################
dotplot(Predictor ~ ATS | Metric,
ATS_Summary,origin = 0,
type = c("p", "h"),
pch = 16,
cex = 2,
alpha = 0.45,
prepanel = function(x, y) {
list(ylim = levels(reorder(y, x)))
},panel = function(x, y, ...) {
panel.dotplot(x, reorder(y, x), ...)
})
##################################
# Obtaining the maximal information coefficient
##################################
<- mine(x = PMA_PreModelling_Train_Numeric[, 2:ncol(PMA_PreModelling_Train_Numeric)],
MIC y = ifelse(PMA_PreModelling_Train_Numeric$Log_Solubility_Class=="High",1,0))$MIC
##################################
# Formulating the summary table
##################################
<- data.frame(Predictor = names(PMA_PreModelling_Train_Numeric)[2:ncol(PMA_PreModelling_Train_Numeric)],
MIC_Summary MIC = MIC[,1],
Metric = rep("MIC", length(MIC)))
MIC_Summary
## Predictor MIC Metric
## 1 MolWeight 0.46368934 MIC
## 2 NumBonds 0.28498465 MIC
## 3 NumMultBonds 0.19005119 MIC
## 4 NumRotBonds 0.03861351 MIC
## 5 NumDblBonds 0.01133118 MIC
## 6 NumCarbon 0.31544845 MIC
## 7 NumNitrogen 0.01995159 MIC
## 8 NumOxygen 0.07491145 MIC
## 9 NumSulfer 0.01254889 MIC
## 10 NumChlorine 0.07010055 MIC
## 11 NumHalogen 0.08459159 MIC
## 12 NumRings 0.16421351 MIC
## 13 HydrophilicFactor 0.32734124 MIC
## 14 SurfaceArea1 0.19959108 MIC
## 15 SurfaceArea2 0.22267432 MIC
##################################
# Exploring predictor performance
##################################
dotplot(Predictor ~ MIC | Metric,
MIC_Summary,origin = 0,
type = c("p", "h"),
pch = 16,
cex = 2,
alpha = 0.45,
prepanel = function(x, y) {
list(ylim = levels(reorder(y, x)))
},panel = function(x, y, ...) {
panel.dotplot(x, reorder(y, x), ...)
})
summary(PMA_PreModelling_Train_Numeric)
## Log_Solubility_Class MolWeight NumBonds NumMultBonds
## Low :427 Min. :-2.835229 Min. :-2.9239 Min. :-1.18881
## High:524 1st Qu.:-0.798923 1st Qu.:-0.6096 1st Qu.:-0.99545
## Median :-0.008626 Median :-0.0356 Median :-0.02867
## Mean : 0.000000 Mean : 0.0000 Mean : 0.00000
## 3rd Qu.: 0.800109 3rd Qu.: 0.5994 3rd Qu.: 0.74476
## Max. : 2.722778 Max. : 3.2279 Max. : 3.64511
## NumRotBonds NumDblBonds NumCarbon NumNitrogen
## Min. :-0.9347 Min. :-0.831342 Min. :-2.64433 Min. :-0.6852
## 1st Qu.:-0.9347 1st Qu.:-0.831342 1st Qu.:-0.68596 1st Qu.:-0.6852
## Median :-0.1043 Median :-0.005212 Median :-0.01199 Median :-0.6852
## Mean : 0.0000 Mean : 0.000000 Mean : 0.00000 Mean : 0.0000
## 3rd Qu.: 0.5184 3rd Qu.: 0.820917 3rd Qu.: 0.53700 3rd Qu.: 0.1578
## Max. : 5.7083 Max. : 4.951564 Max. : 3.05604 Max. : 4.3730
## NumOxygen NumSulfer NumChlorine NumHalogen
## Min. :-0.9103 Min. :-0.336 Min. :-0.3972 Min. :-0.4741
## 1st Qu.:-0.9103 1st Qu.:-0.336 1st Qu.:-0.3972 1st Qu.:-0.4741
## Median :-0.3320 Median :-0.336 Median :-0.3972 Median :-0.4741
## Mean : 0.0000 Mean : 0.000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.2463 3rd Qu.:-0.336 3rd Qu.:-0.3972 3rd Qu.: 0.2049
## Max. : 6.6077 Max. : 7.858 Max. : 6.7442 Max. : 6.3162
## NumRings HydrophilicFactor SurfaceArea1 SurfaceArea2
## Min. :-1.0792 Min. :-0.8565 Min. :-1.0332 Min. :-1.0554
## 1st Qu.:-1.0792 1st Qu.:-0.6593 1st Qu.:-0.7716 1st Qu.:-0.7765
## Median :-0.3093 Median :-0.2606 Median :-0.2085 Median :-0.1866
## Mean : 0.0000 Mean : 0.0000 Mean : 0.0000 Mean : 0.0000
## 3rd Qu.: 0.4607 3rd Qu.: 0.2963 3rd Qu.: 0.4767 3rd Qu.: 0.5358
## Max. : 4.3103 Max. :11.9924 Max. : 8.3733 Max. : 7.6515
##################################
# Obtaining the relief values
##################################
set.seed(12345678)
<- attrEval(Log_Solubility_Class ~ .,
RV data = PMA_PreModelling_Train_Numeric,
estimator = "ReliefFequalK",
ReliefIterations = 50)
##################################
# Formulating the summary table
##################################
<- data.frame(Predictor = names(RV),
RV_Summary RV = RV,
Metric = rep("RV", length(RV)))
RV_Summary
## Predictor RV Metric
## MolWeight MolWeight 0.30039075 RV
## NumBonds NumBonds 0.20575373 RV
## NumMultBonds NumMultBonds 0.08333333 RV
## NumRotBonds NumRotBonds 0.08825000 RV
## NumDblBonds NumDblBonds 0.00200000 RV
## NumCarbon NumCarbon 0.28938624 RV
## NumNitrogen NumNitrogen 0.10800000 RV
## NumOxygen NumOxygen 0.07476923 RV
## NumSulfer NumSulfer 0.01600000 RV
## NumChlorine NumChlorine 0.01400000 RV
## NumHalogen NumHalogen 0.08400000 RV
## NumRings NumRings 0.20600000 RV
## HydrophilicFactor HydrophilicFactor 0.04048816 RV
## SurfaceArea1 SurfaceArea1 0.05630307 RV
## SurfaceArea2 SurfaceArea2 0.08079488 RV
##################################
# Exploring predictor performance
##################################
dotplot(Predictor ~ RV | Metric,
RV_Summary,origin = 0,
type = c("p", "h"),
pch = 16,
cex = 2,
alpha = 0.45,
prepanel = function(x, y) {
list(ylim = levels(reorder(y, x)))
},panel = function(x, y, ...) {
panel.dotplot(x, reorder(y, x), ...)
})
##################################
# Filtering in the factor predictors
# with the numeric response variable
##################################
<- PMA_PreModelling_Train[,grepl("FP", names(PMA_PreModelling_Train))]
PMA_PreModelling_Train_Factor $Log_Solubility_Class <- PMA_PreModelling_Train$Log_Solubility
PMA_PreModelling_Train_Factordim(PMA_PreModelling_Train_Factor)
## [1] 951 206
str(PMA_PreModelling_Train_Factor)
## 'data.frame': 951 obs. of 206 variables:
## $ FP001 : Factor w/ 2 levels "0","1": 1 1 2 1 1 2 1 2 2 2 ...
## $ FP002 : Factor w/ 2 levels "0","1": 2 2 2 1 1 1 2 1 1 2 ...
## $ FP003 : Factor w/ 2 levels "0","1": 1 1 2 2 2 2 1 2 2 2 ...
## $ FP004 : Factor w/ 2 levels "0","1": 1 2 2 1 2 2 2 2 2 2 ...
## $ FP005 : Factor w/ 2 levels "0","1": 2 2 2 1 2 1 2 1 1 2 ...
## $ FP006 : Factor w/ 2 levels "0","1": 1 2 1 1 2 1 1 1 2 2 ...
## $ FP007 : Factor w/ 2 levels "0","1": 1 2 1 2 1 1 1 2 2 2 ...
## $ FP008 : Factor w/ 2 levels "0","1": 2 2 2 1 1 1 2 1 1 1 ...
## $ FP009 : Factor w/ 2 levels "0","1": 1 1 1 1 2 2 2 1 2 1 ...
## $ FP010 : Factor w/ 2 levels "0","1": 1 1 2 1 1 1 1 1 1 1 ...
## $ FP011 : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 2 1 ...
## $ FP012 : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 2 1 1 ...
## $ FP013 : Factor w/ 2 levels "0","1": 1 1 1 1 2 1 2 1 1 1 ...
## $ FP014 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 2 1 1 1 ...
## $ FP015 : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
## $ FP016 : Factor w/ 2 levels "0","1": 1 2 1 1 2 2 1 2 1 1 ...
## $ FP017 : Factor w/ 2 levels "0","1": 1 1 2 2 1 1 1 1 2 2 ...
## $ FP018 : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 1 ...
## $ FP019 : Factor w/ 2 levels "0","1": 2 1 1 1 2 1 2 1 1 1 ...
## $ FP020 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ FP021 : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 2 1 ...
## $ FP022 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
## $ FP023 : Factor w/ 2 levels "0","1": 1 1 1 2 1 1 1 1 2 1 ...
## $ FP024 : Factor w/ 2 levels "0","1": 2 1 1 1 2 1 1 1 1 1 ...
## $ FP025 : Factor w/ 2 levels "0","1": 1 1 2 1 1 1 1 1 1 1 ...
## $ FP026 : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 2 1 1 1 ...
## $ FP027 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
## $ FP028 : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 2 2 ...
## $ FP029 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ FP030 : Factor w/ 2 levels "0","1": 1 1 1 1 2 1 1 1 1 1 ...
## $ FP031 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 2 1 1 ...
## $ FP032 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ FP033 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ FP034 : Factor w/ 2 levels "0","1": 1 1 1 1 2 1 1 1 1 2 ...
## $ FP035 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
## $ FP036 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ FP037 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
## $ FP038 : Factor w/ 2 levels "0","1": 1 1 2 1 1 1 1 1 1 1 ...
## $ FP039 : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 1 1 1 1 ...
## $ FP040 : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 1 1 1 1 ...
## $ FP041 : Factor w/ 2 levels "0","1": 1 1 1 2 1 1 1 1 2 1 ...
## $ FP042 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ FP043 : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 1 ...
## $ FP044 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ FP045 : Factor w/ 2 levels "0","1": 1 1 2 1 1 1 1 1 1 1 ...
## $ FP046 : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 2 1 1 2 ...
## $ FP047 : Factor w/ 2 levels "0","1": 1 2 2 1 1 1 2 1 1 1 ...
## $ FP048 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 2 1 1 ...
## $ FP049 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 2 1 1 1 ...
## $ FP050 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 2 1 2 ...
## $ FP051 : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 1 ...
## $ FP052 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
## $ FP053 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 2 1 1 1 ...
## $ FP054 : Factor w/ 2 levels "0","1": 1 1 1 2 1 1 1 1 2 2 ...
## $ FP055 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ FP056 : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 1 1 1 1 ...
## $ FP057 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 2 1 1 1 ...
## $ FP058 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
## $ FP059 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 2 1 1 ...
## $ FP060 : Factor w/ 2 levels "0","1": 1 2 2 1 1 1 1 2 2 1 ...
## $ FP061 : Factor w/ 2 levels "0","1": 1 1 2 1 1 1 1 2 2 1 ...
## $ FP062 : Factor w/ 2 levels "0","1": 1 1 2 1 1 2 1 2 2 2 ...
## $ FP063 : Factor w/ 2 levels "0","1": 2 2 1 1 2 2 2 1 1 2 ...
## $ FP064 : Factor w/ 2 levels "0","1": 1 2 2 1 2 2 1 2 1 1 ...
## $ FP065 : Factor w/ 2 levels "0","1": 2 2 1 1 2 1 2 1 2 2 ...
## $ FP066 : Factor w/ 2 levels "0","1": 2 1 2 2 2 2 2 2 2 2 ...
## $ FP067 : Factor w/ 2 levels "0","1": 2 2 1 1 2 2 2 1 1 2 ...
## $ FP068 : Factor w/ 2 levels "0","1": 1 2 1 1 2 2 2 1 1 2 ...
## $ FP069 : Factor w/ 2 levels "0","1": 2 1 2 2 2 2 1 2 2 1 ...
## $ FP070 : Factor w/ 2 levels "0","1": 2 2 1 2 1 1 2 1 2 1 ...
## $ FP071 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 2 1 2 2 ...
## $ FP072 : Factor w/ 2 levels "0","1": 1 2 2 1 1 2 1 2 2 2 ...
## $ FP073 : Factor w/ 2 levels "0","1": 1 2 2 1 1 1 1 1 2 1 ...
## $ FP074 : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 2 1 ...
## $ FP075 : Factor w/ 2 levels "0","1": 1 2 1 1 2 2 2 1 1 2 ...
## $ FP076 : Factor w/ 2 levels "0","1": 2 2 1 1 1 1 2 1 2 2 ...
## $ FP077 : Factor w/ 2 levels "0","1": 1 2 1 2 1 1 1 2 2 2 ...
## $ FP078 : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 2 1 ...
## $ FP079 : Factor w/ 2 levels "0","1": 2 2 2 2 2 1 2 1 2 2 ...
## $ FP080 : Factor w/ 2 levels "0","1": 1 2 1 1 2 2 2 2 1 1 ...
## $ FP081 : Factor w/ 2 levels "0","1": 1 1 2 2 1 1 1 2 2 2 ...
## $ FP082 : Factor w/ 2 levels "0","1": 2 2 2 1 2 2 2 1 2 2 ...
## $ FP083 : Factor w/ 2 levels "0","1": 1 1 1 1 2 1 1 1 1 2 ...
## $ FP084 : Factor w/ 2 levels "0","1": 2 2 1 1 2 1 2 1 1 1 ...
## $ FP085 : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 2 1 1 1 ...
## $ FP086 : Factor w/ 2 levels "0","1": 1 1 1 2 2 1 1 2 2 2 ...
## $ FP087 : Factor w/ 2 levels "0","1": 2 2 2 2 2 1 2 1 2 2 ...
## $ FP088 : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 2 2 1 ...
## $ FP089 : Factor w/ 2 levels "0","1": 2 2 1 1 1 1 2 1 1 1 ...
## $ FP090 : Factor w/ 2 levels "0","1": 1 2 1 2 1 1 1 2 2 2 ...
## $ FP091 : Factor w/ 2 levels "0","1": 2 2 1 1 2 1 2 1 1 2 ...
## $ FP092 : Factor w/ 2 levels "0","1": 1 1 1 1 2 2 2 1 2 1 ...
## $ FP093 : Factor w/ 2 levels "0","1": 1 2 1 2 1 1 1 2 2 2 ...
## $ FP094 : Factor w/ 2 levels "0","1": 1 1 1 1 2 1 1 2 1 1 ...
## $ FP095 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 2 ...
## $ FP096 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
## $ FP097 : Factor w/ 2 levels "0","1": 2 2 1 1 1 1 2 1 2 1 ...
## $ FP098 : Factor w/ 2 levels "0","1": 1 1 2 1 1 1 1 2 1 1 ...
## $ FP099 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
## [list output truncated]
summary(PMA_PreModelling_Train_Factor)
## FP001 FP002 FP003 FP004 FP005 FP006 FP007 FP008 FP009
## 0:482 0:438 0:536 0:395 0:400 0:570 0:605 0:641 0:685
## 1:469 1:513 1:415 1:556 1:551 1:381 1:346 1:310 1:266
## FP010 FP011 FP012 FP013 FP014 FP015 FP016 FP017 FP018
## 0:781 0:747 0:783 0:793 0:798 0:133 0:812 0:814 0:826
## 1:170 1:204 1:168 1:158 1:153 1:818 1:139 1:137 1:125
## FP019 FP020 FP021 FP022 FP023 FP024 FP025 FP026 FP027
## 0:835 0:837 0:836 0:852 0:834 0:844 0:841 0:871 0:858
## 1:116 1:114 1:115 1: 99 1:117 1:107 1:110 1: 80 1: 93
## FP028 FP029 FP030 FP031 FP032 FP033 FP034 FP035 FP036
## 0:850 0:854 0:862 0:866 0:881 0:885 0:875 0:882 0:879
## 1:101 1: 97 1: 89 1: 85 1: 70 1: 66 1: 76 1: 69 1: 72
## FP037 FP038 FP039 FP040 FP041 FP042 FP043 FP044 FP045
## 0:884 0:869 0:880 0:886 0:891 0:897 0:888 0:894 0:898
## 1: 67 1: 82 1: 71 1: 65 1: 60 1: 54 1: 63 1: 57 1: 53
## FP046 FP047 FP048 FP049 FP050 FP051 FP052 FP053 FP054
## 0:651 0:698 0:833 0:835 0:844 0:847 0:864 0:862 0:879
## 1:300 1:253 1:118 1:116 1:107 1:104 1: 87 1: 89 1: 72
## FP055 FP056 FP057 FP058 FP059 FP060 FP061 FP062 FP063
## 0:900 0:889 0:837 0:843 0:899 0:493 0:526 0:535 0:546
## 1: 51 1: 62 1:114 1:108 1: 52 1:458 1:425 1:416 1:405
## FP064 FP065 FP066 FP067 FP068 FP069 FP070 FP071 FP072
## 0:555 0:387 0:371 0:590 0:607 0:607 0:613 0:640 0:325
## 1:396 1:564 1:580 1:361 1:344 1:344 1:338 1:311 1:626
## FP073 FP074 FP075 FP076 FP077 FP078 FP079 FP080 FP081
## 0:656 0:642 0:629 0:639 0:646 0:662 0:295 0:663 0:686
## 1:295 1:309 1:322 1:312 1:305 1:289 1:656 1:288 1:265
## FP082 FP083 FP084 FP085 FP086 FP087 FP088 FP089 FP090
## 0:272 0:691 0:679 0:708 0:695 0:260 0:701 0:716 0:714
## 1:679 1:260 1:272 1:243 1:256 1:691 1:250 1:235 1:237
## FP091 FP092 FP093 FP094 FP095 FP096 FP097 FP098 FP099
## 0:737 0:719 0:719 0:731 0:742 0:744 0:727 0:725 0:735
## 1:214 1:232 1:232 1:220 1:209 1:207 1:224 1:226 1:216
## FP100 FP101 FP102 FP103 FP104 FP105 FP106 FP107 FP108
## 0:731 0:726 0:759 0:743 0:739 0:746 0:769 0:750 0:756
## 1:220 1:225 1:192 1:208 1:212 1:205 1:182 1:201 1:195
## FP109 FP110 FP111 FP112 FP113 FP114 FP115 FP116 FP117
## 0:783 0:755 0:764 0:766 0:765 0:803 0:781 0:768 0:781
## 1:168 1:196 1:187 1:185 1:186 1:148 1:170 1:183 1:170
## FP118 FP119 FP120 FP121 FP122 FP123 FP124 FP125 FP126
## 0:768 0:796 0:793 0:818 0:795 0:792 0:797 0:803 0:810
## 1:183 1:155 1:158 1:133 1:156 1:159 1:154 1:148 1:141
## FP127 FP128 FP129 FP130 FP131 FP132 FP133 FP134 FP135
## 0:818 0:810 0:819 0:851 0:831 0:832 0:831 0:830 0:831
## 1:133 1:141 1:132 1:100 1:120 1:119 1:120 1:121 1:120
## FP136 FP137 FP138 FP139 FP140 FP141 FP142 FP143 FP144
## 0:836 0:841 0:845 0:873 0:845 0:840 0:847 0:874 0:852
## 1:115 1:110 1:106 1: 78 1:106 1:111 1:104 1: 77 1: 99
## FP145 FP146 FP147 FP148 FP149 FP150 FP151 FP152 FP153
## 0:852 0:853 0:851 0:868 0:865 0:876 0:898 0:873 0:877
## 1: 99 1: 98 1:100 1: 83 1: 86 1: 75 1: 53 1: 78 1: 74
## FP155 FP156 FP157 FP158 FP159 FP160 FP161 FP162 FP163
## 0:885 0:884 0:892 0:900 0:884 0:886 0:888 0:480 0:498
## 1: 66 1: 67 1: 59 1: 51 1: 67 1: 65 1: 63 1:471 1:453
## FP164 FP165 FP166 FP167 FP168 FP169 FP170 FP171 FP172
## 0:354 0:619 0:636 0:639 0:318 0:774 0:776 0:790 0:807
## 1:597 1:332 1:315 1:312 1:633 1:177 1:175 1:161 1:144
## FP173 FP174 FP175 FP176 FP177 FP178 FP179 FP180 FP181
## 0:816 0:827 0:823 0:835 0:836 0:836 0:858 0:849 0:862
## 1:135 1:124 1:128 1:116 1:115 1:115 1: 93 1:102 1: 89
## FP182 FP183 FP184 FP185 FP186 FP187 FP188 FP189 FP190
## 0:857 0:879 0:871 0:870 0:878 0:882 0:886 0:878 0:882
## 1: 94 1: 72 1: 80 1: 81 1: 73 1: 69 1: 65 1: 73 1: 69
## FP191 FP192 FP193 FP194 FP195 FP196 FP197 FP198 FP201
## 0:884 0:893 0:892 0:895 0:893 0:897 0:901 0:897 0:901
## 1: 67 1: 58 1: 59 1: 56 1: 58 1: 54 1: 50 1: 54 1: 50
## FP202 FP203 FP204 FP205 FP206 FP207 FP208 Log_Solubility_Class
## 0:706 0:842 0:857 0:877 0:894 0:897 0:844 Low :427
## 1:245 1:109 1: 94 1: 74 1: 57 1: 54 1:107 High:524
##################################
# Obtaining the Fisher exact test statistics
##################################
<- function(x, y){
VP_F <- table(x, y)
tab <- fisher.test(tab)
fet <- c(FET_OR = fet$estimate,
out FET_P.Value = fet$p.value)}
##################################
# Formulating the summary table
##################################
<- lapply(PMA_PreModelling_Train_Factor,
VP_F_Summary
VP_F, y = PMA_PreModelling_Train_Factor$Log_Solubility_Class)
<- as.data.frame(do.call("rbind", VP_F_Summary))
VP_F_Summary $Predictor <- names(PMA_PreModelling_Train_Factor)
VP_F_Summary<- VP_F_Summary[-nrow(VP_F_Summary),]
VP_F_Summary $Metric <- rep("VP_F", nrow(VP_F_Summary))
VP_F_Summary$NegativeLog10_FET_P.Value <- -log10(VP_F_Summary$FET_P.Value)
VP_F_Summary$LogOddsRatio <- log(VP_F_Summary[,1])
VP_F_Summary$AbsoluteLogOddsRatio <- abs(VP_F_Summary$LogOddsRatio)
VP_F_Summary
$Group <- ifelse(rownames(VP_F_Summary)=="FP076","FP076",
VP_F_Summaryifelse(rownames(VP_F_Summary)=="FP089","FP089",
ifelse(rownames(VP_F_Summary)=="FP112","FP112",
ifelse(rownames(VP_F_Summary)=="FP044","FP044",
ifelse(rownames(VP_F_Summary)=="FP193","FP193",
"Others")))))
VP_F_Summary
## FET_OR.odds ratio FET_P.Value Predictor Metric NegativeLog10_FET_P.Value
## FP001 1.19708080 1.715372e-01 FP001 VP_F 7.656418e-01
## FP002 0.35039786 5.181602e-15 FP002 VP_F 1.428554e+01
## FP003 0.97166586 8.438406e-01 FP003 VP_F 7.373956e-02
## FP004 1.26919078 7.435063e-02 FP004 VP_F 1.128715e+00
## FP005 0.35435518 2.273432e-14 FP005 VP_F 1.364332e+01
## FP006 1.84346020 5.972899e-06 FP006 VP_F 5.223815e+00
## FP007 0.88537336 3.789438e-01 FP007 VP_F 4.214252e-01
## FP008 0.57340983 7.203997e-05 FP008 VP_F 4.142426e+00
## FP009 0.26982672 1.890323e-18 FP009 VP_F 1.772346e+01
## FP010 1.57515069 1.057248e-02 FP010 VP_F 1.975823e+00
## FP011 1.90083496 9.542912e-05 FP011 VP_F 4.020319e+00
## FP012 1.10579162 6.082058e-01 FP012 VP_F 2.159494e-01
## FP013 0.14883782 2.727491e-23 FP013 VP_F 2.256424e+01
## FP014 0.15750978 9.678623e-22 FP014 VP_F 2.101419e+01
## FP015 2.86928927 5.561892e-08 FP015 VP_F 7.254777e+00
## FP016 0.85604079 4.075094e-01 FP016 VP_F 3.898624e-01
## FP017 0.49376123 1.883936e-04 FP017 VP_F 3.724934e+00
## FP018 0.47679807 1.545802e-04 FP018 VP_F 3.810846e+00
## FP019 1.04400654 8.428105e-01 FP019 VP_F 7.427005e-02
## FP020 1.28698533 2.293994e-01 FP020 VP_F 6.394076e-01
## FP021 0.74658209 1.614886e-01 FP021 VP_F 7.918582e-01
## FP022 1.02076928 1.000000e+00 FP022 VP_F 4.821637e-17
## FP023 0.54497814 2.790955e-03 FP023 VP_F 2.554247e+00
## FP024 1.09109789 6.815917e-01 FP024 VP_F 1.664757e-01
## FP025 1.30887702 2.213225e-01 FP025 VP_F 6.549744e-01
## FP026 0.64245234 6.095878e-02 FP026 VP_F 1.214964e+00
## FP027 2.01654412 2.893976e-03 FP027 VP_F 2.538505e+00
## FP028 1.21715865 3.977531e-01 FP028 VP_F 4.003864e-01
## FP029 0.85300530 5.182871e-01 FP029 VP_F 2.854296e-01
## FP030 1.42811535 1.453412e-01 FP030 VP_F 8.376114e-01
## FP031 0.81964988 4.241544e-01 FP031 VP_F 3.724760e-01
## FP032 1.72977657 4.512846e-02 FP032 VP_F 1.345550e+00
## FP033 1.56955042 9.624740e-02 FP033 VP_F 1.016611e+00
## FP034 0.71399401 1.858759e-01 FP034 VP_F 7.307769e-01
## FP035 0.28405176 2.035732e-06 FP035 VP_F 5.691279e+00
## FP036 2.60842408 4.826361e-04 FP036 VP_F 3.316380e+00
## FP037 0.60007108 5.538991e-02 FP037 VP_F 1.256569e+00
## FP038 1.37688518 2.016885e-01 FP038 VP_F 6.953188e-01
## FP039 0.27168922 7.256956e-07 FP039 VP_F 6.139246e+00
## FP040 1.91054797 1.975706e-02 FP040 VP_F 1.704278e+00
## FP041 0.29980664 2.191840e-05 FP041 VP_F 4.659191e+00
## FP042 1.29903467 3.997709e-01 FP042 VP_F 3.981888e-01
## FP043 0.63257683 8.858424e-02 FP043 VP_F 1.052644e+00
## FP044 0.05440672 3.102624e-15 FP044 VP_F 1.450827e+01
## FP045 0.39880063 1.656122e-03 FP045 VP_F 2.780908e+00
## FP046 0.47005989 8.986407e-08 FP046 VP_F 7.046414e+00
## FP047 0.65690301 5.010695e-03 FP047 VP_F 2.300102e+00
## FP048 0.82285525 3.247352e-01 FP048 VP_F 4.884706e-01
## FP049 0.19582389 2.675582e-14 FP049 VP_F 1.357258e+01
## FP050 0.60302823 1.734588e-02 FP050 VP_F 1.760804e+00
## FP051 0.51334022 1.655115e-03 FP051 VP_F 2.781172e+00
## FP052 1.23362693 3.684283e-01 FP052 VP_F 4.336471e-01
## FP053 0.23582634 1.209324e-09 FP053 VP_F 8.917457e+00
## FP054 0.30817566 5.987552e-06 FP054 VP_F 5.222751e+00
## FP055 1.67259146 1.108072e-01 FP055 VP_F 9.554319e-01
## FP056 0.26122657 2.079039e-06 FP056 VP_F 5.682137e+00
## FP057 0.79249223 2.698146e-01 FP057 VP_F 5.689346e-01
## FP058 0.93853294 7.590565e-01 FP058 VP_F 1.197259e-01
## FP059 0.28137500 3.906572e-05 FP059 VP_F 4.408204e+00
## FP060 1.63240884 1.999323e-04 FP060 VP_F 3.699117e+00
## FP061 1.04977625 7.431330e-01 FP061 VP_F 1.289334e-01
## FP062 1.29158337 5.673496e-02 FP062 VP_F 1.246149e+00
## FP063 1.36519387 2.100723e-02 FP063 VP_F 1.677631e+00
## FP064 1.18728146 2.091106e-01 FP064 VP_F 6.796239e-01
## FP065 0.24259847 2.350296e-24 FP065 VP_F 2.362888e+01
## FP066 1.00753443 1.000000e+00 FP066 VP_F 4.821637e-17
## FP067 1.15740510 2.832438e-01 FP067 VP_F 5.478396e-01
## FP068 1.12634107 4.156288e-01 FP068 VP_F 3.812944e-01
## FP069 0.85481954 2.496501e-01 FP069 VP_F 6.026683e-01
## FP070 0.27947568 5.654858e-20 FP070 VP_F 1.924758e+01
## FP071 0.32504356 2.346502e-15 FP071 VP_F 1.462958e+01
## FP072 2.69986223 1.052267e-12 FP072 VP_F 1.197787e+01
## FP073 1.74346468 1.063125e-04 FP073 VP_F 3.973416e+00
## FP074 1.20851743 1.862849e-01 FP074 VP_F 7.298225e-01
## FP075 1.32082732 4.603196e-02 FP075 VP_F 1.336941e+00
## FP076 0.12445706 6.187262e-45 FP076 VP_F 4.420850e+01
## FP077 0.76099672 5.088938e-02 FP077 VP_F 1.293373e+00
## FP078 0.91855970 5.709918e-01 FP078 VP_F 2.433701e-01
## FP079 0.22398333 6.263011e-23 FP079 VP_F 2.220322e+01
## FP080 1.25723356 1.186374e-01 FP080 VP_F 9.257783e-01
## FP081 0.72911277 2.948404e-02 FP081 VP_F 1.530413e+00
## FP082 0.22175679 7.799605e-22 FP082 VP_F 2.110793e+01
## FP083 1.79705328 1.022517e-04 FP083 VP_F 3.990329e+00
## FP084 1.13637183 3.874658e-01 FP084 VP_F 4.117666e-01
## FP085 0.27122453 2.014219e-17 FP085 VP_F 1.669589e+01
## FP086 0.64974151 3.320546e-03 FP086 VP_F 2.478791e+00
## FP087 0.25801089 7.989715e-18 FP087 VP_F 1.709747e+01
## FP088 1.57116925 3.024704e-03 FP088 VP_F 2.519317e+00
## FP089 0.14721223 2.145272e-32 FP089 VP_F 3.166852e+01
## FP090 0.64237027 3.317666e-03 FP090 VP_F 2.479167e+00
## FP091 0.97800429 9.377988e-01 FP091 VP_F 2.789033e-02
## FP092 0.20899251 6.412816e-23 FP092 VP_F 2.219295e+01
## FP093 0.49172261 3.399920e-06 FP093 VP_F 5.468531e+00
## FP094 0.99476660 1.000000e+00 FP094 VP_F 0.000000e+00
## FP095 1.21598118 2.378080e-01 FP095 VP_F 6.237736e-01
## FP096 0.79866626 1.559207e-01 FP096 VP_F 8.070962e-01
## FP097 0.27373537 2.585714e-16 FP097 VP_F 1.558742e+01
## FP098 1.28105523 1.254892e-01 FP098 VP_F 9.013938e-01
## FP099 0.47185118 1.845962e-06 FP099 VP_F 5.733777e+00
## FP100 1.36192602 5.326631e-02 FP100 VP_F 1.273547e+00
## FP101 1.15291250 3.587501e-01 FP101 VP_F 4.452080e-01
## FP102 0.54942707 2.496460e-04 FP102 VP_F 3.602675e+00
## FP103 0.58576485 6.976654e-04 FP103 VP_F 3.156353e+00
## FP104 0.63194616 3.717872e-03 FP104 VP_F 2.429706e+00
## FP105 0.48316770 5.646872e-06 FP105 VP_F 5.248192e+00
## FP106 0.65902108 1.285535e-02 FP106 VP_F 1.890916e+00
## FP107 0.27844164 7.142993e-15 FP107 VP_F 1.414612e+01
## FP108 1.01457497 9.358550e-01 FP108 VP_F 2.879143e-02
## FP109 0.61805388 4.837573e-03 FP109 VP_F 2.315372e+00
## FP110 1.37134941 5.366059e-02 FP110 VP_F 1.270345e+00
## FP111 0.89745661 5.128947e-01 FP111 VP_F 2.899718e-01
## FP112 0.13793978 2.701713e-28 FP112 VP_F 2.756836e+01
## FP113 1.44943098 2.675822e-02 FP113 VP_F 1.572543e+00
## FP114 0.73592698 8.832607e-02 FP114 VP_F 1.053911e+00
## FP115 0.87408678 4.445275e-01 FP115 VP_F 3.521014e-01
## FP116 1.15250268 4.091495e-01 FP116 VP_F 3.881180e-01
## FP117 0.60131089 2.891885e-03 FP117 VP_F 2.538819e+00
## FP118 1.28812070 1.371138e-01 FP118 VP_F 8.629190e-01
## FP119 1.11895502 5.380064e-01 FP119 VP_F 2.692126e-01
## FP120 1.32008395 1.363755e-01 FP120 VP_F 8.652636e-01
## FP121 0.54424360 1.353781e-03 FP121 VP_F 2.868452e+00
## FP122 0.97085516 9.298813e-01 FP122 VP_F 3.157251e-02
## FP123 0.47156532 2.445612e-05 FP123 VP_F 4.611612e+00
## FP124 1.17599458 3.773446e-01 FP124 VP_F 4.232618e-01
## FP125 1.15573797 4.720280e-01 FP125 VP_F 3.260322e-01
## FP126 1.01045303 1.000000e+00 FP126 VP_F 0.000000e+00
## FP127 1.41857268 7.424460e-02 FP127 VP_F 1.129335e+00
## FP128 1.37469908 9.866654e-02 FP128 VP_F 1.005830e+00
## FP129 0.51443239 4.682229e-04 FP129 VP_F 3.329547e+00
## FP130 0.66502981 5.625422e-02 FP130 VP_F 1.249845e+00
## FP131 0.95783416 8.447698e-01 FP131 VP_F 7.326163e-02
## FP132 2.04266955 5.495649e-04 FP132 VP_F 3.259981e+00
## FP133 1.11789560 6.238370e-01 FP133 VP_F 2.049288e-01
## FP134 0.57195802 4.523155e-03 FP134 VP_F 2.344559e+00
## FP135 0.95783416 8.447698e-01 FP135 VP_F 7.326163e-02
## FP136 1.06770699 7.650764e-01 FP136 VP_F 1.162952e-01
## FP137 0.59399949 1.088153e-02 FP137 VP_F 1.963310e+00
## FP138 0.64186857 3.794478e-02 FP138 VP_F 1.420848e+00
## FP139 0.80019382 3.452346e-01 FP139 VP_F 4.618857e-01
## FP140 1.39373024 1.212801e-01 FP140 VP_F 9.162104e-01
## FP141 0.49290239 5.407860e-04 FP141 VP_F 3.266975e+00
## FP142 0.82977493 4.037494e-01 FP142 VP_F 3.938882e-01
## FP143 0.73530968 2.319326e-01 FP143 VP_F 6.346383e-01
## FP144 1.63603800 3.234915e-02 FP144 VP_F 1.490137e+00
## FP145 1.34827701 2.000793e-01 FP145 VP_F 6.987978e-01
## FP146 0.27276366 6.880778e-09 FP146 VP_F 8.162362e+00
## FP147 1.14075786 5.955152e-01 FP147 VP_F 2.251071e-01
## FP148 1.25912604 3.564450e-01 FP148 VP_F 4.480075e-01
## FP149 0.17427309 2.945871e-12 FP149 VP_F 1.153079e+01
## FP150 1.10335419 7.180062e-01 FP150 VP_F 1.438718e-01
## FP151 1.15739238 6.709985e-01 FP151 VP_F 1.732785e-01
## FP152 0.67652104 1.220168e-01 FP152 VP_F 9.135804e-01
## FP153 0.59654511 3.829194e-02 FP153 VP_F 1.416893e+00
## FP155 0.23782568 2.509629e-07 FP155 VP_F 6.600391e+00
## FP156 0.56197587 2.973839e-02 FP156 VP_F 1.526682e+00
## FP157 0.89614943 6.876569e-01 FP157 VP_F 1.626282e-01
## FP158 1.83744533 5.907418e-02 FP158 VP_F 1.228602e+00
## FP159 0.60007108 5.538991e-02 FP159 VP_F 1.256569e+00
## FP160 0.94725316 8.973947e-01 FP160 VP_F 4.701649e-02
## FP161 4.68576694 6.710555e-07 FP161 VP_F 6.173242e+00
## FP162 0.39641239 2.689179e-12 FP162 VP_F 1.157038e+01
## FP163 1.30035228 5.018728e-02 FP163 VP_F 1.299406e+00
## FP164 0.29316745 3.185855e-18 FP164 VP_F 1.749677e+01
## FP165 1.14157018 3.392740e-01 FP165 VP_F 4.694494e-01
## FP166 0.52535686 4.568833e-06 FP166 VP_F 5.340195e+00
## FP167 1.16920320 2.674396e-01 FP167 VP_F 5.727742e-01
## FP168 0.24673455 1.869134e-21 FP168 VP_F 2.072836e+01
## FP169 0.19581712 3.513565e-20 FP169 VP_F 1.945425e+01
## FP170 0.57865361 1.348906e-03 FP170 VP_F 2.870018e+00
## FP171 1.60655269 8.996114e-03 FP171 VP_F 2.045945e+00
## FP172 0.12306538 1.375876e-24 FP172 VP_F 2.386142e+01
## FP173 0.45698434 3.551485e-05 FP173 VP_F 4.449590e+00
## FP174 0.65612868 3.299563e-02 FP174 VP_F 1.481544e+00
## FP175 0.94595425 7.753215e-01 FP175 VP_F 1.105182e-01
## FP176 1.00334103 1.000000e+00 FP176 VP_F 0.000000e+00
## FP177 0.87473364 5.488107e-01 FP177 VP_F 2.605775e-01
## FP178 0.49966530 6.352505e-04 FP178 VP_F 3.197055e+00
## FP179 0.67434769 7.911965e-02 FP179 VP_F 1.101716e+00
## FP180 1.42373128 1.140569e-01 FP180 VP_F 9.428783e-01
## FP181 0.26768983 2.106565e-08 FP181 VP_F 7.676425e+00
## FP182 1.22466105 3.835099e-01 FP182 VP_F 4.162234e-01
## FP183 1.58365219 8.415258e-02 FP183 VP_F 1.074933e+00
## FP184 0.13694274 1.118373e-13 FP184 VP_F 1.295141e+01
## FP185 0.37563701 5.699476e-05 FP185 VP_F 4.244165e+00
## FP186 1.18281337 5.414405e-01 FP186 VP_F 2.664493e-01
## FP187 0.93790027 8.029155e-01 FP187 VP_F 9.533016e-02
## FP188 1.77119205 3.859076e-02 FP188 VP_F 1.413517e+00
## FP189 0.57675161 2.735176e-02 FP189 VP_F 1.563015e+00
## FP190 0.18597681 1.732961e-09 FP190 VP_F 8.761211e+00
## FP191 1.14571288 6.130662e-01 FP191 VP_F 2.124926e-01
## FP192 0.51452638 1.988386e-02 FP192 VP_F 1.701499e+00
## FP193 0.08191610 1.128789e-13 FP193 VP_F 1.294739e+01
## FP194 1.18034939 5.822638e-01 FP194 VP_F 2.348802e-01
## FP195 3.72159008 3.063427e-05 FP195 VP_F 4.513792e+00
## FP196 0.14858927 3.201577e-09 FP196 VP_F 8.494636e+00
## FP197 0.16479415 4.266746e-08 FP197 VP_F 7.369903e+00
## FP198 2.00709087 2.367488e-02 FP198 VP_F 1.625712e+00
## FP201 1.78135621 7.872569e-02 FP201 VP_F 1.103884e+00
## FP202 0.52577576 2.020707e-05 FP202 VP_F 4.694497e+00
## FP203 0.95670909 8.384706e-01 FP203 VP_F 7.651215e-02
## FP204 0.54424384 6.162864e-03 FP204 VP_F 2.210217e+00
## FP205 0.31642129 5.109654e-06 FP205 VP_F 5.291608e+00
## FP206 0.45381997 5.634832e-03 FP206 VP_F 2.249119e+00
## FP207 0.14858927 3.201577e-09 FP207 VP_F 8.494636e+00
## FP208 0.92038091 7.570924e-01 FP208 VP_F 1.208511e-01
## LogOddsRatio AbsoluteLogOddsRatio Group
## FP001 0.179885927 0.179885927 Others
## FP002 -1.048686041 1.048686041 Others
## FP003 -0.028743299 0.028743299 Others
## FP004 0.238379513 0.238379513 Others
## FP005 -1.037455546 1.037455546 Others
## FP006 0.611644351 0.611644351 Others
## FP007 -0.121745848 0.121745848 Others
## FP008 -0.556154591 0.556154591 Others
## FP009 -1.309975319 1.309975319 Others
## FP010 0.454350945 0.454350945 Others
## FP011 0.642293242 0.642293242 Others
## FP012 0.100561473 0.100561473 Others
## FP013 -1.904898041 1.904898041 Others
## FP014 -1.848267748 1.848267748 Others
## FP015 1.054064357 1.054064357 Others
## FP016 -0.155437247 0.155437247 Others
## FP017 -0.705703211 0.705703211 Others
## FP018 -0.740662219 0.740662219 Others
## FP019 0.043065754 0.043065754 Others
## FP020 0.252302533 0.252302533 Others
## FP021 -0.292249695 0.292249695 Others
## FP022 0.020556543 0.020556543 Others
## FP023 -0.607009590 0.607009590 Others
## FP024 0.087184424 0.087184424 Others
## FP025 0.269169534 0.269169534 Others
## FP026 -0.442462641 0.442462641 Others
## FP027 0.701385217 0.701385217 Others
## FP028 0.196519164 0.196519164 Others
## FP029 -0.158989520 0.158989520 Others
## FP030 0.356355635 0.356355635 Others
## FP031 -0.198878004 0.198878004 Others
## FP032 0.547992253 0.547992253 Others
## FP033 0.450789220 0.450789220 Others
## FP034 -0.336880708 0.336880708 Others
## FP035 -1.258598799 1.258598799 Others
## FP036 0.958746238 0.958746238 Others
## FP037 -0.510707161 0.510707161 Others
## FP038 0.319823833 0.319823833 Others
## FP039 -1.303096431 1.303096431 Others
## FP040 0.647390098 0.647390098 Others
## FP041 -1.204617559 1.204617559 Others
## FP042 0.261621424 0.261621424 Others
## FP043 -0.457953588 0.457953588 Others
## FP044 -2.911267576 2.911267576 FP044
## FP045 -0.919293663 0.919293663 Others
## FP046 -0.754895161 0.754895161 Others
## FP047 -0.420218893 0.420218893 Others
## FP048 -0.194974974 0.194974974 Others
## FP049 -1.630539536 1.630539536 Others
## FP050 -0.505791274 0.505791274 Others
## FP051 -0.666816450 0.666816450 Others
## FP052 0.209958553 0.209958553 Others
## FP053 -1.444659604 1.444659604 Others
## FP054 -1.177085338 1.177085338 Others
## FP055 0.514374198 0.514374198 Others
## FP056 -1.342367160 1.342367160 Others
## FP057 -0.232572576 0.232572576 Others
## FP058 -0.063437329 0.063437329 Others
## FP059 -1.268066990 1.268066990 Others
## FP060 0.490056742 0.490056742 Others
## FP061 0.048577050 0.048577050 Others
## FP062 0.255868883 0.255868883 Others
## FP063 0.311296446 0.311296446 Others
## FP064 0.171666208 0.171666208 Others
## FP065 -1.416347592 1.416347592 Others
## FP066 0.007506185 0.007506185 Others
## FP067 0.146180513 0.146180513 Others
## FP068 0.118974389 0.118974389 Others
## FP069 -0.156864895 0.156864895 Others
## FP070 -1.274839999 1.274839999 Others
## FP071 -1.123796088 1.123796088 Others
## FP072 0.993200747 0.993200747 Others
## FP073 0.555874326 0.555874326 Others
## FP074 0.189394345 0.189394345 Others
## FP075 0.278258294 0.278258294 Others
## FP076 -2.083794540 2.083794540 FP076
## FP077 -0.273126235 0.273126235 Others
## FP078 -0.084948384 0.084948384 Others
## FP079 -1.496183628 1.496183628 Others
## FP080 0.228913722 0.228913722 Others
## FP081 -0.315926873 0.315926873 Others
## FP082 -1.506174057 1.506174057 Others
## FP083 0.586148259 0.586148259 Others
## FP084 0.127840582 0.127840582 Others
## FP085 -1.304808259 1.304808259 Others
## FP086 -0.431180676 0.431180676 Others
## FP087 -1.354753489 1.354753489 Others
## FP088 0.451820085 0.451820085 Others
## FP089 -1.915879958 1.915879958 FP089
## FP090 -0.442590395 0.442590395 Others
## FP091 -0.022241223 0.022241223 Others
## FP092 -1.565456851 1.565456851 Others
## FP093 -0.709840517 0.709840517 Others
## FP094 -0.005247142 0.005247142 Others
## FP095 0.195551307 0.195551307 Others
## FP096 -0.224812113 0.224812113 Others
## FP097 -1.295593440 1.295593440 Others
## FP098 0.247684138 0.247684138 Others
## FP099 -0.751091647 0.751091647 Others
## FP100 0.308899886 0.308899886 Others
## FP101 0.142291351 0.142291351 Others
## FP102 -0.598879231 0.598879231 Others
## FP103 -0.534836847 0.534836847 Others
## FP104 -0.458951075 0.458951075 Others
## FP105 -0.727391471 0.727391471 Others
## FP106 -0.416999764 0.416999764 Others
## FP107 -1.278546809 1.278546809 Others
## FP108 0.014469780 0.014469780 Others
## FP109 -0.481179637 0.481179637 Others
## FP110 0.315795222 0.315795222 Others
## FP111 -0.108190502 0.108190502 Others
## FP112 -1.980938083 1.980938083 FP112
## FP113 0.371171050 0.371171050 Others
## FP114 -0.306624372 0.306624372 Others
## FP115 -0.134575613 0.134575613 Others
## FP116 0.141935823 0.141935823 Others
## FP117 -0.508643191 0.508643191 Others
## FP118 0.253184331 0.253184331 Others
## FP119 0.112395234 0.112395234 Others
## FP120 0.277695331 0.277695331 Others
## FP121 -0.608358336 0.608358336 Others
## FP122 -0.029577988 0.029577988 Others
## FP123 -0.751697650 0.751697650 Others
## FP124 0.162114244 0.162114244 Others
## FP125 0.144739079 0.144739079 Others
## FP126 0.010398773 0.010398773 Others
## FP127 0.349651214 0.349651214 Others
## FP128 0.318234855 0.318234855 Others
## FP129 -0.664691145 0.664691145 Others
## FP130 -0.407923418 0.407923418 Others
## FP131 -0.043080623 0.043080623 Others
## FP132 0.714257556 0.714257556 Others
## FP133 0.111447989 0.111447989 Others
## FP134 -0.558689675 0.558689675 Others
## FP135 -0.043080623 0.043080623 Others
## FP136 0.065513353 0.065513353 Others
## FP137 -0.520876823 0.520876823 Others
## FP138 -0.443371715 0.443371715 Others
## FP139 -0.222901302 0.222901302 Others
## FP140 0.331983776 0.331983776 Others
## FP141 -0.707444112 0.707444112 Others
## FP142 -0.186600779 0.186600779 Others
## FP143 -0.307463530 0.307463530 Others
## FP144 0.492277463 0.492277463 Others
## FP145 0.298827485 0.298827485 Others
## FP146 -1.299149556 1.299149556 Others
## FP147 0.131692828 0.131692828 Others
## FP148 0.230417865 0.230417865 Others
## FP149 -1.747131733 1.747131733 Others
## FP150 0.098354800 0.098354800 Others
## FP151 0.146169523 0.146169523 Others
## FP152 -0.390791731 0.390791731 Others
## FP153 -0.516600423 0.516600423 Others
## FP155 -1.436217309 1.436217309 Others
## FP156 -0.576296365 0.576296365 Others
## FP157 -0.109648111 0.109648111 Others
## FP158 0.608376197 0.608376197 Others
## FP159 -0.510707161 0.510707161 Others
## FP160 -0.054188894 0.054188894 Others
## FP161 1.544529604 1.544529604 Others
## FP162 -0.925300229 0.925300229 Others
## FP163 0.262635216 0.262635216 Others
## FP164 -1.227011339 1.227011339 Others
## FP165 0.132404668 0.132404668 Others
## FP166 -0.643677518 0.643677518 Others
## FP167 0.156322489 0.156322489 Others
## FP168 -1.399442231 1.399442231 Others
## FP169 -1.630574123 1.630574123 Others
## FP170 -0.547051236 0.547051236 Others
## FP171 0.474090698 0.474090698 Others
## FP172 -2.095039482 2.095039482 Others
## FP173 -0.783106166 0.783106166 Others
## FP174 -0.421398357 0.421398357 Others
## FP175 -0.055561071 0.055561071 Others
## FP176 0.003335459 0.003335459 Others
## FP177 -0.133835847 0.133835847 Others
## FP178 -0.693816809 0.693816809 Others
## FP179 -0.394009441 0.394009441 Others
## FP180 0.353281085 0.353281085 Others
## FP181 -1.317926317 1.317926317 Others
## FP182 0.202664115 0.202664115 Others
## FP183 0.459733693 0.459733693 Others
## FP184 -1.988192371 1.988192371 Others
## FP185 -0.979131989 0.979131989 Others
## FP186 0.167895811 0.167895811 Others
## FP187 -0.064111657 0.064111657 Others
## FP188 0.571652797 0.571652797 Others
## FP189 -0.550343597 0.550343597 Others
## FP190 -1.682133264 1.682133264 Others
## FP191 0.136027042 0.136027042 Others
## FP192 -0.664508453 0.664508453 Others
## FP193 -2.502059785 2.502059785 FP193
## FP194 0.165810490 0.165810490 Others
## FP195 1.314151018 1.314151018 Others
## FP196 -1.906569334 1.906569334 Others
## FP197 -1.803058135 1.803058135 Others
## FP198 0.696686345 0.696686345 Others
## FP201 0.577374992 0.577374992 Others
## FP202 -0.642880476 0.642880476 Others
## FP203 -0.044255912 0.044255912 Others
## FP204 -0.608357895 0.608357895 Others
## FP205 -1.150680746 1.150680746 Others
## FP206 -0.790054693 0.790054693 Others
## FP207 -1.906569334 1.906569334 Others
## FP208 -0.082967663 0.082967663 Others
##################################
# Selecting the best-performing
# predictors based from metrics
##################################
<- VP_F_Summary[order(VP_F_Summary$NegativeLog10_FET_P.Value,decreasing=TRUE),]
VP_F_Summary_Top15_FETPValue <- VP_F_Summary_Top15_FETPValue[1:15,]) (VP_F_Summary_Top15_FETPValue
## FET_OR.odds ratio FET_P.Value Predictor Metric NegativeLog10_FET_P.Value
## FP076 0.1244571 6.187262e-45 FP076 VP_F 44.20850
## FP089 0.1472122 2.145272e-32 FP089 VP_F 31.66852
## FP112 0.1379398 2.701713e-28 FP112 VP_F 27.56836
## FP172 0.1230654 1.375876e-24 FP172 VP_F 23.86142
## FP065 0.2425985 2.350296e-24 FP065 VP_F 23.62888
## FP013 0.1488378 2.727491e-23 FP013 VP_F 22.56424
## FP079 0.2239833 6.263011e-23 FP079 VP_F 22.20322
## FP092 0.2089925 6.412816e-23 FP092 VP_F 22.19295
## FP082 0.2217568 7.799605e-22 FP082 VP_F 21.10793
## FP014 0.1575098 9.678623e-22 FP014 VP_F 21.01419
## FP168 0.2467345 1.869134e-21 FP168 VP_F 20.72836
## FP169 0.1958171 3.513565e-20 FP169 VP_F 19.45425
## FP070 0.2794757 5.654858e-20 FP070 VP_F 19.24758
## FP009 0.2698267 1.890323e-18 FP009 VP_F 17.72346
## FP164 0.2931674 3.185855e-18 FP164 VP_F 17.49677
## LogOddsRatio AbsoluteLogOddsRatio Group
## FP076 -2.083795 2.083795 FP076
## FP089 -1.915880 1.915880 FP089
## FP112 -1.980938 1.980938 FP112
## FP172 -2.095039 2.095039 Others
## FP065 -1.416348 1.416348 Others
## FP013 -1.904898 1.904898 Others
## FP079 -1.496184 1.496184 Others
## FP092 -1.565457 1.565457 Others
## FP082 -1.506174 1.506174 Others
## FP014 -1.848268 1.848268 Others
## FP168 -1.399442 1.399442 Others
## FP169 -1.630574 1.630574 Others
## FP070 -1.274840 1.274840 Others
## FP009 -1.309975 1.309975 Others
## FP164 -1.227011 1.227011 Others
<- VP_F_Summary[order(VP_F_Summary$AbsoluteLogOddsRatio,decreasing=TRUE),]
VP_F_Summary_Top15_AbsoluteLogOddsRatio <- VP_F_Summary_Top15_AbsoluteLogOddsRatio[1:15,]) (VP_F_Summary_Top15_AbsoluteLogOddsRatio
## FET_OR.odds ratio FET_P.Value Predictor Metric NegativeLog10_FET_P.Value
## FP044 0.05440672 3.102624e-15 FP044 VP_F 14.508271
## FP193 0.08191610 1.128789e-13 FP193 VP_F 12.947387
## FP172 0.12306538 1.375876e-24 FP172 VP_F 23.861421
## FP076 0.12445706 6.187262e-45 FP076 VP_F 44.208501
## FP184 0.13694274 1.118373e-13 FP184 VP_F 12.951413
## FP112 0.13793978 2.701713e-28 FP112 VP_F 27.568361
## FP089 0.14721223 2.145272e-32 FP089 VP_F 31.668518
## FP196 0.14858927 3.201577e-09 FP196 VP_F 8.494636
## FP207 0.14858927 3.201577e-09 FP207 VP_F 8.494636
## FP013 0.14883782 2.727491e-23 FP013 VP_F 22.564237
## FP014 0.15750978 9.678623e-22 FP014 VP_F 21.014186
## FP197 0.16479415 4.266746e-08 FP197 VP_F 7.369903
## FP149 0.17427309 2.945871e-12 FP149 VP_F 11.530786
## FP190 0.18597681 1.732961e-09 FP190 VP_F 8.761211
## FP169 0.19581712 3.513565e-20 FP169 VP_F 19.454252
## LogOddsRatio AbsoluteLogOddsRatio Group
## FP044 -2.911268 2.911268 FP044
## FP193 -2.502060 2.502060 FP193
## FP172 -2.095039 2.095039 Others
## FP076 -2.083795 2.083795 FP076
## FP184 -1.988192 1.988192 Others
## FP112 -1.980938 1.980938 FP112
## FP089 -1.915880 1.915880 FP089
## FP196 -1.906569 1.906569 Others
## FP207 -1.906569 1.906569 Others
## FP013 -1.904898 1.904898 Others
## FP014 -1.848268 1.848268 Others
## FP197 -1.803058 1.803058 Others
## FP149 -1.747132 1.747132 Others
## FP190 -1.682133 1.682133 Others
## FP169 -1.630574 1.630574 Others
##################################
# Exploring predictor performance
##################################
dotplot(Predictor ~ NegativeLog10_FET_P.Value | Metric,
VP_F_Summary_Top15_FETPValue,origin = 0,
xlab = "-Log10(Fisher Exact Test P-Value)",
type = c("p","h"),
pch = 16,
cex = 2,
alpha = 0.45,
prepanel = function(x, y) {
list(ylim = levels(reorder(y, x)))
},panel = function(x, y, ...) {
panel.dotplot(x, reorder(y, x), ...)
})
dotplot(Predictor ~ AbsoluteLogOddsRatio | Metric,
VP_F_Summary_Top15_AbsoluteLogOddsRatio,origin = 0,
xlab = "ABsolute Log Odds Ratio",
type = c("p", "h"),
pch = 16,
cex = 2,
alpha = 0.45,
prepanel = function(x, y) {
list(ylim = levels(reorder(y, x)))
},panel = function(x, y, ...) {
panel.dotplot(x, reorder(y, x), ...)
})
##################################
# Obtaining the gain ratio
##################################
<- function(x, y){
VP_G <- table(x, y)
tab <- fisher.test(tab)
fet <- c(FET_OR = fet$estimate,
out Gain = attrEval(y ~ x, estimator="GainRatio"))}
##################################
# Formulating the summary table
##################################
<- lapply(PMA_PreModelling_Train_Factor,
VP_G_Summary
VP_G, y = PMA_PreModelling_Train_Factor$Log_Solubility_Class)
<- as.data.frame(do.call("rbind", VP_G_Summary))
VP_G_Summary $Gain <- VP_G_Summary$Gain.x
VP_G_Summary$Gain.x <- NULL
VP_G_Summary$Predictor <- names(PMA_PreModelling_Train_Factor)
VP_G_Summary<- VP_G_Summary[-nrow(VP_G_Summary),]
VP_G_Summary $Metric <- rep("VP_G", nrow(VP_G_Summary))
VP_G_Summary$LogOddsRatio <- log(VP_G_Summary[,1])
VP_G_Summary$AbsoluteLogOddsRatio <- abs(VP_G_Summary$LogOddsRatio)
VP_G_Summary
$Group <- ifelse(rownames(VP_G_Summary)=="FP076","FP076",
VP_G_Summaryifelse(rownames(VP_G_Summary)=="FP089","FP089",
ifelse(rownames(VP_G_Summary)=="FP172","FP172",
ifelse(rownames(VP_G_Summary)=="FP044","FP044",
ifelse(rownames(VP_G_Summary)=="FP193","FP193",
"Others")))))
VP_G_Summary
## FET_OR.odds ratio Gain Predictor Metric LogOddsRatio
## FP001 1.19708080 1.444996e-03 FP001 VP_G 0.179885927
## FP002 0.35039786 4.708223e-02 FP002 VP_G -1.048686041
## FP003 0.97166586 3.674512e-05 FP003 VP_G -0.028743299
## FP004 1.26919078 2.522331e-03 FP004 VP_G 0.238379513
## FP005 0.35435518 4.551018e-02 FP005 VP_G -1.037455546
## FP006 1.84346020 1.618232e-02 FP006 VP_G 0.611644351
## FP007 0.88537336 6.497937e-04 FP007 VP_G -0.121745848
## FP008 0.57340983 1.334323e-02 FP008 VP_G -0.556154591
## FP009 0.26982672 6.919817e-02 FP009 VP_G -1.309975319
## FP010 1.57515069 7.731544e-03 FP010 VP_G 0.454350945
## FP011 1.90083496 1.575628e-02 FP011 VP_G 0.642293242
## FP012 1.10579162 3.890917e-04 FP012 VP_G 0.100561473
## FP013 0.14883782 1.170524e-01 FP013 VP_G -1.904898041
## FP014 0.15750978 1.104560e-01 FP014 VP_G -1.848267748
## FP015 2.86928927 3.948765e-02 FP015 VP_G 1.054064357
## FP016 0.85604079 9.035850e-04 FP016 VP_G -0.155437247
## FP017 0.49376123 1.837501e-02 FP017 VP_G -0.705703211
## FP018 0.47679807 1.979839e-02 FP018 VP_G -0.740662219
## FP019 1.04400654 6.620556e-05 FP019 VP_G 0.043065754
## FP020 1.28698533 2.228941e-03 FP020 VP_G 0.252302533
## FP021 0.74658209 3.076260e-03 FP021 VP_G -0.292249695
## FP022 1.02076928 1.460657e-05 FP022 VP_G 0.020556543
## FP023 0.54497814 1.321735e-02 FP023 VP_G -0.607009590
## FP024 1.09109789 2.662594e-04 FP024 VP_G 0.087184424
## FP025 1.30887702 2.513100e-03 FP025 VP_G 0.269169534
## FP026 0.64245234 6.517365e-03 FP026 VP_G -0.442462641
## FP027 2.01654412 1.547364e-02 FP027 VP_G 0.701385217
## FP028 1.21715865 1.323858e-03 FP028 VP_G 0.196519164
## FP029 0.85300530 8.767322e-04 FP029 VP_G -0.158989520
## FP030 1.42811535 4.164198e-03 FP030 VP_G 0.356355635
## FP031 0.81964988 1.335234e-03 FP031 VP_G -0.198878004
## FP032 1.72977657 9.085596e-03 FP032 VP_G 0.547992253
## FP033 1.56955042 6.160663e-03 FP033 VP_G 0.450789220
## FP034 0.71399401 3.744446e-03 FP034 VP_G -0.336880708
## FP035 0.28405176 4.681628e-02 FP035 VP_G -1.258598799
## FP036 2.60842408 2.580646e-02 FP036 VP_G 0.958746238
## FP037 0.60007108 8.341477e-03 FP037 VP_G -0.510707161
## FP038 1.37688518 3.308563e-03 FP038 VP_G 0.319823833
## FP039 0.27168922 5.011275e-02 FP039 VP_G -1.303096431
## FP040 1.91054797 1.225945e-02 FP040 VP_G 0.647390098
## FP041 0.29980664 4.192846e-02 FP041 VP_G -1.204617559
## FP042 1.29903467 2.037164e-03 FP042 VP_G 0.261621424
## FP043 0.63257683 6.634154e-03 FP043 VP_G -0.457953588
## FP044 0.05440672 1.485800e-01 FP044 VP_G -2.911267576
## FP045 0.39880063 2.481611e-02 FP045 VP_G -0.919293663
## FP046 0.47005989 2.432271e-02 FP046 VP_G -0.754895161
## FP047 0.65690301 7.410695e-03 FP047 VP_G -0.420218893
## FP048 0.82285525 1.375230e-03 FP048 VP_G -0.194974974
## FP049 0.19582389 8.320908e-02 FP049 VP_G -1.630539536
## FP050 0.60302823 9.042809e-03 FP050 VP_G -0.505791274
## FP051 0.51334022 1.549500e-02 FP051 VP_G -0.666816450
## FP052 1.23362693 1.461205e-03 FP052 VP_G 0.209958553
## FP053 0.23582634 6.331973e-02 FP053 VP_G -1.444659604
## FP054 0.30817566 4.194124e-02 FP054 VP_G -1.177085338
## FP055 1.67259146 7.509525e-03 FP055 VP_G 0.514374198
## FP056 0.26122657 5.112509e-02 FP056 VP_G -1.342367160
## FP057 0.79249223 1.943775e-03 FP057 VP_G -0.232572576
## FP058 0.93853294 1.423960e-04 FP058 VP_G -0.063437329
## FP059 0.28137500 4.445829e-02 FP059 VP_G -1.268066990
## FP060 1.63240884 1.063821e-02 FP060 VP_G 0.490056742
## FP061 1.04977625 1.049998e-04 FP061 VP_G 0.048577050
## FP062 1.29158337 2.901668e-03 FP062 VP_G 0.255868883
## FP063 1.36519387 4.278415e-03 FP063 VP_G 0.311296446
## FP064 1.18728146 1.303357e-03 FP064 VP_G 0.171666208
## FP065 0.24259847 8.119361e-02 FP065 VP_G -1.416347592
## FP066 1.00753443 2.484327e-06 FP066 VP_G 0.007506185
## FP067 1.15740510 9.366937e-04 FP067 VP_G 0.146180513
## FP068 1.12634107 6.173353e-04 FP068 VP_G 0.118974389
## FP069 0.85481954 1.078357e-03 FP069 VP_G -0.156864895
## FP070 0.27947568 6.832822e-02 FP070 VP_G -1.274839999
## FP071 0.32504356 5.309215e-02 FP071 VP_G -1.123796088
## FP072 2.69986223 4.205633e-02 FP072 VP_G 0.993200747
## FP073 1.74346468 1.286319e-02 FP073 VP_G 0.555874326
## FP074 1.20851743 1.537511e-03 FP074 VP_G 0.189394345
## FP075 1.32082732 3.326825e-03 FP075 VP_G 0.278258294
## FP076 0.12445706 1.651819e-01 FP076 VP_G -2.083794540
## FP077 0.76099672 3.222236e-03 FP077 VP_G -0.273126235
## FP078 0.91855970 3.087306e-04 FP078 VP_G -0.084948384
## FP079 0.22398333 8.315958e-02 FP079 VP_G -1.496183628
## FP080 1.25723356 2.217248e-03 FP080 VP_G 0.228913722
## FP081 0.72911277 4.222848e-03 FP081 VP_G -0.315926873
## FP082 0.22175679 8.202021e-02 FP082 VP_G -1.506174057
## FP083 1.79705328 1.391607e-02 FP083 VP_G 0.586148259
## FP084 1.13637183 6.880795e-04 FP084 VP_G 0.127840582
## FP085 0.27122453 6.744040e-02 FP085 VP_G -1.304808259
## FP086 0.64974151 7.816398e-03 FP086 VP_G -0.431180676
## FP087 0.25801089 6.720425e-02 FP087 VP_G -1.354753489
## FP088 1.57116925 8.298839e-03 FP088 VP_G 0.451820085
## FP089 0.14721223 1.329106e-01 FP089 VP_G -1.915879958
## FP090 0.64237027 8.128373e-03 FP090 VP_G -0.442590395
## FP091 0.97800429 2.005882e-05 FP091 VP_G -0.022241223
## FP092 0.20899251 9.317801e-02 FP092 VP_G -1.565456851
## FP093 0.49172261 2.066734e-02 FP093 VP_G -0.709840517
## FP094 0.99476660 1.122062e-06 FP094 VP_G -0.005247142
## FP095 1.21598118 1.528442e-03 FP095 VP_G 0.195551307
## FP096 0.79866626 2.047708e-03 FP096 VP_G -0.224812113
## FP097 0.27373537 6.540942e-02 FP097 VP_G -1.295593440
## FP098 1.28105523 2.481631e-03 FP098 VP_G 0.247684138
## FP099 0.47185118 2.279089e-02 FP099 VP_G -0.751091647
## FP100 1.36192602 3.823449e-03 FP100 VP_G 0.308899886
## FP101 1.15291250 8.233660e-04 FP101 VP_G 0.142291351
## FP102 0.54942707 1.425853e-02 FP102 VP_G -0.598879231
## FP103 0.58576485 1.157244e-02 FP103 VP_G -0.534836847
## FP104 0.63194616 8.565860e-03 FP104 VP_G -0.458951075
## FP105 0.48316770 2.118709e-02 FP105 VP_G -0.727391471
## FP106 0.65902108 6.873749e-03 FP106 VP_G -0.416999764
## FP107 0.27844164 6.227834e-02 FP107 VP_G -1.278546809
## FP108 1.01457497 8.332361e-06 FP108 VP_G 0.014469780
## FP109 0.61805388 8.997866e-03 FP109 VP_G -0.481179637
## FP110 1.37134941 3.901093e-03 FP110 VP_G 0.315795222
## FP111 0.89745661 4.641219e-04 FP111 VP_G -0.108190502
## FP112 0.13793978 1.307413e-01 FP112 VP_G -1.980938083
## FP113 1.44943098 5.305027e-03 FP113 VP_G 0.371171050
## FP114 0.73592698 3.568467e-03 FP114 VP_G -0.306624372
## FP115 0.87408678 7.052371e-04 FP115 VP_G -0.134575613
## FP116 1.15250268 7.866812e-04 FP116 VP_G 0.141935823
## FP117 0.60131089 1.007118e-02 FP117 VP_G -0.508643191
## FP118 1.28812070 2.484224e-03 FP118 VP_G 0.253184331
## FP119 1.11895502 4.775937e-04 FP119 VP_G 0.112395234
## FP120 1.32008395 2.891538e-03 FP120 VP_G 0.277695331
## FP121 0.54424360 1.364140e-02 FP121 VP_G -0.608358336
## FP122 0.97085516 3.333730e-05 FP122 VP_G -0.029577988
## FP123 0.47156532 2.145802e-02 FP123 VP_G -0.751697650
## FP124 1.17599458 9.889777e-04 FP124 VP_G 0.162114244
## FP125 1.15573797 7.827169e-04 FP125 VP_G 0.144739079
## FP126 1.01045303 4.030737e-06 FP126 VP_G 0.010398773
## FP127 1.41857268 4.385366e-03 FP127 VP_G 0.349651214
## FP128 1.37469908 3.691278e-03 FP128 VP_G 0.318234855
## FP129 0.51443239 1.621123e-02 FP129 VP_G -0.664691145
## FP130 0.66502981 5.813460e-03 FP130 VP_G -0.407923418
## FP131 0.95783416 6.698224e-05 FP131 VP_G -0.043080623
## FP132 2.04266955 1.697683e-02 FP132 VP_G 0.714257556
## FP133 1.11789560 4.449154e-04 FP133 VP_G 0.111447989
## FP134 0.57195802 1.130280e-02 FP134 VP_G -0.558689675
## FP135 0.95783416 6.698224e-05 FP135 VP_G -0.043080623
## FP136 1.06770699 1.526877e-04 FP136 VP_G 0.065513353
## FP137 0.59399949 9.641656e-03 FP137 VP_G -0.520876823
## FP138 0.64186857 6.947789e-03 FP138 VP_G -0.443371715
## FP139 0.80019382 1.647760e-03 FP139 VP_G -0.222901302
## FP140 1.39373024 3.767078e-03 FP140 VP_G 0.331983776
## FP141 0.49290239 1.764194e-02 FP141 VP_G -0.707444112
## FP142 0.82977493 1.226393e-03 FP142 VP_G -0.186600779
## FP143 0.73530968 3.128164e-03 FP143 VP_G -0.307463530
## FP144 1.63603800 7.995099e-03 FP144 VP_G 0.492277463
## FP145 1.34827701 3.017428e-03 FP145 VP_G 0.298827485
## FP146 0.27276366 5.385873e-02 FP146 VP_G -1.299149556
## FP147 1.14075786 5.964217e-04 FP147 VP_G 0.131692828
## FP148 1.25912604 1.738537e-03 FP148 VP_G 0.230417865
## FP149 0.17427309 8.590156e-02 FP149 VP_G -1.747131733
## FP150 1.10335419 3.137235e-04 FP150 VP_G 0.098354800
## FP151 1.15739238 6.409558e-04 FP151 VP_G 0.146169523
## FP152 0.67652104 5.062911e-03 FP152 VP_G -0.390791731
## FP153 0.59654511 8.715026e-03 FP153 VP_G -0.516600423
## FP155 0.23782568 5.826784e-02 FP155 VP_G -1.436217309
## FP156 0.56197587 1.058664e-02 FP156 VP_G -0.576296365
## FP157 0.89614943 3.749301e-04 FP157 VP_G -0.109648111
## FP158 1.83744533 1.033477e-02 FP158 VP_G 0.608376197
## FP159 0.60007108 8.341477e-03 FP159 VP_G -0.510707161
## FP160 0.94725316 9.332815e-05 FP160 VP_G -0.054188894
## FP161 4.68576694 5.515771e-02 FP161 VP_G 1.544529604
## FP162 0.39641239 3.726946e-02 FP162 VP_G -0.925300229
## FP163 1.30035228 3.072756e-03 FP163 VP_G 0.262635216
## FP164 0.29316745 6.099461e-02 FP164 VP_G -1.227011339
## FP165 1.14157018 7.607483e-04 FP165 VP_G 0.132404668
## FP166 0.52535686 1.786823e-02 FP166 VP_G -0.643677518
## FP167 1.16920320 1.050288e-03 FP167 VP_G 0.156322489
## FP168 0.24673455 7.549097e-02 FP168 VP_G -1.399442231
## FP169 0.19581712 9.334163e-02 FP169 VP_G -1.630574123
## FP170 0.57865361 1.170409e-02 FP170 VP_G -0.547051236
## FP171 1.60655269 8.298564e-03 FP171 VP_G 0.474090698
## FP172 0.12306538 1.317752e-01 FP172 VP_G -2.095039482
## FP173 0.45698434 2.243736e-02 FP173 VP_G -0.783106166
## FP174 0.65612868 6.490726e-03 FP174 VP_G -0.421398357
## FP175 0.94595425 1.132239e-04 FP175 VP_G -0.055561071
## FP176 1.00334103 3.981905e-07 FP176 VP_G 0.003335459
## FP177 0.87473364 6.435415e-04 FP177 VP_G -0.133835847
## FP178 0.49966530 1.711553e-02 FP178 VP_G -0.693816809
## FP179 0.67434769 5.342073e-03 FP179 VP_G -0.394009441
## FP180 1.42373128 4.219579e-03 FP180 VP_G 0.353281085
## FP181 0.26768983 5.396589e-02 FP181 VP_G -1.317926317
## FP182 1.22466105 1.385412e-03 FP182 VP_G 0.202664115
## FP183 1.58365219 6.522956e-03 FP183 VP_G 0.459733693
## FP184 0.13694274 1.025154e-01 FP184 VP_G -1.988192371
## FP185 0.37563701 3.068609e-02 FP185 VP_G -0.979131989
## FP186 1.18281337 9.032186e-04 FP186 VP_G 0.167895811
## FP187 0.93790027 1.322768e-04 FP187 VP_G -0.064111657
## FP188 1.77119205 9.687088e-03 FP188 VP_G 0.571652797
## FP189 0.57675161 9.846444e-03 FP189 VP_G -0.550343597
## FP190 0.18597681 7.641501e-02 FP190 VP_G -1.682133264
## FP191 1.14571288 5.837548e-04 FP191 VP_G 0.136027042
## FP192 0.51452638 1.356897e-02 FP192 VP_G -0.664508453
## FP193 0.08191610 1.275872e-01 FP193 VP_G -2.502059785
## FP194 1.18034939 8.328458e-04 FP194 VP_G 0.165810490
## FP195 3.72159008 4.193747e-02 FP195 VP_G 1.314151018
## FP196 0.14858927 8.701108e-02 FP196 VP_G -1.906569334
## FP197 0.16479415 7.859039e-02 FP197 VP_G -1.803058135
## FP198 2.00709087 1.349541e-02 FP198 VP_G 0.696686345
## FP201 1.78135621 9.320368e-03 FP201 VP_G 0.577374992
## FP202 0.52577576 1.716021e-02 FP202 VP_G -0.642880476
## FP203 0.95670909 6.926750e-05 FP203 VP_G -0.044255912
## FP204 0.54424384 1.266393e-02 FP204 VP_G -0.608357895
## FP205 0.31642129 4.050846e-02 FP205 VP_G -1.150680746
## FP206 0.45381997 1.889706e-02 FP206 VP_G -0.790054693
## FP207 0.14858927 8.701108e-02 FP207 VP_G -1.906569334
## FP208 0.92038091 2.431613e-04 FP208 VP_G -0.082967663
## AbsoluteLogOddsRatio Group
## FP001 0.179885927 Others
## FP002 1.048686041 Others
## FP003 0.028743299 Others
## FP004 0.238379513 Others
## FP005 1.037455546 Others
## FP006 0.611644351 Others
## FP007 0.121745848 Others
## FP008 0.556154591 Others
## FP009 1.309975319 Others
## FP010 0.454350945 Others
## FP011 0.642293242 Others
## FP012 0.100561473 Others
## FP013 1.904898041 Others
## FP014 1.848267748 Others
## FP015 1.054064357 Others
## FP016 0.155437247 Others
## FP017 0.705703211 Others
## FP018 0.740662219 Others
## FP019 0.043065754 Others
## FP020 0.252302533 Others
## FP021 0.292249695 Others
## FP022 0.020556543 Others
## FP023 0.607009590 Others
## FP024 0.087184424 Others
## FP025 0.269169534 Others
## FP026 0.442462641 Others
## FP027 0.701385217 Others
## FP028 0.196519164 Others
## FP029 0.158989520 Others
## FP030 0.356355635 Others
## FP031 0.198878004 Others
## FP032 0.547992253 Others
## FP033 0.450789220 Others
## FP034 0.336880708 Others
## FP035 1.258598799 Others
## FP036 0.958746238 Others
## FP037 0.510707161 Others
## FP038 0.319823833 Others
## FP039 1.303096431 Others
## FP040 0.647390098 Others
## FP041 1.204617559 Others
## FP042 0.261621424 Others
## FP043 0.457953588 Others
## FP044 2.911267576 FP044
## FP045 0.919293663 Others
## FP046 0.754895161 Others
## FP047 0.420218893 Others
## FP048 0.194974974 Others
## FP049 1.630539536 Others
## FP050 0.505791274 Others
## FP051 0.666816450 Others
## FP052 0.209958553 Others
## FP053 1.444659604 Others
## FP054 1.177085338 Others
## FP055 0.514374198 Others
## FP056 1.342367160 Others
## FP057 0.232572576 Others
## FP058 0.063437329 Others
## FP059 1.268066990 Others
## FP060 0.490056742 Others
## FP061 0.048577050 Others
## FP062 0.255868883 Others
## FP063 0.311296446 Others
## FP064 0.171666208 Others
## FP065 1.416347592 Others
## FP066 0.007506185 Others
## FP067 0.146180513 Others
## FP068 0.118974389 Others
## FP069 0.156864895 Others
## FP070 1.274839999 Others
## FP071 1.123796088 Others
## FP072 0.993200747 Others
## FP073 0.555874326 Others
## FP074 0.189394345 Others
## FP075 0.278258294 Others
## FP076 2.083794540 FP076
## FP077 0.273126235 Others
## FP078 0.084948384 Others
## FP079 1.496183628 Others
## FP080 0.228913722 Others
## FP081 0.315926873 Others
## FP082 1.506174057 Others
## FP083 0.586148259 Others
## FP084 0.127840582 Others
## FP085 1.304808259 Others
## FP086 0.431180676 Others
## FP087 1.354753489 Others
## FP088 0.451820085 Others
## FP089 1.915879958 FP089
## FP090 0.442590395 Others
## FP091 0.022241223 Others
## FP092 1.565456851 Others
## FP093 0.709840517 Others
## FP094 0.005247142 Others
## FP095 0.195551307 Others
## FP096 0.224812113 Others
## FP097 1.295593440 Others
## FP098 0.247684138 Others
## FP099 0.751091647 Others
## FP100 0.308899886 Others
## FP101 0.142291351 Others
## FP102 0.598879231 Others
## FP103 0.534836847 Others
## FP104 0.458951075 Others
## FP105 0.727391471 Others
## FP106 0.416999764 Others
## FP107 1.278546809 Others
## FP108 0.014469780 Others
## FP109 0.481179637 Others
## FP110 0.315795222 Others
## FP111 0.108190502 Others
## FP112 1.980938083 Others
## FP113 0.371171050 Others
## FP114 0.306624372 Others
## FP115 0.134575613 Others
## FP116 0.141935823 Others
## FP117 0.508643191 Others
## FP118 0.253184331 Others
## FP119 0.112395234 Others
## FP120 0.277695331 Others
## FP121 0.608358336 Others
## FP122 0.029577988 Others
## FP123 0.751697650 Others
## FP124 0.162114244 Others
## FP125 0.144739079 Others
## FP126 0.010398773 Others
## FP127 0.349651214 Others
## FP128 0.318234855 Others
## FP129 0.664691145 Others
## FP130 0.407923418 Others
## FP131 0.043080623 Others
## FP132 0.714257556 Others
## FP133 0.111447989 Others
## FP134 0.558689675 Others
## FP135 0.043080623 Others
## FP136 0.065513353 Others
## FP137 0.520876823 Others
## FP138 0.443371715 Others
## FP139 0.222901302 Others
## FP140 0.331983776 Others
## FP141 0.707444112 Others
## FP142 0.186600779 Others
## FP143 0.307463530 Others
## FP144 0.492277463 Others
## FP145 0.298827485 Others
## FP146 1.299149556 Others
## FP147 0.131692828 Others
## FP148 0.230417865 Others
## FP149 1.747131733 Others
## FP150 0.098354800 Others
## FP151 0.146169523 Others
## FP152 0.390791731 Others
## FP153 0.516600423 Others
## FP155 1.436217309 Others
## FP156 0.576296365 Others
## FP157 0.109648111 Others
## FP158 0.608376197 Others
## FP159 0.510707161 Others
## FP160 0.054188894 Others
## FP161 1.544529604 Others
## FP162 0.925300229 Others
## FP163 0.262635216 Others
## FP164 1.227011339 Others
## FP165 0.132404668 Others
## FP166 0.643677518 Others
## FP167 0.156322489 Others
## FP168 1.399442231 Others
## FP169 1.630574123 Others
## FP170 0.547051236 Others
## FP171 0.474090698 Others
## FP172 2.095039482 FP172
## FP173 0.783106166 Others
## FP174 0.421398357 Others
## FP175 0.055561071 Others
## FP176 0.003335459 Others
## FP177 0.133835847 Others
## FP178 0.693816809 Others
## FP179 0.394009441 Others
## FP180 0.353281085 Others
## FP181 1.317926317 Others
## FP182 0.202664115 Others
## FP183 0.459733693 Others
## FP184 1.988192371 Others
## FP185 0.979131989 Others
## FP186 0.167895811 Others
## FP187 0.064111657 Others
## FP188 0.571652797 Others
## FP189 0.550343597 Others
## FP190 1.682133264 Others
## FP191 0.136027042 Others
## FP192 0.664508453 Others
## FP193 2.502059785 FP193
## FP194 0.165810490 Others
## FP195 1.314151018 Others
## FP196 1.906569334 Others
## FP197 1.803058135 Others
## FP198 0.696686345 Others
## FP201 0.577374992 Others
## FP202 0.642880476 Others
## FP203 0.044255912 Others
## FP204 0.608357895 Others
## FP205 1.150680746 Others
## FP206 0.790054693 Others
## FP207 1.906569334 Others
## FP208 0.082967663 Others
##################################
# Selecting the best-performing
# predictors based from metrics
##################################
<- VP_G_Summary[order(VP_G_Summary$Gain,decreasing=TRUE),]
VP_G_Summary_Top15_Gain <- VP_G_Summary_Top15_Gain[1:15,]) (VP_G_Summary_Top15_Gain
## FET_OR.odds ratio Gain Predictor Metric LogOddsRatio
## FP076 0.12445706 0.16518193 FP076 VP_G -2.083795
## FP044 0.05440672 0.14857995 FP044 VP_G -2.911268
## FP089 0.14721223 0.13291055 FP089 VP_G -1.915880
## FP172 0.12306538 0.13177523 FP172 VP_G -2.095039
## FP112 0.13793978 0.13074132 FP112 VP_G -1.980938
## FP193 0.08191610 0.12758720 FP193 VP_G -2.502060
## FP013 0.14883782 0.11705243 FP013 VP_G -1.904898
## FP014 0.15750978 0.11045599 FP014 VP_G -1.848268
## FP184 0.13694274 0.10251539 FP184 VP_G -1.988192
## FP169 0.19581712 0.09334163 FP169 VP_G -1.630574
## FP092 0.20899251 0.09317801 FP092 VP_G -1.565457
## FP196 0.14858927 0.08701108 FP196 VP_G -1.906569
## FP207 0.14858927 0.08701108 FP207 VP_G -1.906569
## FP149 0.17427309 0.08590156 FP149 VP_G -1.747132
## FP049 0.19582389 0.08320908 FP049 VP_G -1.630540
## AbsoluteLogOddsRatio Group
## FP076 2.083795 FP076
## FP044 2.911268 FP044
## FP089 1.915880 FP089
## FP172 2.095039 FP172
## FP112 1.980938 Others
## FP193 2.502060 FP193
## FP013 1.904898 Others
## FP014 1.848268 Others
## FP184 1.988192 Others
## FP169 1.630574 Others
## FP092 1.565457 Others
## FP196 1.906569 Others
## FP207 1.906569 Others
## FP149 1.747132 Others
## FP049 1.630540 Others
<- VP_G_Summary[order(VP_G_Summary$AbsoluteLogOddsRatio,decreasing=TRUE),]
VP_G_Summary_Top15_AbsoluteLogOddsRatio <- VP_G_Summary_Top15_AbsoluteLogOddsRatio[1:15,]) (VP_G_Summary_Top15_AbsoluteLogOddsRatio
## FET_OR.odds ratio Gain Predictor Metric LogOddsRatio
## FP044 0.05440672 0.14857995 FP044 VP_G -2.911268
## FP193 0.08191610 0.12758720 FP193 VP_G -2.502060
## FP172 0.12306538 0.13177523 FP172 VP_G -2.095039
## FP076 0.12445706 0.16518193 FP076 VP_G -2.083795
## FP184 0.13694274 0.10251539 FP184 VP_G -1.988192
## FP112 0.13793978 0.13074132 FP112 VP_G -1.980938
## FP089 0.14721223 0.13291055 FP089 VP_G -1.915880
## FP196 0.14858927 0.08701108 FP196 VP_G -1.906569
## FP207 0.14858927 0.08701108 FP207 VP_G -1.906569
## FP013 0.14883782 0.11705243 FP013 VP_G -1.904898
## FP014 0.15750978 0.11045599 FP014 VP_G -1.848268
## FP197 0.16479415 0.07859039 FP197 VP_G -1.803058
## FP149 0.17427309 0.08590156 FP149 VP_G -1.747132
## FP190 0.18597681 0.07641501 FP190 VP_G -1.682133
## FP169 0.19581712 0.09334163 FP169 VP_G -1.630574
## AbsoluteLogOddsRatio Group
## FP044 2.911268 FP044
## FP193 2.502060 FP193
## FP172 2.095039 FP172
## FP076 2.083795 FP076
## FP184 1.988192 Others
## FP112 1.980938 Others
## FP089 1.915880 FP089
## FP196 1.906569 Others
## FP207 1.906569 Others
## FP013 1.904898 Others
## FP014 1.848268 Others
## FP197 1.803058 Others
## FP149 1.747132 Others
## FP190 1.682133 Others
## FP169 1.630574 Others
##################################
# Exploring predictor performance
##################################
dotplot(Predictor ~ Gain | Metric,
VP_G_Summary_Top15_Gain,origin = 0,
xlab = "Gain Ratio",
type = c("p", "h"),
pch = 16,
cex = 2,
alpha = 0.45,
prepanel = function(x, y) {
list(ylim = levels(reorder(y, x)))
},panel = function(x, y, ...) {
panel.dotplot(x, reorder(y, x), ...)
})
dotplot(Predictor ~ AbsoluteLogOddsRatio | Metric,
VP_G_Summary_Top15_AbsoluteLogOddsRatio,origin = 0,
xlab = "Absolute Log Odds Ratio",
type = c("p", "h"),
pch = 16,
cex = 2,
alpha = 0.45,
prepanel = function(x, y) {
list(ylim = levels(reorder(y, x)))
},panel = function(x, y, ...) {
panel.dotplot(x, reorder(y, x), ...)
})
##################################
# Consolidating all performance metrics
# for the numeric predictors
##################################
<- cbind(AUROC_Summary$AUROC,
NumericPredictor_Metrics $ATS,
ATS_Summary$MIC,
MIC_Summary$RV)
RV_Summary
colnames(NumericPredictor_Metrics) <- c("AUROC",
"ATS",
"MIC",
"RV")
rownames(NumericPredictor_Metrics) <- names(PMA_PreModelling_Train_Numeric)[2:ncol(PMA_PreModelling_Train_Numeric)]
<- as.data.frame(NumericPredictor_Metrics)
NumericPredictor_Metrics
$Group <- ifelse(rownames(NumericPredictor_Metrics)=="MolWeight","MolWeight",
NumericPredictor_Metricsifelse(rownames(NumericPredictor_Metrics)=="NumCarbon","NumCarbon",
ifelse(rownames(NumericPredictor_Metrics)=="NumMultBonds","NumMultBonds",
ifelse(rownames(NumericPredictor_Metrics)=="NumBonds","NumBonds",
ifelse(rownames(NumericPredictor_Metrics)=="NumRings","NumRings",
"Others")))))
NumericPredictor_Metrics
## AUROC ATS MIC RV Group
## MolWeight 0.8441930 22.0675750 0.46368934 0.30039075 MolWeight
## NumBonds 0.7893255 16.4907720 0.28498465 0.20575373 NumBonds
## NumMultBonds 0.7230098 13.0920514 0.19005119 0.08333333 NumMultBonds
## NumRotBonds 0.5847695 5.8690062 0.03861351 0.08825000 Others
## NumDblBonds 0.5142035 1.9672950 0.01133118 0.00200000 Others
## NumCarbon 0.8408857 21.0036355 0.31544845 0.28938624 NumCarbon
## NumNitrogen 0.5285634 0.8247139 0.01995159 0.10800000 Others
## NumOxygen 0.5536988 0.9312510 0.07491145 0.07476923 Others
## NumSulfer 0.5317902 3.2021823 0.01254889 0.01600000 Others
## NumChlorine 0.6104904 8.3362925 0.07010055 0.01400000 Others
## NumHalogen 0.6381755 9.1800512 0.08459159 0.08400000 Others
## NumRings 0.7438569 14.3956856 0.16421351 0.20600000 NumRings
## HydrophilicFactor 0.6699367 7.3321483 0.32734124 0.04048816 Others
## SurfaceArea1 0.5726889 2.5597702 0.19959108 0.05630307 Others
## SurfaceArea2 0.5479915 1.1459666 0.22267432 0.08079488 Others
splom(~NumericPredictor_Metrics[,c(1:4)],
groups = NumericPredictor_Metrics$Group,
pch = 16,
cex = 2,
alpha = 0.45,
varnames = c("AUROC", "ATS", "MIC", "RV"),
auto.key = list(points=TRUE, space="top", columns=2),
main = "Feature Importance Comparison for Numeric Predictors",
xlab = "Scatterplot Matrix of Feature Importance Metrics")
##################################
# Consolidating all performance metrics
# for the factor predictors
##################################
<- xyplot(NegativeLog10_FET_P.Value ~ LogOddsRatio,
FET_P.Value_Plot groups = VP_F_Summary$Group,
data = VP_F_Summary,
xlab = "Log Odds Ratio",
ylab = "-Log10(Fisher Exact Test P-Value)",
type = "p",
pch = 16,
cex = 2,
alpha = 0.45,
auto.key = list(points=TRUE, space="top", columns=2),
main = "Feature Importance Comparison for Factor Predictors")
<- xyplot(Gain ~ LogOddsRatio,
Gain_Plot groups = VP_G_Summary$Group,
data = VP_G_Summary,
xlab = "Log Odds Ratio",
ylab = "Gain Ratio",
type = "p",
pch = 16,
cex = 2,
alpha = 0.45,
auto.key = list(points=TRUE, space="top", columns=2),
main = "Feature Importance Comparison for Factor Predictors")
grid.arrange(FET_P.Value_Plot,
Gain_Plot, ncol = 2)