##################################
# Loading R libraries
##################################
library(AppliedPredictiveModeling)
library(caret)
library(rpart)
library(lattice)
library(dplyr)
library(tidyr)
library(moments)
library(skimr)
library(RANN)
library(pls)
library(corrplot)
library(tidyverse)
library(lares)
library(DMwR2)
library(gridExtra)
library(rattle)
library(rpart.plot)
library(RColorBrewer)
library(stats)
library(nnet)
library(elasticnet)
library(earth)
library(party)
library(kernlab)
library(randomForest)
library(Cubist)
library(pROC)
library(mda)
library(klaR)
library(pamr)
library(MLmetrics)
##################################
# Loading source and
# formulating the train set
##################################
data(AlzheimerDisease)
<- predictors
Alzheimer $Class <- diagnosis
Alzheimer
##################################
# Decomposing the Genotype factor
# into binary dummy variables
##################################
## Decompose the genotype factor into binary dummy variables
$E2 <- Alzheimer$E3 <- Alzheimer$E4 <- 0
Alzheimer$E2[grepl("2", Alzheimer$Genotype)] <- 1
Alzheimer$E3[grepl("3", Alzheimer$Genotype)] <- 1
Alzheimer$E4[grepl("4", Alzheimer$Genotype)] <- 1
Alzheimer<- Alzheimer
Alzheimer_Original
##################################
# Removing baseline predictors
##################################
<- Alzheimer[,!(names(Alzheimer) %in% c("Genotype", "age", "tau", "p_tau", "Ab_42", "male"))]
Alzheimer
##################################
# Partitoning the data into
# train and test sets
##################################
set.seed(12345678)
<- createDataPartition(Alzheimer$Class,p=0.8)[[1]]
Alzheimer_Train_Index <- Alzheimer[ Alzheimer_Train_Index, ]
Alzheimer_Train <- Alzheimer[-Alzheimer_Train_Index, ]
Alzheimer_Test
##################################
# Performing a general exploration of the train set
##################################
dim(Alzheimer_Train)
## [1] 267 128
str(Alzheimer_Train)
## 'data.frame': 267 obs. of 128 variables:
## $ ACE_CD143_Angiotensin_Converti : num 2.003 1.562 1.521 2.401 0.431 ...
## $ ACTH_Adrenocorticotropic_Hormon : num -1.386 -1.386 -1.715 -0.968 -1.273 ...
## $ AXL : num 1.098 0.683 -0.145 0.191 -0.222 ...
## $ Adiponectin : num -5.36 -5.02 -5.81 -4.78 -5.22 ...
## $ Alpha_1_Antichymotrypsin : num 1.74 1.46 1.19 2.13 1.31 ...
## $ Alpha_1_Antitrypsin : num -12.6 -11.9 -13.6 -11.1 -12.1 ...
## $ Alpha_1_Microglobulin : num -2.58 -3.24 -2.88 -2.34 -2.55 ...
## $ Alpha_2_Macroglobulin : num -72.7 -154.6 -136.5 -144.9 -154.6 ...
## $ Angiopoietin_2_ANG_2 : num 1.0647 0.7419 0.8329 0.9555 -0.0513 ...
## $ Angiotensinogen : num 2.51 2.46 1.98 2.86 2.52 ...
## $ Apolipoprotein_A_IV : num -1.43 -1.66 -1.66 -1.17 -1.39 ...
## $ Apolipoprotein_A1 : num -7.4 -7.05 -7.68 -6.73 -7.4 ...
## $ Apolipoprotein_A2 : num -0.2614 -0.8675 -0.6539 0.0953 -0.2744 ...
## $ Apolipoprotein_B : num -4.62 -6.75 -3.98 -3.38 -2.96 ...
## $ Apolipoprotein_CI : num -1.273 -1.273 -1.715 -0.755 -1.661 ...
## $ Apolipoprotein_CIII : num -2.31 -2.34 -2.75 -1.51 -2.31 ...
## $ Apolipoprotein_D : num 2.08 1.34 1.34 1.63 1.92 ...
## $ Apolipoprotein_E : num 3.755 3.097 2.753 3.067 0.591 ...
## $ Apolipoprotein_H : num -0.1573 -0.5754 -0.3448 0.6626 0.0972 ...
## $ B_Lymphocyte_Chemoattractant_BL : num 2.3 1.67 1.67 2.3 2.48 ...
## $ BMP_6 : num -2.2 -1.73 -2.06 -1.24 -1.88 ...
## $ Beta_2_Microglobulin : num 0.693 0.47 0.336 0.336 -0.545 ...
## $ Betacellulin : int 34 53 49 67 51 41 42 58 32 43 ...
## $ C_Reactive_Protein : num -4.07 -6.65 -8.05 -4.34 -7.56 ...
## $ CD40 : num -0.796 -1.273 -1.242 -0.924 -1.784 ...
## $ CD5L : num 0.0953 -0.6733 0.0953 0.3633 0.4055 ...
## $ Calbindin : num 33.2 25.3 22.2 21.8 13.2 ...
## $ Calcitonin : num 1.39 3.61 2.12 1.31 1.63 ...
## $ CgA : num 398 466 348 443 138 ...
## $ Clusterin_Apo_J : num 3.56 3.04 2.77 3.04 2.56 ...
## $ Complement_3 : num -10.4 -16.1 -16.1 -12.8 -12 ...
## $ Complement_Factor_H : num 3.57 3.6 4.47 7.25 3.57 ...
## $ Connective_Tissue_Growth_Factor : num 0.531 0.588 0.642 0.916 0.993 ...
## $ Cortisol : num 10 12 10 11 13 4.9 13 12 6.8 12 ...
## $ Creatine_Kinase_MB : num -1.71 -1.75 -1.38 -1.63 -1.67 ...
## $ Cystatin_C : num 9.04 9.07 8.95 8.98 7.84 ...
## $ EGF_R : num -0.135 -0.37 -0.733 -0.621 -1.111 ...
## $ EN_RAGE : num -3.69 -3.82 -4.76 -2.36 -3.44 ...
## $ ENA_78 : num -1.35 -1.36 -1.39 -1.34 -1.36 ...
## $ Eotaxin_3 : int 53 62 62 64 57 64 64 64 82 73 ...
## $ FAS : num -0.0834 -0.5276 -0.6349 -0.1278 -0.3285 ...
## $ FSH_Follicle_Stimulation_Hormon : num -0.652 -1.627 -1.563 -0.976 -1.683 ...
## $ Fas_Ligand : num 3.1 2.98 1.36 4.04 2.41 ...
## $ Fatty_Acid_Binding_Protein : num 2.521 2.248 0.906 2.635 0.624 ...
## $ Ferritin : num 3.33 3.93 3.18 2.69 1.85 ...
## $ Fetuin_A : num 1.28 1.19 1.41 2.15 1.48 ...
## $ Fibrinogen : num -7.04 -8.05 -7.2 -6.98 -6.44 ...
## $ GRO_alpha : num 1.38 1.37 1.41 1.4 1.4 ...
## $ Gamma_Interferon_induced_Monokin: num 2.95 2.72 2.76 2.85 2.82 ...
## $ Glutathione_S_Transferase_alpha : num 1.064 0.867 0.889 1.236 1.154 ...
## $ HB_EGF : num 6.56 8.75 7.75 7.25 6.41 ...
## $ HCC_4 : num -3.04 -4.07 -3.65 -3.15 -3.08 ...
## $ Hepatocyte_Growth_Factor_HGF : num 0.5878 0.5306 0.0953 0.5306 0.0953 ...
## $ I_309 : num 3.43 3.14 2.4 3.76 2.71 ...
## $ ICAM_1 : num -0.1908 -0.462 -0.462 0.0972 -0.9351 ...
## $ IGF_BP_2 : num 5.61 5.35 5.18 5.42 5.06 ...
## $ IL_11 : num 5.12 4.94 4.67 7.07 6.1 ...
## $ IL_13 : num 1.28 1.27 1.27 1.31 1.28 ...
## $ IL_16 : num 4.19 2.88 2.62 4.74 2.67 ...
## $ IL_17E : num 5.73 6.71 4.15 4.2 3.64 ...
## $ IL_1alpha : num -6.57 -8.05 -8.18 -6.94 -8.18 ...
## $ IL_3 : num -3.24 -3.91 -4.65 -3 -3.86 ...
## $ IL_4 : num 2.48 2.4 1.82 2.71 1.21 ...
## $ IL_5 : num 1.099 0.693 -0.248 1.163 -0.4 ...
## $ IL_6 : num 0.2694 0.0962 0.1857 -0.072 0.1857 ...
## $ IL_6_Receptor : num 0.6428 0.4312 0.0967 0.0967 -0.5173 ...
## $ IL_7 : num 4.81 3.71 1.01 4.29 2.78 ...
## $ IL_8 : num 1.71 1.68 1.69 1.76 1.71 ...
## $ IP_10_Inducible_Protein_10 : num 6.24 5.69 5.05 6.37 5.48 ...
## $ IgA : num -6.81 -6.38 -6.32 -4.65 -5.81 ...
## $ Insulin : num -0.626 -0.943 -1.447 -0.3 -1.341 ...
## $ Kidney_Injury_Molecule_1_KIM_1 : num -1.2 -1.2 -1.19 -1.16 -1.12 ...
## $ LOX_1 : num 1.705 1.526 1.163 1.361 0.642 ...
## $ Leptin : num -1.529 -1.466 -1.662 -0.915 -1.361 ...
## $ Lipoprotein_a : num -4.27 -4.93 -5.84 -2.94 -4.51 ...
## $ MCP_1 : num 6.74 6.85 6.77 6.72 6.54 ...
## $ MCP_2 : num 1.981 1.809 0.401 2.221 2.334 ...
## $ MIF : num -1.24 -1.9 -2.3 -1.9 -2.04 ...
## $ MIP_1alpha : num 4.97 3.69 4.05 6.45 4.6 ...
## $ MIP_1beta : num 3.26 3.14 2.4 3.53 2.89 ...
## $ MMP_2 : num 4.48 3.78 2.87 3.69 2.92 ...
## $ MMP_3 : num -2.21 -2.47 -2.3 -1.56 -3.04 ...
## $ MMP10 : num -3.27 -3.65 -2.73 -2.62 -3.32 ...
## $ MMP7 : num -3.774 -5.968 -4.03 -0.222 -1.922 ...
## $ Myoglobin : num -1.897 -0.755 -1.386 -1.772 -1.139 ...
## $ NT_proBNP : num 4.55 4.22 4.25 4.47 4.19 ...
## $ NrCAM : num 5 5.21 4.74 5.2 3.26 ...
## $ Osteopontin : num 5.36 6 5.02 5.69 4.74 ...
## $ PAI_1 : num 1.0035 -0.0306 0.4384 0.2523 0.4384 ...
## $ PAPP_A : num -2.9 -2.81 -2.94 -2.94 -2.94 ...
## $ PLGF : num 4.44 4.03 4.51 4.8 4.39 ...
## $ PYY : num 3.22 3.14 2.89 3.66 3.33 ...
## $ Pancreatic_polypeptide : num 0.579 0.336 -0.892 0.262 -0.478 ...
## $ Prolactin : num 0 -0.511 -0.139 0.182 -0.151 ...
## $ Prostatic_Acid_Phosphatase : num -1.62 -1.74 -1.64 -1.7 -1.76 ...
## $ Protein_S : num -1.78 -2.46 -2.26 -1.66 -2.36 ...
## $ Pulmonary_and_Activation_Regulat: num -0.844 -2.303 -1.661 -0.562 -1.171 ...
## $ RANTES : num -6.21 -6.94 -6.65 -6.32 -6.5 ...
## $ Resistin : num -16.5 -16 -16.5 -11.1 -11.3 ...
## [list output truncated]
summary(Alzheimer_Train)
## ACE_CD143_Angiotensin_Converti ACTH_Adrenocorticotropic_Hormon
## Min. :-0.6756 Min. :-2.207
## 1st Qu.: 0.9462 1st Qu.:-1.715
## Median : 1.3013 Median :-1.561
## Mean : 1.3198 Mean :-1.538
## 3rd Qu.: 1.7191 3rd Qu.:-1.347
## Max. : 2.8398 Max. :-0.844
## AXL Adiponectin Alpha_1_Antichymotrypsin
## Min. :-0.9230 Min. :-6.725 Min. :0.2624
## 1st Qu.: 0.0000 1st Qu.:-5.669 1st Qu.:1.1314
## Median : 0.2804 Median :-5.185 Median :1.3610
## Mean : 0.3093 Mean :-5.201 Mean :1.3605
## 3rd Qu.: 0.6077 3rd Qu.:-4.780 3rd Qu.:1.5892
## Max. : 1.5214 Max. :-3.507 Max. :2.3026
## Alpha_1_Antitrypsin Alpha_1_Microglobulin Alpha_2_Macroglobulin
## Min. :-17.028 Min. :-4.343 Min. :-289.68
## 1st Qu.:-14.071 1st Qu.:-3.270 1st Qu.:-186.64
## Median :-13.004 Median :-2.937 Median :-160.01
## Mean :-13.052 Mean :-2.932 Mean :-158.61
## 3rd Qu.:-12.096 3rd Qu.:-2.590 3rd Qu.:-134.62
## Max. : -8.192 Max. :-1.772 Max. : -59.46
## Angiopoietin_2_ANG_2 Angiotensinogen Apolipoprotein_A_IV Apolipoprotein_A1
## Min. :-0.5447 Min. :1.752 Min. :-2.9565 Min. :-8.680
## 1st Qu.: 0.4700 1st Qu.:2.119 1st Qu.:-2.1203 1st Qu.:-7.763
## Median : 0.6419 Median :2.320 Median :-1.8326 Median :-7.470
## Mean : 0.6730 Mean :2.318 Mean :-1.8544 Mean :-7.483
## 3rd Qu.: 0.8755 3rd Qu.:2.497 3rd Qu.:-1.6094 3rd Qu.:-7.209
## Max. : 1.5261 Max. :2.881 Max. :-0.7765 Max. :-6.166
## Apolipoprotein_A2 Apolipoprotein_B Apolipoprotein_CI Apolipoprotein_CIII
## Min. :-1.8971 Min. :-9.937 Min. :-3.3242 Min. :-3.689
## 1st Qu.:-0.9676 1st Qu.:-6.630 1st Qu.:-1.8326 1st Qu.:-2.773
## Median :-0.6733 Median :-5.703 Median :-1.6094 Median :-2.526
## Mean :-0.6354 Mean :-5.578 Mean :-1.5833 Mean :-2.494
## 3rd Qu.:-0.3147 3rd Qu.:-4.539 3rd Qu.:-1.3667 3rd Qu.:-2.207
## Max. : 0.9555 Max. :-2.153 Max. :-0.2744 Max. :-1.238
## Apolipoprotein_D Apolipoprotein_E Apolipoprotein_H
## Min. :0.470 Min. :0.5911 Min. :-2.23379
## 1st Qu.:1.209 1st Qu.:2.3344 1st Qu.:-0.59782
## Median :1.411 Median :2.8181 Median :-0.37005
## Mean :1.440 Mean :2.8062 Mean :-0.32122
## 3rd Qu.:1.668 3rd Qu.:3.2863 3rd Qu.:-0.06112
## Max. :2.272 Max. :5.4442 Max. : 0.92696
## B_Lymphocyte_Chemoattractant_BL BMP_6 Beta_2_Microglobulin
## Min. :0.7318 Min. :-2.7612 Min. :-0.54473
## 1st Qu.:1.6731 1st Qu.:-2.1516 1st Qu.:-0.04082
## Median :1.9805 Median :-1.8774 Median : 0.18232
## Mean :2.0175 Mean :-1.9114 Mean : 0.16757
## 3rd Qu.:2.3714 3rd Qu.:-1.6753 3rd Qu.: 0.33647
## Max. :4.0237 Max. :-0.8166 Max. : 0.99325
## Betacellulin C_Reactive_Protein CD40 CD5L
## Min. :10.00 Min. :-8.517 Min. :-1.8644 Min. :-1.23787
## 1st Qu.:42.00 1st Qu.:-6.645 1st Qu.:-1.3761 1st Qu.:-0.35667
## Median :51.00 Median :-5.843 Median :-1.2734 Median :-0.06188
## Mean :51.01 Mean :-5.874 Mean :-1.2584 Mean :-0.05310
## 3rd Qu.:59.00 3rd Qu.:-5.083 3rd Qu.:-1.1238 3rd Qu.: 0.26236
## Max. :82.00 Max. :-2.937 Max. :-0.5475 Max. : 1.16315
## Calbindin Calcitonin CgA Clusterin_Apo_J
## Min. :10.96 Min. :-0.7134 Min. :135.6 Min. :1.872
## 1st Qu.:19.77 1st Qu.: 0.9555 1st Qu.:278.0 1st Qu.:2.708
## Median :22.25 Median : 1.6487 Median :331.5 Median :2.890
## Mean :22.43 Mean : 1.6788 Mean :333.3 Mean :2.882
## 3rd Qu.:24.80 3rd Qu.: 2.2824 3rd Qu.:392.1 3rd Qu.:3.045
## Max. :33.78 Max. : 3.8918 Max. :535.4 Max. :3.584
## Complement_3 Complement_Factor_H Connective_Tissue_Growth_Factor
## Min. :-23.387 Min. :-0.8387 Min. :0.1823
## 1st Qu.:-17.567 1st Qu.: 2.7531 1st Qu.:0.6419
## Median :-15.524 Median : 3.6000 Median :0.7885
## Mean :-15.610 Mean : 3.5541 Mean :0.7739
## 3rd Qu.:-13.882 3rd Qu.: 4.2548 3rd Qu.:0.9163
## Max. : -9.563 Max. : 7.6238 Max. :1.4110
## Cortisol Creatine_Kinase_MB Cystatin_C EGF_R
## Min. : 0.10 Min. :-1.872 Min. :7.432 Min. :-1.36135
## 1st Qu.: 9.80 1st Qu.:-1.724 1st Qu.:8.321 1st Qu.:-0.85727
## Median :12.00 Median :-1.671 Median :8.564 Median :-0.68354
## Mean :11.98 Mean :-1.674 Mean :8.586 Mean :-0.70130
## 3rd Qu.:14.00 3rd Qu.:-1.626 3rd Qu.:8.839 3rd Qu.:-0.54612
## Max. :29.00 Max. :-1.384 Max. :9.694 Max. :-0.06112
## EN_RAGE ENA_78 Eotaxin_3 FAS
## Min. :-8.3774 Min. :-1.405 Min. : 7.00 Min. :-1.5141
## 1st Qu.:-4.1997 1st Qu.:-1.381 1st Qu.: 44.00 1st Qu.:-0.7133
## Median :-3.6497 Median :-1.374 Median : 59.00 Median :-0.5276
## Mean :-3.6353 Mean :-1.372 Mean : 58.17 Mean :-0.5291
## 3rd Qu.:-3.1466 3rd Qu.:-1.364 3rd Qu.: 70.00 3rd Qu.:-0.3147
## Max. :-0.3857 Max. :-1.339 Max. :107.00 Max. : 0.3365
## FSH_Follicle_Stimulation_Hormon Fas_Ligand Fatty_Acid_Binding_Protein
## Min. :-2.11511 Min. :-0.1536 Min. :-1.0441
## 1st Qu.:-1.46606 1st Qu.: 2.3415 1st Qu.: 0.7998
## Median :-1.13570 Median : 3.1015 Median : 1.3865
## Mean :-1.14259 Mean : 2.9680 Mean : 1.3529
## 3rd Qu.:-0.87620 3rd Qu.: 3.6950 3rd Qu.: 1.8847
## Max. : 0.09715 Max. : 7.6328 Max. : 3.7055
## Ferritin Fetuin_A Fibrinogen GRO_alpha
## Min. :0.6077 Min. :0.470 Min. :-8.874 Min. :1.271
## 1st Qu.:2.2895 1st Qu.:1.099 1st Qu.:-7.717 1st Qu.:1.351
## Median :2.7749 Median :1.308 Median :-7.323 Median :1.382
## Mean :2.7646 Mean :1.350 Mean :-7.356 Mean :1.378
## 3rd Qu.:3.2915 3rd Qu.:1.609 3rd Qu.:-7.013 3rd Qu.:1.406
## Max. :4.6333 Max. :2.251 Max. :-5.843 Max. :1.495
## Gamma_Interferon_induced_Monokin Glutathione_S_Transferase_alpha
## Min. :2.393 Min. :0.5238
## 1st Qu.:2.707 1st Qu.:0.8439
## Median :2.783 Median :0.9677
## Mean :2.786 Mean :0.9512
## 3rd Qu.:2.873 3rd Qu.:1.0344
## Max. :3.065 Max. :1.3176
## HB_EGF HCC_4 Hepatocyte_Growth_Factor_HGF I_309
## Min. : 2.103 Min. :-4.510 Min. :-0.6349 Min. :1.758
## 1st Qu.: 5.786 1st Qu.:-3.730 1st Qu.: 0.0000 1st Qu.:2.708
## Median : 6.703 Median :-3.507 Median : 0.1823 Median :2.944
## Mean : 6.833 Mean :-3.500 Mean : 0.1963 Mean :2.958
## 3rd Qu.: 7.865 3rd Qu.:-3.270 3rd Qu.: 0.4055 3rd Qu.:3.219
## Max. :10.695 Max. :-2.120 Max. : 0.8755 Max. :4.143
## ICAM_1 IGF_BP_2 IL_11 IL_13
## Min. :-1.5332 Min. :4.635 Min. :1.755 Min. :1.259
## 1st Qu.:-0.8298 1st Qu.:5.179 1st Qu.:3.706 1st Qu.:1.274
## Median :-0.5903 Median :5.323 Median :4.805 Median :1.283
## Mean :-0.5908 Mean :5.317 Mean :4.725 Mean :1.284
## 3rd Qu.:-0.3828 3rd Qu.:5.453 3rd Qu.:5.776 3rd Qu.:1.290
## Max. : 0.5171 Max. :5.948 Max. :8.491 Max. :1.321
## IL_16 IL_17E IL_1alpha IL_3
## Min. :1.187 Min. :1.052 Min. :-8.517 Min. :-5.915
## 1st Qu.:2.521 1st Qu.:4.149 1st Qu.:-7.824 1st Qu.:-4.269
## Median :2.909 Median :4.749 Median :-7.524 Median :-3.912
## Mean :2.929 Mean :4.855 Mean :-7.514 Mean :-3.941
## 3rd Qu.:3.351 3rd Qu.:5.631 3rd Qu.:-7.264 3rd Qu.:-3.631
## Max. :4.937 Max. :8.952 Max. :-5.952 Max. :-2.453
## IL_4 IL_5 IL_6 IL_6_Receptor
## Min. :0.5306 Min. :-1.4271 Min. :-1.5343 Min. :-0.67562
## 1st Qu.:1.4586 1st Qu.:-0.1221 1st Qu.:-0.4127 1st Qu.:-0.12541
## Median :1.8083 Median : 0.1823 Median :-0.1599 Median : 0.09669
## Mean :1.7732 Mean : 0.1866 Mean :-0.1540 Mean : 0.09492
## 3rd Qu.:2.1459 3rd Qu.: 0.4700 3rd Qu.: 0.1410 3rd Qu.: 0.35404
## Max. :3.0445 Max. : 1.9459 Max. : 1.8138 Max. : 0.83099
## IL_7 IL_8 IP_10_Inducible_Protein_10 IgA
## Min. :0.5598 Min. :1.574 Min. :4.317 Min. :-10.520
## 1st Qu.:2.1548 1st Qu.:1.680 1st Qu.:5.398 1st Qu.: -6.645
## Median :2.7934 Median :1.705 Median :5.753 Median : -6.119
## Mean :2.8392 Mean :1.704 Mean :5.755 Mean : -6.121
## 3rd Qu.:3.7055 3rd Qu.:1.728 3rd Qu.:6.064 3rd Qu.: -5.573
## Max. :5.7056 Max. :1.807 Max. :7.501 Max. : -4.200
## Insulin Kidney_Injury_Molecule_1_KIM_1 LOX_1
## Min. :-2.1692 Min. :-1.256 Min. :0.000
## 1st Qu.:-1.4466 1st Qu.:-1.204 1st Qu.:1.030
## Median :-1.2462 Median :-1.183 Median :1.281
## Mean :-1.2329 Mean :-1.185 Mean :1.283
## 3rd Qu.:-1.0340 3rd Qu.:-1.164 3rd Qu.:1.526
## Max. :-0.1586 Max. :-1.105 Max. :2.272
## Leptin Lipoprotein_a MCP_1 MCP_2
## Min. :-2.1468 Min. :-6.812 Min. :5.826 Min. :0.4006
## 1st Qu.:-1.6996 1st Qu.:-5.308 1st Qu.:6.319 1st Qu.:1.5304
## Median :-1.5047 Median :-4.605 Median :6.494 Median :1.8528
## Mean :-1.5042 Mean :-4.417 Mean :6.497 Mean :1.8691
## 3rd Qu.:-1.3295 3rd Qu.:-3.490 3rd Qu.:6.678 3rd Qu.:2.1821
## Max. :-0.6206 Max. :-1.386 Max. :7.230 Max. :4.0237
## MIF MIP_1alpha MIP_1beta MMP_2
## Min. :-2.847 Min. :0.9346 Min. :1.946 Min. :0.09809
## 1st Qu.:-2.120 1st Qu.:3.3377 1st Qu.:2.565 1st Qu.:2.33214
## Median :-1.897 Median :4.0495 Median :2.833 Median :2.81512
## Mean :-1.864 Mean :4.0489 Mean :2.814 Mean :2.87534
## 3rd Qu.:-1.661 3rd Qu.:4.6857 3rd Qu.:3.045 3rd Qu.:3.55121
## Max. :-0.844 Max. :6.7959 Max. :4.007 Max. :5.35895
## MMP_3 MMP10 MMP7 Myoglobin
## Min. :-4.4228 Min. :-4.934 Min. :-8.3975 Min. :-3.1701
## 1st Qu.:-2.7489 1st Qu.:-3.938 1st Qu.:-4.8199 1st Qu.:-2.0402
## Median :-2.4534 Median :-3.650 Median :-3.7735 Median :-1.4697
## Mean :-2.4455 Mean :-3.635 Mean :-3.7894 Mean :-1.3671
## 3rd Qu.:-2.1203 3rd Qu.:-3.352 3rd Qu.:-2.7140 3rd Qu.:-0.7988
## Max. :-0.5276 Max. :-2.207 Max. :-0.2222 Max. : 1.7750
## NT_proBNP NrCAM Osteopontin PAI_1
## Min. :3.178 Min. :2.639 Min. :4.111 Min. :-0.99085
## 1st Qu.:4.350 1st Qu.:3.998 1st Qu.:4.963 1st Qu.:-0.16655
## Median :4.554 Median :4.394 Median :5.187 Median : 0.09396
## Mean :4.552 Mean :4.362 Mean :5.204 Mean : 0.07743
## 3rd Qu.:4.775 3rd Qu.:4.749 3rd Qu.:5.442 3rd Qu.: 0.32005
## Max. :5.886 Max. :6.011 Max. :6.308 Max. : 1.16611
## PAPP_A PLGF PYY Pancreatic_polypeptide
## Min. :-3.311 Min. :2.485 Min. :2.186 Min. :-2.12026
## 1st Qu.:-2.936 1st Qu.:3.638 1st Qu.:2.833 1st Qu.:-0.52763
## Median :-2.871 Median :3.871 Median :2.996 Median :-0.04082
## Mean :-2.854 Mean :3.912 Mean :3.015 Mean :-0.01323
## 3rd Qu.:-2.749 3rd Qu.:4.205 3rd Qu.:3.178 3rd Qu.: 0.53063
## Max. :-2.520 Max. :5.170 Max. :3.932 Max. : 1.93152
## Prolactin Prostatic_Acid_Phosphatase Protein_S
## Min. :-1.30933 Min. :-1.934 Min. :-3.338
## 1st Qu.:-0.13926 1st Qu.:-1.717 1st Qu.:-2.464
## Median : 0.00000 Median :-1.690 Median :-2.259
## Mean : 0.04495 Mean :-1.685 Mean :-2.240
## 3rd Qu.: 0.25799 3rd Qu.:-1.654 3rd Qu.:-2.000
## Max. : 0.99325 Max. :-1.424 Max. :-1.221
## Pulmonary_and_Activation_Regulat RANTES Resistin
## Min. :-2.5133 Min. :-7.222 Min. :-34.967
## 1st Qu.:-1.8326 1st Qu.:-6.725 1st Qu.:-21.468
## Median :-1.5141 Median :-6.502 Median :-17.466
## Mean :-1.4880 Mean :-6.511 Mean :-17.641
## 3rd Qu.:-1.1712 3rd Qu.:-6.320 3rd Qu.:-13.501
## Max. :-0.2744 Max. :-5.547 Max. : -2.239
## S100b SGOT SHBG SOD
## Min. :0.1874 Min. :-1.3471 Min. :-4.135 Min. :4.317
## 1st Qu.:1.0012 1st Qu.:-0.6349 1st Qu.:-2.813 1st Qu.:5.094
## Median :1.2544 Median :-0.4005 Median :-2.489 Median :5.366
## Mean :1.2505 Mean :-0.4057 Mean :-2.477 Mean :5.336
## 3rd Qu.:1.4996 3rd Qu.:-0.1985 3rd Qu.:-2.120 3rd Qu.:5.583
## Max. :2.3726 Max. : 0.7419 Max. :-1.109 Max. :6.317
## Serum_Amyloid_P Sortilin Stem_Cell_Factor TGF_alpha
## Min. :-7.506 Min. :1.654 Min. :2.251 Min. : 6.843
## 1st Qu.:-6.377 1st Qu.:3.343 1st Qu.:3.045 1st Qu.: 8.859
## Median :-6.032 Median :3.867 Median :3.296 Median : 9.919
## Mean :-6.017 Mean :3.852 Mean :3.301 Mean : 9.801
## 3rd Qu.:-5.655 3rd Qu.:4.371 3rd Qu.:3.526 3rd Qu.:10.695
## Max. :-4.646 Max. :6.225 Max. :4.277 Max. :13.827
## TIMP_1 TNF_RII TRAIL_R3 TTR_prealbumin
## Min. : 1.742 Min. :-1.6607 Min. :-1.2107 Min. :2.485
## 1st Qu.:10.490 1st Qu.:-0.8210 1st Qu.:-0.7008 1st Qu.:2.773
## Median :11.565 Median :-0.5978 Median :-0.5317 Median :2.833
## Mean :11.750 Mean :-0.5939 Mean :-0.5394 Mean :2.854
## 3rd Qu.:12.697 3rd Qu.:-0.3784 3rd Qu.:-0.3849 3rd Qu.:2.944
## Max. :18.881 Max. : 0.4700 Max. : 0.2694 Max. :3.332
## Tamm_Horsfall_Protein_THP Thrombomodulin Thrombopoietin
## Min. :-3.206 Min. :-2.0377 Min. :-1.53957
## 1st Qu.:-3.137 1st Qu.:-1.6256 1st Qu.:-0.88645
## Median :-3.117 Median :-1.4920 Median :-0.75100
## Mean :-3.116 Mean :-1.5050 Mean :-0.75419
## 3rd Qu.:-3.096 3rd Qu.:-1.3406 3rd Qu.:-0.62887
## Max. :-2.995 Max. :-0.8166 Max. : 0.09762
## Thymus_Expressed_Chemokine_TECK Thyroid_Stimulating_Hormone
## Min. :1.508 Min. :-6.190
## 1st Qu.:3.343 1st Qu.:-4.962
## Median :3.810 Median :-4.510
## Mean :3.848 Mean :-4.499
## 3rd Qu.:4.316 3rd Qu.:-4.017
## Max. :6.225 Max. :-1.715
## Thyroxine_Binding_Globulin Tissue_Factor Transferrin
## Min. :-2.4769 Min. :-0.2107 Min. :1.932
## 1st Qu.:-1.7720 1st Qu.: 0.8329 1st Qu.:2.708
## Median :-1.5141 Median : 1.2238 Median :2.890
## Mean :-1.4788 Mean : 1.1702 Mean :2.909
## 3rd Qu.:-1.2379 3rd Qu.: 1.4816 3rd Qu.:3.091
## Max. :-0.2107 Max. : 2.4849 Max. :3.761
## Trefoil_Factor_3_TFF3 VCAM_1 VEGF Vitronectin
## Min. :-4.744 Min. :1.723 Min. :11.83 Min. :-1.42712
## 1st Qu.:-4.135 1st Qu.:2.485 1st Qu.:15.77 1st Qu.:-0.51083
## Median :-3.863 Median :2.708 Median :17.08 Median :-0.30111
## Mean :-3.876 Mean :2.688 Mean :16.99 Mean :-0.28473
## 3rd Qu.:-3.650 3rd Qu.:2.890 3rd Qu.:18.10 3rd Qu.:-0.03564
## Max. :-2.957 Max. :3.689 Max. :22.38 Max. : 0.53063
## von_Willebrand_Factor Class E4 E3
## Min. :-4.991 Impaired: 73 Min. :0.0000 Min. :0.0000
## 1st Qu.:-4.200 Control :194 1st Qu.:0.0000 1st Qu.:1.0000
## Median :-3.912 Median :0.0000 Median :1.0000
## Mean :-3.906 Mean :0.4007 Mean :0.9176
## 3rd Qu.:-3.612 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :-2.957 Max. :1.0000 Max. :1.0000
## E2
## Min. :0.000
## 1st Qu.:0.000
## Median :0.000
## Mean :0.161
## 3rd Qu.:0.000
## Max. :1.000
##################################
# Performing a general exploration of the test set
##################################
dim(Alzheimer_Test)
## [1] 66 128
str(Alzheimer_Test)
## 'data.frame': 66 obs. of 128 variables:
## $ ACE_CD143_Angiotensin_Converti : num 1.681 1.602 1.301 1.562 0.831 ...
## $ ACTH_Adrenocorticotropic_Hormon : num -1.61 -1.51 -1.77 -1.56 -1.97 ...
## $ AXL : num 0.6833 0.4495 -0.0201 0.5298 -0.2 ...
## $ Adiponectin : num -5.12 -5.57 -6.17 -6.07 -5.43 ...
## $ Alpha_1_Antichymotrypsin : num 1.281 1.163 1.253 0.875 1.253 ...
## $ Alpha_1_Antitrypsin : num -15.5 -12.1 -14.5 -14 -13.3 ...
## $ Alpha_1_Microglobulin : num -3.17 -2.36 -3.32 -3.19 -3.24 ...
## $ Alpha_2_Macroglobulin : num -98.4 -144.9 -204.1 -125.8 -186.6 ...
## $ Angiopoietin_2_ANG_2 : num 0.916 0.531 0.262 0.742 0 ...
## $ Angiotensinogen : num 2.38 2.26 2.03 2.25 2.05 ...
## $ Apolipoprotein_A_IV : num -2.12 -1.9 -2.04 -2.3 -1.9 ...
## $ Apolipoprotein_A1 : num -8.05 -7.14 -8.08 -7.9 -8.18 ...
## $ Apolipoprotein_A2 : num -1.238 -0.562 -0.844 -1.022 -1.022 ...
## $ Apolipoprotein_B : num -6.52 -6.41 -5.56 -5.36 -5.94 ...
## $ Apolipoprotein_CI : num -1.97 -1.51 -2.21 -1.9 -2.04 ...
## $ Apolipoprotein_CIII : num -3 -1.97 -2.94 -2.83 -2.8 ...
## $ Apolipoprotein_D : num 1.44 1.48 1.39 1.16 1.25 ...
## $ Apolipoprotein_E : num 2.37 3.7 1.33 3.19 1.53 ...
## $ Apolipoprotein_H : num -0.532 -0.125 -0.32 -0.517 -0.462 ...
## $ B_Lymphocyte_Chemoattractant_BL : num 1.981 2.182 1.853 1.274 0.927 ...
## $ BMP_6 : num -1.98 -1.88 -1.68 -2.65 -1.68 ...
## $ Beta_2_Microglobulin : num 0.642 0.182 -0.274 0.405 -0.223 ...
## $ Betacellulin : int 52 59 51 46 51 42 61 46 32 46 ...
## $ C_Reactive_Protein : num -6.21 -6.21 -6.12 -5.71 -6.93 ...
## $ CD40 : num -1.12 -1.24 -1.45 -1.07 -1.41 ...
## $ CD5L : num -0.3285 0 -1.2379 -0.0408 -0.1054 ...
## $ Calbindin : num 23.5 20.6 13.4 27.3 22.5 ...
## $ Calcitonin : num -0.151 4.111 2.303 3.091 1.281 ...
## $ CgA : num 334 323 289 412 342 ...
## $ Clusterin_Apo_J : num 2.83 3.04 2.56 3.14 2.48 ...
## $ Complement_3 : num -13.2 -13 -17.3 -14.8 -19.2 ...
## $ Complement_Factor_H : num 3.1 4.68 3.78 2.52 3.3 ...
## $ Connective_Tissue_Growth_Factor : num 0.531 0.693 0.956 0.588 0.875 ...
## $ Cortisol : num 14 17 9.2 11 12 16 6.1 10 9.8 13 ...
## $ Creatine_Kinase_MB : num -1.65 -1.63 -1.61 -1.59 -1.72 ...
## $ Cystatin_C : num 9.58 8.33 7.91 8.99 8.47 ...
## $ EGF_R : num -0.422 -0.785 -0.976 -0.561 -0.767 ...
## $ EN_RAGE : num -2.94 -3.77 -3.24 -4.2 -4.27 ...
## $ ENA_78 : num -1.37 -1.38 -1.36 -1.37 -1.39 ...
## $ Eotaxin_3 : int 44 70 70 36 39 76 44 62 82 36 ...
## $ FAS : num -0.478 -0.0726 -0.5798 -0.5276 -0.9416 ...
## $ FSH_Follicle_Stimulation_Hormon : num -0.59 -0.652 -1.559 -1.505 -1.361 ...
## $ Fas_Ligand : num 2.537 2.792 2.073 0.288 3.867 ...
## $ Fatty_Acid_Binding_Protein : num 0.624 1.143 0.624 2.379 1.424 ...
## $ Ferritin : num 3.14 2.52 1.9 4.29 1.69 ...
## $ Fetuin_A : num 0.742 1.386 1.194 0.993 1.131 ...
## $ Fibrinogen : num -7.8 -7.06 -7.45 -7.8 -7.35 ...
## $ GRO_alpha : num 1.37 1.35 1.37 1.37 1.38 ...
## $ Gamma_Interferon_induced_Monokin: num 2.89 2.78 2.69 2.69 2.7 ...
## $ Glutathione_S_Transferase_alpha : num 0.708 1.034 1.13 0.676 0.844 ...
## $ HB_EGF : num 5.95 6.11 6.11 6.56 7.75 ...
## $ HCC_4 : num -3.82 -3.24 -3.77 -3.54 -3.73 ...
## $ Hepatocyte_Growth_Factor_HGF : num 0.4055 0.1823 -0.0834 0.47 -0.1054 ...
## $ I_309 : num 3.37 3 2.83 3.14 2.83 ...
## $ ICAM_1 : num -0.857 -1.111 -0.561 -0.357 -1.064 ...
## $ IGF_BP_2 : num 5.42 5.38 5.41 5.14 4.93 ...
## $ IL_11 : num 6.22 4.59 4.1 4.52 3.22 ...
## $ IL_13 : num 1.31 1.27 1.27 1.29 1.27 ...
## $ IL_16 : num 2.44 3.48 2.88 2.81 1.9 ...
## $ IL_17E : num 4.7 3.64 5.73 4.8 4.15 ...
## $ IL_1alpha : num -7.6 -7.37 -7.85 -7.68 -8.33 ...
## $ IL_3 : num -4.27 -4.02 -4.51 -3.58 -4.69 ...
## $ IL_4 : num 1.48 1.81 1.81 2.04 1.21 ...
## $ IL_5 : num 0.788 0.182 0 0.693 -0.371 ...
## $ IL_6 : num -0.371 -1.534 0.422 -0.563 0.8 ...
## $ IL_6_Receptor : num 0.5752 0.0967 -0.5322 -0.1586 0.0967 ...
## $ IL_7 : num 2.34 2.15 1.56 3.71 2.15 ...
## $ IL_8 : num 1.72 1.7 1.69 1.69 1.7 ...
## $ IP_10_Inducible_Protein_10 : num 5.6 5.33 5.06 4.75 5.29 ...
## $ IgA : num -7.62 -5.36 -7.04 -5.6 -6.57 ...
## $ Insulin : num -1.485 -1.034 -1.569 -0.901 -1.865 ...
## $ Kidney_Injury_Molecule_1_KIM_1 : num -1.23 -1.16 -1.12 -1.21 -1.16 ...
## $ LOX_1 : num 1.224 1.131 0.588 1.308 1.308 ...
## $ Leptin : num -1.27 -1.54 -1.54 -1.36 -1.55 ...
## $ Lipoprotein_a : num -4.99 -4.2 -3.54 -4.87 -5.08 ...
## $ MCP_1 : num 6.78 6.38 6.61 6.41 6.78 ...
## $ MCP_2 : num 1.981 1.626 1.626 0.401 1.853 ...
## $ MIF : num -1.66 -1.61 -2.04 -1.97 -1.83 ...
## $ MIP_1alpha : num 4.93 4.35 3.41 2.28 1.68 ...
## $ MIP_1beta : num 3.22 2.77 2.83 2.48 2.77 ...
## $ MMP_2 : num 2.97 2.92 3.27 4.05 1.36 ...
## $ MMP_3 : num -1.77 -1.97 -2.04 -2.04 -2.21 ...
## $ MMP10 : num -4.07 -3.27 -3.02 -3.04 -4.07 ...
## $ MMP7 : num -6.86 -3 -4.36 -3.35 -4.45 ...
## $ Myoglobin : num -1.139 -1.661 0.182 -1.897 -1.204 ...
## $ NT_proBNP : num 4.11 4.52 4.44 4.61 4.51 ...
## $ NrCAM : num 4.97 4.23 3.71 4.75 4.3 ...
## $ Osteopontin : num 5.77 5.28 4.96 5.23 4.79 ...
## $ PAI_1 : num 0 -0.0957 0.4905 0.4905 -0.0957 ...
## $ PAPP_A : num -2.79 -3.03 -2.71 -3.15 -3.08 ...
## $ PLGF : num 3.43 3.69 4.08 3.85 3.76 ...
## $ PYY : num 2.83 3 2.94 2.89 2.83 ...
## $ Pancreatic_polypeptide : num -0.821 0.262 -0.105 0.262 0.182 ...
## $ Prolactin : num -0.0408 0.7419 0.3365 0 -0.2485 ...
## $ Prostatic_Acid_Phosphatase : num -1.74 -1.68 -1.68 -1.69 -1.69 ...
## $ Protein_S : num -2.7 -2.36 -2.36 -2.7 -2.58 ...
## $ Pulmonary_and_Activation_Regulat: num -1.11 -1.31 -1.97 -1.97 -1.39 ...
## $ RANTES : num -5.99 -6.73 -6.65 -6.57 -6.57 ...
## $ Resistin : num -13.5 -15.6 -18 -16 -24.4 ...
## [list output truncated]
summary(Alzheimer_Test)
## ACE_CD143_Angiotensin_Converti ACTH_Adrenocorticotropic_Hormon
## Min. :-0.5473 Min. :-2.2073
## 1st Qu.: 0.9462 1st Qu.:-1.7148
## Median : 1.3013 Median :-1.5374
## Mean : 1.3105 Mean :-1.5311
## 3rd Qu.: 1.6320 3rd Qu.:-1.3863
## Max. : 3.0890 Max. :-0.7985
## AXL Adiponectin Alpha_1_Antichymotrypsin
## Min. :-0.73509 Min. :-7.059 Min. :0.1823
## 1st Qu.:-0.08175 1st Qu.:-5.737 1st Qu.:1.0647
## Median : 0.28035 Median :-5.360 Median :1.3083
## Mean : 0.25373 Mean :-5.298 Mean :1.3077
## 3rd Qu.: 0.60768 3rd Qu.:-4.917 3rd Qu.:1.5686
## Max. : 1.28634 Max. :-3.474 Max. :2.2192
## Alpha_1_Antitrypsin Alpha_1_Microglobulin Alpha_2_Macroglobulin
## Min. :-18.17 Min. :-4.135 Min. :-238.64
## 1st Qu.:-14.70 1st Qu.:-3.284 1st Qu.:-186.64
## Median :-13.59 Median :-3.006 Median :-162.93
## Mean :-13.49 Mean :-2.983 Mean :-162.89
## 3rd Qu.:-12.31 3rd Qu.:-2.674 3rd Qu.:-136.53
## Max. :-10.06 Max. :-1.897 Max. : -50.17
## Angiopoietin_2_ANG_2 Angiotensinogen Apolipoprotein_A_IV Apolipoprotein_A1
## Min. :-0.05129 Min. :1.710 Min. :-2.749 Min. :-8.568
## 1st Qu.: 0.35372 1st Qu.:2.068 1st Qu.:-2.186 1st Qu.:-7.818
## Median : 0.55921 Median :2.276 Median :-1.897 Median :-7.497
## Mean : 0.60278 Mean :2.274 Mean :-1.867 Mean :-7.488
## 3rd Qu.: 0.78846 3rd Qu.:2.430 3rd Qu.:-1.526 3rd Qu.:-7.176
## Max. : 1.77495 Max. :2.752 Max. :-1.109 Max. :-6.645
## Apolipoprotein_A2 Apolipoprotein_B Apolipoprotein_CI Apolipoprotein_CIII
## Min. :-1.9661 Min. :-8.192 Min. :-2.847 Min. :-3.863
## 1st Qu.:-0.9416 1st Qu.:-6.748 1st Qu.:-1.897 1st Qu.:-2.781
## Median :-0.7032 Median :-5.819 Median :-1.609 Median :-2.557
## Mean :-0.6902 Mean :-5.649 Mean :-1.625 Mean :-2.523
## 3rd Qu.:-0.3533 3rd Qu.:-4.603 3rd Qu.:-1.309 3rd Qu.:-2.231
## Max. : 0.5306 Max. :-2.339 Max. :-0.462 Max. :-1.386
## Apolipoprotein_D Apolipoprotein_E Apolipoprotein_H
## Min. :0.2624 Min. :0.6626 Min. :-1.1609
## 1st Qu.:1.1314 1st Qu.:2.1526 1st Qu.:-0.5317
## Median :1.3863 Median :2.8181 Median :-0.2897
## Mean :1.3943 Mean :2.7160 Mean :-0.3212
## 3rd Qu.:1.6864 3rd Qu.:3.2363 3rd Qu.:-0.1032
## Max. :2.6391 Max. :4.6844 Max. : 0.4402
## B_Lymphocyte_Chemoattractant_BL BMP_6 Beta_2_Microglobulin
## Min. :0.7318 Min. :-2.669 Min. :-0.51083
## 1st Qu.:1.5304 1st Qu.:-2.152 1st Qu.:-0.06188
## Median :1.8528 Median :-1.964 Median : 0.18232
## Mean :1.8766 Mean :-1.937 Mean : 0.15566
## 3rd Qu.:2.3714 3rd Qu.:-1.675 3rd Qu.: 0.40547
## Max. :2.9757 Max. :-1.181 Max. : 0.83291
## Betacellulin C_Reactive_Protein CD40 CD5L
## Min. :32.00 Min. :-8.112 Min. :-1.9390 Min. :-1.96611
## 1st Qu.:46.00 1st Qu.:-6.725 1st Qu.:-1.4420 1st Qu.:-0.36747
## Median :51.00 Median :-6.166 Median :-1.2574 Median :-0.05135
## Mean :52.74 Mean :-5.997 Mean :-1.2773 Mean :-0.08760
## 3rd Qu.:59.75 3rd Qu.:-5.369 3rd Qu.:-1.1034 3rd Qu.: 0.24235
## Max. :80.00 Max. :-3.411 Max. :-0.7766 Max. : 0.91629
## Calbindin Calcitonin CgA Clusterin_Apo_J
## Min. :10.81 Min. :-0.7134 Min. :166.6 Min. :1.932
## 1st Qu.:18.88 1st Qu.: 1.2014 1st Qu.:268.2 1st Qu.:2.565
## Median :21.06 Median : 1.6849 Median :324.7 Median :2.833
## Mean :21.49 Mean : 1.7250 Mean :320.2 Mean :2.845
## 3rd Qu.:24.00 3rd Qu.: 2.2618 3rd Qu.:362.3 3rd Qu.:3.045
## Max. :35.36 Max. : 4.1109 Max. :494.5 Max. :3.761
## Complement_3 Complement_Factor_H Connective_Tissue_Growth_Factor
## Min. :-22.40 Min. :0.2766 Min. :0.09531
## 1st Qu.:-17.50 1st Qu.:2.6019 1st Qu.:0.58779
## Median :-15.90 Median :3.3983 Median :0.74194
## Mean :-15.91 Mean :3.3897 Mean :0.74507
## 3rd Qu.:-14.34 3rd Qu.:4.2548 3rd Qu.:0.87547
## Max. :-10.23 Max. :6.5597 Max. :1.41099
## Cortisol Creatine_Kinase_MB Cystatin_C EGF_R
## Min. : 0.10 Min. :-1.872 Min. :7.728 Min. :-1.2694
## 1st Qu.: 8.90 1st Qu.:-1.721 1st Qu.:8.301 1st Qu.:-0.8859
## Median :10.00 Median :-1.651 Median :8.544 Median :-0.6917
## Mean :10.46 Mean :-1.652 Mean :8.576 Mean :-0.6965
## 3rd Qu.:12.00 3rd Qu.:-1.590 3rd Qu.:8.837 3rd Qu.:-0.5034
## Max. :22.00 Max. :-1.434 Max. :9.694 Max. : 0.1891
## EN_RAGE ENA_78 Eotaxin_3 FAS
## Min. :-8.3774 Min. :-1.405 Min. : 23.00 Min. :-1.1087
## 1st Qu.:-4.1836 1st Qu.:-1.382 1st Qu.: 43.00 1st Qu.:-0.7133
## Median :-3.6889 Median :-1.374 Median : 54.00 Median :-0.5798
## Mean :-3.5986 Mean :-1.376 Mean : 55.55 Mean :-0.5414
## 3rd Qu.:-3.2189 3rd Qu.:-1.368 3rd Qu.: 64.00 3rd Qu.:-0.3355
## Max. :-0.8675 Max. :-1.353 Max. :107.00 Max. : 0.1823
## FSH_Follicle_Stimulation_Hormon Fas_Ligand Fatty_Acid_Binding_Protein
## Min. :-1.8101 Min. :0.288 Min. :-0.4559
## 1st Qu.:-1.2694 1st Qu.:2.073 1st Qu.: 0.7998
## Median :-0.9763 Median :2.665 Median : 1.1866
## Mean :-1.0597 Mean :2.649 Mean : 1.2884
## 3rd Qu.:-0.8068 3rd Qu.:3.162 3rd Qu.: 1.9192
## Max. :-0.4757 Max. :5.377 Max. : 3.2188
## Ferritin Fetuin_A Fibrinogen GRO_alpha
## Min. :0.8983 Min. :0.5306 Min. :-9.373 Min. :1.271
## 1st Qu.:2.1473 1st Qu.:1.0296 1st Qu.:-7.799 1st Qu.:1.351
## Median :2.6260 Median :1.3083 Median :-7.316 Median :1.372
## Mean :2.7069 Mean :1.3116 Mean :-7.360 Mean :1.378
## 3rd Qu.:3.1672 3rd Qu.:1.6094 3rd Qu.:-6.970 3rd Qu.:1.398
## Max. :4.9282 Max. :2.2083 Max. :-6.166 Max. :1.514
## Gamma_Interferon_induced_Monokin Glutathione_S_Transferase_alpha
## Min. :2.545 Min. :0.5661
## 1st Qu.:2.698 1st Qu.:0.8257
## Median :2.768 Median :0.9493
## Mean :2.772 Mean :0.9440
## 3rd Qu.:2.829 3rd Qu.:1.0457
## Max. :3.046 Max. :1.3102
## HB_EGF HCC_4 Hepatocyte_Growth_Factor_HGF I_309
## Min. : 3.521 Min. :-4.343 Min. :-0.61619 Min. :2.041
## 1st Qu.: 5.949 1st Qu.:-3.772 1st Qu.:-0.05661 1st Qu.:2.724
## Median : 6.980 Median :-3.540 Median : 0.18232 Median :2.944
## Mean : 6.844 Mean :-3.538 Mean : 0.18076 Mean :2.921
## 3rd Qu.: 7.745 3rd Qu.:-3.352 3rd Qu.: 0.33647 3rd Qu.:3.135
## Max. :10.359 Max. :-2.489 Max. : 1.09861 Max. :3.689
## ICAM_1 IGF_BP_2 IL_11 IL_13
## Min. :-1.4661 Min. :4.718 Min. :2.031 Min. :1.232
## 1st Qu.:-0.7671 1st Qu.:5.127 1st Qu.:3.960 1st Qu.:1.274
## Median :-0.5903 Median :5.255 Median :4.838 Median :1.283
## Mean :-0.5958 Mean :5.263 Mean :4.651 Mean :1.283
## 3rd Qu.:-0.3574 3rd Qu.:5.402 3rd Qu.:5.482 3rd Qu.:1.292
## Max. : 0.3602 Max. :5.916 Max. :8.692 Max. :1.310
## IL_16 IL_17E IL_1alpha IL_3
## Min. :0.9568 Min. :1.582 Min. :-8.468 Min. :-5.521
## 1st Qu.:2.4411 1st Qu.:3.637 1st Qu.:-7.849 1st Qu.:-4.324
## Median :2.8763 Median :4.723 Median :-7.562 Median :-3.963
## Mean :2.8176 Mean :4.774 Mean :-7.549 Mean :-3.976
## 3rd Qu.:3.3514 3rd Qu.:5.415 3rd Qu.:-7.279 3rd Qu.:-3.576
## Max. :4.1028 Max. :8.081 Max. :-6.377 Max. :-3.079
## IL_4 IL_5 IL_6 IL_6_Receptor
## Min. :0.5306 Min. :-1.04982 Min. :-1.53428 Min. :-0.74560
## 1st Qu.:1.4586 1st Qu.:-0.03062 1st Qu.:-0.40924 1st Qu.:-0.20131
## Median :1.7226 Median : 0.22234 Median :-0.07205 Median : 0.00000
## Mean :1.7445 Mean : 0.22853 Mean :-0.05216 Mean : 0.06213
## 3rd Qu.:2.0669 3rd Qu.: 0.53063 3rd Qu.: 0.34805 3rd Qu.: 0.27297
## Max. :2.7081 Max. : 1.13140 Max. : 1.00562 Max. : 0.77048
## IL_7 IL_8 IP_10_Inducible_Protein_10 IgA
## Min. :1.310 Min. :1.615 Min. :4.263 Min. :-7.621
## 1st Qu.:2.379 1st Qu.:1.684 1st Qu.:5.323 1st Qu.:-6.571
## Median :3.148 Median :1.702 Median :5.617 Median :-6.012
## Mean :3.143 Mean :1.704 Mean :5.636 Mean :-6.066
## 3rd Qu.:3.706 3rd Qu.:1.725 3rd Qu.:5.917 3rd Qu.:-5.606
## Max. :5.000 Max. :1.836 Max. :7.208 Max. :-4.733
## Insulin Kidney_Injury_Molecule_1_KIM_1 LOX_1
## Min. :-2.0099 Min. :-1.251 Min. :0.0000
## 1st Qu.:-1.4466 1st Qu.:-1.209 1st Qu.:0.9649
## Median :-1.2169 Median :-1.187 Median :1.2238
## Mean :-1.1998 Mean :-1.188 Mean :1.2085
## 3rd Qu.:-1.0105 3rd Qu.:-1.166 3rd Qu.:1.4351
## Max. :-0.5025 Max. :-1.124 Max. :2.3979
## Leptin Lipoprotein_a MCP_1 MCP_2
## Min. :-1.9471 Min. :-6.571 Min. :5.889 Min. :0.4006
## 1st Qu.:-1.6334 1st Qu.:-5.116 1st Qu.:6.318 1st Qu.:1.5304
## Median :-1.4294 Median :-4.657 Median :6.482 Median :1.8528
## Mean :-1.4363 Mean :-4.515 Mean :6.480 Mean :1.8104
## 3rd Qu.:-1.2409 3rd Qu.:-4.017 3rd Qu.:6.627 3rd Qu.:2.0827
## Max. :-0.8387 Max. :-2.040 Max. :7.065 Max. :3.7545
## MIF MIP_1alpha MIP_1beta MMP_2
## Min. :-2.797 Min. :1.008 Min. :1.917 Min. :0.6248
## 1st Qu.:-2.120 1st Qu.:3.302 1st Qu.:2.485 1st Qu.:2.5513
## Median :-1.966 Median :3.736 Median :2.773 Median :2.9937
## Mean :-1.932 Mean :3.898 Mean :2.784 Mean :3.0347
## 3rd Qu.:-1.715 3rd Qu.:4.686 3rd Qu.:3.079 3rd Qu.:3.4798
## Max. :-1.109 Max. :5.735 Max. :3.784 Max. :6.0996
## MMP_3 MMP10 MMP7 Myoglobin
## Min. :-3.650 Min. :-4.948 Min. :-7.5346 Min. :-3.2968
## 1st Qu.:-2.852 1st Qu.:-4.075 1st Qu.:-4.9634 1st Qu.:-2.0217
## Median :-2.532 Median :-3.612 Median :-4.0302 Median :-1.5874
## Mean :-2.490 Mean :-3.676 Mean :-4.0148 Mean :-1.4165
## 3rd Qu.:-2.120 3rd Qu.:-3.331 3rd Qu.:-3.1640 3rd Qu.:-0.7765
## Max. :-1.171 Max. :-2.900 Max. :-0.1953 Max. : 1.1314
## NT_proBNP NrCAM Osteopontin PAI_1
## Min. :3.611 Min. :2.890 Min. :4.078 Min. :-0.990849
## 1st Qu.:4.174 1st Qu.:3.871 1st Qu.:4.892 1st Qu.:-0.334043
## Median :4.477 Median :4.317 Median :5.168 Median : 0.000000
## Mean :4.488 Mean :4.291 Mean :5.177 Mean :-0.003947
## 3rd Qu.:4.794 3rd Qu.:4.725 3rd Qu.:5.410 3rd Qu.: 0.303112
## Max. :5.398 Max. :5.690 Max. :6.315 Max. : 0.885785
## PAPP_A PLGF PYY Pancreatic_polypeptide
## Min. :-3.152 Min. :2.639 Min. :2.398 Min. :-1.609438
## 1st Qu.:-2.971 1st Qu.:3.689 1st Qu.:2.833 1st Qu.:-0.506693
## Median :-2.841 Median :3.892 Median :2.996 Median : 0.138816
## Mean :-2.845 Mean :3.884 Mean :2.976 Mean :-0.005258
## 3rd Qu.:-2.719 3rd Qu.:4.123 3rd Qu.:3.178 3rd Qu.: 0.470004
## Max. :-2.488 Max. :4.710 Max. :3.738 Max. : 1.504077
## Prolactin Prostatic_Acid_Phosphatase Protein_S
## Min. :-0.38566 Min. :-1.800 Min. :-3.154
## 1st Qu.:-0.16558 1st Qu.:-1.739 1st Qu.:-2.579
## Median : 0.00000 Median :-1.690 Median :-2.259
## Mean : 0.05195 Mean :-1.692 Mean :-2.268
## 3rd Qu.: 0.18232 3rd Qu.:-1.659 3rd Qu.:-1.924
## Max. : 0.78846 Max. :-1.540 Max. :-1.547
## Pulmonary_and_Activation_Regulat RANTES Resistin
## Min. :-2.4418 Min. :-7.236 Min. :-30.156
## 1st Qu.:-1.8326 1st Qu.:-6.725 1st Qu.:-22.131
## Median :-1.5141 Median :-6.571 Median :-18.014
## Mean :-1.5007 Mean :-6.540 Mean :-18.245
## 3rd Qu.:-1.1712 3rd Qu.:-6.392 3rd Qu.:-15.202
## Max. :-0.4463 Max. :-5.843 Max. : -6.594
## S100b SGOT SHBG SOD
## Min. :0.1874 Min. :-1.8971 Min. :-3.730 Min. :4.382
## 1st Qu.:0.9600 1st Qu.:-0.7498 1st Qu.:-3.052 1st Qu.:5.006
## Median :1.1571 Median :-0.4780 Median :-2.711 Median :5.313
## Mean :1.1819 Mean :-0.4898 Mean :-2.686 Mean :5.302
## 3rd Qu.:1.3807 3rd Qu.:-0.2138 3rd Qu.:-2.343 3rd Qu.:5.547
## Max. :2.1950 Max. : 0.1823 Max. :-1.561 Max. :6.461
## Serum_Amyloid_P Sortilin Stem_Cell_Factor TGF_alpha
## Min. :-7.182 Min. :1.508 Min. :2.219 Min. : 7.500
## 1st Qu.:-6.438 1st Qu.:3.177 1st Qu.:3.045 1st Qu.: 9.062
## Median :-6.215 Median :3.867 Median :3.314 Median : 9.596
## Mean :-6.083 Mean :3.787 Mean :3.267 Mean : 9.776
## 3rd Qu.:-5.607 3rd Qu.:4.371 3rd Qu.:3.466 3rd Qu.:10.612
## Max. :-4.699 Max. :5.681 Max. :4.078 Max. :13.083
## TIMP_1 TNF_RII TRAIL_R3 TTR_prealbumin
## Min. : 8.198 Min. :-1.6607 Min. :-1.30636 Min. :2.485
## 1st Qu.:10.530 1st Qu.:-0.8675 1st Qu.:-0.73332 1st Qu.:2.773
## Median :11.341 Median :-0.6541 Median :-0.55547 Median :2.890
## Mean :11.520 Mean :-0.6270 Mean :-0.58640 Mean :2.854
## 3rd Qu.:12.352 3rd Qu.:-0.3320 3rd Qu.:-0.47065 3rd Qu.:2.944
## Max. :16.547 Max. : 0.4055 Max. : 0.09622 Max. :3.091
## Tamm_Horsfall_Protein_THP Thrombomodulin Thrombopoietin
## Min. :-3.206 Min. :-2.054 Min. :-1.5396
## 1st Qu.:-3.144 1st Qu.:-1.675 1st Qu.:-0.8383
## Median :-3.126 Median :-1.534 Median :-0.7039
## Mean :-3.123 Mean :-1.533 Mean :-0.7192
## 3rd Qu.:-3.101 3rd Qu.:-1.341 3rd Qu.:-0.6289
## Max. :-3.041 Max. :-1.019 Max. :-0.3029
## Thymus_Expressed_Chemokine_TECK Thyroid_Stimulating_Hormone
## Min. :2.141 Min. :-6.190
## 1st Qu.:3.283 1st Qu.:-4.733
## Median :3.753 Median :-4.269
## Mean :3.770 Mean :-4.221
## 3rd Qu.:4.316 3rd Qu.:-3.828
## Max. :5.681 Max. :-2.040
## Thyroxine_Binding_Globulin Tissue_Factor Transferrin
## Min. :-2.3026 Min. :0.0000 Min. :2.282
## 1st Qu.:-1.7148 1st Qu.:0.7053 1st Qu.:2.708
## Median :-1.4919 Median :1.1473 Median :2.890
## Mean :-1.4902 Mean :1.1356 Mean :2.900
## 3rd Qu.:-1.2379 3rd Qu.:1.5149 3rd Qu.:3.135
## Max. :-0.5978 Max. :2.7081 Max. :3.497
## Trefoil_Factor_3_TFF3 VCAM_1 VEGF Vitronectin
## Min. :-4.906 Min. :2.028 Min. :12.23 Min. :-1.07881
## 1st Qu.:-4.200 1st Qu.:2.420 1st Qu.:15.03 1st Qu.:-0.46204
## Median :-3.912 Median :2.674 Median :17.08 Median :-0.28106
## Mean :-3.947 Mean :2.644 Mean :16.70 Mean :-0.26833
## 3rd Qu.:-3.772 3rd Qu.:2.833 3rd Qu.:18.19 3rd Qu.:-0.05394
## Max. :-3.170 Max. :3.466 Max. :21.18 Max. : 0.40547
## von_Willebrand_Factor Class E4 E3
## Min. :-4.920 Impaired:18 Min. :0.000 Min. :0.0000
## 1st Qu.:-4.269 Control :48 1st Qu.:0.000 1st Qu.:1.0000
## Median :-4.017 Median :0.000 Median :1.0000
## Mean :-4.014 Mean :0.303 Mean :0.9848
## 3rd Qu.:-3.730 3rd Qu.:1.000 3rd Qu.:1.0000
## Max. :-3.058 Max. :1.000 Max. :1.0000
## E2
## Min. :0.00000
## 1st Qu.:0.00000
## Median :0.00000
## Mean :0.06061
## 3rd Qu.:0.00000
## Max. :1.00000
##################################
# Formulating a data type assessment summary
##################################
<- Alzheimer_Train
PDA <- data.frame(
(PDA.Summary Column.Index=c(1:length(names(PDA))),
Column.Name= names(PDA),
Column.Type=sapply(PDA, function(x) class(x)),
row.names=NULL)
)
## Column.Index Column.Name Column.Type
## 1 1 ACE_CD143_Angiotensin_Converti numeric
## 2 2 ACTH_Adrenocorticotropic_Hormon numeric
## 3 3 AXL numeric
## 4 4 Adiponectin numeric
## 5 5 Alpha_1_Antichymotrypsin numeric
## 6 6 Alpha_1_Antitrypsin numeric
## 7 7 Alpha_1_Microglobulin numeric
## 8 8 Alpha_2_Macroglobulin numeric
## 9 9 Angiopoietin_2_ANG_2 numeric
## 10 10 Angiotensinogen numeric
## 11 11 Apolipoprotein_A_IV numeric
## 12 12 Apolipoprotein_A1 numeric
## 13 13 Apolipoprotein_A2 numeric
## 14 14 Apolipoprotein_B numeric
## 15 15 Apolipoprotein_CI numeric
## 16 16 Apolipoprotein_CIII numeric
## 17 17 Apolipoprotein_D numeric
## 18 18 Apolipoprotein_E numeric
## 19 19 Apolipoprotein_H numeric
## 20 20 B_Lymphocyte_Chemoattractant_BL numeric
## 21 21 BMP_6 numeric
## 22 22 Beta_2_Microglobulin numeric
## 23 23 Betacellulin integer
## 24 24 C_Reactive_Protein numeric
## 25 25 CD40 numeric
## 26 26 CD5L numeric
## 27 27 Calbindin numeric
## 28 28 Calcitonin numeric
## 29 29 CgA numeric
## 30 30 Clusterin_Apo_J numeric
## 31 31 Complement_3 numeric
## 32 32 Complement_Factor_H numeric
## 33 33 Connective_Tissue_Growth_Factor numeric
## 34 34 Cortisol numeric
## 35 35 Creatine_Kinase_MB numeric
## 36 36 Cystatin_C numeric
## 37 37 EGF_R numeric
## 38 38 EN_RAGE numeric
## 39 39 ENA_78 numeric
## 40 40 Eotaxin_3 integer
## 41 41 FAS numeric
## 42 42 FSH_Follicle_Stimulation_Hormon numeric
## 43 43 Fas_Ligand numeric
## 44 44 Fatty_Acid_Binding_Protein numeric
## 45 45 Ferritin numeric
## 46 46 Fetuin_A numeric
## 47 47 Fibrinogen numeric
## 48 48 GRO_alpha numeric
## 49 49 Gamma_Interferon_induced_Monokin numeric
## 50 50 Glutathione_S_Transferase_alpha numeric
## 51 51 HB_EGF numeric
## 52 52 HCC_4 numeric
## 53 53 Hepatocyte_Growth_Factor_HGF numeric
## 54 54 I_309 numeric
## 55 55 ICAM_1 numeric
## 56 56 IGF_BP_2 numeric
## 57 57 IL_11 numeric
## 58 58 IL_13 numeric
## 59 59 IL_16 numeric
## 60 60 IL_17E numeric
## 61 61 IL_1alpha numeric
## 62 62 IL_3 numeric
## 63 63 IL_4 numeric
## 64 64 IL_5 numeric
## 65 65 IL_6 numeric
## 66 66 IL_6_Receptor numeric
## 67 67 IL_7 numeric
## 68 68 IL_8 numeric
## 69 69 IP_10_Inducible_Protein_10 numeric
## 70 70 IgA numeric
## 71 71 Insulin numeric
## 72 72 Kidney_Injury_Molecule_1_KIM_1 numeric
## 73 73 LOX_1 numeric
## 74 74 Leptin numeric
## 75 75 Lipoprotein_a numeric
## 76 76 MCP_1 numeric
## 77 77 MCP_2 numeric
## 78 78 MIF numeric
## 79 79 MIP_1alpha numeric
## 80 80 MIP_1beta numeric
## 81 81 MMP_2 numeric
## 82 82 MMP_3 numeric
## 83 83 MMP10 numeric
## 84 84 MMP7 numeric
## 85 85 Myoglobin numeric
## 86 86 NT_proBNP numeric
## 87 87 NrCAM numeric
## 88 88 Osteopontin numeric
## 89 89 PAI_1 numeric
## 90 90 PAPP_A numeric
## 91 91 PLGF numeric
## 92 92 PYY numeric
## 93 93 Pancreatic_polypeptide numeric
## 94 94 Prolactin numeric
## 95 95 Prostatic_Acid_Phosphatase numeric
## 96 96 Protein_S numeric
## 97 97 Pulmonary_and_Activation_Regulat numeric
## 98 98 RANTES numeric
## 99 99 Resistin numeric
## 100 100 S100b numeric
## 101 101 SGOT numeric
## 102 102 SHBG numeric
## 103 103 SOD numeric
## 104 104 Serum_Amyloid_P numeric
## 105 105 Sortilin numeric
## 106 106 Stem_Cell_Factor numeric
## 107 107 TGF_alpha numeric
## 108 108 TIMP_1 numeric
## 109 109 TNF_RII numeric
## 110 110 TRAIL_R3 numeric
## 111 111 TTR_prealbumin numeric
## 112 112 Tamm_Horsfall_Protein_THP numeric
## 113 113 Thrombomodulin numeric
## 114 114 Thrombopoietin numeric
## 115 115 Thymus_Expressed_Chemokine_TECK numeric
## 116 116 Thyroid_Stimulating_Hormone numeric
## 117 117 Thyroxine_Binding_Globulin numeric
## 118 118 Tissue_Factor numeric
## 119 119 Transferrin numeric
## 120 120 Trefoil_Factor_3_TFF3 numeric
## 121 121 VCAM_1 numeric
## 122 122 VEGF numeric
## 123 123 Vitronectin numeric
## 124 124 von_Willebrand_Factor numeric
## 125 125 Class factor
## 126 126 E4 numeric
## 127 127 E3 numeric
## 128 128 E2 numeric
##################################
# Loading dataset
##################################
<- Alzheimer_Train
DQA
##################################
# Formulating an overall data quality assessment summary
##################################
<- data.frame(
(DQA.Summary Column.Index=c(1:length(names(DQA))),
Column.Name= names(DQA),
Column.Type=sapply(DQA, function(x) class(x)),
Row.Count=sapply(DQA, function(x) nrow(DQA)),
NA.Count=sapply(DQA,function(x)sum(is.na(x))),
Fill.Rate=sapply(DQA,function(x)format(round((sum(!is.na(x))/nrow(DQA)),3),nsmall=3)),
row.names=NULL)
)
## Column.Index Column.Name Column.Type Row.Count
## 1 1 ACE_CD143_Angiotensin_Converti numeric 267
## 2 2 ACTH_Adrenocorticotropic_Hormon numeric 267
## 3 3 AXL numeric 267
## 4 4 Adiponectin numeric 267
## 5 5 Alpha_1_Antichymotrypsin numeric 267
## 6 6 Alpha_1_Antitrypsin numeric 267
## 7 7 Alpha_1_Microglobulin numeric 267
## 8 8 Alpha_2_Macroglobulin numeric 267
## 9 9 Angiopoietin_2_ANG_2 numeric 267
## 10 10 Angiotensinogen numeric 267
## 11 11 Apolipoprotein_A_IV numeric 267
## 12 12 Apolipoprotein_A1 numeric 267
## 13 13 Apolipoprotein_A2 numeric 267
## 14 14 Apolipoprotein_B numeric 267
## 15 15 Apolipoprotein_CI numeric 267
## 16 16 Apolipoprotein_CIII numeric 267
## 17 17 Apolipoprotein_D numeric 267
## 18 18 Apolipoprotein_E numeric 267
## 19 19 Apolipoprotein_H numeric 267
## 20 20 B_Lymphocyte_Chemoattractant_BL numeric 267
## 21 21 BMP_6 numeric 267
## 22 22 Beta_2_Microglobulin numeric 267
## 23 23 Betacellulin integer 267
## 24 24 C_Reactive_Protein numeric 267
## 25 25 CD40 numeric 267
## 26 26 CD5L numeric 267
## 27 27 Calbindin numeric 267
## 28 28 Calcitonin numeric 267
## 29 29 CgA numeric 267
## 30 30 Clusterin_Apo_J numeric 267
## 31 31 Complement_3 numeric 267
## 32 32 Complement_Factor_H numeric 267
## 33 33 Connective_Tissue_Growth_Factor numeric 267
## 34 34 Cortisol numeric 267
## 35 35 Creatine_Kinase_MB numeric 267
## 36 36 Cystatin_C numeric 267
## 37 37 EGF_R numeric 267
## 38 38 EN_RAGE numeric 267
## 39 39 ENA_78 numeric 267
## 40 40 Eotaxin_3 integer 267
## 41 41 FAS numeric 267
## 42 42 FSH_Follicle_Stimulation_Hormon numeric 267
## 43 43 Fas_Ligand numeric 267
## 44 44 Fatty_Acid_Binding_Protein numeric 267
## 45 45 Ferritin numeric 267
## 46 46 Fetuin_A numeric 267
## 47 47 Fibrinogen numeric 267
## 48 48 GRO_alpha numeric 267
## 49 49 Gamma_Interferon_induced_Monokin numeric 267
## 50 50 Glutathione_S_Transferase_alpha numeric 267
## 51 51 HB_EGF numeric 267
## 52 52 HCC_4 numeric 267
## 53 53 Hepatocyte_Growth_Factor_HGF numeric 267
## 54 54 I_309 numeric 267
## 55 55 ICAM_1 numeric 267
## 56 56 IGF_BP_2 numeric 267
## 57 57 IL_11 numeric 267
## 58 58 IL_13 numeric 267
## 59 59 IL_16 numeric 267
## 60 60 IL_17E numeric 267
## 61 61 IL_1alpha numeric 267
## 62 62 IL_3 numeric 267
## 63 63 IL_4 numeric 267
## 64 64 IL_5 numeric 267
## 65 65 IL_6 numeric 267
## 66 66 IL_6_Receptor numeric 267
## 67 67 IL_7 numeric 267
## 68 68 IL_8 numeric 267
## 69 69 IP_10_Inducible_Protein_10 numeric 267
## 70 70 IgA numeric 267
## 71 71 Insulin numeric 267
## 72 72 Kidney_Injury_Molecule_1_KIM_1 numeric 267
## 73 73 LOX_1 numeric 267
## 74 74 Leptin numeric 267
## 75 75 Lipoprotein_a numeric 267
## 76 76 MCP_1 numeric 267
## 77 77 MCP_2 numeric 267
## 78 78 MIF numeric 267
## 79 79 MIP_1alpha numeric 267
## 80 80 MIP_1beta numeric 267
## 81 81 MMP_2 numeric 267
## 82 82 MMP_3 numeric 267
## 83 83 MMP10 numeric 267
## 84 84 MMP7 numeric 267
## 85 85 Myoglobin numeric 267
## 86 86 NT_proBNP numeric 267
## 87 87 NrCAM numeric 267
## 88 88 Osteopontin numeric 267
## 89 89 PAI_1 numeric 267
## 90 90 PAPP_A numeric 267
## 91 91 PLGF numeric 267
## 92 92 PYY numeric 267
## 93 93 Pancreatic_polypeptide numeric 267
## 94 94 Prolactin numeric 267
## 95 95 Prostatic_Acid_Phosphatase numeric 267
## 96 96 Protein_S numeric 267
## 97 97 Pulmonary_and_Activation_Regulat numeric 267
## 98 98 RANTES numeric 267
## 99 99 Resistin numeric 267
## 100 100 S100b numeric 267
## 101 101 SGOT numeric 267
## 102 102 SHBG numeric 267
## 103 103 SOD numeric 267
## 104 104 Serum_Amyloid_P numeric 267
## 105 105 Sortilin numeric 267
## 106 106 Stem_Cell_Factor numeric 267
## 107 107 TGF_alpha numeric 267
## 108 108 TIMP_1 numeric 267
## 109 109 TNF_RII numeric 267
## 110 110 TRAIL_R3 numeric 267
## 111 111 TTR_prealbumin numeric 267
## 112 112 Tamm_Horsfall_Protein_THP numeric 267
## 113 113 Thrombomodulin numeric 267
## 114 114 Thrombopoietin numeric 267
## 115 115 Thymus_Expressed_Chemokine_TECK numeric 267
## 116 116 Thyroid_Stimulating_Hormone numeric 267
## 117 117 Thyroxine_Binding_Globulin numeric 267
## 118 118 Tissue_Factor numeric 267
## 119 119 Transferrin numeric 267
## 120 120 Trefoil_Factor_3_TFF3 numeric 267
## 121 121 VCAM_1 numeric 267
## 122 122 VEGF numeric 267
## 123 123 Vitronectin numeric 267
## 124 124 von_Willebrand_Factor numeric 267
## 125 125 Class factor 267
## 126 126 E4 numeric 267
## 127 127 E3 numeric 267
## 128 128 E2 numeric 267
## NA.Count Fill.Rate
## 1 0 1.000
## 2 0 1.000
## 3 0 1.000
## 4 0 1.000
## 5 0 1.000
## 6 0 1.000
## 7 0 1.000
## 8 0 1.000
## 9 0 1.000
## 10 0 1.000
## 11 0 1.000
## 12 0 1.000
## 13 0 1.000
## 14 0 1.000
## 15 0 1.000
## 16 0 1.000
## 17 0 1.000
## 18 0 1.000
## 19 0 1.000
## 20 0 1.000
## 21 0 1.000
## 22 0 1.000
## 23 0 1.000
## 24 0 1.000
## 25 0 1.000
## 26 0 1.000
## 27 0 1.000
## 28 0 1.000
## 29 0 1.000
## 30 0 1.000
## 31 0 1.000
## 32 0 1.000
## 33 0 1.000
## 34 0 1.000
## 35 0 1.000
## 36 0 1.000
## 37 0 1.000
## 38 0 1.000
## 39 0 1.000
## 40 0 1.000
## 41 0 1.000
## 42 0 1.000
## 43 0 1.000
## 44 0 1.000
## 45 0 1.000
## 46 0 1.000
## 47 0 1.000
## 48 0 1.000
## 49 0 1.000
## 50 0 1.000
## 51 0 1.000
## 52 0 1.000
## 53 0 1.000
## 54 0 1.000
## 55 0 1.000
## 56 0 1.000
## 57 0 1.000
## 58 0 1.000
## 59 0 1.000
## 60 0 1.000
## 61 0 1.000
## 62 0 1.000
## 63 0 1.000
## 64 0 1.000
## 65 0 1.000
## 66 0 1.000
## 67 0 1.000
## 68 0 1.000
## 69 0 1.000
## 70 0 1.000
## 71 0 1.000
## 72 0 1.000
## 73 0 1.000
## 74 0 1.000
## 75 0 1.000
## 76 0 1.000
## 77 0 1.000
## 78 0 1.000
## 79 0 1.000
## 80 0 1.000
## 81 0 1.000
## 82 0 1.000
## 83 0 1.000
## 84 0 1.000
## 85 0 1.000
## 86 0 1.000
## 87 0 1.000
## 88 0 1.000
## 89 0 1.000
## 90 0 1.000
## 91 0 1.000
## 92 0 1.000
## 93 0 1.000
## 94 0 1.000
## 95 0 1.000
## 96 0 1.000
## 97 0 1.000
## 98 0 1.000
## 99 0 1.000
## 100 0 1.000
## 101 0 1.000
## 102 0 1.000
## 103 0 1.000
## 104 0 1.000
## 105 0 1.000
## 106 0 1.000
## 107 0 1.000
## 108 0 1.000
## 109 0 1.000
## 110 0 1.000
## 111 0 1.000
## 112 0 1.000
## 113 0 1.000
## 114 0 1.000
## 115 0 1.000
## 116 0 1.000
## 117 0 1.000
## 118 0 1.000
## 119 0 1.000
## 120 0 1.000
## 121 0 1.000
## 122 0 1.000
## 123 0 1.000
## 124 0 1.000
## 125 0 1.000
## 126 0 1.000
## 127 0 1.000
## 128 0 1.000
##################################
# Listing all predictors
##################################
<- DQA[,!names(DQA) %in% c("Class")]
DQA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DQA.Predictors[,!names(DQA.Predictors) %in% c("E2","E3","E4")]
DQA.Predictors.Numeric <- as.data.frame(sapply(DQA.Predictors.Numeric,function(x) as.numeric(x)))
DQA.Predictors.Numeric
if (length(names(DQA.Predictors.Numeric))>0) {
print(paste0("There are ",
length(names(DQA.Predictors.Numeric))),
(" numeric predictor variable(s)."))
else {
} print("There are no numeric predictor variables.")
}
## [1] "There are 124 numeric predictor variable(s)."
##################################
# Listing all factor predictors
##################################
<- DQA.Predictors[,names(DQA.Predictors) %in% c("E2","E3","E4")]
DQA.Predictors.Factor <- as.data.frame(sapply(DQA.Predictors.Factor,function(x) as.factor(x)))
DQA.Predictors.Factor
if (length(names(DQA.Predictors.Factor))>0) {
print(paste0("There are ",
length(names(DQA.Predictors.Factor))),
(" factor predictor variable(s)."))
else {
} print("There are no factor predictor variables.")
}
## [1] "There are 3 factor predictor variable(s)."
##################################
# Formulating a data quality assessment summary for factor predictors
##################################
if (length(names(DQA.Predictors.Factor))>0) {
##################################
# Formulating a function to determine the first mode
##################################
<- function(x) {
FirstModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab == max(tab)]
ux[tab
}
##################################
# Formulating a function to determine the second mode
##################################
<- function(x) {
SecondModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab = ux[tab == max(tab)]
fm = x[!(x %in% fm)]
sm <- unique(sm)
usm <- tabulate(match(sm, usm))
tabsm ifelse(is.na(usm[tabsm == max(tabsm)])==TRUE,
return("x"),
return(usm[tabsm == max(tabsm)]))
}
<- data.frame(
(DQA.Predictors.Factor.Summary Column.Name= names(DQA.Predictors.Factor),
Column.Type=sapply(DQA.Predictors.Factor, function(x) class(x)),
Unique.Count=sapply(DQA.Predictors.Factor, function(x) length(unique(x))),
First.Mode.Value=sapply(DQA.Predictors.Factor, function(x) as.character(FirstModes(x)[1])),
Second.Mode.Value=sapply(DQA.Predictors.Factor, function(x) as.character(SecondModes(x)[1])),
First.Mode.Count=sapply(DQA.Predictors.Factor, function(x) sum(na.omit(x) == FirstModes(x)[1])),
Second.Mode.Count=sapply(DQA.Predictors.Factor, function(x) sum(na.omit(x) == SecondModes(x)[1])),
Unique.Count.Ratio=sapply(DQA.Predictors.Factor, function(x) format(round((length(unique(x))/nrow(DQA.Predictors.Factor)),3), nsmall=3)),
First.Second.Mode.Ratio=sapply(DQA.Predictors.Factor, function(x) format(round((sum(na.omit(x) == FirstModes(x)[1])/sum(na.omit(x) == SecondModes(x)[1])),3), nsmall=3)),
row.names=NULL)
)
}
## Column.Name Column.Type Unique.Count First.Mode.Value Second.Mode.Value
## 1 E4 character 2 0 1
## 2 E3 character 2 1 0
## 3 E2 character 2 0 1
## First.Mode.Count Second.Mode.Count Unique.Count.Ratio First.Second.Mode.Ratio
## 1 160 107 0.007 1.495
## 2 245 22 0.007 11.136
## 3 224 43 0.007 5.209
##################################
# Formulating a data quality assessment summary for numeric predictors
##################################
if (length(names(DQA.Predictors.Numeric))>0) {
##################################
# Formulating a function to determine the first mode
##################################
<- function(x) {
FirstModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab == max(tab)]
ux[tab
}
##################################
# Formulating a function to determine the second mode
##################################
<- function(x) {
SecondModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab = ux[tab == max(tab)]
fm = na.omit(x)[!(na.omit(x) %in% fm)]
sm <- unique(sm)
usm <- tabulate(match(sm, usm))
tabsm ifelse(is.na(usm[tabsm == max(tabsm)])==TRUE,
return(0.00001),
return(usm[tabsm == max(tabsm)]))
}
<- data.frame(
(DQA.Predictors.Numeric.Summary Column.Name= names(DQA.Predictors.Numeric),
Column.Type=sapply(DQA.Predictors.Numeric, function(x) class(x)),
Unique.Count=sapply(DQA.Predictors.Numeric, function(x) length(unique(x))),
Unique.Count.Ratio=sapply(DQA.Predictors.Numeric, function(x) format(round((length(unique(x))/nrow(DQA.Predictors.Numeric)),3), nsmall=3)),
First.Mode.Value=sapply(DQA.Predictors.Numeric, function(x) format(round((FirstModes(x)[1]),3),nsmall=3)),
Second.Mode.Value=sapply(DQA.Predictors.Numeric, function(x) format(round((SecondModes(x)[1]),3),nsmall=3)),
First.Mode.Count=sapply(DQA.Predictors.Numeric, function(x) sum(na.omit(x) == FirstModes(x)[1])),
Second.Mode.Count=sapply(DQA.Predictors.Numeric, function(x) sum(na.omit(x) == SecondModes(x)[1])),
First.Second.Mode.Ratio=sapply(DQA.Predictors.Numeric, function(x) format(round((sum(na.omit(x) == FirstModes(x)[1])/sum(na.omit(x) == SecondModes(x)[1])),3), nsmall=3)),
Minimum=sapply(DQA.Predictors.Numeric, function(x) format(round(min(x,na.rm = TRUE),3), nsmall=3)),
Mean=sapply(DQA.Predictors.Numeric, function(x) format(round(mean(x,na.rm = TRUE),3), nsmall=3)),
Median=sapply(DQA.Predictors.Numeric, function(x) format(round(median(x,na.rm = TRUE),3), nsmall=3)),
Maximum=sapply(DQA.Predictors.Numeric, function(x) format(round(max(x,na.rm = TRUE),3), nsmall=3)),
Skewness=sapply(DQA.Predictors.Numeric, function(x) format(round(skewness(x,na.rm = TRUE),3), nsmall=3)),
Kurtosis=sapply(DQA.Predictors.Numeric, function(x) format(round(kurtosis(x,na.rm = TRUE),3), nsmall=3)),
Percentile25th=sapply(DQA.Predictors.Numeric, function(x) format(round(quantile(x,probs=0.25,na.rm = TRUE),3), nsmall=3)),
Percentile75th=sapply(DQA.Predictors.Numeric, function(x) format(round(quantile(x,probs=0.75,na.rm = TRUE),3), nsmall=3)),
row.names=NULL)
)
}
## Column.Name Column.Type Unique.Count
## 1 ACE_CD143_Angiotensin_Converti numeric 53
## 2 ACTH_Adrenocorticotropic_Hormon numeric 32
## 3 AXL numeric 59
## 4 Adiponectin numeric 91
## 5 Alpha_1_Antichymotrypsin numeric 65
## 6 Alpha_1_Antitrypsin numeric 64
## 7 Alpha_1_Microglobulin numeric 82
## 8 Alpha_2_Macroglobulin numeric 47
## 9 Angiopoietin_2_ANG_2 numeric 36
## 10 Angiotensinogen numeric 135
## 11 Apolipoprotein_A_IV numeric 53
## 12 Apolipoprotein_A1 numeric 83
## 13 Apolipoprotein_A2 numeric 85
## 14 Apolipoprotein_B numeric 83
## 15 Apolipoprotein_CI numeric 49
## 16 Apolipoprotein_CIII numeric 82
## 17 Apolipoprotein_D numeric 63
## 18 Apolipoprotein_E numeric 79
## 19 Apolipoprotein_H numeric 82
## 20 B_Lymphocyte_Chemoattractant_BL numeric 35
## 21 BMP_6 numeric 42
## 22 Beta_2_Microglobulin numeric 50
## 23 Betacellulin numeric 36
## 24 C_Reactive_Protein numeric 135
## 25 CD40 numeric 34
## 26 CD5L numeric 82
## 27 Calbindin numeric 139
## 28 Calcitonin numeric 93
## 29 CgA numeric 105
## 30 Clusterin_Apo_J numeric 31
## 31 Complement_3 numeric 63
## 32 Complement_Factor_H numeric 87
## 33 Connective_Tissue_Growth_Factor numeric 26
## 34 Cortisol numeric 52
## 35 Creatine_Kinase_MB numeric 32
## 36 Cystatin_C numeric 212
## 37 EGF_R numeric 59
## 38 EN_RAGE numeric 95
## 39 ENA_78 numeric 39
## 40 Eotaxin_3 numeric 42
## 41 FAS numeric 52
## 42 FSH_Follicle_Stimulation_Hormon numeric 97
## 43 Fas_Ligand numeric 57
## 44 Fatty_Acid_Binding_Protein numeric 84
## 45 Ferritin numeric 75
## 46 Fetuin_A numeric 67
## 47 Fibrinogen numeric 91
## 48 GRO_alpha numeric 25
## 49 Gamma_Interferon_induced_Monokin numeric 209
## 50 Glutathione_S_Transferase_alpha numeric 42
## 51 HB_EGF numeric 54
## 52 HCC_4 numeric 53
## 53 Hepatocyte_Growth_Factor_HGF numeric 44
## 54 I_309 numeric 45
## 55 ICAM_1 numeric 68
## 56 IGF_BP_2 numeric 126
## 57 IL_11 numeric 75
## 58 IL_13 numeric 18
## 59 IL_16 numeric 55
## 60 IL_17E numeric 44
## 61 IL_1alpha numeric 60
## 62 IL_3 numeric 58
## 63 IL_4 numeric 47
## 64 IL_5 numeric 48
## 65 IL_6 numeric 53
## 66 IL_6_Receptor numeric 53
## 67 IL_7 numeric 46
## 68 IL_8 numeric 56
## 69 IP_10_Inducible_Protein_10 numeric 211
## 70 IgA numeric 94
## 71 Insulin numeric 55
## 72 Kidney_Injury_Molecule_1_KIM_1 numeric 50
## 73 LOX_1 numeric 67
## 74 Leptin numeric 82
## 75 Lipoprotein_a numeric 128
## 76 MCP_1 numeric 219
## 77 MCP_2 numeric 39
## 78 MIF numeric 43
## 79 MIP_1alpha numeric 49
## 80 MIP_1beta numeric 47
## 81 MMP_2 numeric 48
## 82 MMP_3 numeric 80
## 83 MMP10 numeric 56
## 84 MMP7 numeric 89
## 85 Myoglobin numeric 116
## 86 NT_proBNP numeric 111
## 87 NrCAM numeric 126
## 88 Osteopontin numeric 166
## 89 PAI_1 numeric 70
## 90 PAPP_A numeric 34
## 91 PLGF numeric 86
## 92 PYY numeric 34
## 93 Pancreatic_polypeptide numeric 81
## 94 Prolactin numeric 57
## 95 Prostatic_Acid_Phosphatase numeric 43
## 96 Protein_S numeric 24
## 97 Pulmonary_and_Activation_Regulat numeric 54
## 98 RANTES numeric 39
## 99 Resistin numeric 59
## 100 S100b numeric 35
## 101 SGOT numeric 75
## 102 SHBG numeric 92
## 103 SOD numeric 164
## 104 Serum_Amyloid_P numeric 74
## 105 Sortilin numeric 65
## 106 Stem_Cell_Factor numeric 44
## 107 TGF_alpha numeric 62
## 108 TIMP_1 numeric 56
## 109 TNF_RII numeric 72
## 110 TRAIL_R3 numeric 60
## 111 TTR_prealbumin numeric 16
## 112 Tamm_Horsfall_Protein_THP numeric 60
## 113 Thrombomodulin numeric 35
## 114 Thrombopoietin numeric 54
## 115 Thymus_Expressed_Chemokine_TECK numeric 37
## 116 Thyroid_Stimulating_Hormone numeric 66
## 117 Thyroxine_Binding_Globulin numeric 49
## 118 Tissue_Factor numeric 67
## 119 Transferrin numeric 30
## 120 Trefoil_Factor_3_TFF3 numeric 37
## 121 VCAM_1 numeric 40
## 122 VEGF numeric 200
## 123 Vitronectin numeric 71
## 124 von_Willebrand_Factor numeric 43
## Unique.Count.Ratio First.Mode.Value Second.Mode.Value First.Mode.Count
## 1 0.199 1.157 1.107 14
## 2 0.120 -1.609 -1.715 20
## 3 0.221 0.191 0.530 29
## 4 0.341 -5.991 -4.200 7
## 5 0.243 1.194 1.099 12
## 6 0.240 -12.907 -13.310 11
## 7 0.307 -3.244 -3.270 13
## 8 0.176 -179.087 -194.947 21
## 9 0.135 0.531 0.642 23
## 10 0.506 2.262 2.107 9
## 11 0.199 -2.040 -1.772 30
## 12 0.311 -7.902 -7.452 8
## 13 0.318 -0.968 -0.755 9
## 14 0.311 -6.211 -7.289 11
## 15 0.184 -1.715 -1.661 23
## 16 0.307 -2.120 -2.207 17
## 17 0.236 1.308 1.386 11
## 18 0.296 2.720 4.024 11
## 19 0.307 0.097 -0.383 14
## 20 0.131 2.371 1.981 36
## 21 0.157 -1.675 -1.845 30
## 22 0.187 0.095 0.182 38
## 23 0.135 51.000 42.000 50
## 24 0.506 -5.745 -6.571 7
## 25 0.127 -1.242 -1.273 25
## 26 0.307 0.095 0.182 17
## 27 0.521 21.495 20.891 8
## 28 0.348 0.693 0.956 15
## 29 0.393 315.308 361.583 9
## 30 0.116 2.773 2.890 25
## 31 0.236 -16.545 -18.173 15
## 32 0.326 4.024 4.475 22
## 33 0.097 0.693 0.788 25
## 34 0.195 12.000 11.000 38
## 35 0.120 -1.671 -1.724 27
## 36 0.794 8.470 8.357 4
## 37 0.221 -0.590 -0.700 12
## 38 0.356 -4.200 -4.423 11
## 39 0.146 -1.368 -1.364 39
## 40 0.157 64.000 44.000 38
## 41 0.195 -0.713 -0.528 23
## 42 0.363 -1.064 -1.361 10
## 43 0.213 3.101 2.792 19
## 44 0.315 0.624 0.269 9
## 45 0.281 2.382 3.329 8
## 46 0.251 1.281 1.224 15
## 47 0.341 -6.571 -7.601 7
## 48 0.094 1.398 1.372 32
## 49 0.783 2.789 2.584 4
## 50 0.157 0.968 0.985 20
## 51 0.202 6.413 6.560 17
## 52 0.199 -3.576 -3.689 15
## 53 0.165 0.095 0.182 45
## 54 0.169 2.944 2.996 21
## 55 0.255 -0.489 -0.590 19
## 56 0.472 5.328 5.187 7
## 57 0.281 2.031 5.122 16
## 58 0.067 1.283 1.274 41
## 59 0.206 3.077 2.924 21
## 60 0.165 5.325 4.749 30
## 61 0.225 -7.264 -7.849 35
## 62 0.217 -3.912 -4.075 20
## 63 0.176 1.808 1.209 25
## 64 0.180 0.182 0.336 27
## 65 0.199 0.096 -0.242 19
## 66 0.199 0.273 0.000 26
## 67 0.172 2.155 3.476 33
## 68 0.210 1.676 1.691 15
## 69 0.790 5.687 5.050 3
## 70 0.352 -6.645 -6.502 11
## 71 0.206 -1.277 -1.341 19
## 72 0.187 -1.172 -1.144 11
## 73 0.251 1.281 1.131 13
## 74 0.307 -1.329 -1.466 17
## 75 0.479 -4.343 -4.423 8
## 76 0.820 6.213 6.293 4
## 77 0.146 1.530 1.853 32
## 78 0.161 -1.897 -2.120 29
## 79 0.184 5.359 3.690 21
## 80 0.176 2.944 2.833 22
## 81 0.180 2.332 3.266 36
## 82 0.300 -2.207 -2.120 18
## 83 0.210 -3.689 -3.963 14
## 84 0.333 -3.774 -3.345 24
## 85 0.434 -2.040 -1.609 11
## 86 0.416 4.466 4.554 8
## 87 0.472 4.489 3.912 6
## 88 0.622 5.288 5.187 6
## 89 0.262 0.094 0.177 25
## 90 0.127 -2.971 -2.902 26
## 91 0.322 3.738 3.714 10
## 92 0.127 2.833 2.996 38
## 93 0.303 0.182 0.336 14
## 94 0.213 0.095 0.182 32
## 95 0.161 -1.690 -1.710 26
## 96 0.090 -2.358 -2.259 36
## 97 0.202 -1.386 -1.772 16
## 98 0.146 -6.571 -6.502 28
## 99 0.221 -20.661 -23.322 14
## 100 0.131 0.946 1.055 22
## 101 0.281 -0.400 0.095 12
## 102 0.345 -2.207 -1.772 24
## 103 0.614 5.263 5.468 9
## 104 0.277 -6.032 -6.215 14
## 105 0.243 3.461 3.924 11
## 106 0.165 3.401 3.045 18
## 107 0.232 10.186 10.858 10
## 108 0.210 11.266 11.856 15
## 109 0.270 -0.755 -0.598 9
## 110 0.225 -0.683 -0.471 13
## 111 0.060 2.833 2.890 48
## 112 0.225 -3.096 -3.123 20
## 113 0.131 -1.579 -1.534 26
## 114 0.202 -0.886 -0.658 20
## 115 0.139 4.149 3.637 20
## 116 0.247 -6.190 -4.605 15
## 117 0.184 -1.715 -1.661 19
## 118 0.251 0.742 1.435 9
## 119 0.112 2.890 2.773 29
## 120 0.139 -4.017 -3.730 19
## 121 0.150 2.708 2.565 27
## 122 0.749 15.756 17.476 4
## 123 0.266 0.095 0.182 14
## 124 0.161 -4.269 -4.343 18
## Second.Mode.Count First.Second.Mode.Ratio Minimum Mean Median
## 1 11 1.273 -0.676 1.320 1.301
## 2 19 1.053 -2.207 -1.538 -1.561
## 3 26 1.115 -0.923 0.309 0.280
## 4 6 1.167 -6.725 -5.201 -5.185
## 5 10 1.200 0.262 1.361 1.361
## 6 10 1.100 -17.028 -13.052 -13.004
## 7 8 1.625 -4.343 -2.932 -2.937
## 8 20 1.050 -289.685 -158.615 -160.010
## 9 20 1.150 -0.545 0.673 0.642
## 10 7 1.286 1.752 2.318 2.320
## 11 21 1.429 -2.957 -1.854 -1.833
## 12 7 1.143 -8.680 -7.483 -7.470
## 13 8 1.125 -1.897 -0.635 -0.673
## 14 9 1.222 -9.937 -5.578 -5.703
## 15 18 1.278 -3.324 -1.583 -1.609
## 16 15 1.133 -3.689 -2.494 -2.526
## 17 10 1.100 0.470 1.440 1.411
## 18 10 1.100 0.591 2.806 2.818
## 19 9 1.556 -2.234 -0.321 -0.370
## 20 30 1.200 0.732 2.017 1.981
## 21 29 1.034 -2.761 -1.911 -1.877
## 22 36 1.056 -0.545 0.168 0.182
## 23 31 1.613 10.000 51.011 51.000
## 24 6 1.167 -8.517 -5.874 -5.843
## 25 20 1.250 -1.864 -1.258 -1.273
## 26 16 1.062 -1.238 -0.053 -0.062
## 27 6 1.333 10.961 22.433 22.249
## 28 13 1.154 -0.713 1.679 1.649
## 29 8 1.125 135.605 333.298 331.520
## 30 21 1.190 1.872 2.882 2.890
## 31 13 1.154 -23.387 -15.610 -15.524
## 32 19 1.158 -0.839 3.554 3.600
## 33 24 1.042 0.182 0.774 0.788
## 34 34 1.118 0.100 11.984 12.000
## 35 24 1.125 -1.872 -1.674 -1.671
## 36 3 1.333 7.432 8.586 8.564
## 37 11 1.091 -1.361 -0.701 -0.684
## 38 10 1.100 -8.377 -3.635 -3.650
## 39 22 1.773 -1.405 -1.372 -1.374
## 40 26 1.462 7.000 58.172 59.000
## 41 19 1.211 -1.514 -0.529 -0.528
## 42 9 1.111 -2.115 -1.143 -1.136
## 43 14 1.357 -0.154 2.968 3.101
## 44 8 1.125 -1.044 1.353 1.387
## 45 7 1.143 0.608 2.765 2.775
## 46 12 1.250 0.470 1.350 1.308
## 47 6 1.167 -8.874 -7.356 -7.323
## 48 28 1.143 1.271 1.378 1.382
## 49 3 1.333 2.393 2.786 2.783
## 50 18 1.111 0.524 0.951 0.968
## 51 13 1.308 2.103 6.833 6.703
## 52 14 1.071 -4.510 -3.500 -3.507
## 53 27 1.667 -0.635 0.196 0.182
## 54 19 1.105 1.758 2.958 2.944
## 55 14 1.357 -1.533 -0.591 -0.590
## 56 6 1.167 4.635 5.317 5.323
## 57 10 1.600 1.755 4.725 4.805
## 58 38 1.079 1.259 1.284 1.283
## 59 20 1.050 1.187 2.929 2.909
## 60 25 1.200 1.052 4.855 4.749
## 61 26 1.346 -8.517 -7.514 -7.524
## 62 17 1.176 -5.915 -3.941 -3.912
## 63 20 1.250 0.531 1.773 1.808
## 64 26 1.038 -1.427 0.187 0.182
## 65 16 1.188 -1.534 -0.154 -0.160
## 66 24 1.083 -0.676 0.095 0.097
## 67 18 1.833 0.560 2.839 2.793
## 68 13 1.154 1.574 1.704 1.705
## 69 2 1.500 4.317 5.755 5.753
## 70 10 1.100 -10.520 -6.121 -6.119
## 71 16 1.188 -2.169 -1.233 -1.246
## 72 10 1.100 -1.256 -1.185 -1.183
## 73 12 1.083 0.000 1.283 1.281
## 74 15 1.133 -2.147 -1.504 -1.505
## 75 7 1.143 -6.812 -4.417 -4.605
## 76 3 1.333 5.826 6.497 6.494
## 77 24 1.333 0.401 1.869 1.853
## 78 25 1.160 -2.847 -1.864 -1.897
## 79 14 1.500 0.935 4.049 4.050
## 80 21 1.048 1.946 2.814 2.833
## 81 22 1.636 0.098 2.875 2.815
## 82 17 1.059 -4.423 -2.446 -2.453
## 83 13 1.077 -4.934 -3.635 -3.650
## 84 13 1.846 -8.398 -3.789 -3.774
## 85 10 1.100 -3.170 -1.367 -1.470
## 86 7 1.143 3.178 4.552 4.554
## 87 5 1.200 2.639 4.362 4.394
## 88 5 1.200 4.111 5.204 5.187
## 89 20 1.250 -0.991 0.077 0.094
## 90 25 1.040 -3.311 -2.854 -2.871
## 91 9 1.111 2.485 3.912 3.871
## 92 27 1.407 2.186 3.015 2.996
## 93 11 1.273 -2.120 -0.013 -0.041
## 94 30 1.067 -1.309 0.045 0.000
## 95 21 1.238 -1.934 -1.685 -1.690
## 96 30 1.200 -3.338 -2.240 -2.259
## 97 14 1.143 -2.513 -1.488 -1.514
## 98 22 1.273 -7.222 -6.511 -6.502
## 99 13 1.077 -34.967 -17.641 -17.466
## 100 20 1.100 0.187 1.251 1.254
## 101 10 1.200 -1.347 -0.406 -0.400
## 102 11 2.182 -4.135 -2.477 -2.489
## 103 6 1.500 4.317 5.336 5.366
## 104 12 1.167 -7.506 -6.017 -6.032
## 105 10 1.100 1.654 3.852 3.867
## 106 15 1.200 2.251 3.301 3.296
## 107 9 1.111 6.843 9.801 9.919
## 108 13 1.154 1.742 11.750 11.565
## 109 8 1.125 -1.661 -0.594 -0.598
## 110 12 1.083 -1.211 -0.539 -0.532
## 111 47 1.021 2.485 2.854 2.833
## 112 18 1.111 -3.206 -3.116 -3.117
## 113 21 1.238 -2.038 -1.505 -1.492
## 114 17 1.176 -1.540 -0.754 -0.751
## 115 19 1.053 1.508 3.848 3.810
## 116 14 1.071 -6.190 -4.499 -4.510
## 117 18 1.056 -2.477 -1.479 -1.514
## 118 8 1.125 -0.211 1.170 1.224
## 119 28 1.036 1.932 2.909 2.890
## 120 18 1.056 -4.744 -3.876 -3.863
## 121 24 1.125 1.723 2.688 2.708
## 122 3 1.333 11.831 16.988 17.077
## 123 12 1.167 -1.427 -0.285 -0.301
## 124 17 1.059 -4.991 -3.906 -3.912
## Maximum Skewness Kurtosis Percentile25th Percentile75th
## 1 2.840 -0.101 2.975 0.946 1.719
## 2 -0.844 0.028 2.746 -1.715 -1.347
## 3 1.521 0.038 2.866 0.000 0.608
## 4 -3.507 0.108 2.713 -5.669 -4.780
## 5 2.303 0.034 3.103 1.131 1.589
## 6 -8.192 0.164 3.586 -14.071 -12.096
## 7 -1.772 0.027 2.648 -3.270 -2.590
## 8 -59.456 -0.034 3.026 -186.641 -134.622
## 9 1.526 -0.168 3.639 0.470 0.875
## 10 2.881 -0.042 2.370 2.119 2.497
## 11 -0.777 0.046 3.028 -2.120 -1.609
## 12 -6.166 -0.016 3.085 -7.763 -7.209
## 13 0.956 0.279 2.957 -0.968 -0.315
## 14 -2.153 -0.032 2.767 -6.630 -4.539
## 15 -0.274 0.088 4.317 -1.833 -1.367
## 16 -1.238 0.245 3.246 -2.773 -2.207
## 17 2.272 0.001 2.732 1.209 1.668
## 18 5.444 0.064 3.288 2.334 3.286
## 19 0.927 0.097 4.888 -0.598 -0.061
## 20 4.024 0.050 3.410 1.673 2.371
## 21 -0.817 0.055 3.667 -2.152 -1.675
## 22 0.993 -0.022 2.906 -0.041 0.336
## 23 82.000 -0.026 3.431 42.000 59.000
## 24 -2.937 0.032 2.515 -6.645 -5.083
## 25 -0.547 0.157 3.504 -1.376 -1.124
## 26 1.163 0.095 2.918 -0.357 0.262
## 27 33.777 -0.042 3.250 19.771 24.795
## 28 3.892 0.109 2.731 0.956 2.282
## 29 535.397 -0.055 2.708 278.025 392.067
## 30 3.584 -0.186 3.336 2.708 3.045
## 31 -9.563 -0.028 2.547 -17.567 -13.882
## 32 7.624 0.065 3.873 2.753 4.255
## 33 1.411 0.015 3.002 0.642 0.916
## 34 29.000 0.586 5.953 9.800 14.000
## 35 -1.384 0.086 3.197 -1.724 -1.626
## 36 9.694 0.157 3.075 8.321 8.839
## 37 -0.061 -0.163 2.797 -0.857 -0.546
## 38 -0.386 0.068 6.513 -4.200 -3.147
## 39 -1.339 0.091 2.965 -1.381 -1.364
## 40 107.000 0.074 2.842 44.000 70.000
## 41 0.336 -0.085 2.755 -0.713 -0.315
## 42 0.097 0.114 2.612 -1.466 -0.876
## 43 7.633 0.000 4.024 2.341 3.695
## 44 3.706 0.037 3.003 0.800 1.885
## 45 4.633 -0.127 2.981 2.290 3.292
## 46 2.251 0.165 2.609 1.099 1.609
## 47 -5.843 0.006 3.171 -7.717 -7.013
## 48 1.495 -0.219 2.941 1.351 1.406
## 49 3.065 -0.158 2.829 2.707 2.873
## 50 1.318 0.044 2.589 0.844 1.034
## 51 10.695 0.020 2.900 5.786 7.865
## 52 -2.120 0.260 3.591 -3.730 -3.270
## 53 0.875 -0.055 2.708 0.000 0.405
## 54 4.143 -0.174 3.467 2.708 3.219
## 55 0.517 -0.040 2.979 -0.830 -0.383
## 56 5.948 -0.110 3.296 5.179 5.453
## 57 8.491 0.000 2.480 3.706 5.776
## 58 1.321 0.402 3.316 1.274 1.290
## 59 4.937 0.175 3.117 2.521 3.351
## 60 8.952 -0.012 3.326 4.149 5.631
## 61 -5.952 0.299 3.530 -7.824 -7.264
## 62 -2.453 -0.150 3.642 -4.269 -3.631
## 63 3.045 0.004 2.822 1.459 2.146
## 64 1.946 -0.045 3.687 -0.122 0.470
## 65 1.814 0.016 4.021 -0.413 0.141
## 66 0.831 -0.132 2.443 -0.125 0.354
## 67 5.706 -0.022 2.488 2.155 3.706
## 68 1.807 -0.145 3.470 1.680 1.728
## 69 7.501 0.293 3.317 5.398 6.064
## 70 -4.200 -0.607 6.399 -6.645 -5.573
## 71 -0.159 -0.025 3.691 -1.447 -1.034
## 72 -1.105 0.012 2.880 -1.204 -1.164
## 73 2.272 -0.020 3.061 1.030 1.526
## 74 -0.621 0.053 3.019 -1.700 -1.329
## 75 -1.386 0.344 2.390 -5.308 -3.490
## 76 7.230 -0.015 2.788 6.319 6.678
## 77 4.024 0.000 3.957 1.530 2.182
## 78 -0.844 0.160 3.034 -2.120 -1.661
## 79 6.796 0.151 2.810 3.338 4.686
## 80 4.007 0.206 3.001 2.565 3.045
## 81 5.359 -0.113 2.953 2.332 3.551
## 82 -0.528 -0.114 3.615 -2.749 -2.120
## 83 -2.207 0.240 3.361 -3.938 -3.352
## 84 -0.222 -0.035 2.703 -4.820 -2.714
## 85 1.775 0.704 3.638 -2.040 -0.799
## 86 5.886 -0.160 4.130 4.350 4.775
## 87 6.011 -0.244 3.244 3.998 4.749
## 88 6.308 0.031 3.223 4.963 5.442
## 89 1.166 0.045 2.750 -0.167 0.320
## 90 -2.520 -0.029 2.698 -2.936 -2.749
## 91 5.170 -0.092 3.011 3.638 4.205
## 92 3.932 0.166 3.434 2.833 3.178
## 93 1.932 0.076 2.690 -0.528 0.531
## 94 0.993 -0.045 4.693 -0.139 0.258
## 95 -1.424 0.193 5.276 -1.717 -1.654
## 96 -1.221 0.041 3.459 -2.464 -2.000
## 97 -0.274 0.244 2.621 -1.833 -1.171
## 98 -5.547 0.170 2.912 -6.725 -6.320
## 99 -2.239 -0.050 2.997 -21.468 -13.501
## 100 2.373 -0.007 2.971 1.001 1.500
## 101 0.742 0.142 3.370 -0.635 -0.198
## 102 -1.109 -0.066 2.965 -2.813 -2.120
## 103 6.317 -0.145 3.061 5.094 5.583
## 104 -4.646 -0.127 2.900 -6.377 -5.655
## 105 6.225 0.056 2.885 3.343 4.371
## 106 4.277 0.056 2.906 3.045 3.526
## 107 13.827 -0.021 2.809 8.859 10.695
## 108 18.881 0.157 6.004 10.490 12.697
## 109 0.470 0.044 3.110 -0.821 -0.378
## 110 0.269 0.070 3.256 -0.701 -0.385
## 111 3.332 0.226 3.255 2.773 2.944
## 112 -2.995 0.059 3.762 -3.137 -3.096
## 113 -0.817 -0.024 2.875 -1.626 -1.341
## 114 0.098 0.239 3.382 -0.886 -0.629
## 115 6.225 0.032 3.693 3.343 4.316
## 116 -1.715 0.024 3.719 -4.962 -4.017
## 117 -0.211 0.331 2.879 -1.772 -1.238
## 118 2.485 -0.180 2.887 0.833 1.482
## 119 3.761 -0.035 3.828 2.708 3.091
## 120 -2.957 0.151 2.781 -4.135 -3.650
## 121 3.689 -0.064 3.124 2.485 2.890
## 122 22.380 0.102 3.290 15.773 18.095
## 123 0.531 -0.038 2.760 -0.511 -0.036
## 124 -2.957 -0.124 2.625 -4.200 -3.612
##################################
# Identifying potential data quality issues
##################################
##################################
# Checking for missing observations
##################################
if ((nrow(DQA.Summary[DQA.Summary$NA.Count>0,]))>0){
print(paste0("Missing observations noted for ",
nrow(DQA.Summary[DQA.Summary$NA.Count>0,])),
(" variable(s) with NA.Count>0 and Fill.Rate<1.0."))
$NA.Count>0,]
DQA.Summary[DQA.Summaryelse {
} print("No missing observations noted.")
}
## [1] "No missing observations noted."
##################################
# Checking for zero or near-zero variance predictors
##################################
if (length(names(DQA.Predictors.Factor))==0) {
print("No factor predictors noted.")
else if (nrow(DQA.Predictors.Factor.Summary[as.numeric(as.character(DQA.Predictors.Factor.Summary$First.Second.Mode.Ratio))>5,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Predictors.Factor.Summary[as.numeric(as.character(DQA.Predictors.Factor.Summary$First.Second.Mode.Ratio))>5,])),
(" factor variable(s) with First.Second.Mode.Ratio>5."))
as.numeric(as.character(DQA.Predictors.Factor.Summary$First.Second.Mode.Ratio))>5,]
DQA.Predictors.Factor.Summary[else {
} print("No low variance factor predictors due to high first-second mode ratio noted.")
}
## [1] "Low variance observed for 2 factor variable(s) with First.Second.Mode.Ratio>5."
## Column.Name Column.Type Unique.Count First.Mode.Value Second.Mode.Value
## 2 E3 character 2 1 0
## 3 E2 character 2 0 1
## First.Mode.Count Second.Mode.Count Unique.Count.Ratio First.Second.Mode.Ratio
## 2 245 22 0.007 11.136
## 3 224 43 0.007 5.209
if (length(names(DQA.Predictors.Numeric))==0) {
print("No numeric predictors noted.")
else if (nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$First.Second.Mode.Ratio))>5,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$First.Second.Mode.Ratio))>5,])),
(" numeric variable(s) with First.Second.Mode.Ratio>5."))
as.numeric(as.character(DQA.Predictors.Numeric.Summary$First.Second.Mode.Ratio))>5,]
DQA.Predictors.Numeric.Summary[else {
} print("No low variance numeric predictors due to high first-second mode ratio noted.")
}
## [1] "No low variance numeric predictors due to high first-second mode ratio noted."
if (length(names(DQA.Predictors.Numeric))==0) {
print("No numeric predictors noted.")
else if (nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Unique.Count.Ratio))<0.01,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Unique.Count.Ratio))<0.01,])),
(" numeric variable(s) with Unique.Count.Ratio<0.01."))
as.numeric(as.character(DQA.Predictors.Numeric.Summary$Unique.Count.Ratio))<0.01,]
DQA.Predictors.Numeric.Summary[else {
} print("No low variance numeric predictors due to low unique count ratio noted.")
}
## [1] "No low variance numeric predictors due to low unique count ratio noted."
##################################
# Checking for skewed predictors
##################################
if (length(names(DQA.Predictors.Numeric))==0) {
print("No numeric predictors noted.")
else if (nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))>3 |
} as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))<(-3),])>0){
print(paste0("High skewness observed for ",
nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))>3 |
(as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))<(-3),])),
" numeric variable(s) with Skewness>3 or Skewness<(-3)."))
as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))>3 |
DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))<(-3),]
else {
} print("No skewed numeric predictors noted.")
}
## [1] "No skewed numeric predictors noted."
##################################
# Loading dataset
##################################
<- Alzheimer_Train
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("Class")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,!names(DPA.Predictors) %in% c("E2","E3","E4")]
DPA.Predictors.Numeric <- as.data.frame(sapply(DPA.Predictors.Numeric,function(x) as.numeric(x)))
DPA.Predictors.Numeric
##################################
# Identifying outliers for the numeric predictors
##################################
<- c()
OutlierCountList
for (i in 1:ncol(DPA.Predictors.Numeric)) {
<- boxplot.stats(DPA.Predictors.Numeric[,i])$out
Outliers <- length(Outliers)
OutlierCount <- append(OutlierCountList,OutlierCount)
OutlierCountList <- which(DPA.Predictors.Numeric[,i] %in% c(Outliers))
OutlierIndices boxplot(DPA.Predictors.Numeric[,i],
ylab = names(DPA.Predictors.Numeric)[i],
main = names(DPA.Predictors.Numeric)[i],
horizontal=TRUE)
mtext(paste0(OutlierCount, " Outlier(s) Detected"))
}
<- as.data.frame(cbind(names(DPA.Predictors.Numeric),(OutlierCountList)))
OutlierCountSummary names(OutlierCountSummary) <- c("NumericPredictors","OutlierCount")
$OutlierCount <- as.numeric(as.character(OutlierCountSummary$OutlierCount))
OutlierCountSummary<- nrow(OutlierCountSummary[OutlierCountSummary$OutlierCount>0,])
NumericPredictorWithOutlierCount print(paste0(NumericPredictorWithOutlierCount, " numeric variable(s) were noted with outlier(s)." ))
## [1] "105 numeric variable(s) were noted with outlier(s)."
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA.Predictors.Numeric)) (DPA_Skimmed
Name | DPA.Predictors.Numeric |
Number of rows | 267 |
Number of columns | 124 |
_______________________ | |
Column type frequency: | |
numeric | 124 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
ACE_CD143_Angiotensin_Converti | 0 | 1 | 1.32 | 0.54 | -0.68 | 0.95 | 1.30 | 1.72 | 2.84 | ▁▃▇▇▁ |
ACTH_Adrenocorticotropic_Hormon | 0 | 1 | -1.54 | 0.28 | -2.21 | -1.71 | -1.56 | -1.35 | -0.84 | ▂▆▇▆▂ |
AXL | 0 | 1 | 0.31 | 0.45 | -0.92 | 0.00 | 0.28 | 0.61 | 1.52 | ▁▅▇▃▁ |
Adiponectin | 0 | 1 | -5.20 | 0.67 | -6.73 | -5.67 | -5.18 | -4.78 | -3.51 | ▂▆▇▅▁ |
Alpha_1_Antichymotrypsin | 0 | 1 | 1.36 | 0.36 | 0.26 | 1.13 | 1.36 | 1.59 | 2.30 | ▁▃▇▅▁ |
Alpha_1_Antitrypsin | 0 | 1 | -13.05 | 1.48 | -17.03 | -14.07 | -13.00 | -12.10 | -8.19 | ▁▅▇▂▁ |
Alpha_1_Microglobulin | 0 | 1 | -2.93 | 0.48 | -4.34 | -3.27 | -2.94 | -2.59 | -1.77 | ▁▅▇▇▂ |
Alpha_2_Macroglobulin | 0 | 1 | -158.61 | 40.93 | -289.68 | -186.64 | -160.01 | -134.62 | -59.46 | ▁▂▇▅▂ |
Angiopoietin_2_ANG_2 | 0 | 1 | 0.67 | 0.33 | -0.54 | 0.47 | 0.64 | 0.88 | 1.53 | ▁▂▇▆▂ |
Angiotensinogen | 0 | 1 | 2.32 | 0.25 | 1.75 | 2.12 | 2.32 | 2.50 | 2.88 | ▃▆▇▇▂ |
Apolipoprotein_A_IV | 0 | 1 | -1.85 | 0.38 | -2.96 | -2.12 | -1.83 | -1.61 | -0.78 | ▁▃▇▃▁ |
Apolipoprotein_A1 | 0 | 1 | -7.48 | 0.44 | -8.68 | -7.76 | -7.47 | -7.21 | -6.17 | ▁▅▇▃▁ |
Apolipoprotein_A2 | 0 | 1 | -0.64 | 0.50 | -1.90 | -0.97 | -0.67 | -0.31 | 0.96 | ▁▇▇▃▁ |
Apolipoprotein_B | 0 | 1 | -5.58 | 1.48 | -9.94 | -6.63 | -5.70 | -4.54 | -2.15 | ▁▅▇▇▂ |
Apolipoprotein_CI | 0 | 1 | -1.58 | 0.41 | -3.32 | -1.83 | -1.61 | -1.37 | -0.27 | ▁▁▇▅▁ |
Apolipoprotein_CIII | 0 | 1 | -2.49 | 0.44 | -3.69 | -2.77 | -2.53 | -2.21 | -1.24 | ▁▅▇▅▁ |
Apolipoprotein_D | 0 | 1 | 1.44 | 0.34 | 0.47 | 1.21 | 1.41 | 1.67 | 2.27 | ▁▃▇▆▂ |
Apolipoprotein_E | 0 | 1 | 2.81 | 0.78 | 0.59 | 2.33 | 2.82 | 3.29 | 5.44 | ▁▅▇▂▁ |
Apolipoprotein_H | 0 | 1 | -0.32 | 0.39 | -2.23 | -0.60 | -0.37 | -0.06 | 0.93 | ▁▁▇▆▁ |
B_Lymphocyte_Chemoattractant_BL | 0 | 1 | 2.02 | 0.57 | 0.73 | 1.67 | 1.98 | 2.37 | 4.02 | ▃▇▇▁▁ |
BMP_6 | 0 | 1 | -1.91 | 0.31 | -2.76 | -2.15 | -1.88 | -1.68 | -0.82 | ▁▅▇▂▁ |
Beta_2_Microglobulin | 0 | 1 | 0.17 | 0.30 | -0.54 | -0.04 | 0.18 | 0.34 | 0.99 | ▂▃▇▃▁ |
Betacellulin | 0 | 1 | 51.01 | 10.87 | 10.00 | 42.00 | 51.00 | 59.00 | 82.00 | ▁▁▇▅▁ |
C_Reactive_Protein | 0 | 1 | -5.87 | 1.20 | -8.52 | -6.65 | -5.84 | -5.08 | -2.94 | ▃▆▇▆▂ |
CD40 | 0 | 1 | -1.26 | 0.21 | -1.86 | -1.38 | -1.27 | -1.12 | -0.55 | ▁▆▇▃▁ |
CD5L | 0 | 1 | -0.05 | 0.45 | -1.24 | -0.36 | -0.06 | 0.26 | 1.16 | ▁▅▇▃▁ |
Calbindin | 0 | 1 | 22.43 | 4.11 | 10.96 | 19.77 | 22.25 | 24.80 | 33.78 | ▁▃▇▃▁ |
Calcitonin | 0 | 1 | 1.68 | 0.87 | -0.71 | 0.96 | 1.65 | 2.28 | 3.89 | ▁▆▇▅▂ |
CgA | 0 | 1 | 333.30 | 83.67 | 135.60 | 278.02 | 331.52 | 392.07 | 535.40 | ▂▆▇▅▂ |
Clusterin_Apo_J | 0 | 1 | 2.88 | 0.29 | 1.87 | 2.71 | 2.89 | 3.04 | 3.58 | ▁▂▇▆▂ |
Complement_3 | 0 | 1 | -15.61 | 2.46 | -23.39 | -17.57 | -15.52 | -13.88 | -9.56 | ▁▆▇▇▂ |
Complement_Factor_H | 0 | 1 | 3.55 | 1.25 | -0.84 | 2.75 | 3.60 | 4.25 | 7.62 | ▁▃▇▅▁ |
Connective_Tissue_Growth_Factor | 0 | 1 | 0.77 | 0.20 | 0.18 | 0.64 | 0.79 | 0.92 | 1.41 | ▁▅▇▃▁ |
Cortisol | 0 | 1 | 11.98 | 3.95 | 0.10 | 9.80 | 12.00 | 14.00 | 29.00 | ▁▇▇▁▁ |
Creatine_Kinase_MB | 0 | 1 | -1.67 | 0.09 | -1.87 | -1.72 | -1.67 | -1.63 | -1.38 | ▃▅▇▂▁ |
Cystatin_C | 0 | 1 | 8.59 | 0.40 | 7.43 | 8.32 | 8.56 | 8.84 | 9.69 | ▁▅▇▃▁ |
EGF_R | 0 | 1 | -0.70 | 0.22 | -1.36 | -0.86 | -0.68 | -0.55 | -0.06 | ▁▃▇▅▁ |
EN_RAGE | 0 | 1 | -3.64 | 0.88 | -8.38 | -4.20 | -3.65 | -3.15 | -0.39 | ▁▁▇▆▁ |
ENA_78 | 0 | 1 | -1.37 | 0.01 | -1.41 | -1.38 | -1.37 | -1.36 | -1.34 | ▁▅▇▅▁ |
Eotaxin_3 | 0 | 1 | 58.17 | 15.83 | 7.00 | 44.00 | 59.00 | 70.00 | 107.00 | ▁▆▇▅▁ |
FAS | 0 | 1 | -0.53 | 0.30 | -1.51 | -0.71 | -0.53 | -0.31 | 0.34 | ▁▃▇▆▁ |
FSH_Follicle_Stimulation_Hormon | 0 | 1 | -1.14 | 0.42 | -2.12 | -1.47 | -1.14 | -0.88 | 0.10 | ▂▇▇▃▁ |
Fas_Ligand | 0 | 1 | 2.97 | 1.09 | -0.15 | 2.34 | 3.10 | 3.70 | 7.63 | ▂▇▇▁▁ |
Fatty_Acid_Binding_Protein | 0 | 1 | 1.35 | 0.78 | -1.04 | 0.80 | 1.39 | 1.88 | 3.71 | ▁▅▇▅▁ |
Ferritin | 0 | 1 | 2.76 | 0.76 | 0.61 | 2.29 | 2.77 | 3.29 | 4.63 | ▁▃▇▆▂ |
Fetuin_A | 0 | 1 | 1.35 | 0.37 | 0.47 | 1.10 | 1.31 | 1.61 | 2.25 | ▂▆▇▅▂ |
Fibrinogen | 0 | 1 | -7.36 | 0.57 | -8.87 | -7.72 | -7.32 | -7.01 | -5.84 | ▁▅▇▅▁ |
GRO_alpha | 0 | 1 | 1.38 | 0.04 | 1.27 | 1.35 | 1.38 | 1.41 | 1.49 | ▂▃▇▅▁ |
Gamma_Interferon_induced_Monokin | 0 | 1 | 2.79 | 0.11 | 2.39 | 2.71 | 2.78 | 2.87 | 3.07 | ▁▂▇▆▂ |
Glutathione_S_Transferase_alpha | 0 | 1 | 0.95 | 0.16 | 0.52 | 0.84 | 0.97 | 1.03 | 1.32 | ▁▃▇▅▂ |
HB_EGF | 0 | 1 | 6.83 | 1.51 | 2.10 | 5.79 | 6.70 | 7.86 | 10.70 | ▁▃▇▆▂ |
HCC_4 | 0 | 1 | -3.50 | 0.36 | -4.51 | -3.73 | -3.51 | -3.27 | -2.12 | ▂▇▇▂▁ |
Hepatocyte_Growth_Factor_HGF | 0 | 1 | 0.20 | 0.29 | -0.63 | 0.00 | 0.18 | 0.41 | 0.88 | ▁▃▇▅▂ |
I_309 | 0 | 1 | 2.96 | 0.37 | 1.76 | 2.71 | 2.94 | 3.22 | 4.14 | ▁▅▇▃▁ |
ICAM_1 | 0 | 1 | -0.59 | 0.33 | -1.53 | -0.83 | -0.59 | -0.38 | 0.52 | ▁▅▇▃▁ |
IGF_BP_2 | 0 | 1 | 5.32 | 0.20 | 4.63 | 5.18 | 5.32 | 5.45 | 5.95 | ▁▃▇▅▁ |
IL_11 | 0 | 1 | 4.72 | 1.45 | 1.75 | 3.71 | 4.81 | 5.78 | 8.49 | ▃▆▇▅▁ |
IL_13 | 0 | 1 | 1.28 | 0.01 | 1.26 | 1.27 | 1.28 | 1.29 | 1.32 | ▂▇▆▂▁ |
IL_16 | 0 | 1 | 2.93 | 0.66 | 1.19 | 2.52 | 2.91 | 3.35 | 4.94 | ▁▆▇▃▁ |
IL_17E | 0 | 1 | 4.85 | 1.34 | 1.05 | 4.15 | 4.75 | 5.63 | 8.95 | ▁▆▇▃▁ |
IL_1alpha | 0 | 1 | -7.51 | 0.39 | -8.52 | -7.82 | -7.52 | -7.26 | -5.95 | ▂▇▇▂▁ |
IL_3 | 0 | 1 | -3.94 | 0.50 | -5.91 | -4.27 | -3.91 | -3.63 | -2.45 | ▁▂▇▅▁ |
IL_4 | 0 | 1 | 1.77 | 0.51 | 0.53 | 1.46 | 1.81 | 2.15 | 3.04 | ▂▅▇▅▂ |
IL_5 | 0 | 1 | 0.19 | 0.46 | -1.43 | -0.12 | 0.18 | 0.47 | 1.95 | ▁▃▇▂▁ |
IL_6 | 0 | 1 | -0.15 | 0.55 | -1.53 | -0.41 | -0.16 | 0.14 | 1.81 | ▂▆▇▂▁ |
IL_6_Receptor | 0 | 1 | 0.09 | 0.32 | -0.68 | -0.13 | 0.10 | 0.35 | 0.83 | ▂▆▇▆▂ |
IL_7 | 0 | 1 | 2.84 | 1.05 | 0.56 | 2.15 | 2.79 | 3.71 | 5.71 | ▃▇▇▆▁ |
IL_8 | 0 | 1 | 1.70 | 0.04 | 1.57 | 1.68 | 1.71 | 1.73 | 1.81 | ▁▂▇▆▁ |
IP_10_Inducible_Protein_10 | 0 | 1 | 5.75 | 0.51 | 4.32 | 5.40 | 5.75 | 6.06 | 7.50 | ▁▆▇▂▁ |
IgA | 0 | 1 | -6.12 | 0.76 | -10.52 | -6.65 | -6.12 | -5.57 | -4.20 | ▁▁▂▇▂ |
Insulin | 0 | 1 | -1.23 | 0.34 | -2.17 | -1.45 | -1.25 | -1.03 | -0.16 | ▁▃▇▂▁ |
Kidney_Injury_Molecule_1_KIM_1 | 0 | 1 | -1.18 | 0.03 | -1.26 | -1.20 | -1.18 | -1.16 | -1.10 | ▁▇▇▅▁ |
LOX_1 | 0 | 1 | 1.28 | 0.40 | 0.00 | 1.03 | 1.28 | 1.53 | 2.27 | ▁▂▇▅▂ |
Leptin | 0 | 1 | -1.50 | 0.27 | -2.15 | -1.70 | -1.50 | -1.33 | -0.62 | ▂▇▇▂▁ |
Lipoprotein_a | 0 | 1 | -4.42 | 1.11 | -6.81 | -5.31 | -4.61 | -3.49 | -1.39 | ▂▇▅▅▁ |
MCP_1 | 0 | 1 | 6.50 | 0.26 | 5.83 | 6.32 | 6.49 | 6.68 | 7.23 | ▂▅▇▅▁ |
MCP_2 | 0 | 1 | 1.87 | 0.65 | 0.40 | 1.53 | 1.85 | 2.18 | 4.02 | ▂▆▇▂▁ |
MIF | 0 | 1 | -1.86 | 0.34 | -2.85 | -2.12 | -1.90 | -1.66 | -0.84 | ▁▃▇▃▁ |
MIP_1alpha | 0 | 1 | 4.05 | 1.01 | 0.93 | 3.34 | 4.05 | 4.69 | 6.80 | ▁▅▇▅▂ |
MIP_1beta | 0 | 1 | 2.81 | 0.38 | 1.95 | 2.56 | 2.83 | 3.04 | 4.01 | ▂▅▇▂▁ |
MMP_2 | 0 | 1 | 2.88 | 0.93 | 0.10 | 2.33 | 2.82 | 3.55 | 5.36 | ▁▃▇▆▁ |
MMP_3 | 0 | 1 | -2.45 | 0.57 | -4.42 | -2.75 | -2.45 | -2.12 | -0.53 | ▁▂▇▃▁ |
MMP10 | 0 | 1 | -3.63 | 0.43 | -4.93 | -3.94 | -3.65 | -3.35 | -2.21 | ▁▆▇▃▁ |
MMP7 | 0 | 1 | -3.79 | 1.55 | -8.40 | -4.82 | -3.77 | -2.71 | -0.22 | ▁▃▇▅▂ |
Myoglobin | 0 | 1 | -1.37 | 0.95 | -3.17 | -2.04 | -1.47 | -0.80 | 1.77 | ▅▇▆▂▁ |
NT_proBNP | 0 | 1 | 4.55 | 0.38 | 3.18 | 4.35 | 4.55 | 4.77 | 5.89 | ▁▂▇▃▁ |
NrCAM | 0 | 1 | 4.36 | 0.57 | 2.64 | 4.00 | 4.39 | 4.75 | 6.01 | ▁▃▇▅▁ |
Osteopontin | 0 | 1 | 5.20 | 0.39 | 4.11 | 4.96 | 5.19 | 5.44 | 6.31 | ▁▅▇▃▁ |
PAI_1 | 0 | 1 | 0.08 | 0.41 | -0.99 | -0.17 | 0.09 | 0.32 | 1.17 | ▂▃▇▅▂ |
PAPP_A | 0 | 1 | -2.85 | 0.14 | -3.31 | -2.94 | -2.87 | -2.75 | -2.52 | ▁▂▇▅▂ |
PLGF | 0 | 1 | 3.91 | 0.41 | 2.48 | 3.64 | 3.87 | 4.20 | 5.17 | ▁▃▇▆▁ |
PYY | 0 | 1 | 3.02 | 0.29 | 2.19 | 2.83 | 3.00 | 3.18 | 3.93 | ▁▅▇▃▁ |
Pancreatic_polypeptide | 0 | 1 | -0.01 | 0.72 | -2.12 | -0.53 | -0.04 | 0.53 | 1.93 | ▁▅▇▆▁ |
Prolactin | 0 | 1 | 0.04 | 0.30 | -1.31 | -0.14 | 0.00 | 0.26 | 0.99 | ▁▁▇▇▁ |
Prostatic_Acid_Phosphatase | 0 | 1 | -1.69 | 0.06 | -1.93 | -1.72 | -1.69 | -1.65 | -1.42 | ▁▂▇▂▁ |
Protein_S | 0 | 1 | -2.24 | 0.35 | -3.34 | -2.46 | -2.26 | -2.00 | -1.22 | ▁▃▇▃▁ |
Pulmonary_and_Activation_Regulat | 0 | 1 | -1.49 | 0.45 | -2.51 | -1.83 | -1.51 | -1.17 | -0.27 | ▂▇▇▃▂ |
RANTES | 0 | 1 | -6.51 | 0.32 | -7.22 | -6.73 | -6.50 | -6.32 | -5.55 | ▃▇▇▃▁ |
Resistin | 0 | 1 | -17.64 | 5.82 | -34.97 | -21.47 | -17.47 | -13.50 | -2.24 | ▁▃▇▆▁ |
S100b | 0 | 1 | 1.25 | 0.34 | 0.19 | 1.00 | 1.25 | 1.50 | 2.37 | ▁▆▇▅▁ |
SGOT | 0 | 1 | -0.41 | 0.35 | -1.35 | -0.63 | -0.40 | -0.20 | 0.74 | ▁▅▇▂▁ |
SHBG | 0 | 1 | -2.48 | 0.58 | -4.14 | -2.81 | -2.49 | -2.12 | -1.11 | ▁▃▇▆▂ |
SOD | 0 | 1 | 5.34 | 0.38 | 4.32 | 5.09 | 5.37 | 5.58 | 6.32 | ▁▅▇▅▁ |
Serum_Amyloid_P | 0 | 1 | -6.02 | 0.56 | -7.51 | -6.38 | -6.03 | -5.65 | -4.65 | ▁▅▇▅▂ |
Sortilin | 0 | 1 | 3.85 | 0.87 | 1.65 | 3.34 | 3.87 | 4.37 | 6.23 | ▁▅▇▃▁ |
Stem_Cell_Factor | 0 | 1 | 3.30 | 0.36 | 2.25 | 3.04 | 3.30 | 3.53 | 4.28 | ▁▅▇▅▁ |
TGF_alpha | 0 | 1 | 9.80 | 1.32 | 6.84 | 8.86 | 9.92 | 10.70 | 13.83 | ▂▆▇▂▁ |
TIMP_1 | 0 | 1 | 11.75 | 1.90 | 1.74 | 10.49 | 11.56 | 12.70 | 18.88 | ▁▁▇▅▁ |
TNF_RII | 0 | 1 | -0.59 | 0.33 | -1.66 | -0.82 | -0.60 | -0.38 | 0.47 | ▁▃▇▃▁ |
TRAIL_R3 | 0 | 1 | -0.54 | 0.24 | -1.21 | -0.70 | -0.53 | -0.38 | 0.27 | ▁▆▇▂▁ |
TTR_prealbumin | 0 | 1 | 2.85 | 0.14 | 2.48 | 2.77 | 2.83 | 2.94 | 3.33 | ▂▅▇▃▁ |
Tamm_Horsfall_Protein_THP | 0 | 1 | -3.12 | 0.03 | -3.21 | -3.14 | -3.12 | -3.10 | -2.99 | ▁▇▇▂▁ |
Thrombomodulin | 0 | 1 | -1.51 | 0.22 | -2.04 | -1.63 | -1.49 | -1.34 | -0.82 | ▂▆▇▃▁ |
Thrombopoietin | 0 | 1 | -0.75 | 0.24 | -1.54 | -0.89 | -0.75 | -0.63 | 0.10 | ▁▅▇▂▁ |
Thymus_Expressed_Chemokine_TECK | 0 | 1 | 3.85 | 0.80 | 1.51 | 3.34 | 3.81 | 4.32 | 6.23 | ▁▃▇▃▁ |
Thyroid_Stimulating_Hormone | 0 | 1 | -4.50 | 0.75 | -6.19 | -4.96 | -4.51 | -4.02 | -1.71 | ▂▇▆▁▁ |
Thyroxine_Binding_Globulin | 0 | 1 | -1.48 | 0.40 | -2.48 | -1.77 | -1.51 | -1.24 | -0.21 | ▂▇▇▃▁ |
Tissue_Factor | 0 | 1 | 1.17 | 0.50 | -0.21 | 0.83 | 1.22 | 1.48 | 2.48 | ▁▅▇▆▁ |
Transferrin | 0 | 1 | 2.91 | 0.27 | 1.93 | 2.71 | 2.89 | 3.09 | 3.76 | ▁▂▇▅▁ |
Trefoil_Factor_3_TFF3 | 0 | 1 | -3.88 | 0.34 | -4.74 | -4.14 | -3.86 | -3.65 | -2.96 | ▁▅▇▃▁ |
VCAM_1 | 0 | 1 | 2.69 | 0.32 | 1.72 | 2.48 | 2.71 | 2.89 | 3.69 | ▁▃▇▃▁ |
VEGF | 0 | 1 | 16.99 | 1.81 | 11.83 | 15.77 | 17.08 | 18.10 | 22.38 | ▁▅▇▃▁ |
Vitronectin | 0 | 1 | -0.28 | 0.33 | -1.43 | -0.51 | -0.30 | -0.04 | 0.53 | ▁▃▇▇▂ |
von_Willebrand_Factor | 0 | 1 | -3.91 | 0.38 | -4.99 | -4.20 | -3.91 | -3.61 | -2.96 | ▁▅▇▆▂ |
###################################
# Verifying the data dimensions
###################################
dim(DPA.Predictors.Numeric)
## [1] 267 124
##################################
# Loading dataset
##################################
<- DPA.Predictors
DPA
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA)) (DPA_Skimmed
Name | DPA |
Number of rows | 267 |
Number of columns | 127 |
_______________________ | |
Column type frequency: | |
numeric | 127 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
ACE_CD143_Angiotensin_Converti | 0 | 1 | 1.32 | 0.54 | -0.68 | 0.95 | 1.30 | 1.72 | 2.84 | ▁▃▇▇▁ |
ACTH_Adrenocorticotropic_Hormon | 0 | 1 | -1.54 | 0.28 | -2.21 | -1.71 | -1.56 | -1.35 | -0.84 | ▂▆▇▆▂ |
AXL | 0 | 1 | 0.31 | 0.45 | -0.92 | 0.00 | 0.28 | 0.61 | 1.52 | ▁▅▇▃▁ |
Adiponectin | 0 | 1 | -5.20 | 0.67 | -6.73 | -5.67 | -5.18 | -4.78 | -3.51 | ▂▆▇▅▁ |
Alpha_1_Antichymotrypsin | 0 | 1 | 1.36 | 0.36 | 0.26 | 1.13 | 1.36 | 1.59 | 2.30 | ▁▃▇▅▁ |
Alpha_1_Antitrypsin | 0 | 1 | -13.05 | 1.48 | -17.03 | -14.07 | -13.00 | -12.10 | -8.19 | ▁▅▇▂▁ |
Alpha_1_Microglobulin | 0 | 1 | -2.93 | 0.48 | -4.34 | -3.27 | -2.94 | -2.59 | -1.77 | ▁▅▇▇▂ |
Alpha_2_Macroglobulin | 0 | 1 | -158.61 | 40.93 | -289.68 | -186.64 | -160.01 | -134.62 | -59.46 | ▁▂▇▅▂ |
Angiopoietin_2_ANG_2 | 0 | 1 | 0.67 | 0.33 | -0.54 | 0.47 | 0.64 | 0.88 | 1.53 | ▁▂▇▆▂ |
Angiotensinogen | 0 | 1 | 2.32 | 0.25 | 1.75 | 2.12 | 2.32 | 2.50 | 2.88 | ▃▆▇▇▂ |
Apolipoprotein_A_IV | 0 | 1 | -1.85 | 0.38 | -2.96 | -2.12 | -1.83 | -1.61 | -0.78 | ▁▃▇▃▁ |
Apolipoprotein_A1 | 0 | 1 | -7.48 | 0.44 | -8.68 | -7.76 | -7.47 | -7.21 | -6.17 | ▁▅▇▃▁ |
Apolipoprotein_A2 | 0 | 1 | -0.64 | 0.50 | -1.90 | -0.97 | -0.67 | -0.31 | 0.96 | ▁▇▇▃▁ |
Apolipoprotein_B | 0 | 1 | -5.58 | 1.48 | -9.94 | -6.63 | -5.70 | -4.54 | -2.15 | ▁▅▇▇▂ |
Apolipoprotein_CI | 0 | 1 | -1.58 | 0.41 | -3.32 | -1.83 | -1.61 | -1.37 | -0.27 | ▁▁▇▅▁ |
Apolipoprotein_CIII | 0 | 1 | -2.49 | 0.44 | -3.69 | -2.77 | -2.53 | -2.21 | -1.24 | ▁▅▇▅▁ |
Apolipoprotein_D | 0 | 1 | 1.44 | 0.34 | 0.47 | 1.21 | 1.41 | 1.67 | 2.27 | ▁▃▇▆▂ |
Apolipoprotein_E | 0 | 1 | 2.81 | 0.78 | 0.59 | 2.33 | 2.82 | 3.29 | 5.44 | ▁▅▇▂▁ |
Apolipoprotein_H | 0 | 1 | -0.32 | 0.39 | -2.23 | -0.60 | -0.37 | -0.06 | 0.93 | ▁▁▇▆▁ |
B_Lymphocyte_Chemoattractant_BL | 0 | 1 | 2.02 | 0.57 | 0.73 | 1.67 | 1.98 | 2.37 | 4.02 | ▃▇▇▁▁ |
BMP_6 | 0 | 1 | -1.91 | 0.31 | -2.76 | -2.15 | -1.88 | -1.68 | -0.82 | ▁▅▇▂▁ |
Beta_2_Microglobulin | 0 | 1 | 0.17 | 0.30 | -0.54 | -0.04 | 0.18 | 0.34 | 0.99 | ▂▃▇▃▁ |
Betacellulin | 0 | 1 | 51.01 | 10.87 | 10.00 | 42.00 | 51.00 | 59.00 | 82.00 | ▁▁▇▅▁ |
C_Reactive_Protein | 0 | 1 | -5.87 | 1.20 | -8.52 | -6.65 | -5.84 | -5.08 | -2.94 | ▃▆▇▆▂ |
CD40 | 0 | 1 | -1.26 | 0.21 | -1.86 | -1.38 | -1.27 | -1.12 | -0.55 | ▁▆▇▃▁ |
CD5L | 0 | 1 | -0.05 | 0.45 | -1.24 | -0.36 | -0.06 | 0.26 | 1.16 | ▁▅▇▃▁ |
Calbindin | 0 | 1 | 22.43 | 4.11 | 10.96 | 19.77 | 22.25 | 24.80 | 33.78 | ▁▃▇▃▁ |
Calcitonin | 0 | 1 | 1.68 | 0.87 | -0.71 | 0.96 | 1.65 | 2.28 | 3.89 | ▁▆▇▅▂ |
CgA | 0 | 1 | 333.30 | 83.67 | 135.60 | 278.02 | 331.52 | 392.07 | 535.40 | ▂▆▇▅▂ |
Clusterin_Apo_J | 0 | 1 | 2.88 | 0.29 | 1.87 | 2.71 | 2.89 | 3.04 | 3.58 | ▁▂▇▆▂ |
Complement_3 | 0 | 1 | -15.61 | 2.46 | -23.39 | -17.57 | -15.52 | -13.88 | -9.56 | ▁▆▇▇▂ |
Complement_Factor_H | 0 | 1 | 3.55 | 1.25 | -0.84 | 2.75 | 3.60 | 4.25 | 7.62 | ▁▃▇▅▁ |
Connective_Tissue_Growth_Factor | 0 | 1 | 0.77 | 0.20 | 0.18 | 0.64 | 0.79 | 0.92 | 1.41 | ▁▅▇▃▁ |
Cortisol | 0 | 1 | 11.98 | 3.95 | 0.10 | 9.80 | 12.00 | 14.00 | 29.00 | ▁▇▇▁▁ |
Creatine_Kinase_MB | 0 | 1 | -1.67 | 0.09 | -1.87 | -1.72 | -1.67 | -1.63 | -1.38 | ▃▅▇▂▁ |
Cystatin_C | 0 | 1 | 8.59 | 0.40 | 7.43 | 8.32 | 8.56 | 8.84 | 9.69 | ▁▅▇▃▁ |
EGF_R | 0 | 1 | -0.70 | 0.22 | -1.36 | -0.86 | -0.68 | -0.55 | -0.06 | ▁▃▇▅▁ |
EN_RAGE | 0 | 1 | -3.64 | 0.88 | -8.38 | -4.20 | -3.65 | -3.15 | -0.39 | ▁▁▇▆▁ |
ENA_78 | 0 | 1 | -1.37 | 0.01 | -1.41 | -1.38 | -1.37 | -1.36 | -1.34 | ▁▅▇▅▁ |
Eotaxin_3 | 0 | 1 | 58.17 | 15.83 | 7.00 | 44.00 | 59.00 | 70.00 | 107.00 | ▁▆▇▅▁ |
FAS | 0 | 1 | -0.53 | 0.30 | -1.51 | -0.71 | -0.53 | -0.31 | 0.34 | ▁▃▇▆▁ |
FSH_Follicle_Stimulation_Hormon | 0 | 1 | -1.14 | 0.42 | -2.12 | -1.47 | -1.14 | -0.88 | 0.10 | ▂▇▇▃▁ |
Fas_Ligand | 0 | 1 | 2.97 | 1.09 | -0.15 | 2.34 | 3.10 | 3.70 | 7.63 | ▂▇▇▁▁ |
Fatty_Acid_Binding_Protein | 0 | 1 | 1.35 | 0.78 | -1.04 | 0.80 | 1.39 | 1.88 | 3.71 | ▁▅▇▅▁ |
Ferritin | 0 | 1 | 2.76 | 0.76 | 0.61 | 2.29 | 2.77 | 3.29 | 4.63 | ▁▃▇▆▂ |
Fetuin_A | 0 | 1 | 1.35 | 0.37 | 0.47 | 1.10 | 1.31 | 1.61 | 2.25 | ▂▆▇▅▂ |
Fibrinogen | 0 | 1 | -7.36 | 0.57 | -8.87 | -7.72 | -7.32 | -7.01 | -5.84 | ▁▅▇▅▁ |
GRO_alpha | 0 | 1 | 1.38 | 0.04 | 1.27 | 1.35 | 1.38 | 1.41 | 1.49 | ▂▃▇▅▁ |
Gamma_Interferon_induced_Monokin | 0 | 1 | 2.79 | 0.11 | 2.39 | 2.71 | 2.78 | 2.87 | 3.07 | ▁▂▇▆▂ |
Glutathione_S_Transferase_alpha | 0 | 1 | 0.95 | 0.16 | 0.52 | 0.84 | 0.97 | 1.03 | 1.32 | ▁▃▇▅▂ |
HB_EGF | 0 | 1 | 6.83 | 1.51 | 2.10 | 5.79 | 6.70 | 7.86 | 10.70 | ▁▃▇▆▂ |
HCC_4 | 0 | 1 | -3.50 | 0.36 | -4.51 | -3.73 | -3.51 | -3.27 | -2.12 | ▂▇▇▂▁ |
Hepatocyte_Growth_Factor_HGF | 0 | 1 | 0.20 | 0.29 | -0.63 | 0.00 | 0.18 | 0.41 | 0.88 | ▁▃▇▅▂ |
I_309 | 0 | 1 | 2.96 | 0.37 | 1.76 | 2.71 | 2.94 | 3.22 | 4.14 | ▁▅▇▃▁ |
ICAM_1 | 0 | 1 | -0.59 | 0.33 | -1.53 | -0.83 | -0.59 | -0.38 | 0.52 | ▁▅▇▃▁ |
IGF_BP_2 | 0 | 1 | 5.32 | 0.20 | 4.63 | 5.18 | 5.32 | 5.45 | 5.95 | ▁▃▇▅▁ |
IL_11 | 0 | 1 | 4.72 | 1.45 | 1.75 | 3.71 | 4.81 | 5.78 | 8.49 | ▃▆▇▅▁ |
IL_13 | 0 | 1 | 1.28 | 0.01 | 1.26 | 1.27 | 1.28 | 1.29 | 1.32 | ▂▇▆▂▁ |
IL_16 | 0 | 1 | 2.93 | 0.66 | 1.19 | 2.52 | 2.91 | 3.35 | 4.94 | ▁▆▇▃▁ |
IL_17E | 0 | 1 | 4.85 | 1.34 | 1.05 | 4.15 | 4.75 | 5.63 | 8.95 | ▁▆▇▃▁ |
IL_1alpha | 0 | 1 | -7.51 | 0.39 | -8.52 | -7.82 | -7.52 | -7.26 | -5.95 | ▂▇▇▂▁ |
IL_3 | 0 | 1 | -3.94 | 0.50 | -5.91 | -4.27 | -3.91 | -3.63 | -2.45 | ▁▂▇▅▁ |
IL_4 | 0 | 1 | 1.77 | 0.51 | 0.53 | 1.46 | 1.81 | 2.15 | 3.04 | ▂▅▇▅▂ |
IL_5 | 0 | 1 | 0.19 | 0.46 | -1.43 | -0.12 | 0.18 | 0.47 | 1.95 | ▁▃▇▂▁ |
IL_6 | 0 | 1 | -0.15 | 0.55 | -1.53 | -0.41 | -0.16 | 0.14 | 1.81 | ▂▆▇▂▁ |
IL_6_Receptor | 0 | 1 | 0.09 | 0.32 | -0.68 | -0.13 | 0.10 | 0.35 | 0.83 | ▂▆▇▆▂ |
IL_7 | 0 | 1 | 2.84 | 1.05 | 0.56 | 2.15 | 2.79 | 3.71 | 5.71 | ▃▇▇▆▁ |
IL_8 | 0 | 1 | 1.70 | 0.04 | 1.57 | 1.68 | 1.71 | 1.73 | 1.81 | ▁▂▇▆▁ |
IP_10_Inducible_Protein_10 | 0 | 1 | 5.75 | 0.51 | 4.32 | 5.40 | 5.75 | 6.06 | 7.50 | ▁▆▇▂▁ |
IgA | 0 | 1 | -6.12 | 0.76 | -10.52 | -6.65 | -6.12 | -5.57 | -4.20 | ▁▁▂▇▂ |
Insulin | 0 | 1 | -1.23 | 0.34 | -2.17 | -1.45 | -1.25 | -1.03 | -0.16 | ▁▃▇▂▁ |
Kidney_Injury_Molecule_1_KIM_1 | 0 | 1 | -1.18 | 0.03 | -1.26 | -1.20 | -1.18 | -1.16 | -1.10 | ▁▇▇▅▁ |
LOX_1 | 0 | 1 | 1.28 | 0.40 | 0.00 | 1.03 | 1.28 | 1.53 | 2.27 | ▁▂▇▅▂ |
Leptin | 0 | 1 | -1.50 | 0.27 | -2.15 | -1.70 | -1.50 | -1.33 | -0.62 | ▂▇▇▂▁ |
Lipoprotein_a | 0 | 1 | -4.42 | 1.11 | -6.81 | -5.31 | -4.61 | -3.49 | -1.39 | ▂▇▅▅▁ |
MCP_1 | 0 | 1 | 6.50 | 0.26 | 5.83 | 6.32 | 6.49 | 6.68 | 7.23 | ▂▅▇▅▁ |
MCP_2 | 0 | 1 | 1.87 | 0.65 | 0.40 | 1.53 | 1.85 | 2.18 | 4.02 | ▂▆▇▂▁ |
MIF | 0 | 1 | -1.86 | 0.34 | -2.85 | -2.12 | -1.90 | -1.66 | -0.84 | ▁▃▇▃▁ |
MIP_1alpha | 0 | 1 | 4.05 | 1.01 | 0.93 | 3.34 | 4.05 | 4.69 | 6.80 | ▁▅▇▅▂ |
MIP_1beta | 0 | 1 | 2.81 | 0.38 | 1.95 | 2.56 | 2.83 | 3.04 | 4.01 | ▂▅▇▂▁ |
MMP_2 | 0 | 1 | 2.88 | 0.93 | 0.10 | 2.33 | 2.82 | 3.55 | 5.36 | ▁▃▇▆▁ |
MMP_3 | 0 | 1 | -2.45 | 0.57 | -4.42 | -2.75 | -2.45 | -2.12 | -0.53 | ▁▂▇▃▁ |
MMP10 | 0 | 1 | -3.63 | 0.43 | -4.93 | -3.94 | -3.65 | -3.35 | -2.21 | ▁▆▇▃▁ |
MMP7 | 0 | 1 | -3.79 | 1.55 | -8.40 | -4.82 | -3.77 | -2.71 | -0.22 | ▁▃▇▅▂ |
Myoglobin | 0 | 1 | -1.37 | 0.95 | -3.17 | -2.04 | -1.47 | -0.80 | 1.77 | ▅▇▆▂▁ |
NT_proBNP | 0 | 1 | 4.55 | 0.38 | 3.18 | 4.35 | 4.55 | 4.77 | 5.89 | ▁▂▇▃▁ |
NrCAM | 0 | 1 | 4.36 | 0.57 | 2.64 | 4.00 | 4.39 | 4.75 | 6.01 | ▁▃▇▅▁ |
Osteopontin | 0 | 1 | 5.20 | 0.39 | 4.11 | 4.96 | 5.19 | 5.44 | 6.31 | ▁▅▇▃▁ |
PAI_1 | 0 | 1 | 0.08 | 0.41 | -0.99 | -0.17 | 0.09 | 0.32 | 1.17 | ▂▃▇▅▂ |
PAPP_A | 0 | 1 | -2.85 | 0.14 | -3.31 | -2.94 | -2.87 | -2.75 | -2.52 | ▁▂▇▅▂ |
PLGF | 0 | 1 | 3.91 | 0.41 | 2.48 | 3.64 | 3.87 | 4.20 | 5.17 | ▁▃▇▆▁ |
PYY | 0 | 1 | 3.02 | 0.29 | 2.19 | 2.83 | 3.00 | 3.18 | 3.93 | ▁▅▇▃▁ |
Pancreatic_polypeptide | 0 | 1 | -0.01 | 0.72 | -2.12 | -0.53 | -0.04 | 0.53 | 1.93 | ▁▅▇▆▁ |
Prolactin | 0 | 1 | 0.04 | 0.30 | -1.31 | -0.14 | 0.00 | 0.26 | 0.99 | ▁▁▇▇▁ |
Prostatic_Acid_Phosphatase | 0 | 1 | -1.69 | 0.06 | -1.93 | -1.72 | -1.69 | -1.65 | -1.42 | ▁▂▇▂▁ |
Protein_S | 0 | 1 | -2.24 | 0.35 | -3.34 | -2.46 | -2.26 | -2.00 | -1.22 | ▁▃▇▃▁ |
Pulmonary_and_Activation_Regulat | 0 | 1 | -1.49 | 0.45 | -2.51 | -1.83 | -1.51 | -1.17 | -0.27 | ▂▇▇▃▂ |
RANTES | 0 | 1 | -6.51 | 0.32 | -7.22 | -6.73 | -6.50 | -6.32 | -5.55 | ▃▇▇▃▁ |
Resistin | 0 | 1 | -17.64 | 5.82 | -34.97 | -21.47 | -17.47 | -13.50 | -2.24 | ▁▃▇▆▁ |
S100b | 0 | 1 | 1.25 | 0.34 | 0.19 | 1.00 | 1.25 | 1.50 | 2.37 | ▁▆▇▅▁ |
SGOT | 0 | 1 | -0.41 | 0.35 | -1.35 | -0.63 | -0.40 | -0.20 | 0.74 | ▁▅▇▂▁ |
SHBG | 0 | 1 | -2.48 | 0.58 | -4.14 | -2.81 | -2.49 | -2.12 | -1.11 | ▁▃▇▆▂ |
SOD | 0 | 1 | 5.34 | 0.38 | 4.32 | 5.09 | 5.37 | 5.58 | 6.32 | ▁▅▇▅▁ |
Serum_Amyloid_P | 0 | 1 | -6.02 | 0.56 | -7.51 | -6.38 | -6.03 | -5.65 | -4.65 | ▁▅▇▅▂ |
Sortilin | 0 | 1 | 3.85 | 0.87 | 1.65 | 3.34 | 3.87 | 4.37 | 6.23 | ▁▅▇▃▁ |
Stem_Cell_Factor | 0 | 1 | 3.30 | 0.36 | 2.25 | 3.04 | 3.30 | 3.53 | 4.28 | ▁▅▇▅▁ |
TGF_alpha | 0 | 1 | 9.80 | 1.32 | 6.84 | 8.86 | 9.92 | 10.70 | 13.83 | ▂▆▇▂▁ |
TIMP_1 | 0 | 1 | 11.75 | 1.90 | 1.74 | 10.49 | 11.56 | 12.70 | 18.88 | ▁▁▇▅▁ |
TNF_RII | 0 | 1 | -0.59 | 0.33 | -1.66 | -0.82 | -0.60 | -0.38 | 0.47 | ▁▃▇▃▁ |
TRAIL_R3 | 0 | 1 | -0.54 | 0.24 | -1.21 | -0.70 | -0.53 | -0.38 | 0.27 | ▁▆▇▂▁ |
TTR_prealbumin | 0 | 1 | 2.85 | 0.14 | 2.48 | 2.77 | 2.83 | 2.94 | 3.33 | ▂▅▇▃▁ |
Tamm_Horsfall_Protein_THP | 0 | 1 | -3.12 | 0.03 | -3.21 | -3.14 | -3.12 | -3.10 | -2.99 | ▁▇▇▂▁ |
Thrombomodulin | 0 | 1 | -1.51 | 0.22 | -2.04 | -1.63 | -1.49 | -1.34 | -0.82 | ▂▆▇▃▁ |
Thrombopoietin | 0 | 1 | -0.75 | 0.24 | -1.54 | -0.89 | -0.75 | -0.63 | 0.10 | ▁▅▇▂▁ |
Thymus_Expressed_Chemokine_TECK | 0 | 1 | 3.85 | 0.80 | 1.51 | 3.34 | 3.81 | 4.32 | 6.23 | ▁▃▇▃▁ |
Thyroid_Stimulating_Hormone | 0 | 1 | -4.50 | 0.75 | -6.19 | -4.96 | -4.51 | -4.02 | -1.71 | ▂▇▆▁▁ |
Thyroxine_Binding_Globulin | 0 | 1 | -1.48 | 0.40 | -2.48 | -1.77 | -1.51 | -1.24 | -0.21 | ▂▇▇▃▁ |
Tissue_Factor | 0 | 1 | 1.17 | 0.50 | -0.21 | 0.83 | 1.22 | 1.48 | 2.48 | ▁▅▇▆▁ |
Transferrin | 0 | 1 | 2.91 | 0.27 | 1.93 | 2.71 | 2.89 | 3.09 | 3.76 | ▁▂▇▅▁ |
Trefoil_Factor_3_TFF3 | 0 | 1 | -3.88 | 0.34 | -4.74 | -4.14 | -3.86 | -3.65 | -2.96 | ▁▅▇▃▁ |
VCAM_1 | 0 | 1 | 2.69 | 0.32 | 1.72 | 2.48 | 2.71 | 2.89 | 3.69 | ▁▃▇▃▁ |
VEGF | 0 | 1 | 16.99 | 1.81 | 11.83 | 15.77 | 17.08 | 18.10 | 22.38 | ▁▅▇▃▁ |
Vitronectin | 0 | 1 | -0.28 | 0.33 | -1.43 | -0.51 | -0.30 | -0.04 | 0.53 | ▁▃▇▇▂ |
von_Willebrand_Factor | 0 | 1 | -3.91 | 0.38 | -4.99 | -4.20 | -3.91 | -3.61 | -2.96 | ▁▅▇▆▂ |
E4 | 0 | 1 | 0.40 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
E3 | 0 | 1 | 0.92 | 0.28 | 0.00 | 1.00 | 1.00 | 1.00 | 1.00 | ▁▁▁▁▇ |
E2 | 0 | 1 | 0.16 | 0.37 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
##################################
# Identifying columns with low variance
###################################
<- nearZeroVar(DPA,
DPA_LowVariance freqCut = 95/5,
uniqueCut = 10,
saveMetrics= TRUE)
$nzv,]) (DPA_LowVariance[DPA_LowVariance
## [1] freqRatio percentUnique zeroVar nzv
## <0 rows> (or 0-length row.names)
if ((nrow(DPA_LowVariance[DPA_LowVariance$nzv,]))==0){
print("No low variance predictors noted.")
else {
}
print(paste0("Low variance observed for ",
nrow(DPA_LowVariance[DPA_LowVariance$nzv,])),
(" numeric variable(s) with First.Second.Mode.Ratio>4 and Unique.Count.Ratio<0.10."))
<- (nrow(DPA_LowVariance[DPA_LowVariance$nzv,]))
DPA_LowVarianceForRemoval
print(paste0("Low variance can be resolved by removing ",
nrow(DPA_LowVariance[DPA_LowVariance$nzv,])),
(" numeric variable(s)."))
for (j in 1:DPA_LowVarianceForRemoval) {
<- rownames(DPA_LowVariance[DPA_LowVariance$nzv,])[j]
DPA_LowVarianceRemovedVariable print(paste0("Variable ",
j," for removal: ",
DPA_LowVarianceRemovedVariable))
}
%>%
DPA skim() %>%
::filter(skim_variable %in% rownames(DPA_LowVariance[DPA_LowVariance$nzv,]))
dplyr
##################################
# Filtering out columns with low variance
#################################
<- DPA[,!names(DPA) %in% rownames(DPA_LowVariance[DPA_LowVariance$nzv,])]
DPA_ExcludedLowVariance
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_ExcludedLowVariance))
(DPA_ExcludedLowVariance_Skimmed }
## [1] "No low variance predictors noted."
##################################
# Loading dataset
##################################
<- Alzheimer_Train
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("Class")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,!names(DPA.Predictors) %in% c("E2","E3","E4")]
DPA.Predictors.Numeric <- as.data.frame(sapply(DPA.Predictors.Numeric,function(x) as.numeric(x)))
DPA.Predictors.Numeric
##################################
# Visualizing pairwise correlation between predictors
##################################
<- cor.mtest(DPA.Predictors.Numeric,
DPA_CorrelationTest method = "pearson",
conf.level = .95)
corrplot(cor(DPA.Predictors.Numeric,
method = "pearson",
use="pairwise.complete.obs"),
method = "circle",
type = "upper",
order = "original",
tl.col = "black",
tl.cex = 0.75,
tl.srt = 90,
sig.level = 0.05,
p.mat = DPA_CorrelationTest$p,
insig = "blank")
##################################
# Identifying the highly correlated variables
##################################
<- cor(DPA.Predictors.Numeric,
DPA_Correlation method = "pearson",
use="pairwise.complete.obs")
<- sum(abs(DPA_Correlation[upper.tri(DPA_Correlation)]) > 0.95)) (DPA_HighlyCorrelatedCount
## [1] 0
if (DPA_HighlyCorrelatedCount == 0) {
print("No highly correlated predictors noted.")
else {
} print(paste0("High correlation observed for ",
(DPA_HighlyCorrelatedCount)," pairs of numeric variable(s) with Correlation.Coefficient>0.95."))
<- corr_cross(DPA.Predictors.Numeric,
(DPA_HighlyCorrelatedPairs max_pvalue = 0.05,
top = DPA_HighlyCorrelatedCount,
rm.na = TRUE,
grid = FALSE
))
}
## [1] "No highly correlated predictors noted."
if (DPA_HighlyCorrelatedCount > 0) {
<- findCorrelation(DPA_Correlation, cutoff = 0.95)
DPA_HighlyCorrelated
<- length(DPA_HighlyCorrelated))
(DPA_HighlyCorrelatedForRemoval
print(paste0("High correlation can be resolved by removing ",
(DPA_HighlyCorrelatedForRemoval)," numeric variable(s)."))
for (j in 1:DPA_HighlyCorrelatedForRemoval) {
<- colnames(DPA.Predictors.Numeric)[DPA_HighlyCorrelated[j]]
DPA_HighlyCorrelatedRemovedVariable print(paste0("Variable ",
j," for removal: ",
DPA_HighlyCorrelatedRemovedVariable))
}
##################################
# Filtering out columns with high correlation
#################################
<- DPA[,-DPA_HighlyCorrelated]
DPA_ExcludedHighCorrelation
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_ExcludedHighCorrelation))
(DPA_ExcludedHighCorrelation_Skimmed
}
##################################
# Loading dataset
##################################
<- Alzheimer_Train
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("Class")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,!names(DPA.Predictors) %in% c("E2","E3","E4")]
DPA.Predictors.Numeric <- as.data.frame(sapply(DPA.Predictors.Numeric,function(x) as.numeric(x)))
DPA.Predictors.Numeric
##################################
# Identifying the linearly dependent variables
##################################
<- findLinearCombos(DPA.Predictors.Numeric)
DPA_LinearlyDependent
<- length(DPA_LinearlyDependent$linearCombos)) (DPA_LinearlyDependentCount
## [1] 0
if (DPA_LinearlyDependentCount == 0) {
print("No linearly dependent predictors noted.")
else {
} print(paste0("Linear dependency observed for ",
(DPA_LinearlyDependentCount)," subset(s) of numeric variable(s)."))
for (i in 1:DPA_LinearlyDependentCount) {
<- colnames(DPA.Predictors.Numeric)[DPA_LinearlyDependent$linearCombos[[i]]]
DPA_LinearlyDependentSubset print(paste0("Linear dependent variable(s) for subset ",
i," include: ",
DPA_LinearlyDependentSubset))
}
}
## [1] "No linearly dependent predictors noted."
##################################
# Identifying the linearly dependent variables for removal
##################################
if (DPA_LinearlyDependentCount > 0) {
<- findLinearCombos(DPA.Predictors.Numeric)
DPA_LinearlyDependent
<- length(DPA_LinearlyDependent$remove)
DPA_LinearlyDependentForRemoval
print(paste0("Linear dependency can be resolved by removing ",
(DPA_LinearlyDependentForRemoval)," numeric variable(s)."))
for (j in 1:DPA_LinearlyDependentForRemoval) {
<- colnames(DPA.Predictors.Numeric)[DPA_LinearlyDependent$remove[j]]
DPA_LinearlyDependentRemovedVariable print(paste0("Variable ",
j," for removal: ",
DPA_LinearlyDependentRemovedVariable))
}
##################################
# Filtering out columns with linear dependency
#################################
<- DPA[,-DPA_LinearlyDependent$remove]
DPA_ExcludedLinearlyDependent
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_ExcludedLinearlyDependent))
(DPA_ExcludedLinearlyDependent_Skimmed
}
##################################
# Creating the pre-modelling
# train set
##################################
<- Alzheimer_Train
PMA_PreModelling_Train $Class <- as.factor(PMA_PreModelling_Train$Class)
PMA_PreModelling_Train$E2 <- as.factor(PMA_PreModelling_Train$E2)
PMA_PreModelling_Train$E3 <- as.factor(PMA_PreModelling_Train$E3)
PMA_PreModelling_Train$E4 <- as.factor(PMA_PreModelling_Train$E4)
PMA_PreModelling_Train
##################################
# Gathering descriptive statistics
##################################
<- skim(PMA_PreModelling_Train)) (PMA_PreModelling_Train_Skimmed
Name | PMA_PreModelling_Train |
Number of rows | 267 |
Number of columns | 128 |
_______________________ | |
Column type frequency: | |
factor | 4 |
numeric | 124 |
________________________ | |
Group variables | None |
Variable type: factor
skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
---|---|---|---|---|---|
Class | 0 | 1 | FALSE | 2 | Con: 194, Imp: 73 |
E4 | 0 | 1 | FALSE | 2 | 0: 160, 1: 107 |
E3 | 0 | 1 | FALSE | 2 | 1: 245, 0: 22 |
E2 | 0 | 1 | FALSE | 2 | 0: 224, 1: 43 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
ACE_CD143_Angiotensin_Converti | 0 | 1 | 1.32 | 0.54 | -0.68 | 0.95 | 1.30 | 1.72 | 2.84 | ▁▃▇▇▁ |
ACTH_Adrenocorticotropic_Hormon | 0 | 1 | -1.54 | 0.28 | -2.21 | -1.71 | -1.56 | -1.35 | -0.84 | ▂▆▇▆▂ |
AXL | 0 | 1 | 0.31 | 0.45 | -0.92 | 0.00 | 0.28 | 0.61 | 1.52 | ▁▅▇▃▁ |
Adiponectin | 0 | 1 | -5.20 | 0.67 | -6.73 | -5.67 | -5.18 | -4.78 | -3.51 | ▂▆▇▅▁ |
Alpha_1_Antichymotrypsin | 0 | 1 | 1.36 | 0.36 | 0.26 | 1.13 | 1.36 | 1.59 | 2.30 | ▁▃▇▅▁ |
Alpha_1_Antitrypsin | 0 | 1 | -13.05 | 1.48 | -17.03 | -14.07 | -13.00 | -12.10 | -8.19 | ▁▅▇▂▁ |
Alpha_1_Microglobulin | 0 | 1 | -2.93 | 0.48 | -4.34 | -3.27 | -2.94 | -2.59 | -1.77 | ▁▅▇▇▂ |
Alpha_2_Macroglobulin | 0 | 1 | -158.61 | 40.93 | -289.68 | -186.64 | -160.01 | -134.62 | -59.46 | ▁▂▇▅▂ |
Angiopoietin_2_ANG_2 | 0 | 1 | 0.67 | 0.33 | -0.54 | 0.47 | 0.64 | 0.88 | 1.53 | ▁▂▇▆▂ |
Angiotensinogen | 0 | 1 | 2.32 | 0.25 | 1.75 | 2.12 | 2.32 | 2.50 | 2.88 | ▃▆▇▇▂ |
Apolipoprotein_A_IV | 0 | 1 | -1.85 | 0.38 | -2.96 | -2.12 | -1.83 | -1.61 | -0.78 | ▁▃▇▃▁ |
Apolipoprotein_A1 | 0 | 1 | -7.48 | 0.44 | -8.68 | -7.76 | -7.47 | -7.21 | -6.17 | ▁▅▇▃▁ |
Apolipoprotein_A2 | 0 | 1 | -0.64 | 0.50 | -1.90 | -0.97 | -0.67 | -0.31 | 0.96 | ▁▇▇▃▁ |
Apolipoprotein_B | 0 | 1 | -5.58 | 1.48 | -9.94 | -6.63 | -5.70 | -4.54 | -2.15 | ▁▅▇▇▂ |
Apolipoprotein_CI | 0 | 1 | -1.58 | 0.41 | -3.32 | -1.83 | -1.61 | -1.37 | -0.27 | ▁▁▇▅▁ |
Apolipoprotein_CIII | 0 | 1 | -2.49 | 0.44 | -3.69 | -2.77 | -2.53 | -2.21 | -1.24 | ▁▅▇▅▁ |
Apolipoprotein_D | 0 | 1 | 1.44 | 0.34 | 0.47 | 1.21 | 1.41 | 1.67 | 2.27 | ▁▃▇▆▂ |
Apolipoprotein_E | 0 | 1 | 2.81 | 0.78 | 0.59 | 2.33 | 2.82 | 3.29 | 5.44 | ▁▅▇▂▁ |
Apolipoprotein_H | 0 | 1 | -0.32 | 0.39 | -2.23 | -0.60 | -0.37 | -0.06 | 0.93 | ▁▁▇▆▁ |
B_Lymphocyte_Chemoattractant_BL | 0 | 1 | 2.02 | 0.57 | 0.73 | 1.67 | 1.98 | 2.37 | 4.02 | ▃▇▇▁▁ |
BMP_6 | 0 | 1 | -1.91 | 0.31 | -2.76 | -2.15 | -1.88 | -1.68 | -0.82 | ▁▅▇▂▁ |
Beta_2_Microglobulin | 0 | 1 | 0.17 | 0.30 | -0.54 | -0.04 | 0.18 | 0.34 | 0.99 | ▂▃▇▃▁ |
Betacellulin | 0 | 1 | 51.01 | 10.87 | 10.00 | 42.00 | 51.00 | 59.00 | 82.00 | ▁▁▇▅▁ |
C_Reactive_Protein | 0 | 1 | -5.87 | 1.20 | -8.52 | -6.65 | -5.84 | -5.08 | -2.94 | ▃▆▇▆▂ |
CD40 | 0 | 1 | -1.26 | 0.21 | -1.86 | -1.38 | -1.27 | -1.12 | -0.55 | ▁▆▇▃▁ |
CD5L | 0 | 1 | -0.05 | 0.45 | -1.24 | -0.36 | -0.06 | 0.26 | 1.16 | ▁▅▇▃▁ |
Calbindin | 0 | 1 | 22.43 | 4.11 | 10.96 | 19.77 | 22.25 | 24.80 | 33.78 | ▁▃▇▃▁ |
Calcitonin | 0 | 1 | 1.68 | 0.87 | -0.71 | 0.96 | 1.65 | 2.28 | 3.89 | ▁▆▇▅▂ |
CgA | 0 | 1 | 333.30 | 83.67 | 135.60 | 278.02 | 331.52 | 392.07 | 535.40 | ▂▆▇▅▂ |
Clusterin_Apo_J | 0 | 1 | 2.88 | 0.29 | 1.87 | 2.71 | 2.89 | 3.04 | 3.58 | ▁▂▇▆▂ |
Complement_3 | 0 | 1 | -15.61 | 2.46 | -23.39 | -17.57 | -15.52 | -13.88 | -9.56 | ▁▆▇▇▂ |
Complement_Factor_H | 0 | 1 | 3.55 | 1.25 | -0.84 | 2.75 | 3.60 | 4.25 | 7.62 | ▁▃▇▅▁ |
Connective_Tissue_Growth_Factor | 0 | 1 | 0.77 | 0.20 | 0.18 | 0.64 | 0.79 | 0.92 | 1.41 | ▁▅▇▃▁ |
Cortisol | 0 | 1 | 11.98 | 3.95 | 0.10 | 9.80 | 12.00 | 14.00 | 29.00 | ▁▇▇▁▁ |
Creatine_Kinase_MB | 0 | 1 | -1.67 | 0.09 | -1.87 | -1.72 | -1.67 | -1.63 | -1.38 | ▃▅▇▂▁ |
Cystatin_C | 0 | 1 | 8.59 | 0.40 | 7.43 | 8.32 | 8.56 | 8.84 | 9.69 | ▁▅▇▃▁ |
EGF_R | 0 | 1 | -0.70 | 0.22 | -1.36 | -0.86 | -0.68 | -0.55 | -0.06 | ▁▃▇▅▁ |
EN_RAGE | 0 | 1 | -3.64 | 0.88 | -8.38 | -4.20 | -3.65 | -3.15 | -0.39 | ▁▁▇▆▁ |
ENA_78 | 0 | 1 | -1.37 | 0.01 | -1.41 | -1.38 | -1.37 | -1.36 | -1.34 | ▁▅▇▅▁ |
Eotaxin_3 | 0 | 1 | 58.17 | 15.83 | 7.00 | 44.00 | 59.00 | 70.00 | 107.00 | ▁▆▇▅▁ |
FAS | 0 | 1 | -0.53 | 0.30 | -1.51 | -0.71 | -0.53 | -0.31 | 0.34 | ▁▃▇▆▁ |
FSH_Follicle_Stimulation_Hormon | 0 | 1 | -1.14 | 0.42 | -2.12 | -1.47 | -1.14 | -0.88 | 0.10 | ▂▇▇▃▁ |
Fas_Ligand | 0 | 1 | 2.97 | 1.09 | -0.15 | 2.34 | 3.10 | 3.70 | 7.63 | ▂▇▇▁▁ |
Fatty_Acid_Binding_Protein | 0 | 1 | 1.35 | 0.78 | -1.04 | 0.80 | 1.39 | 1.88 | 3.71 | ▁▅▇▅▁ |
Ferritin | 0 | 1 | 2.76 | 0.76 | 0.61 | 2.29 | 2.77 | 3.29 | 4.63 | ▁▃▇▆▂ |
Fetuin_A | 0 | 1 | 1.35 | 0.37 | 0.47 | 1.10 | 1.31 | 1.61 | 2.25 | ▂▆▇▅▂ |
Fibrinogen | 0 | 1 | -7.36 | 0.57 | -8.87 | -7.72 | -7.32 | -7.01 | -5.84 | ▁▅▇▅▁ |
GRO_alpha | 0 | 1 | 1.38 | 0.04 | 1.27 | 1.35 | 1.38 | 1.41 | 1.49 | ▂▃▇▅▁ |
Gamma_Interferon_induced_Monokin | 0 | 1 | 2.79 | 0.11 | 2.39 | 2.71 | 2.78 | 2.87 | 3.07 | ▁▂▇▆▂ |
Glutathione_S_Transferase_alpha | 0 | 1 | 0.95 | 0.16 | 0.52 | 0.84 | 0.97 | 1.03 | 1.32 | ▁▃▇▅▂ |
HB_EGF | 0 | 1 | 6.83 | 1.51 | 2.10 | 5.79 | 6.70 | 7.86 | 10.70 | ▁▃▇▆▂ |
HCC_4 | 0 | 1 | -3.50 | 0.36 | -4.51 | -3.73 | -3.51 | -3.27 | -2.12 | ▂▇▇▂▁ |
Hepatocyte_Growth_Factor_HGF | 0 | 1 | 0.20 | 0.29 | -0.63 | 0.00 | 0.18 | 0.41 | 0.88 | ▁▃▇▅▂ |
I_309 | 0 | 1 | 2.96 | 0.37 | 1.76 | 2.71 | 2.94 | 3.22 | 4.14 | ▁▅▇▃▁ |
ICAM_1 | 0 | 1 | -0.59 | 0.33 | -1.53 | -0.83 | -0.59 | -0.38 | 0.52 | ▁▅▇▃▁ |
IGF_BP_2 | 0 | 1 | 5.32 | 0.20 | 4.63 | 5.18 | 5.32 | 5.45 | 5.95 | ▁▃▇▅▁ |
IL_11 | 0 | 1 | 4.72 | 1.45 | 1.75 | 3.71 | 4.81 | 5.78 | 8.49 | ▃▆▇▅▁ |
IL_13 | 0 | 1 | 1.28 | 0.01 | 1.26 | 1.27 | 1.28 | 1.29 | 1.32 | ▂▇▆▂▁ |
IL_16 | 0 | 1 | 2.93 | 0.66 | 1.19 | 2.52 | 2.91 | 3.35 | 4.94 | ▁▆▇▃▁ |
IL_17E | 0 | 1 | 4.85 | 1.34 | 1.05 | 4.15 | 4.75 | 5.63 | 8.95 | ▁▆▇▃▁ |
IL_1alpha | 0 | 1 | -7.51 | 0.39 | -8.52 | -7.82 | -7.52 | -7.26 | -5.95 | ▂▇▇▂▁ |
IL_3 | 0 | 1 | -3.94 | 0.50 | -5.91 | -4.27 | -3.91 | -3.63 | -2.45 | ▁▂▇▅▁ |
IL_4 | 0 | 1 | 1.77 | 0.51 | 0.53 | 1.46 | 1.81 | 2.15 | 3.04 | ▂▅▇▅▂ |
IL_5 | 0 | 1 | 0.19 | 0.46 | -1.43 | -0.12 | 0.18 | 0.47 | 1.95 | ▁▃▇▂▁ |
IL_6 | 0 | 1 | -0.15 | 0.55 | -1.53 | -0.41 | -0.16 | 0.14 | 1.81 | ▂▆▇▂▁ |
IL_6_Receptor | 0 | 1 | 0.09 | 0.32 | -0.68 | -0.13 | 0.10 | 0.35 | 0.83 | ▂▆▇▆▂ |
IL_7 | 0 | 1 | 2.84 | 1.05 | 0.56 | 2.15 | 2.79 | 3.71 | 5.71 | ▃▇▇▆▁ |
IL_8 | 0 | 1 | 1.70 | 0.04 | 1.57 | 1.68 | 1.71 | 1.73 | 1.81 | ▁▂▇▆▁ |
IP_10_Inducible_Protein_10 | 0 | 1 | 5.75 | 0.51 | 4.32 | 5.40 | 5.75 | 6.06 | 7.50 | ▁▆▇▂▁ |
IgA | 0 | 1 | -6.12 | 0.76 | -10.52 | -6.65 | -6.12 | -5.57 | -4.20 | ▁▁▂▇▂ |
Insulin | 0 | 1 | -1.23 | 0.34 | -2.17 | -1.45 | -1.25 | -1.03 | -0.16 | ▁▃▇▂▁ |
Kidney_Injury_Molecule_1_KIM_1 | 0 | 1 | -1.18 | 0.03 | -1.26 | -1.20 | -1.18 | -1.16 | -1.10 | ▁▇▇▅▁ |
LOX_1 | 0 | 1 | 1.28 | 0.40 | 0.00 | 1.03 | 1.28 | 1.53 | 2.27 | ▁▂▇▅▂ |
Leptin | 0 | 1 | -1.50 | 0.27 | -2.15 | -1.70 | -1.50 | -1.33 | -0.62 | ▂▇▇▂▁ |
Lipoprotein_a | 0 | 1 | -4.42 | 1.11 | -6.81 | -5.31 | -4.61 | -3.49 | -1.39 | ▂▇▅▅▁ |
MCP_1 | 0 | 1 | 6.50 | 0.26 | 5.83 | 6.32 | 6.49 | 6.68 | 7.23 | ▂▅▇▅▁ |
MCP_2 | 0 | 1 | 1.87 | 0.65 | 0.40 | 1.53 | 1.85 | 2.18 | 4.02 | ▂▆▇▂▁ |
MIF | 0 | 1 | -1.86 | 0.34 | -2.85 | -2.12 | -1.90 | -1.66 | -0.84 | ▁▃▇▃▁ |
MIP_1alpha | 0 | 1 | 4.05 | 1.01 | 0.93 | 3.34 | 4.05 | 4.69 | 6.80 | ▁▅▇▅▂ |
MIP_1beta | 0 | 1 | 2.81 | 0.38 | 1.95 | 2.56 | 2.83 | 3.04 | 4.01 | ▂▅▇▂▁ |
MMP_2 | 0 | 1 | 2.88 | 0.93 | 0.10 | 2.33 | 2.82 | 3.55 | 5.36 | ▁▃▇▆▁ |
MMP_3 | 0 | 1 | -2.45 | 0.57 | -4.42 | -2.75 | -2.45 | -2.12 | -0.53 | ▁▂▇▃▁ |
MMP10 | 0 | 1 | -3.63 | 0.43 | -4.93 | -3.94 | -3.65 | -3.35 | -2.21 | ▁▆▇▃▁ |
MMP7 | 0 | 1 | -3.79 | 1.55 | -8.40 | -4.82 | -3.77 | -2.71 | -0.22 | ▁▃▇▅▂ |
Myoglobin | 0 | 1 | -1.37 | 0.95 | -3.17 | -2.04 | -1.47 | -0.80 | 1.77 | ▅▇▆▂▁ |
NT_proBNP | 0 | 1 | 4.55 | 0.38 | 3.18 | 4.35 | 4.55 | 4.77 | 5.89 | ▁▂▇▃▁ |
NrCAM | 0 | 1 | 4.36 | 0.57 | 2.64 | 4.00 | 4.39 | 4.75 | 6.01 | ▁▃▇▅▁ |
Osteopontin | 0 | 1 | 5.20 | 0.39 | 4.11 | 4.96 | 5.19 | 5.44 | 6.31 | ▁▅▇▃▁ |
PAI_1 | 0 | 1 | 0.08 | 0.41 | -0.99 | -0.17 | 0.09 | 0.32 | 1.17 | ▂▃▇▅▂ |
PAPP_A | 0 | 1 | -2.85 | 0.14 | -3.31 | -2.94 | -2.87 | -2.75 | -2.52 | ▁▂▇▅▂ |
PLGF | 0 | 1 | 3.91 | 0.41 | 2.48 | 3.64 | 3.87 | 4.20 | 5.17 | ▁▃▇▆▁ |
PYY | 0 | 1 | 3.02 | 0.29 | 2.19 | 2.83 | 3.00 | 3.18 | 3.93 | ▁▅▇▃▁ |
Pancreatic_polypeptide | 0 | 1 | -0.01 | 0.72 | -2.12 | -0.53 | -0.04 | 0.53 | 1.93 | ▁▅▇▆▁ |
Prolactin | 0 | 1 | 0.04 | 0.30 | -1.31 | -0.14 | 0.00 | 0.26 | 0.99 | ▁▁▇▇▁ |
Prostatic_Acid_Phosphatase | 0 | 1 | -1.69 | 0.06 | -1.93 | -1.72 | -1.69 | -1.65 | -1.42 | ▁▂▇▂▁ |
Protein_S | 0 | 1 | -2.24 | 0.35 | -3.34 | -2.46 | -2.26 | -2.00 | -1.22 | ▁▃▇▃▁ |
Pulmonary_and_Activation_Regulat | 0 | 1 | -1.49 | 0.45 | -2.51 | -1.83 | -1.51 | -1.17 | -0.27 | ▂▇▇▃▂ |
RANTES | 0 | 1 | -6.51 | 0.32 | -7.22 | -6.73 | -6.50 | -6.32 | -5.55 | ▃▇▇▃▁ |
Resistin | 0 | 1 | -17.64 | 5.82 | -34.97 | -21.47 | -17.47 | -13.50 | -2.24 | ▁▃▇▆▁ |
S100b | 0 | 1 | 1.25 | 0.34 | 0.19 | 1.00 | 1.25 | 1.50 | 2.37 | ▁▆▇▅▁ |
SGOT | 0 | 1 | -0.41 | 0.35 | -1.35 | -0.63 | -0.40 | -0.20 | 0.74 | ▁▅▇▂▁ |
SHBG | 0 | 1 | -2.48 | 0.58 | -4.14 | -2.81 | -2.49 | -2.12 | -1.11 | ▁▃▇▆▂ |
SOD | 0 | 1 | 5.34 | 0.38 | 4.32 | 5.09 | 5.37 | 5.58 | 6.32 | ▁▅▇▅▁ |
Serum_Amyloid_P | 0 | 1 | -6.02 | 0.56 | -7.51 | -6.38 | -6.03 | -5.65 | -4.65 | ▁▅▇▅▂ |
Sortilin | 0 | 1 | 3.85 | 0.87 | 1.65 | 3.34 | 3.87 | 4.37 | 6.23 | ▁▅▇▃▁ |
Stem_Cell_Factor | 0 | 1 | 3.30 | 0.36 | 2.25 | 3.04 | 3.30 | 3.53 | 4.28 | ▁▅▇▅▁ |
TGF_alpha | 0 | 1 | 9.80 | 1.32 | 6.84 | 8.86 | 9.92 | 10.70 | 13.83 | ▂▆▇▂▁ |
TIMP_1 | 0 | 1 | 11.75 | 1.90 | 1.74 | 10.49 | 11.56 | 12.70 | 18.88 | ▁▁▇▅▁ |
TNF_RII | 0 | 1 | -0.59 | 0.33 | -1.66 | -0.82 | -0.60 | -0.38 | 0.47 | ▁▃▇▃▁ |
TRAIL_R3 | 0 | 1 | -0.54 | 0.24 | -1.21 | -0.70 | -0.53 | -0.38 | 0.27 | ▁▆▇▂▁ |
TTR_prealbumin | 0 | 1 | 2.85 | 0.14 | 2.48 | 2.77 | 2.83 | 2.94 | 3.33 | ▂▅▇▃▁ |
Tamm_Horsfall_Protein_THP | 0 | 1 | -3.12 | 0.03 | -3.21 | -3.14 | -3.12 | -3.10 | -2.99 | ▁▇▇▂▁ |
Thrombomodulin | 0 | 1 | -1.51 | 0.22 | -2.04 | -1.63 | -1.49 | -1.34 | -0.82 | ▂▆▇▃▁ |
Thrombopoietin | 0 | 1 | -0.75 | 0.24 | -1.54 | -0.89 | -0.75 | -0.63 | 0.10 | ▁▅▇▂▁ |
Thymus_Expressed_Chemokine_TECK | 0 | 1 | 3.85 | 0.80 | 1.51 | 3.34 | 3.81 | 4.32 | 6.23 | ▁▃▇▃▁ |
Thyroid_Stimulating_Hormone | 0 | 1 | -4.50 | 0.75 | -6.19 | -4.96 | -4.51 | -4.02 | -1.71 | ▂▇▆▁▁ |
Thyroxine_Binding_Globulin | 0 | 1 | -1.48 | 0.40 | -2.48 | -1.77 | -1.51 | -1.24 | -0.21 | ▂▇▇▃▁ |
Tissue_Factor | 0 | 1 | 1.17 | 0.50 | -0.21 | 0.83 | 1.22 | 1.48 | 2.48 | ▁▅▇▆▁ |
Transferrin | 0 | 1 | 2.91 | 0.27 | 1.93 | 2.71 | 2.89 | 3.09 | 3.76 | ▁▂▇▅▁ |
Trefoil_Factor_3_TFF3 | 0 | 1 | -3.88 | 0.34 | -4.74 | -4.14 | -3.86 | -3.65 | -2.96 | ▁▅▇▃▁ |
VCAM_1 | 0 | 1 | 2.69 | 0.32 | 1.72 | 2.48 | 2.71 | 2.89 | 3.69 | ▁▃▇▃▁ |
VEGF | 0 | 1 | 16.99 | 1.81 | 11.83 | 15.77 | 17.08 | 18.10 | 22.38 | ▁▅▇▃▁ |
Vitronectin | 0 | 1 | -0.28 | 0.33 | -1.43 | -0.51 | -0.30 | -0.04 | 0.53 | ▁▃▇▇▂ |
von_Willebrand_Factor | 0 | 1 | -3.91 | 0.38 | -4.99 | -4.20 | -3.91 | -3.61 | -2.96 | ▁▅▇▆▂ |
###################################
# Verifying the data dimensions
# for the train set
###################################
dim(PMA_PreModelling_Train)
## [1] 267 128
##################################
# Formulating the test set
##################################
<- Alzheimer_Test
PMA_PreModelling_Test $Class <- as.factor(PMA_PreModelling_Test$Class)
PMA_PreModelling_Test$E2 <- as.factor(PMA_PreModelling_Test$E2)
PMA_PreModelling_Test$E3 <- as.factor(PMA_PreModelling_Test$E3)
PMA_PreModelling_Test$E4 <- as.factor(PMA_PreModelling_Test$E4)
PMA_PreModelling_Test
##################################
# Gathering descriptive statistics
##################################
<- skim(PMA_PreModelling_Test)) (PMA_PreModelling_Test_Skimmed
Name | PMA_PreModelling_Test |
Number of rows | 66 |
Number of columns | 128 |
_______________________ | |
Column type frequency: | |
factor | 4 |
numeric | 124 |
________________________ | |
Group variables | None |
Variable type: factor
skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
---|---|---|---|---|---|
Class | 0 | 1 | FALSE | 2 | Con: 48, Imp: 18 |
E4 | 0 | 1 | FALSE | 2 | 0: 46, 1: 20 |
E3 | 0 | 1 | FALSE | 2 | 1: 65, 0: 1 |
E2 | 0 | 1 | FALSE | 2 | 0: 62, 1: 4 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
ACE_CD143_Angiotensin_Converti | 0 | 1 | 1.31 | 0.59 | -0.55 | 0.95 | 1.30 | 1.63 | 3.09 | ▁▃▇▃▁ |
ACTH_Adrenocorticotropic_Hormon | 0 | 1 | -1.53 | 0.25 | -2.21 | -1.71 | -1.54 | -1.39 | -0.80 | ▁▃▇▃▁ |
AXL | 0 | 1 | 0.25 | 0.45 | -0.74 | -0.08 | 0.28 | 0.61 | 1.29 | ▃▃▇▅▂ |
Adiponectin | 0 | 1 | -5.30 | 0.66 | -7.06 | -5.74 | -5.36 | -4.92 | -3.47 | ▁▅▇▃▁ |
Alpha_1_Antichymotrypsin | 0 | 1 | 1.31 | 0.38 | 0.18 | 1.06 | 1.31 | 1.57 | 2.22 | ▁▃▇▆▂ |
Alpha_1_Antitrypsin | 0 | 1 | -13.49 | 1.76 | -18.17 | -14.70 | -13.59 | -12.31 | -10.06 | ▁▃▇▆▃ |
Alpha_1_Microglobulin | 0 | 1 | -2.98 | 0.49 | -4.14 | -3.28 | -3.01 | -2.67 | -1.90 | ▂▅▇▅▂ |
Alpha_2_Macroglobulin | 0 | 1 | -162.89 | 40.98 | -238.64 | -186.64 | -162.93 | -136.53 | -50.17 | ▃▇▇▃▁ |
Angiopoietin_2_ANG_2 | 0 | 1 | 0.60 | 0.35 | -0.05 | 0.35 | 0.56 | 0.79 | 1.77 | ▃▇▅▁▁ |
Angiotensinogen | 0 | 1 | 2.27 | 0.23 | 1.71 | 2.07 | 2.28 | 2.43 | 2.75 | ▁▇▇▇▂ |
Apolipoprotein_A_IV | 0 | 1 | -1.87 | 0.38 | -2.75 | -2.19 | -1.90 | -1.53 | -1.11 | ▂▆▇▆▅ |
Apolipoprotein_A1 | 0 | 1 | -7.49 | 0.42 | -8.57 | -7.82 | -7.50 | -7.18 | -6.65 | ▁▆▇▇▃ |
Apolipoprotein_A2 | 0 | 1 | -0.69 | 0.48 | -1.97 | -0.94 | -0.70 | -0.35 | 0.53 | ▁▃▇▅▁ |
Apolipoprotein_B | 0 | 1 | -5.65 | 1.34 | -8.19 | -6.75 | -5.82 | -4.60 | -2.34 | ▃▇▅▅▂ |
Apolipoprotein_CI | 0 | 1 | -1.63 | 0.45 | -2.85 | -1.90 | -1.61 | -1.31 | -0.46 | ▂▅▇▅▁ |
Apolipoprotein_CIII | 0 | 1 | -2.52 | 0.46 | -3.86 | -2.78 | -2.56 | -2.23 | -1.39 | ▁▂▇▃▂ |
Apolipoprotein_D | 0 | 1 | 1.39 | 0.41 | 0.26 | 1.13 | 1.39 | 1.69 | 2.64 | ▁▇▇▅▁ |
Apolipoprotein_E | 0 | 1 | 2.72 | 0.88 | 0.66 | 2.15 | 2.82 | 3.24 | 4.68 | ▂▅▇▅▂ |
Apolipoprotein_H | 0 | 1 | -0.32 | 0.33 | -1.16 | -0.53 | -0.29 | -0.10 | 0.44 | ▂▅▇▆▂ |
B_Lymphocyte_Chemoattractant_BL | 0 | 1 | 1.88 | 0.57 | 0.73 | 1.53 | 1.85 | 2.37 | 2.98 | ▂▆▇▆▃ |
BMP_6 | 0 | 1 | -1.94 | 0.35 | -2.67 | -2.15 | -1.96 | -1.68 | -1.18 | ▃▃▇▆▂ |
Beta_2_Microglobulin | 0 | 1 | 0.16 | 0.31 | -0.51 | -0.06 | 0.18 | 0.41 | 0.83 | ▂▇▇▆▂ |
Betacellulin | 0 | 1 | 52.74 | 10.58 | 32.00 | 46.00 | 51.00 | 59.75 | 80.00 | ▂▇▅▃▁ |
C_Reactive_Protein | 0 | 1 | -6.00 | 1.17 | -8.11 | -6.73 | -6.17 | -5.37 | -3.41 | ▃▇▇▃▃ |
CD40 | 0 | 1 | -1.28 | 0.24 | -1.94 | -1.44 | -1.26 | -1.10 | -0.78 | ▂▃▅▇▂ |
CD5L | 0 | 1 | -0.09 | 0.49 | -1.97 | -0.37 | -0.05 | 0.24 | 0.92 | ▁▁▃▇▂ |
Calbindin | 0 | 1 | 21.49 | 4.49 | 10.81 | 18.88 | 21.06 | 24.00 | 35.36 | ▂▆▇▂▁ |
Calcitonin | 0 | 1 | 1.73 | 0.89 | -0.71 | 1.20 | 1.68 | 2.26 | 4.11 | ▁▅▇▅▁ |
CgA | 0 | 1 | 320.22 | 75.26 | 166.55 | 268.21 | 324.75 | 362.27 | 494.53 | ▃▃▇▃▂ |
Clusterin_Apo_J | 0 | 1 | 2.85 | 0.35 | 1.93 | 2.56 | 2.83 | 3.04 | 3.76 | ▁▆▇▅▁ |
Complement_3 | 0 | 1 | -15.91 | 2.60 | -22.40 | -17.50 | -15.90 | -14.34 | -10.23 | ▁▃▇▃▂ |
Complement_Factor_H | 0 | 1 | 3.39 | 1.26 | 0.28 | 2.60 | 3.40 | 4.25 | 6.56 | ▁▅▇▅▁ |
Connective_Tissue_Growth_Factor | 0 | 1 | 0.75 | 0.22 | 0.10 | 0.59 | 0.74 | 0.88 | 1.41 | ▁▃▇▃▁ |
Cortisol | 0 | 1 | 10.46 | 3.85 | 0.10 | 8.90 | 10.00 | 12.00 | 22.00 | ▁▂▇▂▁ |
Creatine_Kinase_MB | 0 | 1 | -1.65 | 0.10 | -1.87 | -1.72 | -1.65 | -1.59 | -1.43 | ▂▅▇▅▂ |
Cystatin_C | 0 | 1 | 8.58 | 0.42 | 7.73 | 8.30 | 8.54 | 8.84 | 9.69 | ▃▇▇▃▂ |
EGF_R | 0 | 1 | -0.70 | 0.26 | -1.27 | -0.89 | -0.69 | -0.50 | 0.19 | ▂▆▇▁▁ |
EN_RAGE | 0 | 1 | -3.60 | 0.98 | -8.38 | -4.18 | -3.69 | -3.22 | -0.87 | ▁▁▅▇▁ |
ENA_78 | 0 | 1 | -1.38 | 0.01 | -1.41 | -1.38 | -1.37 | -1.37 | -1.35 | ▂▂▆▇▂ |
Eotaxin_3 | 0 | 1 | 55.55 | 18.33 | 23.00 | 43.00 | 54.00 | 64.00 | 107.00 | ▅▇▆▂▁ |
FAS | 0 | 1 | -0.54 | 0.30 | -1.11 | -0.71 | -0.58 | -0.34 | 0.18 | ▃▇▇▅▂ |
FSH_Follicle_Stimulation_Hormon | 0 | 1 | -1.06 | 0.34 | -1.81 | -1.27 | -0.98 | -0.81 | -0.48 | ▂▂▅▇▃ |
Fas_Ligand | 0 | 1 | 2.65 | 1.02 | 0.29 | 2.07 | 2.67 | 3.16 | 5.38 | ▂▅▇▃▁ |
Fatty_Acid_Binding_Protein | 0 | 1 | 1.29 | 0.79 | -0.46 | 0.80 | 1.19 | 1.92 | 3.22 | ▂▇▆▆▂ |
Ferritin | 0 | 1 | 2.71 | 0.87 | 0.90 | 2.15 | 2.63 | 3.17 | 4.93 | ▂▆▇▂▂ |
Fetuin_A | 0 | 1 | 1.31 | 0.41 | 0.53 | 1.03 | 1.31 | 1.61 | 2.21 | ▅▇▇▇▂ |
Fibrinogen | 0 | 1 | -7.36 | 0.63 | -9.37 | -7.80 | -7.32 | -6.97 | -6.17 | ▁▂▇▇▅ |
GRO_alpha | 0 | 1 | 1.38 | 0.04 | 1.27 | 1.35 | 1.37 | 1.40 | 1.51 | ▁▇▇▂▁ |
Gamma_Interferon_induced_Monokin | 0 | 1 | 2.77 | 0.12 | 2.54 | 2.70 | 2.77 | 2.83 | 3.05 | ▂▅▇▂▂ |
Glutathione_S_Transferase_alpha | 0 | 1 | 0.94 | 0.17 | 0.57 | 0.83 | 0.95 | 1.05 | 1.31 | ▃▅▇▇▂ |
HB_EGF | 0 | 1 | 6.84 | 1.43 | 3.52 | 5.95 | 6.98 | 7.75 | 10.36 | ▂▆▇▇▂ |
HCC_4 | 0 | 1 | -3.54 | 0.35 | -4.34 | -3.77 | -3.54 | -3.35 | -2.49 | ▂▇▇▃▁ |
Hepatocyte_Growth_Factor_HGF | 0 | 1 | 0.18 | 0.34 | -0.62 | -0.06 | 0.18 | 0.34 | 1.10 | ▁▆▇▃▁ |
I_309 | 0 | 1 | 2.92 | 0.36 | 2.04 | 2.72 | 2.94 | 3.14 | 3.69 | ▂▂▇▃▂ |
ICAM_1 | 0 | 1 | -0.60 | 0.35 | -1.47 | -0.77 | -0.59 | -0.36 | 0.36 | ▂▂▇▅▁ |
IGF_BP_2 | 0 | 1 | 5.26 | 0.22 | 4.72 | 5.13 | 5.25 | 5.40 | 5.92 | ▁▅▇▂▁ |
IL_11 | 0 | 1 | 4.65 | 1.33 | 2.03 | 3.96 | 4.84 | 5.48 | 8.69 | ▃▅▇▂▁ |
IL_13 | 0 | 1 | 1.28 | 0.01 | 1.23 | 1.27 | 1.28 | 1.29 | 1.31 | ▁▁▇▇▃ |
IL_16 | 0 | 1 | 2.82 | 0.70 | 0.96 | 2.44 | 2.88 | 3.35 | 4.10 | ▂▂▇▇▅ |
IL_17E | 0 | 1 | 4.77 | 1.33 | 1.58 | 3.64 | 4.72 | 5.41 | 8.08 | ▁▇▇▃▂ |
IL_1alpha | 0 | 1 | -7.55 | 0.44 | -8.47 | -7.85 | -7.56 | -7.28 | -6.38 | ▂▇▇▃▁ |
IL_3 | 0 | 1 | -3.98 | 0.52 | -5.52 | -4.32 | -3.96 | -3.58 | -3.08 | ▁▃▇▇▆ |
IL_4 | 0 | 1 | 1.74 | 0.52 | 0.53 | 1.46 | 1.72 | 2.07 | 2.71 | ▁▅▇▃▅ |
IL_5 | 0 | 1 | 0.23 | 0.44 | -1.05 | -0.03 | 0.22 | 0.53 | 1.13 | ▁▂▇▇▂ |
IL_6 | 0 | 1 | -0.05 | 0.52 | -1.53 | -0.41 | -0.07 | 0.35 | 1.01 | ▁▂▇▇▂ |
IL_6_Receptor | 0 | 1 | 0.06 | 0.35 | -0.75 | -0.20 | 0.00 | 0.27 | 0.77 | ▂▇▇▆▅ |
IL_7 | 0 | 1 | 3.14 | 0.85 | 1.31 | 2.38 | 3.15 | 3.71 | 5.00 | ▁▇▅▆▂ |
IL_8 | 0 | 1 | 1.70 | 0.04 | 1.62 | 1.68 | 1.70 | 1.73 | 1.84 | ▂▇▆▂▁ |
IP_10_Inducible_Protein_10 | 0 | 1 | 5.64 | 0.51 | 4.26 | 5.32 | 5.62 | 5.92 | 7.21 | ▁▅▇▃▁ |
IgA | 0 | 1 | -6.07 | 0.64 | -7.62 | -6.57 | -6.01 | -5.61 | -4.73 | ▁▆▇▇▂ |
Insulin | 0 | 1 | -1.20 | 0.32 | -2.01 | -1.45 | -1.22 | -1.01 | -0.50 | ▂▆▇▇▃ |
Kidney_Injury_Molecule_1_KIM_1 | 0 | 1 | -1.19 | 0.03 | -1.25 | -1.21 | -1.19 | -1.17 | -1.12 | ▃▇▇▇▂ |
LOX_1 | 0 | 1 | 1.21 | 0.45 | 0.00 | 0.96 | 1.22 | 1.44 | 2.40 | ▁▃▇▃▁ |
Leptin | 0 | 1 | -1.44 | 0.25 | -1.95 | -1.63 | -1.43 | -1.24 | -0.84 | ▃▇▆▇▂ |
Lipoprotein_a | 0 | 1 | -4.51 | 0.91 | -6.57 | -5.12 | -4.66 | -4.02 | -2.04 | ▂▇▇▅▁ |
MCP_1 | 0 | 1 | 6.48 | 0.25 | 5.89 | 6.32 | 6.48 | 6.63 | 7.06 | ▁▅▇▅▂ |
MCP_2 | 0 | 1 | 1.81 | 0.60 | 0.40 | 1.53 | 1.85 | 2.08 | 3.75 | ▂▅▇▁▁ |
MIF | 0 | 1 | -1.93 | 0.31 | -2.80 | -2.12 | -1.97 | -1.71 | -1.11 | ▁▃▇▃▁ |
MIP_1alpha | 0 | 1 | 3.90 | 1.04 | 1.01 | 3.30 | 3.74 | 4.69 | 5.74 | ▁▂▇▆▅ |
MIP_1beta | 0 | 1 | 2.78 | 0.39 | 1.92 | 2.48 | 2.77 | 3.08 | 3.78 | ▃▆▇▆▁ |
MMP_2 | 0 | 1 | 3.03 | 0.95 | 0.62 | 2.55 | 2.99 | 3.48 | 6.10 | ▁▃▇▁▁ |
MMP_3 | 0 | 1 | -2.49 | 0.59 | -3.65 | -2.85 | -2.53 | -2.12 | -1.17 | ▃▆▇▅▂ |
MMP10 | 0 | 1 | -3.68 | 0.44 | -4.95 | -4.07 | -3.61 | -3.33 | -2.90 | ▁▃▆▇▅ |
MMP7 | 0 | 1 | -4.01 | 1.55 | -7.53 | -4.96 | -4.03 | -3.16 | -0.20 | ▂▃▇▂▁ |
Myoglobin | 0 | 1 | -1.42 | 0.91 | -3.30 | -2.02 | -1.59 | -0.78 | 1.13 | ▂▇▆▃▁ |
NT_proBNP | 0 | 1 | 4.49 | 0.39 | 3.61 | 4.17 | 4.48 | 4.79 | 5.40 | ▂▇▇▆▂ |
NrCAM | 0 | 1 | 4.29 | 0.58 | 2.89 | 3.87 | 4.32 | 4.72 | 5.69 | ▂▆▇▆▁ |
Osteopontin | 0 | 1 | 5.18 | 0.40 | 4.08 | 4.89 | 5.17 | 5.41 | 6.32 | ▁▆▇▃▁ |
PAI_1 | 0 | 1 | 0.00 | 0.41 | -0.99 | -0.33 | 0.00 | 0.30 | 0.89 | ▂▆▇▇▂ |
PAPP_A | 0 | 1 | -2.84 | 0.14 | -3.15 | -2.97 | -2.84 | -2.72 | -2.49 | ▂▇▇▆▁ |
PLGF | 0 | 1 | 3.88 | 0.38 | 2.64 | 3.69 | 3.89 | 4.12 | 4.71 | ▁▂▇▇▂ |
PYY | 0 | 1 | 2.98 | 0.29 | 2.40 | 2.83 | 3.00 | 3.18 | 3.74 | ▃▇▇▅▁ |
Pancreatic_polypeptide | 0 | 1 | -0.01 | 0.70 | -1.61 | -0.51 | 0.14 | 0.47 | 1.50 | ▂▅▅▇▁ |
Prolactin | 0 | 1 | 0.05 | 0.29 | -0.39 | -0.17 | 0.00 | 0.18 | 0.79 | ▇▇▆▃▂ |
Prostatic_Acid_Phosphatase | 0 | 1 | -1.69 | 0.05 | -1.80 | -1.74 | -1.69 | -1.66 | -1.54 | ▂▇▇▂▁ |
Protein_S | 0 | 1 | -2.27 | 0.41 | -3.15 | -2.58 | -2.26 | -1.92 | -1.55 | ▃▅▇▇▆ |
Pulmonary_and_Activation_Regulat | 0 | 1 | -1.50 | 0.45 | -2.44 | -1.83 | -1.51 | -1.17 | -0.45 | ▃▇▆▇▂ |
RANTES | 0 | 1 | -6.54 | 0.29 | -7.24 | -6.73 | -6.57 | -6.39 | -5.84 | ▁▆▇▃▂ |
Resistin | 0 | 1 | -18.24 | 5.18 | -30.16 | -22.13 | -18.01 | -15.20 | -6.59 | ▂▅▇▅▂ |
S100b | 0 | 1 | 1.18 | 0.37 | 0.19 | 0.96 | 1.16 | 1.38 | 2.20 | ▁▃▇▂▁ |
SGOT | 0 | 1 | -0.49 | 0.37 | -1.90 | -0.75 | -0.48 | -0.21 | 0.18 | ▁▁▆▇▅ |
SHBG | 0 | 1 | -2.69 | 0.49 | -3.73 | -3.05 | -2.71 | -2.34 | -1.56 | ▂▇▇▆▂ |
SOD | 0 | 1 | 5.30 | 0.41 | 4.38 | 5.01 | 5.31 | 5.55 | 6.46 | ▂▇▇▃▁ |
Serum_Amyloid_P | 0 | 1 | -6.08 | 0.52 | -7.18 | -6.44 | -6.21 | -5.61 | -4.70 | ▂▇▅▃▁ |
Sortilin | 0 | 1 | 3.79 | 0.87 | 1.51 | 3.18 | 3.87 | 4.37 | 5.68 | ▂▅▇▆▃ |
Stem_Cell_Factor | 0 | 1 | 3.27 | 0.35 | 2.22 | 3.04 | 3.31 | 3.47 | 4.08 | ▁▅▇▇▂ |
TGF_alpha | 0 | 1 | 9.78 | 1.11 | 7.50 | 9.06 | 9.60 | 10.61 | 13.08 | ▃▇▆▃▁ |
TIMP_1 | 0 | 1 | 11.52 | 1.64 | 8.20 | 10.53 | 11.34 | 12.35 | 16.55 | ▅▇▇▃▁ |
TNF_RII | 0 | 1 | -0.63 | 0.36 | -1.66 | -0.87 | -0.65 | -0.33 | 0.41 | ▁▅▇▅▁ |
TRAIL_R3 | 0 | 1 | -0.59 | 0.23 | -1.31 | -0.73 | -0.56 | -0.47 | 0.10 | ▁▃▇▂▁ |
TTR_prealbumin | 0 | 1 | 2.85 | 0.14 | 2.48 | 2.77 | 2.89 | 2.94 | 3.09 | ▁▃▅▇▅ |
Tamm_Horsfall_Protein_THP | 0 | 1 | -3.12 | 0.03 | -3.21 | -3.14 | -3.13 | -3.10 | -3.04 | ▁▅▇▃▂ |
Thrombomodulin | 0 | 1 | -1.53 | 0.25 | -2.05 | -1.68 | -1.53 | -1.34 | -1.02 | ▂▆▇▆▂ |
Thrombopoietin | 0 | 1 | -0.72 | 0.21 | -1.54 | -0.84 | -0.70 | -0.63 | -0.30 | ▁▁▆▇▃ |
Thymus_Expressed_Chemokine_TECK | 0 | 1 | 3.77 | 0.73 | 2.14 | 3.28 | 3.75 | 4.32 | 5.68 | ▃▆▇▅▂ |
Thyroid_Stimulating_Hormone | 0 | 1 | -4.22 | 0.79 | -6.19 | -4.73 | -4.27 | -3.83 | -2.04 | ▁▅▇▃▁ |
Thyroxine_Binding_Globulin | 0 | 1 | -1.49 | 0.40 | -2.30 | -1.71 | -1.49 | -1.24 | -0.60 | ▃▅▇▆▂ |
Tissue_Factor | 0 | 1 | 1.14 | 0.54 | 0.00 | 0.71 | 1.15 | 1.51 | 2.71 | ▂▆▇▂▁ |
Transferrin | 0 | 1 | 2.90 | 0.29 | 2.28 | 2.71 | 2.89 | 3.14 | 3.50 | ▃▃▇▅▃ |
Trefoil_Factor_3_TFF3 | 0 | 1 | -3.95 | 0.34 | -4.91 | -4.20 | -3.91 | -3.77 | -3.17 | ▁▂▇▆▂ |
VCAM_1 | 0 | 1 | 2.64 | 0.32 | 2.03 | 2.42 | 2.67 | 2.83 | 3.47 | ▅▃▇▃▁ |
VEGF | 0 | 1 | 16.70 | 2.10 | 12.23 | 15.03 | 17.08 | 18.19 | 21.18 | ▃▇▇▆▂ |
Vitronectin | 0 | 1 | -0.27 | 0.32 | -1.08 | -0.46 | -0.28 | -0.05 | 0.41 | ▁▅▇▆▂ |
von_Willebrand_Factor | 0 | 1 | -4.01 | 0.38 | -4.92 | -4.27 | -4.02 | -3.73 | -3.06 | ▂▇▇▆▂ |
###################################
# Verifying the data dimensions
# for the test set
###################################
dim(PMA_PreModelling_Test)
## [1] 66 128
##################################
# Loading dataset
##################################
<- PMA_PreModelling_Train
EDA
##################################
# Listing all predictors
##################################
<- EDA[,!names(EDA) %in% c("Class")]
EDA.Predictors
##################################
# Listing all numeric predictors
##################################
<- EDA.Predictors[,sapply(EDA.Predictors, is.numeric)]
EDA.Predictors.Numeric ncol(EDA.Predictors.Numeric)
## [1] 124
names(EDA.Predictors.Numeric)
## [1] "ACE_CD143_Angiotensin_Converti" "ACTH_Adrenocorticotropic_Hormon"
## [3] "AXL" "Adiponectin"
## [5] "Alpha_1_Antichymotrypsin" "Alpha_1_Antitrypsin"
## [7] "Alpha_1_Microglobulin" "Alpha_2_Macroglobulin"
## [9] "Angiopoietin_2_ANG_2" "Angiotensinogen"
## [11] "Apolipoprotein_A_IV" "Apolipoprotein_A1"
## [13] "Apolipoprotein_A2" "Apolipoprotein_B"
## [15] "Apolipoprotein_CI" "Apolipoprotein_CIII"
## [17] "Apolipoprotein_D" "Apolipoprotein_E"
## [19] "Apolipoprotein_H" "B_Lymphocyte_Chemoattractant_BL"
## [21] "BMP_6" "Beta_2_Microglobulin"
## [23] "Betacellulin" "C_Reactive_Protein"
## [25] "CD40" "CD5L"
## [27] "Calbindin" "Calcitonin"
## [29] "CgA" "Clusterin_Apo_J"
## [31] "Complement_3" "Complement_Factor_H"
## [33] "Connective_Tissue_Growth_Factor" "Cortisol"
## [35] "Creatine_Kinase_MB" "Cystatin_C"
## [37] "EGF_R" "EN_RAGE"
## [39] "ENA_78" "Eotaxin_3"
## [41] "FAS" "FSH_Follicle_Stimulation_Hormon"
## [43] "Fas_Ligand" "Fatty_Acid_Binding_Protein"
## [45] "Ferritin" "Fetuin_A"
## [47] "Fibrinogen" "GRO_alpha"
## [49] "Gamma_Interferon_induced_Monokin" "Glutathione_S_Transferase_alpha"
## [51] "HB_EGF" "HCC_4"
## [53] "Hepatocyte_Growth_Factor_HGF" "I_309"
## [55] "ICAM_1" "IGF_BP_2"
## [57] "IL_11" "IL_13"
## [59] "IL_16" "IL_17E"
## [61] "IL_1alpha" "IL_3"
## [63] "IL_4" "IL_5"
## [65] "IL_6" "IL_6_Receptor"
## [67] "IL_7" "IL_8"
## [69] "IP_10_Inducible_Protein_10" "IgA"
## [71] "Insulin" "Kidney_Injury_Molecule_1_KIM_1"
## [73] "LOX_1" "Leptin"
## [75] "Lipoprotein_a" "MCP_1"
## [77] "MCP_2" "MIF"
## [79] "MIP_1alpha" "MIP_1beta"
## [81] "MMP_2" "MMP_3"
## [83] "MMP10" "MMP7"
## [85] "Myoglobin" "NT_proBNP"
## [87] "NrCAM" "Osteopontin"
## [89] "PAI_1" "PAPP_A"
## [91] "PLGF" "PYY"
## [93] "Pancreatic_polypeptide" "Prolactin"
## [95] "Prostatic_Acid_Phosphatase" "Protein_S"
## [97] "Pulmonary_and_Activation_Regulat" "RANTES"
## [99] "Resistin" "S100b"
## [101] "SGOT" "SHBG"
## [103] "SOD" "Serum_Amyloid_P"
## [105] "Sortilin" "Stem_Cell_Factor"
## [107] "TGF_alpha" "TIMP_1"
## [109] "TNF_RII" "TRAIL_R3"
## [111] "TTR_prealbumin" "Tamm_Horsfall_Protein_THP"
## [113] "Thrombomodulin" "Thrombopoietin"
## [115] "Thymus_Expressed_Chemokine_TECK" "Thyroid_Stimulating_Hormone"
## [117] "Thyroxine_Binding_Globulin" "Tissue_Factor"
## [119] "Transferrin" "Trefoil_Factor_3_TFF3"
## [121] "VCAM_1" "VEGF"
## [123] "Vitronectin" "von_Willebrand_Factor"
##################################
# Listing all factor predictors
##################################
<- EDA.Predictors[,sapply(EDA.Predictors, is.factor)]
EDA.Predictors.Factor ncol(EDA.Predictors.Factor)
## [1] 3
names(EDA.Predictors.Factor)
## [1] "E4" "E3" "E2"
##################################
# Formulating the box plots
##################################
featurePlot(x = EDA.Predictors.Numeric[1:124],
y = EDA$Class,
plot = "box",
scales = list(x = list(relation="free", rot = 90),
y = list(relation="free")),
adjust = 1.5,
pch = "|",
layout=(c(4,4)))
##################################
# Restructuring the dataset for
# for barchart analysis
##################################
<- as.data.frame(cbind(EDA$Class,
EDA.Bar.Source
EDA.Predictors.Factor))names(EDA.Bar.Source) <- c("Class",names(EDA.Predictors.Factor))
ncol(EDA.Bar.Source)
## [1] 4
##################################
# Creating a function to formulate
# the proportions table
##################################
<- function(FactorVar) {
EDA.PropTable.Function <- EDA.Bar.Source[,c("Class",
EDA.Bar.Source.FactorVar
FactorVar)]<- as.data.frame(prop.table(table(EDA.Bar.Source.FactorVar), 2))
EDA.Bar.Source.FactorVar.Prop names(EDA.Bar.Source.FactorVar.Prop)[2] <- "Class"
$Variable <- rep(FactorVar,nrow(EDA.Bar.Source.FactorVar.Prop))
EDA.Bar.Source.FactorVar.Prop
return(EDA.Bar.Source.FactorVar.Prop)
}
<- rbind(EDA.PropTable.Function("E2"),
EDA.Bar.Source.FactorVar.Prop EDA.PropTable.Function("E3"),
EDA.PropTable.Function("E4"))
<- barchart(EDA.Bar.Source.FactorVar.Prop[,3] ~
(EDA.Barchart.FactorVar 2] | EDA.Bar.Source.FactorVar.Prop[,4],
EDA.Bar.Source.FactorVar.Prop[,data=EDA.Bar.Source.FactorVar.Prop,
groups = EDA.Bar.Source.FactorVar.Prop[,1],
stack=TRUE,
ylab = "Proportion",
xlab = "Class",
auto.key = list(adj = 1),
layout=(c(3,1))))
##################################
# Converting all predictors to numeric
# for both train and test data
##################################
for (i in 1:ncol(PMA_PreModelling_Train)){
if (names(PMA_PreModelling_Train)[i]!="Class"){
<- as.numeric(PMA_PreModelling_Train[,i])
PMA_PreModelling_Train[,i]
}
}summary(PMA_PreModelling_Train)
## ACE_CD143_Angiotensin_Converti ACTH_Adrenocorticotropic_Hormon
## Min. :-0.6756 Min. :-2.207
## 1st Qu.: 0.9462 1st Qu.:-1.715
## Median : 1.3013 Median :-1.561
## Mean : 1.3198 Mean :-1.538
## 3rd Qu.: 1.7191 3rd Qu.:-1.347
## Max. : 2.8398 Max. :-0.844
## AXL Adiponectin Alpha_1_Antichymotrypsin
## Min. :-0.9230 Min. :-6.725 Min. :0.2624
## 1st Qu.: 0.0000 1st Qu.:-5.669 1st Qu.:1.1314
## Median : 0.2804 Median :-5.185 Median :1.3610
## Mean : 0.3093 Mean :-5.201 Mean :1.3605
## 3rd Qu.: 0.6077 3rd Qu.:-4.780 3rd Qu.:1.5892
## Max. : 1.5214 Max. :-3.507 Max. :2.3026
## Alpha_1_Antitrypsin Alpha_1_Microglobulin Alpha_2_Macroglobulin
## Min. :-17.028 Min. :-4.343 Min. :-289.68
## 1st Qu.:-14.071 1st Qu.:-3.270 1st Qu.:-186.64
## Median :-13.004 Median :-2.937 Median :-160.01
## Mean :-13.052 Mean :-2.932 Mean :-158.61
## 3rd Qu.:-12.096 3rd Qu.:-2.590 3rd Qu.:-134.62
## Max. : -8.192 Max. :-1.772 Max. : -59.46
## Angiopoietin_2_ANG_2 Angiotensinogen Apolipoprotein_A_IV Apolipoprotein_A1
## Min. :-0.5447 Min. :1.752 Min. :-2.9565 Min. :-8.680
## 1st Qu.: 0.4700 1st Qu.:2.119 1st Qu.:-2.1203 1st Qu.:-7.763
## Median : 0.6419 Median :2.320 Median :-1.8326 Median :-7.470
## Mean : 0.6730 Mean :2.318 Mean :-1.8544 Mean :-7.483
## 3rd Qu.: 0.8755 3rd Qu.:2.497 3rd Qu.:-1.6094 3rd Qu.:-7.209
## Max. : 1.5261 Max. :2.881 Max. :-0.7765 Max. :-6.166
## Apolipoprotein_A2 Apolipoprotein_B Apolipoprotein_CI Apolipoprotein_CIII
## Min. :-1.8971 Min. :-9.937 Min. :-3.3242 Min. :-3.689
## 1st Qu.:-0.9676 1st Qu.:-6.630 1st Qu.:-1.8326 1st Qu.:-2.773
## Median :-0.6733 Median :-5.703 Median :-1.6094 Median :-2.526
## Mean :-0.6354 Mean :-5.578 Mean :-1.5833 Mean :-2.494
## 3rd Qu.:-0.3147 3rd Qu.:-4.539 3rd Qu.:-1.3667 3rd Qu.:-2.207
## Max. : 0.9555 Max. :-2.153 Max. :-0.2744 Max. :-1.238
## Apolipoprotein_D Apolipoprotein_E Apolipoprotein_H
## Min. :0.470 Min. :0.5911 Min. :-2.23379
## 1st Qu.:1.209 1st Qu.:2.3344 1st Qu.:-0.59782
## Median :1.411 Median :2.8181 Median :-0.37005
## Mean :1.440 Mean :2.8062 Mean :-0.32122
## 3rd Qu.:1.668 3rd Qu.:3.2863 3rd Qu.:-0.06112
## Max. :2.272 Max. :5.4442 Max. : 0.92696
## B_Lymphocyte_Chemoattractant_BL BMP_6 Beta_2_Microglobulin
## Min. :0.7318 Min. :-2.7612 Min. :-0.54473
## 1st Qu.:1.6731 1st Qu.:-2.1516 1st Qu.:-0.04082
## Median :1.9805 Median :-1.8774 Median : 0.18232
## Mean :2.0175 Mean :-1.9114 Mean : 0.16757
## 3rd Qu.:2.3714 3rd Qu.:-1.6753 3rd Qu.: 0.33647
## Max. :4.0237 Max. :-0.8166 Max. : 0.99325
## Betacellulin C_Reactive_Protein CD40 CD5L
## Min. :10.00 Min. :-8.517 Min. :-1.8644 Min. :-1.23787
## 1st Qu.:42.00 1st Qu.:-6.645 1st Qu.:-1.3761 1st Qu.:-0.35667
## Median :51.00 Median :-5.843 Median :-1.2734 Median :-0.06188
## Mean :51.01 Mean :-5.874 Mean :-1.2584 Mean :-0.05310
## 3rd Qu.:59.00 3rd Qu.:-5.083 3rd Qu.:-1.1238 3rd Qu.: 0.26236
## Max. :82.00 Max. :-2.937 Max. :-0.5475 Max. : 1.16315
## Calbindin Calcitonin CgA Clusterin_Apo_J
## Min. :10.96 Min. :-0.7134 Min. :135.6 Min. :1.872
## 1st Qu.:19.77 1st Qu.: 0.9555 1st Qu.:278.0 1st Qu.:2.708
## Median :22.25 Median : 1.6487 Median :331.5 Median :2.890
## Mean :22.43 Mean : 1.6788 Mean :333.3 Mean :2.882
## 3rd Qu.:24.80 3rd Qu.: 2.2824 3rd Qu.:392.1 3rd Qu.:3.045
## Max. :33.78 Max. : 3.8918 Max. :535.4 Max. :3.584
## Complement_3 Complement_Factor_H Connective_Tissue_Growth_Factor
## Min. :-23.387 Min. :-0.8387 Min. :0.1823
## 1st Qu.:-17.567 1st Qu.: 2.7531 1st Qu.:0.6419
## Median :-15.524 Median : 3.6000 Median :0.7885
## Mean :-15.610 Mean : 3.5541 Mean :0.7739
## 3rd Qu.:-13.882 3rd Qu.: 4.2548 3rd Qu.:0.9163
## Max. : -9.563 Max. : 7.6238 Max. :1.4110
## Cortisol Creatine_Kinase_MB Cystatin_C EGF_R
## Min. : 0.10 Min. :-1.872 Min. :7.432 Min. :-1.36135
## 1st Qu.: 9.80 1st Qu.:-1.724 1st Qu.:8.321 1st Qu.:-0.85727
## Median :12.00 Median :-1.671 Median :8.564 Median :-0.68354
## Mean :11.98 Mean :-1.674 Mean :8.586 Mean :-0.70130
## 3rd Qu.:14.00 3rd Qu.:-1.626 3rd Qu.:8.839 3rd Qu.:-0.54612
## Max. :29.00 Max. :-1.384 Max. :9.694 Max. :-0.06112
## EN_RAGE ENA_78 Eotaxin_3 FAS
## Min. :-8.3774 Min. :-1.405 Min. : 7.00 Min. :-1.5141
## 1st Qu.:-4.1997 1st Qu.:-1.381 1st Qu.: 44.00 1st Qu.:-0.7133
## Median :-3.6497 Median :-1.374 Median : 59.00 Median :-0.5276
## Mean :-3.6353 Mean :-1.372 Mean : 58.17 Mean :-0.5291
## 3rd Qu.:-3.1466 3rd Qu.:-1.364 3rd Qu.: 70.00 3rd Qu.:-0.3147
## Max. :-0.3857 Max. :-1.339 Max. :107.00 Max. : 0.3365
## FSH_Follicle_Stimulation_Hormon Fas_Ligand Fatty_Acid_Binding_Protein
## Min. :-2.11511 Min. :-0.1536 Min. :-1.0441
## 1st Qu.:-1.46606 1st Qu.: 2.3415 1st Qu.: 0.7998
## Median :-1.13570 Median : 3.1015 Median : 1.3865
## Mean :-1.14259 Mean : 2.9680 Mean : 1.3529
## 3rd Qu.:-0.87620 3rd Qu.: 3.6950 3rd Qu.: 1.8847
## Max. : 0.09715 Max. : 7.6328 Max. : 3.7055
## Ferritin Fetuin_A Fibrinogen GRO_alpha
## Min. :0.6077 Min. :0.470 Min. :-8.874 Min. :1.271
## 1st Qu.:2.2895 1st Qu.:1.099 1st Qu.:-7.717 1st Qu.:1.351
## Median :2.7749 Median :1.308 Median :-7.323 Median :1.382
## Mean :2.7646 Mean :1.350 Mean :-7.356 Mean :1.378
## 3rd Qu.:3.2915 3rd Qu.:1.609 3rd Qu.:-7.013 3rd Qu.:1.406
## Max. :4.6333 Max. :2.251 Max. :-5.843 Max. :1.495
## Gamma_Interferon_induced_Monokin Glutathione_S_Transferase_alpha
## Min. :2.393 Min. :0.5238
## 1st Qu.:2.707 1st Qu.:0.8439
## Median :2.783 Median :0.9677
## Mean :2.786 Mean :0.9512
## 3rd Qu.:2.873 3rd Qu.:1.0344
## Max. :3.065 Max. :1.3176
## HB_EGF HCC_4 Hepatocyte_Growth_Factor_HGF I_309
## Min. : 2.103 Min. :-4.510 Min. :-0.6349 Min. :1.758
## 1st Qu.: 5.786 1st Qu.:-3.730 1st Qu.: 0.0000 1st Qu.:2.708
## Median : 6.703 Median :-3.507 Median : 0.1823 Median :2.944
## Mean : 6.833 Mean :-3.500 Mean : 0.1963 Mean :2.958
## 3rd Qu.: 7.865 3rd Qu.:-3.270 3rd Qu.: 0.4055 3rd Qu.:3.219
## Max. :10.695 Max. :-2.120 Max. : 0.8755 Max. :4.143
## ICAM_1 IGF_BP_2 IL_11 IL_13
## Min. :-1.5332 Min. :4.635 Min. :1.755 Min. :1.259
## 1st Qu.:-0.8298 1st Qu.:5.179 1st Qu.:3.706 1st Qu.:1.274
## Median :-0.5903 Median :5.323 Median :4.805 Median :1.283
## Mean :-0.5908 Mean :5.317 Mean :4.725 Mean :1.284
## 3rd Qu.:-0.3828 3rd Qu.:5.453 3rd Qu.:5.776 3rd Qu.:1.290
## Max. : 0.5171 Max. :5.948 Max. :8.491 Max. :1.321
## IL_16 IL_17E IL_1alpha IL_3
## Min. :1.187 Min. :1.052 Min. :-8.517 Min. :-5.915
## 1st Qu.:2.521 1st Qu.:4.149 1st Qu.:-7.824 1st Qu.:-4.269
## Median :2.909 Median :4.749 Median :-7.524 Median :-3.912
## Mean :2.929 Mean :4.855 Mean :-7.514 Mean :-3.941
## 3rd Qu.:3.351 3rd Qu.:5.631 3rd Qu.:-7.264 3rd Qu.:-3.631
## Max. :4.937 Max. :8.952 Max. :-5.952 Max. :-2.453
## IL_4 IL_5 IL_6 IL_6_Receptor
## Min. :0.5306 Min. :-1.4271 Min. :-1.5343 Min. :-0.67562
## 1st Qu.:1.4586 1st Qu.:-0.1221 1st Qu.:-0.4127 1st Qu.:-0.12541
## Median :1.8083 Median : 0.1823 Median :-0.1599 Median : 0.09669
## Mean :1.7732 Mean : 0.1866 Mean :-0.1540 Mean : 0.09492
## 3rd Qu.:2.1459 3rd Qu.: 0.4700 3rd Qu.: 0.1410 3rd Qu.: 0.35404
## Max. :3.0445 Max. : 1.9459 Max. : 1.8138 Max. : 0.83099
## IL_7 IL_8 IP_10_Inducible_Protein_10 IgA
## Min. :0.5598 Min. :1.574 Min. :4.317 Min. :-10.520
## 1st Qu.:2.1548 1st Qu.:1.680 1st Qu.:5.398 1st Qu.: -6.645
## Median :2.7934 Median :1.705 Median :5.753 Median : -6.119
## Mean :2.8392 Mean :1.704 Mean :5.755 Mean : -6.121
## 3rd Qu.:3.7055 3rd Qu.:1.728 3rd Qu.:6.064 3rd Qu.: -5.573
## Max. :5.7056 Max. :1.807 Max. :7.501 Max. : -4.200
## Insulin Kidney_Injury_Molecule_1_KIM_1 LOX_1
## Min. :-2.1692 Min. :-1.256 Min. :0.000
## 1st Qu.:-1.4466 1st Qu.:-1.204 1st Qu.:1.030
## Median :-1.2462 Median :-1.183 Median :1.281
## Mean :-1.2329 Mean :-1.185 Mean :1.283
## 3rd Qu.:-1.0340 3rd Qu.:-1.164 3rd Qu.:1.526
## Max. :-0.1586 Max. :-1.105 Max. :2.272
## Leptin Lipoprotein_a MCP_1 MCP_2
## Min. :-2.1468 Min. :-6.812 Min. :5.826 Min. :0.4006
## 1st Qu.:-1.6996 1st Qu.:-5.308 1st Qu.:6.319 1st Qu.:1.5304
## Median :-1.5047 Median :-4.605 Median :6.494 Median :1.8528
## Mean :-1.5042 Mean :-4.417 Mean :6.497 Mean :1.8691
## 3rd Qu.:-1.3295 3rd Qu.:-3.490 3rd Qu.:6.678 3rd Qu.:2.1821
## Max. :-0.6206 Max. :-1.386 Max. :7.230 Max. :4.0237
## MIF MIP_1alpha MIP_1beta MMP_2
## Min. :-2.847 Min. :0.9346 Min. :1.946 Min. :0.09809
## 1st Qu.:-2.120 1st Qu.:3.3377 1st Qu.:2.565 1st Qu.:2.33214
## Median :-1.897 Median :4.0495 Median :2.833 Median :2.81512
## Mean :-1.864 Mean :4.0489 Mean :2.814 Mean :2.87534
## 3rd Qu.:-1.661 3rd Qu.:4.6857 3rd Qu.:3.045 3rd Qu.:3.55121
## Max. :-0.844 Max. :6.7959 Max. :4.007 Max. :5.35895
## MMP_3 MMP10 MMP7 Myoglobin
## Min. :-4.4228 Min. :-4.934 Min. :-8.3975 Min. :-3.1701
## 1st Qu.:-2.7489 1st Qu.:-3.938 1st Qu.:-4.8199 1st Qu.:-2.0402
## Median :-2.4534 Median :-3.650 Median :-3.7735 Median :-1.4697
## Mean :-2.4455 Mean :-3.635 Mean :-3.7894 Mean :-1.3671
## 3rd Qu.:-2.1203 3rd Qu.:-3.352 3rd Qu.:-2.7140 3rd Qu.:-0.7988
## Max. :-0.5276 Max. :-2.207 Max. :-0.2222 Max. : 1.7750
## NT_proBNP NrCAM Osteopontin PAI_1
## Min. :3.178 Min. :2.639 Min. :4.111 Min. :-0.99085
## 1st Qu.:4.350 1st Qu.:3.998 1st Qu.:4.963 1st Qu.:-0.16655
## Median :4.554 Median :4.394 Median :5.187 Median : 0.09396
## Mean :4.552 Mean :4.362 Mean :5.204 Mean : 0.07743
## 3rd Qu.:4.775 3rd Qu.:4.749 3rd Qu.:5.442 3rd Qu.: 0.32005
## Max. :5.886 Max. :6.011 Max. :6.308 Max. : 1.16611
## PAPP_A PLGF PYY Pancreatic_polypeptide
## Min. :-3.311 Min. :2.485 Min. :2.186 Min. :-2.12026
## 1st Qu.:-2.936 1st Qu.:3.638 1st Qu.:2.833 1st Qu.:-0.52763
## Median :-2.871 Median :3.871 Median :2.996 Median :-0.04082
## Mean :-2.854 Mean :3.912 Mean :3.015 Mean :-0.01323
## 3rd Qu.:-2.749 3rd Qu.:4.205 3rd Qu.:3.178 3rd Qu.: 0.53063
## Max. :-2.520 Max. :5.170 Max. :3.932 Max. : 1.93152
## Prolactin Prostatic_Acid_Phosphatase Protein_S
## Min. :-1.30933 Min. :-1.934 Min. :-3.338
## 1st Qu.:-0.13926 1st Qu.:-1.717 1st Qu.:-2.464
## Median : 0.00000 Median :-1.690 Median :-2.259
## Mean : 0.04495 Mean :-1.685 Mean :-2.240
## 3rd Qu.: 0.25799 3rd Qu.:-1.654 3rd Qu.:-2.000
## Max. : 0.99325 Max. :-1.424 Max. :-1.221
## Pulmonary_and_Activation_Regulat RANTES Resistin
## Min. :-2.5133 Min. :-7.222 Min. :-34.967
## 1st Qu.:-1.8326 1st Qu.:-6.725 1st Qu.:-21.468
## Median :-1.5141 Median :-6.502 Median :-17.466
## Mean :-1.4880 Mean :-6.511 Mean :-17.641
## 3rd Qu.:-1.1712 3rd Qu.:-6.320 3rd Qu.:-13.501
## Max. :-0.2744 Max. :-5.547 Max. : -2.239
## S100b SGOT SHBG SOD
## Min. :0.1874 Min. :-1.3471 Min. :-4.135 Min. :4.317
## 1st Qu.:1.0012 1st Qu.:-0.6349 1st Qu.:-2.813 1st Qu.:5.094
## Median :1.2544 Median :-0.4005 Median :-2.489 Median :5.366
## Mean :1.2505 Mean :-0.4057 Mean :-2.477 Mean :5.336
## 3rd Qu.:1.4996 3rd Qu.:-0.1985 3rd Qu.:-2.120 3rd Qu.:5.583
## Max. :2.3726 Max. : 0.7419 Max. :-1.109 Max. :6.317
## Serum_Amyloid_P Sortilin Stem_Cell_Factor TGF_alpha
## Min. :-7.506 Min. :1.654 Min. :2.251 Min. : 6.843
## 1st Qu.:-6.377 1st Qu.:3.343 1st Qu.:3.045 1st Qu.: 8.859
## Median :-6.032 Median :3.867 Median :3.296 Median : 9.919
## Mean :-6.017 Mean :3.852 Mean :3.301 Mean : 9.801
## 3rd Qu.:-5.655 3rd Qu.:4.371 3rd Qu.:3.526 3rd Qu.:10.695
## Max. :-4.646 Max. :6.225 Max. :4.277 Max. :13.827
## TIMP_1 TNF_RII TRAIL_R3 TTR_prealbumin
## Min. : 1.742 Min. :-1.6607 Min. :-1.2107 Min. :2.485
## 1st Qu.:10.490 1st Qu.:-0.8210 1st Qu.:-0.7008 1st Qu.:2.773
## Median :11.565 Median :-0.5978 Median :-0.5317 Median :2.833
## Mean :11.750 Mean :-0.5939 Mean :-0.5394 Mean :2.854
## 3rd Qu.:12.697 3rd Qu.:-0.3784 3rd Qu.:-0.3849 3rd Qu.:2.944
## Max. :18.881 Max. : 0.4700 Max. : 0.2694 Max. :3.332
## Tamm_Horsfall_Protein_THP Thrombomodulin Thrombopoietin
## Min. :-3.206 Min. :-2.0377 Min. :-1.53957
## 1st Qu.:-3.137 1st Qu.:-1.6256 1st Qu.:-0.88645
## Median :-3.117 Median :-1.4920 Median :-0.75100
## Mean :-3.116 Mean :-1.5050 Mean :-0.75419
## 3rd Qu.:-3.096 3rd Qu.:-1.3406 3rd Qu.:-0.62887
## Max. :-2.995 Max. :-0.8166 Max. : 0.09762
## Thymus_Expressed_Chemokine_TECK Thyroid_Stimulating_Hormone
## Min. :1.508 Min. :-6.190
## 1st Qu.:3.343 1st Qu.:-4.962
## Median :3.810 Median :-4.510
## Mean :3.848 Mean :-4.499
## 3rd Qu.:4.316 3rd Qu.:-4.017
## Max. :6.225 Max. :-1.715
## Thyroxine_Binding_Globulin Tissue_Factor Transferrin
## Min. :-2.4769 Min. :-0.2107 Min. :1.932
## 1st Qu.:-1.7720 1st Qu.: 0.8329 1st Qu.:2.708
## Median :-1.5141 Median : 1.2238 Median :2.890
## Mean :-1.4788 Mean : 1.1702 Mean :2.909
## 3rd Qu.:-1.2379 3rd Qu.: 1.4816 3rd Qu.:3.091
## Max. :-0.2107 Max. : 2.4849 Max. :3.761
## Trefoil_Factor_3_TFF3 VCAM_1 VEGF Vitronectin
## Min. :-4.744 Min. :1.723 Min. :11.83 Min. :-1.42712
## 1st Qu.:-4.135 1st Qu.:2.485 1st Qu.:15.77 1st Qu.:-0.51083
## Median :-3.863 Median :2.708 Median :17.08 Median :-0.30111
## Mean :-3.876 Mean :2.688 Mean :16.99 Mean :-0.28473
## 3rd Qu.:-3.650 3rd Qu.:2.890 3rd Qu.:18.10 3rd Qu.:-0.03564
## Max. :-2.957 Max. :3.689 Max. :22.38 Max. : 0.53063
## von_Willebrand_Factor Class E4 E3
## Min. :-4.991 Impaired: 73 Min. :1.000 Min. :1.000
## 1st Qu.:-4.200 Control :194 1st Qu.:1.000 1st Qu.:2.000
## Median :-3.912 Median :1.000 Median :2.000
## Mean :-3.906 Mean :1.401 Mean :1.918
## 3rd Qu.:-3.612 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :-2.957 Max. :2.000 Max. :2.000
## E2
## Min. :1.000
## 1st Qu.:1.000
## Median :1.000
## Mean :1.161
## 3rd Qu.:1.000
## Max. :2.000
for (i in 1:ncol(PMA_PreModelling_Test)){
if (names(PMA_PreModelling_Test)[i]!="Class"){
<- as.numeric(PMA_PreModelling_Test[,i])
PMA_PreModelling_Test[,i]
}
}summary(PMA_PreModelling_Test)
## ACE_CD143_Angiotensin_Converti ACTH_Adrenocorticotropic_Hormon
## Min. :-0.5473 Min. :-2.2073
## 1st Qu.: 0.9462 1st Qu.:-1.7148
## Median : 1.3013 Median :-1.5374
## Mean : 1.3105 Mean :-1.5311
## 3rd Qu.: 1.6320 3rd Qu.:-1.3863
## Max. : 3.0890 Max. :-0.7985
## AXL Adiponectin Alpha_1_Antichymotrypsin
## Min. :-0.73509 Min. :-7.059 Min. :0.1823
## 1st Qu.:-0.08175 1st Qu.:-5.737 1st Qu.:1.0647
## Median : 0.28035 Median :-5.360 Median :1.3083
## Mean : 0.25373 Mean :-5.298 Mean :1.3077
## 3rd Qu.: 0.60768 3rd Qu.:-4.917 3rd Qu.:1.5686
## Max. : 1.28634 Max. :-3.474 Max. :2.2192
## Alpha_1_Antitrypsin Alpha_1_Microglobulin Alpha_2_Macroglobulin
## Min. :-18.17 Min. :-4.135 Min. :-238.64
## 1st Qu.:-14.70 1st Qu.:-3.284 1st Qu.:-186.64
## Median :-13.59 Median :-3.006 Median :-162.93
## Mean :-13.49 Mean :-2.983 Mean :-162.89
## 3rd Qu.:-12.31 3rd Qu.:-2.674 3rd Qu.:-136.53
## Max. :-10.06 Max. :-1.897 Max. : -50.17
## Angiopoietin_2_ANG_2 Angiotensinogen Apolipoprotein_A_IV Apolipoprotein_A1
## Min. :-0.05129 Min. :1.710 Min. :-2.749 Min. :-8.568
## 1st Qu.: 0.35372 1st Qu.:2.068 1st Qu.:-2.186 1st Qu.:-7.818
## Median : 0.55921 Median :2.276 Median :-1.897 Median :-7.497
## Mean : 0.60278 Mean :2.274 Mean :-1.867 Mean :-7.488
## 3rd Qu.: 0.78846 3rd Qu.:2.430 3rd Qu.:-1.526 3rd Qu.:-7.176
## Max. : 1.77495 Max. :2.752 Max. :-1.109 Max. :-6.645
## Apolipoprotein_A2 Apolipoprotein_B Apolipoprotein_CI Apolipoprotein_CIII
## Min. :-1.9661 Min. :-8.192 Min. :-2.847 Min. :-3.863
## 1st Qu.:-0.9416 1st Qu.:-6.748 1st Qu.:-1.897 1st Qu.:-2.781
## Median :-0.7032 Median :-5.819 Median :-1.609 Median :-2.557
## Mean :-0.6902 Mean :-5.649 Mean :-1.625 Mean :-2.523
## 3rd Qu.:-0.3533 3rd Qu.:-4.603 3rd Qu.:-1.309 3rd Qu.:-2.231
## Max. : 0.5306 Max. :-2.339 Max. :-0.462 Max. :-1.386
## Apolipoprotein_D Apolipoprotein_E Apolipoprotein_H
## Min. :0.2624 Min. :0.6626 Min. :-1.1609
## 1st Qu.:1.1314 1st Qu.:2.1526 1st Qu.:-0.5317
## Median :1.3863 Median :2.8181 Median :-0.2897
## Mean :1.3943 Mean :2.7160 Mean :-0.3212
## 3rd Qu.:1.6864 3rd Qu.:3.2363 3rd Qu.:-0.1032
## Max. :2.6391 Max. :4.6844 Max. : 0.4402
## B_Lymphocyte_Chemoattractant_BL BMP_6 Beta_2_Microglobulin
## Min. :0.7318 Min. :-2.669 Min. :-0.51083
## 1st Qu.:1.5304 1st Qu.:-2.152 1st Qu.:-0.06188
## Median :1.8528 Median :-1.964 Median : 0.18232
## Mean :1.8766 Mean :-1.937 Mean : 0.15566
## 3rd Qu.:2.3714 3rd Qu.:-1.675 3rd Qu.: 0.40547
## Max. :2.9757 Max. :-1.181 Max. : 0.83291
## Betacellulin C_Reactive_Protein CD40 CD5L
## Min. :32.00 Min. :-8.112 Min. :-1.9390 Min. :-1.96611
## 1st Qu.:46.00 1st Qu.:-6.725 1st Qu.:-1.4420 1st Qu.:-0.36747
## Median :51.00 Median :-6.166 Median :-1.2574 Median :-0.05135
## Mean :52.74 Mean :-5.997 Mean :-1.2773 Mean :-0.08760
## 3rd Qu.:59.75 3rd Qu.:-5.369 3rd Qu.:-1.1034 3rd Qu.: 0.24235
## Max. :80.00 Max. :-3.411 Max. :-0.7766 Max. : 0.91629
## Calbindin Calcitonin CgA Clusterin_Apo_J
## Min. :10.81 Min. :-0.7134 Min. :166.6 Min. :1.932
## 1st Qu.:18.88 1st Qu.: 1.2014 1st Qu.:268.2 1st Qu.:2.565
## Median :21.06 Median : 1.6849 Median :324.7 Median :2.833
## Mean :21.49 Mean : 1.7250 Mean :320.2 Mean :2.845
## 3rd Qu.:24.00 3rd Qu.: 2.2618 3rd Qu.:362.3 3rd Qu.:3.045
## Max. :35.36 Max. : 4.1109 Max. :494.5 Max. :3.761
## Complement_3 Complement_Factor_H Connective_Tissue_Growth_Factor
## Min. :-22.40 Min. :0.2766 Min. :0.09531
## 1st Qu.:-17.50 1st Qu.:2.6019 1st Qu.:0.58779
## Median :-15.90 Median :3.3983 Median :0.74194
## Mean :-15.91 Mean :3.3897 Mean :0.74507
## 3rd Qu.:-14.34 3rd Qu.:4.2548 3rd Qu.:0.87547
## Max. :-10.23 Max. :6.5597 Max. :1.41099
## Cortisol Creatine_Kinase_MB Cystatin_C EGF_R
## Min. : 0.10 Min. :-1.872 Min. :7.728 Min. :-1.2694
## 1st Qu.: 8.90 1st Qu.:-1.721 1st Qu.:8.301 1st Qu.:-0.8859
## Median :10.00 Median :-1.651 Median :8.544 Median :-0.6917
## Mean :10.46 Mean :-1.652 Mean :8.576 Mean :-0.6965
## 3rd Qu.:12.00 3rd Qu.:-1.590 3rd Qu.:8.837 3rd Qu.:-0.5034
## Max. :22.00 Max. :-1.434 Max. :9.694 Max. : 0.1891
## EN_RAGE ENA_78 Eotaxin_3 FAS
## Min. :-8.3774 Min. :-1.405 Min. : 23.00 Min. :-1.1087
## 1st Qu.:-4.1836 1st Qu.:-1.382 1st Qu.: 43.00 1st Qu.:-0.7133
## Median :-3.6889 Median :-1.374 Median : 54.00 Median :-0.5798
## Mean :-3.5986 Mean :-1.376 Mean : 55.55 Mean :-0.5414
## 3rd Qu.:-3.2189 3rd Qu.:-1.368 3rd Qu.: 64.00 3rd Qu.:-0.3355
## Max. :-0.8675 Max. :-1.353 Max. :107.00 Max. : 0.1823
## FSH_Follicle_Stimulation_Hormon Fas_Ligand Fatty_Acid_Binding_Protein
## Min. :-1.8101 Min. :0.288 Min. :-0.4559
## 1st Qu.:-1.2694 1st Qu.:2.073 1st Qu.: 0.7998
## Median :-0.9763 Median :2.665 Median : 1.1866
## Mean :-1.0597 Mean :2.649 Mean : 1.2884
## 3rd Qu.:-0.8068 3rd Qu.:3.162 3rd Qu.: 1.9192
## Max. :-0.4757 Max. :5.377 Max. : 3.2188
## Ferritin Fetuin_A Fibrinogen GRO_alpha
## Min. :0.8983 Min. :0.5306 Min. :-9.373 Min. :1.271
## 1st Qu.:2.1473 1st Qu.:1.0296 1st Qu.:-7.799 1st Qu.:1.351
## Median :2.6260 Median :1.3083 Median :-7.316 Median :1.372
## Mean :2.7069 Mean :1.3116 Mean :-7.360 Mean :1.378
## 3rd Qu.:3.1672 3rd Qu.:1.6094 3rd Qu.:-6.970 3rd Qu.:1.398
## Max. :4.9282 Max. :2.2083 Max. :-6.166 Max. :1.514
## Gamma_Interferon_induced_Monokin Glutathione_S_Transferase_alpha
## Min. :2.545 Min. :0.5661
## 1st Qu.:2.698 1st Qu.:0.8257
## Median :2.768 Median :0.9493
## Mean :2.772 Mean :0.9440
## 3rd Qu.:2.829 3rd Qu.:1.0457
## Max. :3.046 Max. :1.3102
## HB_EGF HCC_4 Hepatocyte_Growth_Factor_HGF I_309
## Min. : 3.521 Min. :-4.343 Min. :-0.61619 Min. :2.041
## 1st Qu.: 5.949 1st Qu.:-3.772 1st Qu.:-0.05661 1st Qu.:2.724
## Median : 6.980 Median :-3.540 Median : 0.18232 Median :2.944
## Mean : 6.844 Mean :-3.538 Mean : 0.18076 Mean :2.921
## 3rd Qu.: 7.745 3rd Qu.:-3.352 3rd Qu.: 0.33647 3rd Qu.:3.135
## Max. :10.359 Max. :-2.489 Max. : 1.09861 Max. :3.689
## ICAM_1 IGF_BP_2 IL_11 IL_13
## Min. :-1.4661 Min. :4.718 Min. :2.031 Min. :1.232
## 1st Qu.:-0.7671 1st Qu.:5.127 1st Qu.:3.960 1st Qu.:1.274
## Median :-0.5903 Median :5.255 Median :4.838 Median :1.283
## Mean :-0.5958 Mean :5.263 Mean :4.651 Mean :1.283
## 3rd Qu.:-0.3574 3rd Qu.:5.402 3rd Qu.:5.482 3rd Qu.:1.292
## Max. : 0.3602 Max. :5.916 Max. :8.692 Max. :1.310
## IL_16 IL_17E IL_1alpha IL_3
## Min. :0.9568 Min. :1.582 Min. :-8.468 Min. :-5.521
## 1st Qu.:2.4411 1st Qu.:3.637 1st Qu.:-7.849 1st Qu.:-4.324
## Median :2.8763 Median :4.723 Median :-7.562 Median :-3.963
## Mean :2.8176 Mean :4.774 Mean :-7.549 Mean :-3.976
## 3rd Qu.:3.3514 3rd Qu.:5.415 3rd Qu.:-7.279 3rd Qu.:-3.576
## Max. :4.1028 Max. :8.081 Max. :-6.377 Max. :-3.079
## IL_4 IL_5 IL_6 IL_6_Receptor
## Min. :0.5306 Min. :-1.04982 Min. :-1.53428 Min. :-0.74560
## 1st Qu.:1.4586 1st Qu.:-0.03062 1st Qu.:-0.40924 1st Qu.:-0.20131
## Median :1.7226 Median : 0.22234 Median :-0.07205 Median : 0.00000
## Mean :1.7445 Mean : 0.22853 Mean :-0.05216 Mean : 0.06213
## 3rd Qu.:2.0669 3rd Qu.: 0.53063 3rd Qu.: 0.34805 3rd Qu.: 0.27297
## Max. :2.7081 Max. : 1.13140 Max. : 1.00562 Max. : 0.77048
## IL_7 IL_8 IP_10_Inducible_Protein_10 IgA
## Min. :1.310 Min. :1.615 Min. :4.263 Min. :-7.621
## 1st Qu.:2.379 1st Qu.:1.684 1st Qu.:5.323 1st Qu.:-6.571
## Median :3.148 Median :1.702 Median :5.617 Median :-6.012
## Mean :3.143 Mean :1.704 Mean :5.636 Mean :-6.066
## 3rd Qu.:3.706 3rd Qu.:1.725 3rd Qu.:5.917 3rd Qu.:-5.606
## Max. :5.000 Max. :1.836 Max. :7.208 Max. :-4.733
## Insulin Kidney_Injury_Molecule_1_KIM_1 LOX_1
## Min. :-2.0099 Min. :-1.251 Min. :0.0000
## 1st Qu.:-1.4466 1st Qu.:-1.209 1st Qu.:0.9649
## Median :-1.2169 Median :-1.187 Median :1.2238
## Mean :-1.1998 Mean :-1.188 Mean :1.2085
## 3rd Qu.:-1.0105 3rd Qu.:-1.166 3rd Qu.:1.4351
## Max. :-0.5025 Max. :-1.124 Max. :2.3979
## Leptin Lipoprotein_a MCP_1 MCP_2
## Min. :-1.9471 Min. :-6.571 Min. :5.889 Min. :0.4006
## 1st Qu.:-1.6334 1st Qu.:-5.116 1st Qu.:6.318 1st Qu.:1.5304
## Median :-1.4294 Median :-4.657 Median :6.482 Median :1.8528
## Mean :-1.4363 Mean :-4.515 Mean :6.480 Mean :1.8104
## 3rd Qu.:-1.2409 3rd Qu.:-4.017 3rd Qu.:6.627 3rd Qu.:2.0827
## Max. :-0.8387 Max. :-2.040 Max. :7.065 Max. :3.7545
## MIF MIP_1alpha MIP_1beta MMP_2
## Min. :-2.797 Min. :1.008 Min. :1.917 Min. :0.6248
## 1st Qu.:-2.120 1st Qu.:3.302 1st Qu.:2.485 1st Qu.:2.5513
## Median :-1.966 Median :3.736 Median :2.773 Median :2.9937
## Mean :-1.932 Mean :3.898 Mean :2.784 Mean :3.0347
## 3rd Qu.:-1.715 3rd Qu.:4.686 3rd Qu.:3.079 3rd Qu.:3.4798
## Max. :-1.109 Max. :5.735 Max. :3.784 Max. :6.0996
## MMP_3 MMP10 MMP7 Myoglobin
## Min. :-3.650 Min. :-4.948 Min. :-7.5346 Min. :-3.2968
## 1st Qu.:-2.852 1st Qu.:-4.075 1st Qu.:-4.9634 1st Qu.:-2.0217
## Median :-2.532 Median :-3.612 Median :-4.0302 Median :-1.5874
## Mean :-2.490 Mean :-3.676 Mean :-4.0148 Mean :-1.4165
## 3rd Qu.:-2.120 3rd Qu.:-3.331 3rd Qu.:-3.1640 3rd Qu.:-0.7765
## Max. :-1.171 Max. :-2.900 Max. :-0.1953 Max. : 1.1314
## NT_proBNP NrCAM Osteopontin PAI_1
## Min. :3.611 Min. :2.890 Min. :4.078 Min. :-0.990849
## 1st Qu.:4.174 1st Qu.:3.871 1st Qu.:4.892 1st Qu.:-0.334043
## Median :4.477 Median :4.317 Median :5.168 Median : 0.000000
## Mean :4.488 Mean :4.291 Mean :5.177 Mean :-0.003947
## 3rd Qu.:4.794 3rd Qu.:4.725 3rd Qu.:5.410 3rd Qu.: 0.303112
## Max. :5.398 Max. :5.690 Max. :6.315 Max. : 0.885785
## PAPP_A PLGF PYY Pancreatic_polypeptide
## Min. :-3.152 Min. :2.639 Min. :2.398 Min. :-1.609438
## 1st Qu.:-2.971 1st Qu.:3.689 1st Qu.:2.833 1st Qu.:-0.506693
## Median :-2.841 Median :3.892 Median :2.996 Median : 0.138816
## Mean :-2.845 Mean :3.884 Mean :2.976 Mean :-0.005258
## 3rd Qu.:-2.719 3rd Qu.:4.123 3rd Qu.:3.178 3rd Qu.: 0.470004
## Max. :-2.488 Max. :4.710 Max. :3.738 Max. : 1.504077
## Prolactin Prostatic_Acid_Phosphatase Protein_S
## Min. :-0.38566 Min. :-1.800 Min. :-3.154
## 1st Qu.:-0.16558 1st Qu.:-1.739 1st Qu.:-2.579
## Median : 0.00000 Median :-1.690 Median :-2.259
## Mean : 0.05195 Mean :-1.692 Mean :-2.268
## 3rd Qu.: 0.18232 3rd Qu.:-1.659 3rd Qu.:-1.924
## Max. : 0.78846 Max. :-1.540 Max. :-1.547
## Pulmonary_and_Activation_Regulat RANTES Resistin
## Min. :-2.4418 Min. :-7.236 Min. :-30.156
## 1st Qu.:-1.8326 1st Qu.:-6.725 1st Qu.:-22.131
## Median :-1.5141 Median :-6.571 Median :-18.014
## Mean :-1.5007 Mean :-6.540 Mean :-18.245
## 3rd Qu.:-1.1712 3rd Qu.:-6.392 3rd Qu.:-15.202
## Max. :-0.4463 Max. :-5.843 Max. : -6.594
## S100b SGOT SHBG SOD
## Min. :0.1874 Min. :-1.8971 Min. :-3.730 Min. :4.382
## 1st Qu.:0.9600 1st Qu.:-0.7498 1st Qu.:-3.052 1st Qu.:5.006
## Median :1.1571 Median :-0.4780 Median :-2.711 Median :5.313
## Mean :1.1819 Mean :-0.4898 Mean :-2.686 Mean :5.302
## 3rd Qu.:1.3807 3rd Qu.:-0.2138 3rd Qu.:-2.343 3rd Qu.:5.547
## Max. :2.1950 Max. : 0.1823 Max. :-1.561 Max. :6.461
## Serum_Amyloid_P Sortilin Stem_Cell_Factor TGF_alpha
## Min. :-7.182 Min. :1.508 Min. :2.219 Min. : 7.500
## 1st Qu.:-6.438 1st Qu.:3.177 1st Qu.:3.045 1st Qu.: 9.062
## Median :-6.215 Median :3.867 Median :3.314 Median : 9.596
## Mean :-6.083 Mean :3.787 Mean :3.267 Mean : 9.776
## 3rd Qu.:-5.607 3rd Qu.:4.371 3rd Qu.:3.466 3rd Qu.:10.612
## Max. :-4.699 Max. :5.681 Max. :4.078 Max. :13.083
## TIMP_1 TNF_RII TRAIL_R3 TTR_prealbumin
## Min. : 8.198 Min. :-1.6607 Min. :-1.30636 Min. :2.485
## 1st Qu.:10.530 1st Qu.:-0.8675 1st Qu.:-0.73332 1st Qu.:2.773
## Median :11.341 Median :-0.6541 Median :-0.55547 Median :2.890
## Mean :11.520 Mean :-0.6270 Mean :-0.58640 Mean :2.854
## 3rd Qu.:12.352 3rd Qu.:-0.3320 3rd Qu.:-0.47065 3rd Qu.:2.944
## Max. :16.547 Max. : 0.4055 Max. : 0.09622 Max. :3.091
## Tamm_Horsfall_Protein_THP Thrombomodulin Thrombopoietin
## Min. :-3.206 Min. :-2.054 Min. :-1.5396
## 1st Qu.:-3.144 1st Qu.:-1.675 1st Qu.:-0.8383
## Median :-3.126 Median :-1.534 Median :-0.7039
## Mean :-3.123 Mean :-1.533 Mean :-0.7192
## 3rd Qu.:-3.101 3rd Qu.:-1.341 3rd Qu.:-0.6289
## Max. :-3.041 Max. :-1.019 Max. :-0.3029
## Thymus_Expressed_Chemokine_TECK Thyroid_Stimulating_Hormone
## Min. :2.141 Min. :-6.190
## 1st Qu.:3.283 1st Qu.:-4.733
## Median :3.753 Median :-4.269
## Mean :3.770 Mean :-4.221
## 3rd Qu.:4.316 3rd Qu.:-3.828
## Max. :5.681 Max. :-2.040
## Thyroxine_Binding_Globulin Tissue_Factor Transferrin
## Min. :-2.3026 Min. :0.0000 Min. :2.282
## 1st Qu.:-1.7148 1st Qu.:0.7053 1st Qu.:2.708
## Median :-1.4919 Median :1.1473 Median :2.890
## Mean :-1.4902 Mean :1.1356 Mean :2.900
## 3rd Qu.:-1.2379 3rd Qu.:1.5149 3rd Qu.:3.135
## Max. :-0.5978 Max. :2.7081 Max. :3.497
## Trefoil_Factor_3_TFF3 VCAM_1 VEGF Vitronectin
## Min. :-4.906 Min. :2.028 Min. :12.23 Min. :-1.07881
## 1st Qu.:-4.200 1st Qu.:2.420 1st Qu.:15.03 1st Qu.:-0.46204
## Median :-3.912 Median :2.674 Median :17.08 Median :-0.28106
## Mean :-3.947 Mean :2.644 Mean :16.70 Mean :-0.26833
## 3rd Qu.:-3.772 3rd Qu.:2.833 3rd Qu.:18.19 3rd Qu.:-0.05394
## Max. :-3.170 Max. :3.466 Max. :21.18 Max. : 0.40547
## von_Willebrand_Factor Class E4 E3
## Min. :-4.920 Impaired:18 Min. :1.000 Min. :1.000
## 1st Qu.:-4.269 Control :48 1st Qu.:1.000 1st Qu.:2.000
## Median :-4.017 Median :1.000 Median :2.000
## Mean :-4.014 Mean :1.303 Mean :1.985
## 3rd Qu.:-3.730 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :-3.058 Max. :2.000 Max. :2.000
## E2
## Min. :1.000
## 1st Qu.:1.000
## Median :1.000
## Mean :1.061
## 3rd Qu.:1.000
## Max. :2.000
##################################
# Creating consistent fold assignments
# for the Cross Validation process
##################################
set.seed(12345678)
<- createFolds(PMA_PreModelling_Train$Class ,
KFold_Indices k = 10,
returnTrain=TRUE)
##################################
# Formulating a function to summarize
# model performance metrics
##################################
<- function(...) c(twoClassSummary(...), defaultSummary(...))
FiveMetricsSummary
##################################
# Formulating the controls for the
# model training process
##################################
<- trainControl(method = "cv",
KFold_TrainControl summaryFunction = FiveMetricsSummary,
classProbs = TRUE,
index = KFold_Indices)
##################################
# Running the random forest model
# by setting the caret method to 'rf'
##################################
set.seed(12345678)
<- caret::train(x = PMA_PreModelling_Train[,!names(PMA_PreModelling_Train) %in% c("Class")],
RF_FULL_Tune y = PMA_PreModelling_Train$Class,
method = "rf",
metric = "Accuracy",
tuneGrid = data.frame(mtry = floor(sqrt(length(names(PMA_PreModelling_Train) %in% c("Class"))))),
ntree = 50,
trControl = KFold_TrainControl)
##################################
# Reporting the cross-validation results
# for the train set
##################################
RF_FULL_Tune
## Random Forest
##
## 267 samples
## 127 predictors
## 2 classes: 'Impaired', 'Control'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 240, 239, 241, 240, 241, 241, ...
## Resampling results:
##
## ROC Sens Spec Accuracy Kappa
## 0.7645982 0.3178571 0.9536842 0.7797517 0.3186583
##
## Tuning parameter 'mtry' was held constant at a value of 11
$finalModel RF_FULL_Tune
##
## Call:
## randomForest(x = x, y = y, ntree = 50, mtry = param$mtry)
## Type of random forest: classification
## Number of trees: 50
## No. of variables tried at each split: 11
##
## OOB estimate of error rate: 23.6%
## Confusion matrix:
## Impaired Control class.error
## Impaired 24 49 0.67123288
## Control 14 180 0.07216495
$results RF_FULL_Tune
## mtry ROC Sens Spec Accuracy Kappa ROCSD SensSD
## 1 11 0.7645982 0.3178571 0.9536842 0.7797517 0.3186583 0.1373713 0.1832328
## SpecSD AccuracySD KappaSD
## 1 0.03791666 0.06273494 0.218025
<- RF_FULL_Tune$results[,c("Accuracy")]) (RF_FULL_Train_Accuracy
## [1] 0.7797517
##################################
# Identifying and plotting the
# best model predictors
##################################
<- varImp(RF_FULL_Tune, scale = TRUE)
RF_FULL_VarImp plot(RF_FULL_VarImp,
top=25,
scales=list(y=list(cex = .95)),
main="Ranked Variable Importance : Random Forest",
xlab="Scaled Variable Importance Metrics",
ylab="Predictors",
cex=2,
origin=0,
alpha=0.45)
##################################
# Independently evaluating the model and
# reporting the independent evaluation results
# on the test set
##################################
<- data.frame(RF_FULL_Observed = PMA_PreModelling_Test$Class,
RF_FULL_Test RF_FULL_Predicted = predict(RF_FULL_Tune,
!names(PMA_PreModelling_Test) %in% c("Class")],
PMA_PreModelling_Test[,type = "raw"))
RF_FULL_Test
## RF_FULL_Observed RF_FULL_Predicted
## 1 Control Control
## 2 Impaired Control
## 3 Impaired Control
## 4 Control Control
## 5 Impaired Control
## 6 Impaired Control
## 7 Impaired Control
## 8 Control Control
## 9 Impaired Impaired
## 10 Control Control
## 11 Control Control
## 12 Control Control
## 13 Control Control
## 14 Impaired Control
## 15 Control Control
## 16 Control Control
## 17 Control Control
## 18 Impaired Impaired
## 19 Control Control
## 20 Control Control
## 21 Control Control
## 22 Control Control
## 23 Control Control
## 24 Control Control
## 25 Control Control
## 26 Control Control
## 27 Control Control
## 28 Control Control
## 29 Control Control
## 30 Control Control
## 31 Impaired Control
## 32 Control Control
## 33 Control Control
## 34 Control Control
## 35 Control Control
## 36 Control Control
## 37 Control Control
## 38 Control Control
## 39 Impaired Control
## 40 Control Control
## 41 Control Control
## 42 Control Control
## 43 Control Control
## 44 Impaired Impaired
## 45 Control Control
## 46 Control Control
## 47 Control Control
## 48 Impaired Control
## 49 Control Control
## 50 Impaired Control
## 51 Control Control
## 52 Impaired Impaired
## 53 Impaired Control
## 54 Control Control
## 55 Control Control
## 56 Control Control
## 57 Control Control
## 58 Impaired Control
## 59 Control Control
## 60 Impaired Impaired
## 61 Control Control
## 62 Control Control
## 63 Impaired Control
## 64 Control Control
## 65 Control Control
## 66 Control Control
##################################
# Reporting the independent evaluation results
# for the test set
##################################
<- Accuracy(y_pred = RF_FULL_Test$RF_FULL_Predicted,
(RF_FULL_Test_Accuracy y_true = RF_FULL_Test$RF_FULL_Observed))
## [1] 0.8030303
##################################
# Formulating the controls for the
# simulated annealing process
##################################
<- safsControl(method = "cv",
KFold_SAControl verbose = TRUE,
functions = rfSA,
index = KFold_Indices,
returnResamp = "final",
improve = 25)
##################################
# Running the random forest model
# by setting the caret method to 'rf'
# with implementation of simulated annealing
##################################
set.seed(12345678)
<- caret::safs(x = PMA_PreModelling_Train[,!names(PMA_PreModelling_Train) %in% c("Class")],
RF_SA_Tune y = PMA_PreModelling_Train$Class,
iters = 10,
ntree = 50,
safsControl = KFold_SAControl)
## Fold01 1 0.7333333 (26)
## Fold01 2 0.7333333->0.6958333 (26+1, 96.3%) 0.9027833 A
## Fold01 3 0.7333333->0.7166667 (26+0, 92.6%) 0.9340906 A
## Fold01 4 0.7333333->0.7291667 (26+1, 89.3%) 0.977529 A
## Fold01 5 0.7333333->0.7291667 (26+2, 86.2%) 0.9719907 A
## Fold01 6 0.7333333->0.7208333 (26+3, 83.3%) 0.9027833 A
## Fold01 7 0.7333333->0.7125 (26+4, 80.6%) 0.8196617 A
## Fold01 8 0.7333333->0.7166667 (26+5, 78.1%) 0.8337529 A
## Fold01 9 0.7333333->0.7208333 (26+4, 75.0%) 0.8577787 A
## Fold01 10 0.7333333->0.7416667 (26+5, 72.7%) *
## Fold02 1 0.7322176 (26)
## Fold02 2 0.7322176->0.7656904 (26+1, 96.3%) *
## Fold02 3 0.7656904->0.7238494 (27-1, 96.3%) 0.8487977 A
## Fold02 4 0.7656904->0.748954 (27+0, 92.9%) 0.9162815 A
## Fold02 5 0.7656904->0.748954 (27+1, 89.7%) 0.8964707 A
## Fold02 6 0.7656904->0.7656904 (27+2, 86.7%) 1 A
## Fold02 7 0.7656904->0.7238494 (27+3, 83.9%) 0.6821446 A
## Fold02 8 0.7656904->0.748954 (27+2, 80.6%) 0.8395717 A
## Fold02 9 0.7656904->0.7238494 (27+1, 77.4%) 0.6115227 A
## Fold02 10 0.7656904->0.7782427 (27+2, 75.0%) *
## Fold03 1 0.7385892 (26)
## Fold03 2 0.7385892->0.7510373 (26-1, 96.2%) *
## Fold03 3 0.7510373->0.7427386 (25+1, 96.2%) 0.9673942 A
## Fold03 4 0.7510373->0.7219917 (25+2, 92.6%) 0.8566755 A
## Fold03 5 0.7510373->0.7427386 (25+3, 89.3%) 0.9462499 A
## Fold03 6 0.7510373->0.7178423 (25+4, 86.2%) 0.7670576 A
## Fold03 7 0.7510373->0.7427386 (25+5, 83.3%) 0.9255676 A
## Fold03 8 0.7510373->0.7302905 (25+6, 80.6%) 0.8017211 A
## Fold03 9 0.7510373->0.7385892 (25+7, 78.1%) 0.8614216 A
## Fold03 10 0.7510373->0.7261411 (25+8, 75.8%) 0.7178521 A
## Fold04 1 0.7291667 (26)
## Fold04 2 0.7291667->0.75 (26+1, 96.3%) *
## Fold04 3 0.75->0.7625 (27-1, 96.3%) *
## Fold04 4 0.7625->0.7291667 (26+1, 96.3%) 0.8395717 A
## Fold04 5 0.7625->0.7583333 (26+2, 92.9%) 0.9730475 A
## Fold04 6 0.7625->0.725 (26+3, 89.7%) 0.7444706 A
## Fold04 7 0.7625->0.7041667 (26+4, 86.7%) 0.5853653
## Fold04 8 0.7625->0.75 (26+2, 86.2%) 0.8770884 A
## Fold04 9 0.7625->0.75 (26+3, 83.3%) 0.8628271 A
## Fold04 10 0.7625->0.75 (26+4, 80.6%) 0.8487977 A
## Fold05 1 0.7095436 (26)
## Fold05 2 0.7095436->0.7385892 (26+1, 96.3%) *
## Fold05 3 0.7385892->0.7427386 (27-1, 96.3%) *
## Fold05 4 0.7427386->0.7344398 (26-1, 96.2%) 0.9562913 A
## Fold05 5 0.7427386->0.7302905 (26-2, 92.3%) 0.9196162 A
## Fold05 6 0.7427386->0.7261411 (26-3, 88.5%) 0.8745217
## Fold05 7 0.7427386->0.7676349 (26-3, 88.5%) *
## Fold05 8 0.7676349->0.7053942 (23+1, 95.8%) 0.5227517 A
## Fold05 9 0.7676349->0.7095436 (23+2, 92.0%) 0.5060696
## Fold05 10 0.7676349->0.7676349 (23+2, 92.0%) 1 A
## Fold06 1 0.7551867 (26)
## Fold06 2 0.7551867->0.7385892 (26-1, 96.2%) 0.956996 A
## Fold06 3 0.7551867->0.6929461 (26+0, 92.6%) 0.7809433 A
## Fold06 4 0.7551867->0.7551867 (26+1, 89.3%) 1 A
## Fold06 5 0.7551867->0.7053942 (26+2, 86.2%) 0.7191608
## Fold06 6 0.7551867->0.7012448 (26+2, 86.2%) 0.6514391 A
## Fold06 7 0.7551867->0.7261411 (26+3, 83.3%) 0.7639669 A
## Fold06 8 0.7551867->0.7053942 (26+4, 80.6%) 0.5900945 A
## Fold06 9 0.7551867->0.7427386 (26+5, 78.1%) 0.8621279 A
## Fold06 10 0.7551867->0.746888 (26+4, 75.0%) 0.8959326
## Fold07 1 0.7427386 (26)
## Fold07 2 0.7427386->0.7178423 (26+1, 96.3%) 0.9351586 A
## Fold07 3 0.7427386->0.7385892 (26+2, 92.9%) 0.9833799 A
## Fold07 4 0.7427386->0.7095436 (26+3, 89.7%) 0.8362974 A
## Fold07 5 0.7427386->0.7385892 (26+2, 86.2%) 0.9724536 A
## Fold07 6 0.7427386->0.7053942 (26+3, 83.3%) 0.7395777 A
## Fold07 7 0.7427386->0.7261411 (26+4, 80.6%) 0.855196 A
## Fold07 8 0.7427386->0.746888 (26+5, 78.1%) *
## Fold07 9 0.746888->0.7427386 (31+1, 96.9%) 0.9512294 A
## Fold07 10 0.746888->0.7427386 (31+0, 93.8%) 0.9459595 A
## Fold08 1 0.725 (26)
## Fold08 2 0.725->0.7208333 (26+1, 96.3%) 0.9885716 A
## Fold08 3 0.725->0.7083333 (26+2, 92.9%) 0.9333589 A
## Fold08 4 0.725->0.7375 (26+3, 89.7%) *
## Fold08 5 0.7375->0.7541667 (29+1, 96.7%) *
## Fold08 6 0.7541667->0.7375 (30+1, 96.8%) 0.8758183 A
## Fold08 7 0.7541667->0.7708333 (30+2, 93.8%) *
## Fold08 8 0.7708333->0.775 (32+1, 97.0%) *
## Fold08 9 0.775->0.7333333 (33-1, 97.0%) 0.6163927 A
## Fold08 10 0.775->0.7458333 (33-2, 93.9%) 0.6863661
## Fold09 1 0.7447699 (26)
## Fold09 2 0.7447699->0.748954 (26+1, 96.3%) *
## Fold09 3 0.748954->0.7447699 (27+1, 96.4%) 0.9833799 A
## Fold09 4 0.748954->0.7280335 (27+2, 93.1%) 0.894284
## Fold09 5 0.748954->0.707113 (27+2, 93.1%) 0.7562906 A
## Fold09 6 0.748954->0.7364017 (27+1, 89.7%) 0.9043321 A
## Fold09 7 0.748954->0.7322176 (27+2, 86.7%) 0.855196 A
## Fold09 8 0.748954->0.7405858 (27+3, 83.9%) 0.914493 A
## Fold09 9 0.748954->0.7112971 (27+4, 81.2%) 0.6360272 A
## Fold09 10 0.748954->0.7447699 (27+5, 78.8%) 0.9456659 A
## Fold10 1 0.7385892 (26)
## Fold10 2 0.7385892->0.7344398 (26+1, 96.3%) 0.9888269 A
## Fold10 3 0.7385892->0.7302905 (26+2, 92.9%) 0.9668539 A
## Fold10 4 0.7385892->0.7344398 (26+3, 89.7%) 0.9777787 A
## Fold10 5 0.7385892->0.7427386 (26+4, 86.7%) *
## Fold10 6 0.7427386->0.7510373 (30-1, 96.7%) *
## Fold10 7 0.7510373->0.7385892 (29+1, 96.7%) 0.8904555
## Fold10 8 0.7510373->0.7136929 (29-1, 96.6%) 0.6718031
## Fold10 9 0.7510373->0.7385892 (29-1, 96.6%) 0.8614216 A
## Fold10 10 0.7510373->0.7593361 (29-2, 93.1%) *
## + final SA
## 1 0.7490637 (26)
## 2 0.7490637->0.741573 (26+1, 96.3%) 0.9801987 A
## 3 0.7490637->0.7790262 (26+2, 92.9%) *
## 4 0.7790262->0.741573 (28+1, 96.6%) 0.825053 A
## 5 0.7790262->0.7303371 (28+2, 93.3%) 0.7316156 A
## 6 0.7790262->0.7378277 (28+3, 90.3%) 0.7281067
## 7 0.7790262->0.741573 (28+3, 90.3%) 0.7142384 A
## 8 0.7790262->0.7490637 (28+2, 87.1%) 0.7351415 A
## 9 0.7790262->0.7378277 (28+3, 84.4%) 0.6212874 A
## 10 0.7790262->0.7453184 (28+4, 81.8%) 0.6487601 A
## + final model
##################################
# Reporting the cross-validation results
# for the train set
##################################
RF_SA_Tune
##
## Simulated Annealing Feature Selection
##
## 267 samples
## 127 predictors
## 2 classes: 'Impaired', 'Control'
##
## Maximum search iterations: 10
## Restart after 25 iterations without improvement (0 restarts on average)
##
## Internal performance values: Accuracy, Kappa
## Subset selection driven to maximize internal Accuracy
##
## External performance values: Accuracy, Kappa
## Best iteration chose by maximizing external Accuracy
## External resampling method: Cross-Validated (10 fold)
##
## During resampling:
## * the top 5 selected variables (out of a possible 127):
## Alpha_1_Microglobulin (50%), IL_3 (50%), Osteopontin (50%), Beta_2_Microglobulin (40%), BMP_6 (40%)
## * on average, 27.8 variables were selected (min = 23, max = 33)
##
## In the final search using the entire training set:
## * 30 features selected at iteration 8 including:
## ACTH_Adrenocorticotropic_Hormon, Angiotensinogen, Apolipoprotein_A2, Apolipoprotein_B, Apolipoprotein_CI ...
## * external performance at this iteration is
##
## Accuracy Kappa
## 0.7762 0.3061
$fit RF_SA_Tune
##
## Call:
## randomForest(x = x, y = y, ntree = 50)
## Type of random forest: classification
## Number of trees: 50
## No. of variables tried at each split: 5
##
## OOB estimate of error rate: 25.47%
## Confusion matrix:
## Impaired Control class.error
## Impaired 19 54 0.73972603
## Control 14 180 0.07216495
$averages RF_SA_Tune
## Iter Accuracy Kappa
## 1 1 0.7238604 0.1488856
## 2 2 0.7380037 0.2028418
## 3 3 0.7493895 0.2403309
## 4 4 0.7684778 0.2964909
## 5 5 0.7683557 0.2916946
## 6 6 0.7389703 0.1972777
## 7 7 0.7576516 0.2461292
## 8 8 0.7761803 0.3061366
## 9 9 0.7680810 0.2678226
## 10 10 0.7609381 0.2700777
<- RF_SA_Tune$averages[RF_SA_Tune$averages$Accuracy==max(RF_SA_Tune$averages$Accuracy),
(RF_SA_Train_Accuracy c("Accuracy")])
## [1] 0.7761803
##################################
# Independently evaluating the model and
# reporting the independent evaluation results
# on the test set
##################################
<- data.frame(RF_SA_Observed = PMA_PreModelling_Test$Class,
RF_SA_Test RF_SA_Predicted = predict(RF_SA_Tune,
!names(PMA_PreModelling_Test) %in% c("Class")],
PMA_PreModelling_Test[,type = "raw"))
RF_SA_Test
## RF_SA_Observed RF_SA_Predicted.pred RF_SA_Predicted.Impaired
## 4 Control Control 0.20
## 10 Impaired Control 0.34
## 13 Impaired Control 0.24
## 15 Control Control 0.10
## 27 Impaired Control 0.14
## 32 Impaired Control 0.04
## 33 Impaired Control 0.30
## 49 Control Control 0.08
## 52 Impaired Impaired 0.84
## 54 Control Control 0.10
## 58 Control Control 0.38
## 66 Control Control 0.26
## 79 Control Control 0.14
## 87 Impaired Control 0.28
## 89 Control Control 0.34
## 91 Control Control 0.34
## 92 Control Control 0.20
## 101 Impaired Impaired 0.72
## 102 Control Control 0.22
## 106 Control Control 0.14
## 116 Control Control 0.30
## 119 Control Control 0.12
## 120 Control Control 0.10
## 122 Control Control 0.22
## 125 Control Control 0.18
## 127 Control Control 0.10
## 138 Control Control 0.28
## 142 Control Control 0.34
## 150 Control Control 0.36
## 151 Control Control 0.26
## 164 Impaired Control 0.32
## 173 Control Control 0.12
## 187 Control Control 0.06
## 188 Control Control 0.32
## 196 Control Control 0.24
## 199 Control Control 0.48
## 203 Control Control 0.12
## 204 Control Control 0.32
## 206 Impaired Control 0.42
## 207 Control Control 0.24
## 209 Control Control 0.22
## 211 Control Control 0.04
## 217 Control Control 0.44
## 221 Impaired Impaired 0.54
## 222 Control Control 0.10
## 235 Control Control 0.10
## 238 Control Control 0.18
## 248 Impaired Control 0.24
## 252 Control Control 0.24
## 259 Impaired Control 0.10
## 266 Control Control 0.26
## 276 Impaired Control 0.38
## 280 Impaired Control 0.26
## 284 Control Control 0.18
## 285 Control Control 0.08
## 286 Control Control 0.06
## 288 Control Control 0.08
## 293 Impaired Control 0.22
## 295 Control Control 0.26
## 296 Impaired Impaired 0.64
## 300 Control Impaired 0.58
## 309 Control Control 0.08
## 310 Impaired Control 0.28
## 318 Control Control 0.12
## 319 Control Control 0.14
## 328 Control Control 0.28
## RF_SA_Predicted.Control
## 4 0.80
## 10 0.66
## 13 0.76
## 15 0.90
## 27 0.86
## 32 0.96
## 33 0.70
## 49 0.92
## 52 0.16
## 54 0.90
## 58 0.62
## 66 0.74
## 79 0.86
## 87 0.72
## 89 0.66
## 91 0.66
## 92 0.80
## 101 0.28
## 102 0.78
## 106 0.86
## 116 0.70
## 119 0.88
## 120 0.90
## 122 0.78
## 125 0.82
## 127 0.90
## 138 0.72
## 142 0.66
## 150 0.64
## 151 0.74
## 164 0.68
## 173 0.88
## 187 0.94
## 188 0.68
## 196 0.76
## 199 0.52
## 203 0.88
## 204 0.68
## 206 0.58
## 207 0.76
## 209 0.78
## 211 0.96
## 217 0.56
## 221 0.46
## 222 0.90
## 235 0.90
## 238 0.82
## 248 0.76
## 252 0.76
## 259 0.90
## 266 0.74
## 276 0.62
## 280 0.74
## 284 0.82
## 285 0.92
## 286 0.94
## 288 0.92
## 293 0.78
## 295 0.74
## 296 0.36
## 300 0.42
## 309 0.92
## 310 0.72
## 318 0.88
## 319 0.86
## 328 0.72
##################################
# Reporting the independent evaluation results
# for the test set
##################################
<- Accuracy(y_pred = RF_SA_Test$RF_SA_Predicted.pred,
(RF_SA_Test_Accuracy y_true = RF_SA_Test$RF_SA_Observed))
## [1] 0.7727273
##################################
# Formulating the controls for the
# genetic algorithms process
##################################
<- gafsControl(method = "cv",
KFold_GAControl verbose = TRUE,
functions = rfGA,
index = KFold_Indices,
returnResamp = "final")
##################################
# Running the random forest model
# by setting the caret method to 'rf'
# with implementation of genetic algorithms
##################################
set.seed(12345678)
<- caret::gafs(x = PMA_PreModelling_Train[,!names(PMA_PreModelling_Train) %in% c("Class")],
RF_GA_Tune y = PMA_PreModelling_Train$Class,
iters = 10,
ntree = 50,
gafsControl = KFold_GAControl)
## Fold01 1 0.7833333 (81)
## Fold01 2 0.7833333->0.7791667 ( 81-> 84, 48.6%)
## Fold01 3 0.7833333->0.7875 ( 81-> 76, 48.1%) *
## Fold01 4 0.7875->0.7875 ( 76-> 78, 45.3%)
## Fold01 5 0.7875->0.8 ( 76-> 81, 46.7%) *
## Fold01 6 0.8->0.8083333 ( 81-> 81, 55.8%) *
## Fold01 7 0.8083333->0.7791667 ( 81-> 73, 51.0%)
## Fold01 8 0.8083333->0.7916667 ( 81-> 67, 41.0%)
## Fold01 9 0.8083333->0.7875 ( 81-> 69, 41.5%)
## Fold01 10 0.8083333->0.7958333 ( 81-> 71, 50.5%)
## Fold02 1 0.790795 (61)
## Fold02 2 0.790795->0.7991632 ( 61-> 23, 18.3%) *
## Fold02 3 0.7991632->0.8200837 ( 23-> 64, 14.5%) *
## Fold02 4 0.8200837->0.7949791 ( 64-> 97, 40.0%)
## Fold02 5 0.8200837->0.8033473 ( 64-> 73, 82.7%)
## Fold02 6 0.8200837->0.7991632 ( 64-> 73, 71.2%)
## Fold02 7 0.8200837->0.8033473 ( 64-> 88, 42.1%)
## Fold02 8 0.8200837->0.7991632 ( 64-> 81, 54.3%)
## Fold02 9 0.8200837->0.8075314 ( 64-> 76, 44.3%)
## Fold02 10 0.8200837->0.7991632 ( 64-> 71, 58.8%)
## Fold03 1 0.780083 (103)
## Fold03 2 0.780083->0.7883817 (103-> 47, 31.6%) *
## Fold03 3 0.7883817->0.780083 ( 47-> 89, 33.3%)
## Fold03 4 0.7883817->0.780083 ( 47-> 54, 23.2%)
## Fold03 5 0.7883817->0.7925311 ( 47-> 47, 27.0%) *
## Fold03 6 0.7925311->0.8008299 ( 47-> 72, 26.6%) *
## Fold03 7 0.8008299->0.7966805 ( 72-> 73, 70.6%)
## Fold03 8 0.8008299->0.7966805 ( 72-> 63, 87.5%)
## Fold03 9 0.8008299->0.8049793 ( 72-> 62, 78.7%) *
## Fold03 10 0.8049793->0.7925311 ( 62-> 49, 60.9%)
## Fold04 1 0.7708333 (107)
## Fold04 2 0.7708333->0.7791667 (107->106, 99.1%) *
## Fold04 3 0.7791667->0.7875 (106-> 75, 50.8%) *
## Fold04 4 0.7875->0.8 ( 75-> 75, 100.0%) *
## Fold04 5 0.8->0.7958333 ( 75-> 82, 42.7%)
## Fold04 6 0.8->0.7875 ( 75-> 59, 50.6%)
## Fold04 7 0.8->0.7833333 ( 75-> 79, 42.6%)
## Fold04 8 0.8->0.8 ( 75-> 73, 78.3%)
## Fold04 9 0.8->0.7833333 ( 75-> 67, 54.3%)
## Fold04 10 0.8->0.8 ( 75-> 92, 50.5%)
## Fold05 1 0.7842324 (99)
## Fold05 2 0.7842324->0.780083 ( 99-> 30, 22.9%)
## Fold05 3 0.7842324->0.7925311 ( 99-> 40, 31.1%) *
## Fold05 4 0.7925311->0.7966805 ( 40-> 88, 25.5%) *
## Fold05 5 0.7966805->0.8008299 ( 88-> 50, 34.0%) *
## Fold05 6 0.8008299->0.780083 ( 50-> 48, 81.5%)
## Fold05 7 0.8008299->0.8008299 ( 50-> 35, 28.8%)
## Fold05 8 0.8008299->0.7925311 ( 50-> 52, 29.1%)
## Fold05 9 0.8008299->0.8008299 ( 50-> 51, 34.7%)
## Fold05 10 0.8008299->0.7925311 ( 50-> 51, 34.7%)
## Fold06 1 0.7759336 (11)
## Fold06 2 0.7759336->0.7759336 ( 11-> 70, 9.5%)
## Fold06 3 0.7759336->0.7759336 ( 11-> 56, 6.3%)
## Fold06 4 0.7759336->0.7842324 ( 11-> 66, 8.5%) *
## Fold06 5 0.7842324->0.7842324 ( 66-> 70, 54.5%)
## Fold06 6 0.7842324->0.780083 ( 66-> 61, 47.7%)
## Fold06 7 0.7842324->0.7883817 ( 66-> 53, 45.1%) *
## Fold06 8 0.7883817->0.7925311 ( 53-> 53, 49.3%) *
## Fold06 9 0.7925311->0.7925311 ( 53-> 71, 51.2%)
## Fold06 10 0.7925311->0.7925311 ( 53-> 31, 47.4%)
## Fold07 1 0.7883817 (90)
## Fold07 2 0.7883817->0.7966805 ( 90-> 34, 21.6%) *
## Fold07 3 0.7966805->0.8049793 ( 34-> 69, 22.6%) *
## Fold07 4 0.8049793->0.7925311 ( 69-> 89, 50.5%)
## Fold07 5 0.8049793->0.8008299 ( 69-> 99, 42.4%)
## Fold07 6 0.8049793->0.8174274 ( 69->101, 42.9%) *
## Fold07 7 0.8174274->0.7966805 (101-> 76, 56.6%)
## Fold07 8 0.8174274->0.8049793 (101-> 72, 57.3%)
## Fold07 9 0.8174274->0.813278 (101-> 61, 48.6%)
## Fold07 10 0.8174274->0.8008299 (101-> 61, 48.6%)
## Fold08 1 0.7875 (76)
## Fold08 2 0.7875->0.7958333 ( 76->111, 55.8%) *
## Fold08 3 0.7958333->0.7791667 (111-> 67, 50.8%)
## Fold08 4 0.7958333->0.7875 (111-> 57, 46.1%)
## Fold08 5 0.7958333->0.7916667 (111-> 52, 40.5%)
## Fold08 6 0.7958333->0.8 (111-> 69, 52.5%) *
## Fold08 7 0.8->0.8 ( 69-> 55, 39.3%)
## Fold08 8 0.8->0.7958333 ( 69-> 62, 42.4%)
## Fold08 9 0.8->0.7833333 ( 69-> 70, 51.1%)
## Fold08 10 0.8->0.8125 ( 69-> 45, 37.3%) *
## Fold09 1 0.7991632 (61)
## Fold09 2 0.7991632->0.7949791 ( 61->113, 46.2%)
## Fold09 3 0.7991632->0.7991632 ( 61-> 79, 38.6%)
## Fold09 4 0.7991632->0.7866109 ( 61-> 32, 32.9%)
## Fold09 5 0.7991632->0.8117155 ( 61-> 55, 46.8%) *
## Fold09 6 0.8117155->0.7991632 ( 55-> 76, 32.3%)
## Fold09 7 0.8117155->0.7949791 ( 55-> 47, 32.5%)
## Fold09 8 0.8117155->0.8033473 ( 55-> 67, 31.2%)
## Fold09 9 0.8117155->0.8075314 ( 55-> 44, 32.0%)
## Fold09 10 0.8117155->0.8075314 ( 55-> 55, 27.9%)
## Fold10 1 0.7925311 (86)
## Fold10 2 0.7925311->0.7883817 ( 86-> 77, 49.5%)
## Fold10 3 0.7925311->0.7925311 ( 86-> 77, 49.5%)
## Fold10 4 0.7925311->0.7966805 ( 86-> 95, 52.1%) *
## Fold10 5 0.7966805->0.8049793 ( 95-> 45, 33.3%) *
## Fold10 6 0.8049793->0.8008299 ( 45-> 45, 100.0%)
## Fold10 7 0.8049793->0.8008299 ( 45-> 71, 33.3%)
## Fold10 8 0.8049793->0.7883817 ( 45-> 96, 33.0%)
## Fold10 9 0.8049793->0.780083 ( 45-> 85, 36.8%)
## Fold10 10 0.8049793->0.8008299 ( 45-> 64, 65.2%)
## + final GA
## 1 0.7865169 (93)
## 2 0.7865169->0.8052434 ( 93-> 23, 16.0%) *
## 3 0.8052434->0.8089888 ( 23-> 72, 20.3%) *
## 4 0.8089888->0.7940075 ( 72-> 87, 54.4%)
## 5 0.8089888->0.7940075 ( 72-> 65, 38.4%)
## 6 0.8089888->0.7865169 ( 72->100, 55.0%)
## 7 0.8089888->0.8052434 ( 72->100, 49.6%)
## 8 0.8089888->0.8164794 ( 72-> 79, 48.0%) *
## 9 0.8164794->0.7940075 ( 79->100, 62.7%)
## 10 0.8164794->0.8014981 ( 79->103, 64.0%)
## + final model
##################################
# Reporting the cross-validation results
# for the train set
##################################
RF_GA_Tune
##
## Genetic Algorithm Feature Selection
##
## 267 samples
## 127 predictors
## 2 classes: 'Impaired', 'Control'
##
## Maximum generations: 10
## Population per generation: 50
## Crossover probability: 0.8
## Mutation probability: 0.1
## Elitism: 0
##
## Internal performance values: Accuracy, Kappa
## Subset selection driven to maximize internal Accuracy
##
## External performance values: Accuracy, Kappa
## Best iteration chose by maximizing external Accuracy
## External resampling method: Cross-Validated (10 fold)
##
## During resampling:
## * the top 5 selected variables (out of a possible 127):
## MMP10 (100%), Cystatin_C (90%), PAI_1 (90%), Angiotensinogen (80%), Calbindin (80%)
## * on average, 63.1 variables were selected (min = 45, max = 101)
##
## In the final search using the entire training set:
## * 93 features selected at iteration 1 including:
## ACE_CD143_Angiotensin_Converti, ACTH_Adrenocorticotropic_Hormon, AXL, Alpha_1_Antichymotrypsin, Alpha_1_Microglobulin ...
## * external performance at this iteration is
##
## Accuracy Kappa
## 0.7984 0.3829
$fit RF_GA_Tune
##
## Call:
## randomForest(x = x, y = y, ntree = 50)
## Type of random forest: classification
## Number of trees: 50
## No. of variables tried at each split: 9
##
## OOB estimate of error rate: 22.1%
## Confusion matrix:
## Impaired Control class.error
## Impaired 27 46 0.63013699
## Control 13 181 0.06701031
$averages RF_GA_Tune
## Iter Accuracy Kappa
## 1 1 0.7984229 0.3828759
## 2 2 0.7800265 0.3073912
## 3 3 0.7752137 0.3112097
## 4 4 0.7875865 0.3592537
## 5 5 0.7649267 0.2534131
## 6 6 0.7908730 0.3560893
## 7 7 0.7874339 0.3312224
## 8 8 0.7687729 0.2740272
## 9 9 0.7794770 0.3112902
## 10 10 0.7803114 0.3393190
<- RF_GA_Tune$averages[RF_GA_Tune$averages$Accuracy==max(RF_GA_Tune$averages$Accuracy),
(RF_GA_Train_Accuracy c("Accuracy")])
## [1] 0.7984229
##################################
# Independently evaluating the model and
# reporting the independent evaluation results
# on the test set
##################################
<- data.frame(RF_GA_Observed = PMA_PreModelling_Test$Class,
RF_GA_Test RF_GA_Predicted = predict(RF_GA_Tune,
!names(PMA_PreModelling_Test) %in% c("Class")],
PMA_PreModelling_Test[,type = "raw"))
RF_GA_Test
## RF_GA_Observed RF_GA_Predicted.pred RF_GA_Predicted.Impaired
## 4 Control Control 0.20
## 10 Impaired Control 0.28
## 13 Impaired Control 0.14
## 15 Control Control 0.26
## 27 Impaired Control 0.18
## 32 Impaired Control 0.16
## 33 Impaired Control 0.26
## 49 Control Control 0.06
## 52 Impaired Impaired 0.68
## 54 Control Control 0.14
## 58 Control Control 0.42
## 66 Control Control 0.14
## 79 Control Control 0.16
## 87 Impaired Control 0.30
## 89 Control Control 0.20
## 91 Control Control 0.38
## 92 Control Control 0.10
## 101 Impaired Impaired 0.60
## 102 Control Control 0.10
## 106 Control Control 0.32
## 116 Control Control 0.30
## 119 Control Control 0.18
## 120 Control Control 0.12
## 122 Control Control 0.18
## 125 Control Control 0.22
## 127 Control Control 0.16
## 138 Control Control 0.44
## 142 Control Control 0.24
## 150 Control Control 0.18
## 151 Control Control 0.30
## 164 Impaired Control 0.24
## 173 Control Control 0.14
## 187 Control Control 0.18
## 188 Control Control 0.20
## 196 Control Impaired 0.50
## 199 Control Control 0.26
## 203 Control Control 0.28
## 204 Control Control 0.42
## 206 Impaired Impaired 0.58
## 207 Control Control 0.12
## 209 Control Control 0.16
## 211 Control Control 0.06
## 217 Control Control 0.22
## 221 Impaired Impaired 0.54
## 222 Control Control 0.26
## 235 Control Control 0.06
## 238 Control Control 0.22
## 248 Impaired Control 0.20
## 252 Control Control 0.08
## 259 Impaired Control 0.28
## 266 Control Control 0.14
## 276 Impaired Impaired 0.58
## 280 Impaired Control 0.26
## 284 Control Control 0.28
## 285 Control Control 0.12
## 286 Control Control 0.12
## 288 Control Control 0.06
## 293 Impaired Control 0.26
## 295 Control Control 0.26
## 296 Impaired Control 0.46
## 300 Control Control 0.38
## 309 Control Control 0.14
## 310 Impaired Control 0.30
## 318 Control Control 0.22
## 319 Control Control 0.16
## 328 Control Control 0.08
## RF_GA_Predicted.Control
## 4 0.80
## 10 0.72
## 13 0.86
## 15 0.74
## 27 0.82
## 32 0.84
## 33 0.74
## 49 0.94
## 52 0.32
## 54 0.86
## 58 0.58
## 66 0.86
## 79 0.84
## 87 0.70
## 89 0.80
## 91 0.62
## 92 0.90
## 101 0.40
## 102 0.90
## 106 0.68
## 116 0.70
## 119 0.82
## 120 0.88
## 122 0.82
## 125 0.78
## 127 0.84
## 138 0.56
## 142 0.76
## 150 0.82
## 151 0.70
## 164 0.76
## 173 0.86
## 187 0.82
## 188 0.80
## 196 0.50
## 199 0.74
## 203 0.72
## 204 0.58
## 206 0.42
## 207 0.88
## 209 0.84
## 211 0.94
## 217 0.78
## 221 0.46
## 222 0.74
## 235 0.94
## 238 0.78
## 248 0.80
## 252 0.92
## 259 0.72
## 266 0.86
## 276 0.42
## 280 0.74
## 284 0.72
## 285 0.88
## 286 0.88
## 288 0.94
## 293 0.74
## 295 0.74
## 296 0.54
## 300 0.62
## 309 0.86
## 310 0.70
## 318 0.78
## 319 0.84
## 328 0.92
##################################
# Reporting the independent evaluation results
# for the test set
##################################
<- Accuracy(y_pred = RF_GA_Test$RF_GA_Predicted.pred,
(RF_GA_Test_Accuracy y_true = RF_GA_Test$RF_GA_Observed))
## [1] 0.7878788
##################################
# Converting all predictors to numeric
# for both train and test data
##################################
for (i in 1:ncol(PMA_PreModelling_Train)){
if (names(PMA_PreModelling_Train)[i]!="Class"){
<- as.numeric(PMA_PreModelling_Train[,i])
PMA_PreModelling_Train[,i]
}
}summary(PMA_PreModelling_Train)
## ACE_CD143_Angiotensin_Converti ACTH_Adrenocorticotropic_Hormon
## Min. :-0.6756 Min. :-2.207
## 1st Qu.: 0.9462 1st Qu.:-1.715
## Median : 1.3013 Median :-1.561
## Mean : 1.3198 Mean :-1.538
## 3rd Qu.: 1.7191 3rd Qu.:-1.347
## Max. : 2.8398 Max. :-0.844
## AXL Adiponectin Alpha_1_Antichymotrypsin
## Min. :-0.9230 Min. :-6.725 Min. :0.2624
## 1st Qu.: 0.0000 1st Qu.:-5.669 1st Qu.:1.1314
## Median : 0.2804 Median :-5.185 Median :1.3610
## Mean : 0.3093 Mean :-5.201 Mean :1.3605
## 3rd Qu.: 0.6077 3rd Qu.:-4.780 3rd Qu.:1.5892
## Max. : 1.5214 Max. :-3.507 Max. :2.3026
## Alpha_1_Antitrypsin Alpha_1_Microglobulin Alpha_2_Macroglobulin
## Min. :-17.028 Min. :-4.343 Min. :-289.68
## 1st Qu.:-14.071 1st Qu.:-3.270 1st Qu.:-186.64
## Median :-13.004 Median :-2.937 Median :-160.01
## Mean :-13.052 Mean :-2.932 Mean :-158.61
## 3rd Qu.:-12.096 3rd Qu.:-2.590 3rd Qu.:-134.62
## Max. : -8.192 Max. :-1.772 Max. : -59.46
## Angiopoietin_2_ANG_2 Angiotensinogen Apolipoprotein_A_IV Apolipoprotein_A1
## Min. :-0.5447 Min. :1.752 Min. :-2.9565 Min. :-8.680
## 1st Qu.: 0.4700 1st Qu.:2.119 1st Qu.:-2.1203 1st Qu.:-7.763
## Median : 0.6419 Median :2.320 Median :-1.8326 Median :-7.470
## Mean : 0.6730 Mean :2.318 Mean :-1.8544 Mean :-7.483
## 3rd Qu.: 0.8755 3rd Qu.:2.497 3rd Qu.:-1.6094 3rd Qu.:-7.209
## Max. : 1.5261 Max. :2.881 Max. :-0.7765 Max. :-6.166
## Apolipoprotein_A2 Apolipoprotein_B Apolipoprotein_CI Apolipoprotein_CIII
## Min. :-1.8971 Min. :-9.937 Min. :-3.3242 Min. :-3.689
## 1st Qu.:-0.9676 1st Qu.:-6.630 1st Qu.:-1.8326 1st Qu.:-2.773
## Median :-0.6733 Median :-5.703 Median :-1.6094 Median :-2.526
## Mean :-0.6354 Mean :-5.578 Mean :-1.5833 Mean :-2.494
## 3rd Qu.:-0.3147 3rd Qu.:-4.539 3rd Qu.:-1.3667 3rd Qu.:-2.207
## Max. : 0.9555 Max. :-2.153 Max. :-0.2744 Max. :-1.238
## Apolipoprotein_D Apolipoprotein_E Apolipoprotein_H
## Min. :0.470 Min. :0.5911 Min. :-2.23379
## 1st Qu.:1.209 1st Qu.:2.3344 1st Qu.:-0.59782
## Median :1.411 Median :2.8181 Median :-0.37005
## Mean :1.440 Mean :2.8062 Mean :-0.32122
## 3rd Qu.:1.668 3rd Qu.:3.2863 3rd Qu.:-0.06112
## Max. :2.272 Max. :5.4442 Max. : 0.92696
## B_Lymphocyte_Chemoattractant_BL BMP_6 Beta_2_Microglobulin
## Min. :0.7318 Min. :-2.7612 Min. :-0.54473
## 1st Qu.:1.6731 1st Qu.:-2.1516 1st Qu.:-0.04082
## Median :1.9805 Median :-1.8774 Median : 0.18232
## Mean :2.0175 Mean :-1.9114 Mean : 0.16757
## 3rd Qu.:2.3714 3rd Qu.:-1.6753 3rd Qu.: 0.33647
## Max. :4.0237 Max. :-0.8166 Max. : 0.99325
## Betacellulin C_Reactive_Protein CD40 CD5L
## Min. :10.00 Min. :-8.517 Min. :-1.8644 Min. :-1.23787
## 1st Qu.:42.00 1st Qu.:-6.645 1st Qu.:-1.3761 1st Qu.:-0.35667
## Median :51.00 Median :-5.843 Median :-1.2734 Median :-0.06188
## Mean :51.01 Mean :-5.874 Mean :-1.2584 Mean :-0.05310
## 3rd Qu.:59.00 3rd Qu.:-5.083 3rd Qu.:-1.1238 3rd Qu.: 0.26236
## Max. :82.00 Max. :-2.937 Max. :-0.5475 Max. : 1.16315
## Calbindin Calcitonin CgA Clusterin_Apo_J
## Min. :10.96 Min. :-0.7134 Min. :135.6 Min. :1.872
## 1st Qu.:19.77 1st Qu.: 0.9555 1st Qu.:278.0 1st Qu.:2.708
## Median :22.25 Median : 1.6487 Median :331.5 Median :2.890
## Mean :22.43 Mean : 1.6788 Mean :333.3 Mean :2.882
## 3rd Qu.:24.80 3rd Qu.: 2.2824 3rd Qu.:392.1 3rd Qu.:3.045
## Max. :33.78 Max. : 3.8918 Max. :535.4 Max. :3.584
## Complement_3 Complement_Factor_H Connective_Tissue_Growth_Factor
## Min. :-23.387 Min. :-0.8387 Min. :0.1823
## 1st Qu.:-17.567 1st Qu.: 2.7531 1st Qu.:0.6419
## Median :-15.524 Median : 3.6000 Median :0.7885
## Mean :-15.610 Mean : 3.5541 Mean :0.7739
## 3rd Qu.:-13.882 3rd Qu.: 4.2548 3rd Qu.:0.9163
## Max. : -9.563 Max. : 7.6238 Max. :1.4110
## Cortisol Creatine_Kinase_MB Cystatin_C EGF_R
## Min. : 0.10 Min. :-1.872 Min. :7.432 Min. :-1.36135
## 1st Qu.: 9.80 1st Qu.:-1.724 1st Qu.:8.321 1st Qu.:-0.85727
## Median :12.00 Median :-1.671 Median :8.564 Median :-0.68354
## Mean :11.98 Mean :-1.674 Mean :8.586 Mean :-0.70130
## 3rd Qu.:14.00 3rd Qu.:-1.626 3rd Qu.:8.839 3rd Qu.:-0.54612
## Max. :29.00 Max. :-1.384 Max. :9.694 Max. :-0.06112
## EN_RAGE ENA_78 Eotaxin_3 FAS
## Min. :-8.3774 Min. :-1.405 Min. : 7.00 Min. :-1.5141
## 1st Qu.:-4.1997 1st Qu.:-1.381 1st Qu.: 44.00 1st Qu.:-0.7133
## Median :-3.6497 Median :-1.374 Median : 59.00 Median :-0.5276
## Mean :-3.6353 Mean :-1.372 Mean : 58.17 Mean :-0.5291
## 3rd Qu.:-3.1466 3rd Qu.:-1.364 3rd Qu.: 70.00 3rd Qu.:-0.3147
## Max. :-0.3857 Max. :-1.339 Max. :107.00 Max. : 0.3365
## FSH_Follicle_Stimulation_Hormon Fas_Ligand Fatty_Acid_Binding_Protein
## Min. :-2.11511 Min. :-0.1536 Min. :-1.0441
## 1st Qu.:-1.46606 1st Qu.: 2.3415 1st Qu.: 0.7998
## Median :-1.13570 Median : 3.1015 Median : 1.3865
## Mean :-1.14259 Mean : 2.9680 Mean : 1.3529
## 3rd Qu.:-0.87620 3rd Qu.: 3.6950 3rd Qu.: 1.8847
## Max. : 0.09715 Max. : 7.6328 Max. : 3.7055
## Ferritin Fetuin_A Fibrinogen GRO_alpha
## Min. :0.6077 Min. :0.470 Min. :-8.874 Min. :1.271
## 1st Qu.:2.2895 1st Qu.:1.099 1st Qu.:-7.717 1st Qu.:1.351
## Median :2.7749 Median :1.308 Median :-7.323 Median :1.382
## Mean :2.7646 Mean :1.350 Mean :-7.356 Mean :1.378
## 3rd Qu.:3.2915 3rd Qu.:1.609 3rd Qu.:-7.013 3rd Qu.:1.406
## Max. :4.6333 Max. :2.251 Max. :-5.843 Max. :1.495
## Gamma_Interferon_induced_Monokin Glutathione_S_Transferase_alpha
## Min. :2.393 Min. :0.5238
## 1st Qu.:2.707 1st Qu.:0.8439
## Median :2.783 Median :0.9677
## Mean :2.786 Mean :0.9512
## 3rd Qu.:2.873 3rd Qu.:1.0344
## Max. :3.065 Max. :1.3176
## HB_EGF HCC_4 Hepatocyte_Growth_Factor_HGF I_309
## Min. : 2.103 Min. :-4.510 Min. :-0.6349 Min. :1.758
## 1st Qu.: 5.786 1st Qu.:-3.730 1st Qu.: 0.0000 1st Qu.:2.708
## Median : 6.703 Median :-3.507 Median : 0.1823 Median :2.944
## Mean : 6.833 Mean :-3.500 Mean : 0.1963 Mean :2.958
## 3rd Qu.: 7.865 3rd Qu.:-3.270 3rd Qu.: 0.4055 3rd Qu.:3.219
## Max. :10.695 Max. :-2.120 Max. : 0.8755 Max. :4.143
## ICAM_1 IGF_BP_2 IL_11 IL_13
## Min. :-1.5332 Min. :4.635 Min. :1.755 Min. :1.259
## 1st Qu.:-0.8298 1st Qu.:5.179 1st Qu.:3.706 1st Qu.:1.274
## Median :-0.5903 Median :5.323 Median :4.805 Median :1.283
## Mean :-0.5908 Mean :5.317 Mean :4.725 Mean :1.284
## 3rd Qu.:-0.3828 3rd Qu.:5.453 3rd Qu.:5.776 3rd Qu.:1.290
## Max. : 0.5171 Max. :5.948 Max. :8.491 Max. :1.321
## IL_16 IL_17E IL_1alpha IL_3
## Min. :1.187 Min. :1.052 Min. :-8.517 Min. :-5.915
## 1st Qu.:2.521 1st Qu.:4.149 1st Qu.:-7.824 1st Qu.:-4.269
## Median :2.909 Median :4.749 Median :-7.524 Median :-3.912
## Mean :2.929 Mean :4.855 Mean :-7.514 Mean :-3.941
## 3rd Qu.:3.351 3rd Qu.:5.631 3rd Qu.:-7.264 3rd Qu.:-3.631
## Max. :4.937 Max. :8.952 Max. :-5.952 Max. :-2.453
## IL_4 IL_5 IL_6 IL_6_Receptor
## Min. :0.5306 Min. :-1.4271 Min. :-1.5343 Min. :-0.67562
## 1st Qu.:1.4586 1st Qu.:-0.1221 1st Qu.:-0.4127 1st Qu.:-0.12541
## Median :1.8083 Median : 0.1823 Median :-0.1599 Median : 0.09669
## Mean :1.7732 Mean : 0.1866 Mean :-0.1540 Mean : 0.09492
## 3rd Qu.:2.1459 3rd Qu.: 0.4700 3rd Qu.: 0.1410 3rd Qu.: 0.35404
## Max. :3.0445 Max. : 1.9459 Max. : 1.8138 Max. : 0.83099
## IL_7 IL_8 IP_10_Inducible_Protein_10 IgA
## Min. :0.5598 Min. :1.574 Min. :4.317 Min. :-10.520
## 1st Qu.:2.1548 1st Qu.:1.680 1st Qu.:5.398 1st Qu.: -6.645
## Median :2.7934 Median :1.705 Median :5.753 Median : -6.119
## Mean :2.8392 Mean :1.704 Mean :5.755 Mean : -6.121
## 3rd Qu.:3.7055 3rd Qu.:1.728 3rd Qu.:6.064 3rd Qu.: -5.573
## Max. :5.7056 Max. :1.807 Max. :7.501 Max. : -4.200
## Insulin Kidney_Injury_Molecule_1_KIM_1 LOX_1
## Min. :-2.1692 Min. :-1.256 Min. :0.000
## 1st Qu.:-1.4466 1st Qu.:-1.204 1st Qu.:1.030
## Median :-1.2462 Median :-1.183 Median :1.281
## Mean :-1.2329 Mean :-1.185 Mean :1.283
## 3rd Qu.:-1.0340 3rd Qu.:-1.164 3rd Qu.:1.526
## Max. :-0.1586 Max. :-1.105 Max. :2.272
## Leptin Lipoprotein_a MCP_1 MCP_2
## Min. :-2.1468 Min. :-6.812 Min. :5.826 Min. :0.4006
## 1st Qu.:-1.6996 1st Qu.:-5.308 1st Qu.:6.319 1st Qu.:1.5304
## Median :-1.5047 Median :-4.605 Median :6.494 Median :1.8528
## Mean :-1.5042 Mean :-4.417 Mean :6.497 Mean :1.8691
## 3rd Qu.:-1.3295 3rd Qu.:-3.490 3rd Qu.:6.678 3rd Qu.:2.1821
## Max. :-0.6206 Max. :-1.386 Max. :7.230 Max. :4.0237
## MIF MIP_1alpha MIP_1beta MMP_2
## Min. :-2.847 Min. :0.9346 Min. :1.946 Min. :0.09809
## 1st Qu.:-2.120 1st Qu.:3.3377 1st Qu.:2.565 1st Qu.:2.33214
## Median :-1.897 Median :4.0495 Median :2.833 Median :2.81512
## Mean :-1.864 Mean :4.0489 Mean :2.814 Mean :2.87534
## 3rd Qu.:-1.661 3rd Qu.:4.6857 3rd Qu.:3.045 3rd Qu.:3.55121
## Max. :-0.844 Max. :6.7959 Max. :4.007 Max. :5.35895
## MMP_3 MMP10 MMP7 Myoglobin
## Min. :-4.4228 Min. :-4.934 Min. :-8.3975 Min. :-3.1701
## 1st Qu.:-2.7489 1st Qu.:-3.938 1st Qu.:-4.8199 1st Qu.:-2.0402
## Median :-2.4534 Median :-3.650 Median :-3.7735 Median :-1.4697
## Mean :-2.4455 Mean :-3.635 Mean :-3.7894 Mean :-1.3671
## 3rd Qu.:-2.1203 3rd Qu.:-3.352 3rd Qu.:-2.7140 3rd Qu.:-0.7988
## Max. :-0.5276 Max. :-2.207 Max. :-0.2222 Max. : 1.7750
## NT_proBNP NrCAM Osteopontin PAI_1
## Min. :3.178 Min. :2.639 Min. :4.111 Min. :-0.99085
## 1st Qu.:4.350 1st Qu.:3.998 1st Qu.:4.963 1st Qu.:-0.16655
## Median :4.554 Median :4.394 Median :5.187 Median : 0.09396
## Mean :4.552 Mean :4.362 Mean :5.204 Mean : 0.07743
## 3rd Qu.:4.775 3rd Qu.:4.749 3rd Qu.:5.442 3rd Qu.: 0.32005
## Max. :5.886 Max. :6.011 Max. :6.308 Max. : 1.16611
## PAPP_A PLGF PYY Pancreatic_polypeptide
## Min. :-3.311 Min. :2.485 Min. :2.186 Min. :-2.12026
## 1st Qu.:-2.936 1st Qu.:3.638 1st Qu.:2.833 1st Qu.:-0.52763
## Median :-2.871 Median :3.871 Median :2.996 Median :-0.04082
## Mean :-2.854 Mean :3.912 Mean :3.015 Mean :-0.01323
## 3rd Qu.:-2.749 3rd Qu.:4.205 3rd Qu.:3.178 3rd Qu.: 0.53063
## Max. :-2.520 Max. :5.170 Max. :3.932 Max. : 1.93152
## Prolactin Prostatic_Acid_Phosphatase Protein_S
## Min. :-1.30933 Min. :-1.934 Min. :-3.338
## 1st Qu.:-0.13926 1st Qu.:-1.717 1st Qu.:-2.464
## Median : 0.00000 Median :-1.690 Median :-2.259
## Mean : 0.04495 Mean :-1.685 Mean :-2.240
## 3rd Qu.: 0.25799 3rd Qu.:-1.654 3rd Qu.:-2.000
## Max. : 0.99325 Max. :-1.424 Max. :-1.221
## Pulmonary_and_Activation_Regulat RANTES Resistin
## Min. :-2.5133 Min. :-7.222 Min. :-34.967
## 1st Qu.:-1.8326 1st Qu.:-6.725 1st Qu.:-21.468
## Median :-1.5141 Median :-6.502 Median :-17.466
## Mean :-1.4880 Mean :-6.511 Mean :-17.641
## 3rd Qu.:-1.1712 3rd Qu.:-6.320 3rd Qu.:-13.501
## Max. :-0.2744 Max. :-5.547 Max. : -2.239
## S100b SGOT SHBG SOD
## Min. :0.1874 Min. :-1.3471 Min. :-4.135 Min. :4.317
## 1st Qu.:1.0012 1st Qu.:-0.6349 1st Qu.:-2.813 1st Qu.:5.094
## Median :1.2544 Median :-0.4005 Median :-2.489 Median :5.366
## Mean :1.2505 Mean :-0.4057 Mean :-2.477 Mean :5.336
## 3rd Qu.:1.4996 3rd Qu.:-0.1985 3rd Qu.:-2.120 3rd Qu.:5.583
## Max. :2.3726 Max. : 0.7419 Max. :-1.109 Max. :6.317
## Serum_Amyloid_P Sortilin Stem_Cell_Factor TGF_alpha
## Min. :-7.506 Min. :1.654 Min. :2.251 Min. : 6.843
## 1st Qu.:-6.377 1st Qu.:3.343 1st Qu.:3.045 1st Qu.: 8.859
## Median :-6.032 Median :3.867 Median :3.296 Median : 9.919
## Mean :-6.017 Mean :3.852 Mean :3.301 Mean : 9.801
## 3rd Qu.:-5.655 3rd Qu.:4.371 3rd Qu.:3.526 3rd Qu.:10.695
## Max. :-4.646 Max. :6.225 Max. :4.277 Max. :13.827
## TIMP_1 TNF_RII TRAIL_R3 TTR_prealbumin
## Min. : 1.742 Min. :-1.6607 Min. :-1.2107 Min. :2.485
## 1st Qu.:10.490 1st Qu.:-0.8210 1st Qu.:-0.7008 1st Qu.:2.773
## Median :11.565 Median :-0.5978 Median :-0.5317 Median :2.833
## Mean :11.750 Mean :-0.5939 Mean :-0.5394 Mean :2.854
## 3rd Qu.:12.697 3rd Qu.:-0.3784 3rd Qu.:-0.3849 3rd Qu.:2.944
## Max. :18.881 Max. : 0.4700 Max. : 0.2694 Max. :3.332
## Tamm_Horsfall_Protein_THP Thrombomodulin Thrombopoietin
## Min. :-3.206 Min. :-2.0377 Min. :-1.53957
## 1st Qu.:-3.137 1st Qu.:-1.6256 1st Qu.:-0.88645
## Median :-3.117 Median :-1.4920 Median :-0.75100
## Mean :-3.116 Mean :-1.5050 Mean :-0.75419
## 3rd Qu.:-3.096 3rd Qu.:-1.3406 3rd Qu.:-0.62887
## Max. :-2.995 Max. :-0.8166 Max. : 0.09762
## Thymus_Expressed_Chemokine_TECK Thyroid_Stimulating_Hormone
## Min. :1.508 Min. :-6.190
## 1st Qu.:3.343 1st Qu.:-4.962
## Median :3.810 Median :-4.510
## Mean :3.848 Mean :-4.499
## 3rd Qu.:4.316 3rd Qu.:-4.017
## Max. :6.225 Max. :-1.715
## Thyroxine_Binding_Globulin Tissue_Factor Transferrin
## Min. :-2.4769 Min. :-0.2107 Min. :1.932
## 1st Qu.:-1.7720 1st Qu.: 0.8329 1st Qu.:2.708
## Median :-1.5141 Median : 1.2238 Median :2.890
## Mean :-1.4788 Mean : 1.1702 Mean :2.909
## 3rd Qu.:-1.2379 3rd Qu.: 1.4816 3rd Qu.:3.091
## Max. :-0.2107 Max. : 2.4849 Max. :3.761
## Trefoil_Factor_3_TFF3 VCAM_1 VEGF Vitronectin
## Min. :-4.744 Min. :1.723 Min. :11.83 Min. :-1.42712
## 1st Qu.:-4.135 1st Qu.:2.485 1st Qu.:15.77 1st Qu.:-0.51083
## Median :-3.863 Median :2.708 Median :17.08 Median :-0.30111
## Mean :-3.876 Mean :2.688 Mean :16.99 Mean :-0.28473
## 3rd Qu.:-3.650 3rd Qu.:2.890 3rd Qu.:18.10 3rd Qu.:-0.03564
## Max. :-2.957 Max. :3.689 Max. :22.38 Max. : 0.53063
## von_Willebrand_Factor Class E4 E3
## Min. :-4.991 Impaired: 73 Min. :1.000 Min. :1.000
## 1st Qu.:-4.200 Control :194 1st Qu.:1.000 1st Qu.:2.000
## Median :-3.912 Median :1.000 Median :2.000
## Mean :-3.906 Mean :1.401 Mean :1.918
## 3rd Qu.:-3.612 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :-2.957 Max. :2.000 Max. :2.000
## E2
## Min. :1.000
## 1st Qu.:1.000
## Median :1.000
## Mean :1.161
## 3rd Qu.:1.000
## Max. :2.000
for (i in 1:ncol(PMA_PreModelling_Test)){
if (names(PMA_PreModelling_Test)[i]!="Class"){
<- as.numeric(PMA_PreModelling_Test[,i])
PMA_PreModelling_Test[,i]
}
}summary(PMA_PreModelling_Test)
## ACE_CD143_Angiotensin_Converti ACTH_Adrenocorticotropic_Hormon
## Min. :-0.5473 Min. :-2.2073
## 1st Qu.: 0.9462 1st Qu.:-1.7148
## Median : 1.3013 Median :-1.5374
## Mean : 1.3105 Mean :-1.5311
## 3rd Qu.: 1.6320 3rd Qu.:-1.3863
## Max. : 3.0890 Max. :-0.7985
## AXL Adiponectin Alpha_1_Antichymotrypsin
## Min. :-0.73509 Min. :-7.059 Min. :0.1823
## 1st Qu.:-0.08175 1st Qu.:-5.737 1st Qu.:1.0647
## Median : 0.28035 Median :-5.360 Median :1.3083
## Mean : 0.25373 Mean :-5.298 Mean :1.3077
## 3rd Qu.: 0.60768 3rd Qu.:-4.917 3rd Qu.:1.5686
## Max. : 1.28634 Max. :-3.474 Max. :2.2192
## Alpha_1_Antitrypsin Alpha_1_Microglobulin Alpha_2_Macroglobulin
## Min. :-18.17 Min. :-4.135 Min. :-238.64
## 1st Qu.:-14.70 1st Qu.:-3.284 1st Qu.:-186.64
## Median :-13.59 Median :-3.006 Median :-162.93
## Mean :-13.49 Mean :-2.983 Mean :-162.89
## 3rd Qu.:-12.31 3rd Qu.:-2.674 3rd Qu.:-136.53
## Max. :-10.06 Max. :-1.897 Max. : -50.17
## Angiopoietin_2_ANG_2 Angiotensinogen Apolipoprotein_A_IV Apolipoprotein_A1
## Min. :-0.05129 Min. :1.710 Min. :-2.749 Min. :-8.568
## 1st Qu.: 0.35372 1st Qu.:2.068 1st Qu.:-2.186 1st Qu.:-7.818
## Median : 0.55921 Median :2.276 Median :-1.897 Median :-7.497
## Mean : 0.60278 Mean :2.274 Mean :-1.867 Mean :-7.488
## 3rd Qu.: 0.78846 3rd Qu.:2.430 3rd Qu.:-1.526 3rd Qu.:-7.176
## Max. : 1.77495 Max. :2.752 Max. :-1.109 Max. :-6.645
## Apolipoprotein_A2 Apolipoprotein_B Apolipoprotein_CI Apolipoprotein_CIII
## Min. :-1.9661 Min. :-8.192 Min. :-2.847 Min. :-3.863
## 1st Qu.:-0.9416 1st Qu.:-6.748 1st Qu.:-1.897 1st Qu.:-2.781
## Median :-0.7032 Median :-5.819 Median :-1.609 Median :-2.557
## Mean :-0.6902 Mean :-5.649 Mean :-1.625 Mean :-2.523
## 3rd Qu.:-0.3533 3rd Qu.:-4.603 3rd Qu.:-1.309 3rd Qu.:-2.231
## Max. : 0.5306 Max. :-2.339 Max. :-0.462 Max. :-1.386
## Apolipoprotein_D Apolipoprotein_E Apolipoprotein_H
## Min. :0.2624 Min. :0.6626 Min. :-1.1609
## 1st Qu.:1.1314 1st Qu.:2.1526 1st Qu.:-0.5317
## Median :1.3863 Median :2.8181 Median :-0.2897
## Mean :1.3943 Mean :2.7160 Mean :-0.3212
## 3rd Qu.:1.6864 3rd Qu.:3.2363 3rd Qu.:-0.1032
## Max. :2.6391 Max. :4.6844 Max. : 0.4402
## B_Lymphocyte_Chemoattractant_BL BMP_6 Beta_2_Microglobulin
## Min. :0.7318 Min. :-2.669 Min. :-0.51083
## 1st Qu.:1.5304 1st Qu.:-2.152 1st Qu.:-0.06188
## Median :1.8528 Median :-1.964 Median : 0.18232
## Mean :1.8766 Mean :-1.937 Mean : 0.15566
## 3rd Qu.:2.3714 3rd Qu.:-1.675 3rd Qu.: 0.40547
## Max. :2.9757 Max. :-1.181 Max. : 0.83291
## Betacellulin C_Reactive_Protein CD40 CD5L
## Min. :32.00 Min. :-8.112 Min. :-1.9390 Min. :-1.96611
## 1st Qu.:46.00 1st Qu.:-6.725 1st Qu.:-1.4420 1st Qu.:-0.36747
## Median :51.00 Median :-6.166 Median :-1.2574 Median :-0.05135
## Mean :52.74 Mean :-5.997 Mean :-1.2773 Mean :-0.08760
## 3rd Qu.:59.75 3rd Qu.:-5.369 3rd Qu.:-1.1034 3rd Qu.: 0.24235
## Max. :80.00 Max. :-3.411 Max. :-0.7766 Max. : 0.91629
## Calbindin Calcitonin CgA Clusterin_Apo_J
## Min. :10.81 Min. :-0.7134 Min. :166.6 Min. :1.932
## 1st Qu.:18.88 1st Qu.: 1.2014 1st Qu.:268.2 1st Qu.:2.565
## Median :21.06 Median : 1.6849 Median :324.7 Median :2.833
## Mean :21.49 Mean : 1.7250 Mean :320.2 Mean :2.845
## 3rd Qu.:24.00 3rd Qu.: 2.2618 3rd Qu.:362.3 3rd Qu.:3.045
## Max. :35.36 Max. : 4.1109 Max. :494.5 Max. :3.761
## Complement_3 Complement_Factor_H Connective_Tissue_Growth_Factor
## Min. :-22.40 Min. :0.2766 Min. :0.09531
## 1st Qu.:-17.50 1st Qu.:2.6019 1st Qu.:0.58779
## Median :-15.90 Median :3.3983 Median :0.74194
## Mean :-15.91 Mean :3.3897 Mean :0.74507
## 3rd Qu.:-14.34 3rd Qu.:4.2548 3rd Qu.:0.87547
## Max. :-10.23 Max. :6.5597 Max. :1.41099
## Cortisol Creatine_Kinase_MB Cystatin_C EGF_R
## Min. : 0.10 Min. :-1.872 Min. :7.728 Min. :-1.2694
## 1st Qu.: 8.90 1st Qu.:-1.721 1st Qu.:8.301 1st Qu.:-0.8859
## Median :10.00 Median :-1.651 Median :8.544 Median :-0.6917
## Mean :10.46 Mean :-1.652 Mean :8.576 Mean :-0.6965
## 3rd Qu.:12.00 3rd Qu.:-1.590 3rd Qu.:8.837 3rd Qu.:-0.5034
## Max. :22.00 Max. :-1.434 Max. :9.694 Max. : 0.1891
## EN_RAGE ENA_78 Eotaxin_3 FAS
## Min. :-8.3774 Min. :-1.405 Min. : 23.00 Min. :-1.1087
## 1st Qu.:-4.1836 1st Qu.:-1.382 1st Qu.: 43.00 1st Qu.:-0.7133
## Median :-3.6889 Median :-1.374 Median : 54.00 Median :-0.5798
## Mean :-3.5986 Mean :-1.376 Mean : 55.55 Mean :-0.5414
## 3rd Qu.:-3.2189 3rd Qu.:-1.368 3rd Qu.: 64.00 3rd Qu.:-0.3355
## Max. :-0.8675 Max. :-1.353 Max. :107.00 Max. : 0.1823
## FSH_Follicle_Stimulation_Hormon Fas_Ligand Fatty_Acid_Binding_Protein
## Min. :-1.8101 Min. :0.288 Min. :-0.4559
## 1st Qu.:-1.2694 1st Qu.:2.073 1st Qu.: 0.7998
## Median :-0.9763 Median :2.665 Median : 1.1866
## Mean :-1.0597 Mean :2.649 Mean : 1.2884
## 3rd Qu.:-0.8068 3rd Qu.:3.162 3rd Qu.: 1.9192
## Max. :-0.4757 Max. :5.377 Max. : 3.2188
## Ferritin Fetuin_A Fibrinogen GRO_alpha
## Min. :0.8983 Min. :0.5306 Min. :-9.373 Min. :1.271
## 1st Qu.:2.1473 1st Qu.:1.0296 1st Qu.:-7.799 1st Qu.:1.351
## Median :2.6260 Median :1.3083 Median :-7.316 Median :1.372
## Mean :2.7069 Mean :1.3116 Mean :-7.360 Mean :1.378
## 3rd Qu.:3.1672 3rd Qu.:1.6094 3rd Qu.:-6.970 3rd Qu.:1.398
## Max. :4.9282 Max. :2.2083 Max. :-6.166 Max. :1.514
## Gamma_Interferon_induced_Monokin Glutathione_S_Transferase_alpha
## Min. :2.545 Min. :0.5661
## 1st Qu.:2.698 1st Qu.:0.8257
## Median :2.768 Median :0.9493
## Mean :2.772 Mean :0.9440
## 3rd Qu.:2.829 3rd Qu.:1.0457
## Max. :3.046 Max. :1.3102
## HB_EGF HCC_4 Hepatocyte_Growth_Factor_HGF I_309
## Min. : 3.521 Min. :-4.343 Min. :-0.61619 Min. :2.041
## 1st Qu.: 5.949 1st Qu.:-3.772 1st Qu.:-0.05661 1st Qu.:2.724
## Median : 6.980 Median :-3.540 Median : 0.18232 Median :2.944
## Mean : 6.844 Mean :-3.538 Mean : 0.18076 Mean :2.921
## 3rd Qu.: 7.745 3rd Qu.:-3.352 3rd Qu.: 0.33647 3rd Qu.:3.135
## Max. :10.359 Max. :-2.489 Max. : 1.09861 Max. :3.689
## ICAM_1 IGF_BP_2 IL_11 IL_13
## Min. :-1.4661 Min. :4.718 Min. :2.031 Min. :1.232
## 1st Qu.:-0.7671 1st Qu.:5.127 1st Qu.:3.960 1st Qu.:1.274
## Median :-0.5903 Median :5.255 Median :4.838 Median :1.283
## Mean :-0.5958 Mean :5.263 Mean :4.651 Mean :1.283
## 3rd Qu.:-0.3574 3rd Qu.:5.402 3rd Qu.:5.482 3rd Qu.:1.292
## Max. : 0.3602 Max. :5.916 Max. :8.692 Max. :1.310
## IL_16 IL_17E IL_1alpha IL_3
## Min. :0.9568 Min. :1.582 Min. :-8.468 Min. :-5.521
## 1st Qu.:2.4411 1st Qu.:3.637 1st Qu.:-7.849 1st Qu.:-4.324
## Median :2.8763 Median :4.723 Median :-7.562 Median :-3.963
## Mean :2.8176 Mean :4.774 Mean :-7.549 Mean :-3.976
## 3rd Qu.:3.3514 3rd Qu.:5.415 3rd Qu.:-7.279 3rd Qu.:-3.576
## Max. :4.1028 Max. :8.081 Max. :-6.377 Max. :-3.079
## IL_4 IL_5 IL_6 IL_6_Receptor
## Min. :0.5306 Min. :-1.04982 Min. :-1.53428 Min. :-0.74560
## 1st Qu.:1.4586 1st Qu.:-0.03062 1st Qu.:-0.40924 1st Qu.:-0.20131
## Median :1.7226 Median : 0.22234 Median :-0.07205 Median : 0.00000
## Mean :1.7445 Mean : 0.22853 Mean :-0.05216 Mean : 0.06213
## 3rd Qu.:2.0669 3rd Qu.: 0.53063 3rd Qu.: 0.34805 3rd Qu.: 0.27297
## Max. :2.7081 Max. : 1.13140 Max. : 1.00562 Max. : 0.77048
## IL_7 IL_8 IP_10_Inducible_Protein_10 IgA
## Min. :1.310 Min. :1.615 Min. :4.263 Min. :-7.621
## 1st Qu.:2.379 1st Qu.:1.684 1st Qu.:5.323 1st Qu.:-6.571
## Median :3.148 Median :1.702 Median :5.617 Median :-6.012
## Mean :3.143 Mean :1.704 Mean :5.636 Mean :-6.066
## 3rd Qu.:3.706 3rd Qu.:1.725 3rd Qu.:5.917 3rd Qu.:-5.606
## Max. :5.000 Max. :1.836 Max. :7.208 Max. :-4.733
## Insulin Kidney_Injury_Molecule_1_KIM_1 LOX_1
## Min. :-2.0099 Min. :-1.251 Min. :0.0000
## 1st Qu.:-1.4466 1st Qu.:-1.209 1st Qu.:0.9649
## Median :-1.2169 Median :-1.187 Median :1.2238
## Mean :-1.1998 Mean :-1.188 Mean :1.2085
## 3rd Qu.:-1.0105 3rd Qu.:-1.166 3rd Qu.:1.4351
## Max. :-0.5025 Max. :-1.124 Max. :2.3979
## Leptin Lipoprotein_a MCP_1 MCP_2
## Min. :-1.9471 Min. :-6.571 Min. :5.889 Min. :0.4006
## 1st Qu.:-1.6334 1st Qu.:-5.116 1st Qu.:6.318 1st Qu.:1.5304
## Median :-1.4294 Median :-4.657 Median :6.482 Median :1.8528
## Mean :-1.4363 Mean :-4.515 Mean :6.480 Mean :1.8104
## 3rd Qu.:-1.2409 3rd Qu.:-4.017 3rd Qu.:6.627 3rd Qu.:2.0827
## Max. :-0.8387 Max. :-2.040 Max. :7.065 Max. :3.7545
## MIF MIP_1alpha MIP_1beta MMP_2
## Min. :-2.797 Min. :1.008 Min. :1.917 Min. :0.6248
## 1st Qu.:-2.120 1st Qu.:3.302 1st Qu.:2.485 1st Qu.:2.5513
## Median :-1.966 Median :3.736 Median :2.773 Median :2.9937
## Mean :-1.932 Mean :3.898 Mean :2.784 Mean :3.0347
## 3rd Qu.:-1.715 3rd Qu.:4.686 3rd Qu.:3.079 3rd Qu.:3.4798
## Max. :-1.109 Max. :5.735 Max. :3.784 Max. :6.0996
## MMP_3 MMP10 MMP7 Myoglobin
## Min. :-3.650 Min. :-4.948 Min. :-7.5346 Min. :-3.2968
## 1st Qu.:-2.852 1st Qu.:-4.075 1st Qu.:-4.9634 1st Qu.:-2.0217
## Median :-2.532 Median :-3.612 Median :-4.0302 Median :-1.5874
## Mean :-2.490 Mean :-3.676 Mean :-4.0148 Mean :-1.4165
## 3rd Qu.:-2.120 3rd Qu.:-3.331 3rd Qu.:-3.1640 3rd Qu.:-0.7765
## Max. :-1.171 Max. :-2.900 Max. :-0.1953 Max. : 1.1314
## NT_proBNP NrCAM Osteopontin PAI_1
## Min. :3.611 Min. :2.890 Min. :4.078 Min. :-0.990849
## 1st Qu.:4.174 1st Qu.:3.871 1st Qu.:4.892 1st Qu.:-0.334043
## Median :4.477 Median :4.317 Median :5.168 Median : 0.000000
## Mean :4.488 Mean :4.291 Mean :5.177 Mean :-0.003947
## 3rd Qu.:4.794 3rd Qu.:4.725 3rd Qu.:5.410 3rd Qu.: 0.303112
## Max. :5.398 Max. :5.690 Max. :6.315 Max. : 0.885785
## PAPP_A PLGF PYY Pancreatic_polypeptide
## Min. :-3.152 Min. :2.639 Min. :2.398 Min. :-1.609438
## 1st Qu.:-2.971 1st Qu.:3.689 1st Qu.:2.833 1st Qu.:-0.506693
## Median :-2.841 Median :3.892 Median :2.996 Median : 0.138816
## Mean :-2.845 Mean :3.884 Mean :2.976 Mean :-0.005258
## 3rd Qu.:-2.719 3rd Qu.:4.123 3rd Qu.:3.178 3rd Qu.: 0.470004
## Max. :-2.488 Max. :4.710 Max. :3.738 Max. : 1.504077
## Prolactin Prostatic_Acid_Phosphatase Protein_S
## Min. :-0.38566 Min. :-1.800 Min. :-3.154
## 1st Qu.:-0.16558 1st Qu.:-1.739 1st Qu.:-2.579
## Median : 0.00000 Median :-1.690 Median :-2.259
## Mean : 0.05195 Mean :-1.692 Mean :-2.268
## 3rd Qu.: 0.18232 3rd Qu.:-1.659 3rd Qu.:-1.924
## Max. : 0.78846 Max. :-1.540 Max. :-1.547
## Pulmonary_and_Activation_Regulat RANTES Resistin
## Min. :-2.4418 Min. :-7.236 Min. :-30.156
## 1st Qu.:-1.8326 1st Qu.:-6.725 1st Qu.:-22.131
## Median :-1.5141 Median :-6.571 Median :-18.014
## Mean :-1.5007 Mean :-6.540 Mean :-18.245
## 3rd Qu.:-1.1712 3rd Qu.:-6.392 3rd Qu.:-15.202
## Max. :-0.4463 Max. :-5.843 Max. : -6.594
## S100b SGOT SHBG SOD
## Min. :0.1874 Min. :-1.8971 Min. :-3.730 Min. :4.382
## 1st Qu.:0.9600 1st Qu.:-0.7498 1st Qu.:-3.052 1st Qu.:5.006
## Median :1.1571 Median :-0.4780 Median :-2.711 Median :5.313
## Mean :1.1819 Mean :-0.4898 Mean :-2.686 Mean :5.302
## 3rd Qu.:1.3807 3rd Qu.:-0.2138 3rd Qu.:-2.343 3rd Qu.:5.547
## Max. :2.1950 Max. : 0.1823 Max. :-1.561 Max. :6.461
## Serum_Amyloid_P Sortilin Stem_Cell_Factor TGF_alpha
## Min. :-7.182 Min. :1.508 Min. :2.219 Min. : 7.500
## 1st Qu.:-6.438 1st Qu.:3.177 1st Qu.:3.045 1st Qu.: 9.062
## Median :-6.215 Median :3.867 Median :3.314 Median : 9.596
## Mean :-6.083 Mean :3.787 Mean :3.267 Mean : 9.776
## 3rd Qu.:-5.607 3rd Qu.:4.371 3rd Qu.:3.466 3rd Qu.:10.612
## Max. :-4.699 Max. :5.681 Max. :4.078 Max. :13.083
## TIMP_1 TNF_RII TRAIL_R3 TTR_prealbumin
## Min. : 8.198 Min. :-1.6607 Min. :-1.30636 Min. :2.485
## 1st Qu.:10.530 1st Qu.:-0.8675 1st Qu.:-0.73332 1st Qu.:2.773
## Median :11.341 Median :-0.6541 Median :-0.55547 Median :2.890
## Mean :11.520 Mean :-0.6270 Mean :-0.58640 Mean :2.854
## 3rd Qu.:12.352 3rd Qu.:-0.3320 3rd Qu.:-0.47065 3rd Qu.:2.944
## Max. :16.547 Max. : 0.4055 Max. : 0.09622 Max. :3.091
## Tamm_Horsfall_Protein_THP Thrombomodulin Thrombopoietin
## Min. :-3.206 Min. :-2.054 Min. :-1.5396
## 1st Qu.:-3.144 1st Qu.:-1.675 1st Qu.:-0.8383
## Median :-3.126 Median :-1.534 Median :-0.7039
## Mean :-3.123 Mean :-1.533 Mean :-0.7192
## 3rd Qu.:-3.101 3rd Qu.:-1.341 3rd Qu.:-0.6289
## Max. :-3.041 Max. :-1.019 Max. :-0.3029
## Thymus_Expressed_Chemokine_TECK Thyroid_Stimulating_Hormone
## Min. :2.141 Min. :-6.190
## 1st Qu.:3.283 1st Qu.:-4.733
## Median :3.753 Median :-4.269
## Mean :3.770 Mean :-4.221
## 3rd Qu.:4.316 3rd Qu.:-3.828
## Max. :5.681 Max. :-2.040
## Thyroxine_Binding_Globulin Tissue_Factor Transferrin
## Min. :-2.3026 Min. :0.0000 Min. :2.282
## 1st Qu.:-1.7148 1st Qu.:0.7053 1st Qu.:2.708
## Median :-1.4919 Median :1.1473 Median :2.890
## Mean :-1.4902 Mean :1.1356 Mean :2.900
## 3rd Qu.:-1.2379 3rd Qu.:1.5149 3rd Qu.:3.135
## Max. :-0.5978 Max. :2.7081 Max. :3.497
## Trefoil_Factor_3_TFF3 VCAM_1 VEGF Vitronectin
## Min. :-4.906 Min. :2.028 Min. :12.23 Min. :-1.07881
## 1st Qu.:-4.200 1st Qu.:2.420 1st Qu.:15.03 1st Qu.:-0.46204
## Median :-3.912 Median :2.674 Median :17.08 Median :-0.28106
## Mean :-3.947 Mean :2.644 Mean :16.70 Mean :-0.26833
## 3rd Qu.:-3.772 3rd Qu.:2.833 3rd Qu.:18.19 3rd Qu.:-0.05394
## Max. :-3.170 Max. :3.466 Max. :21.18 Max. : 0.40547
## von_Willebrand_Factor Class E4 E3
## Min. :-4.920 Impaired:18 Min. :1.000 Min. :1.000
## 1st Qu.:-4.269 Control :48 1st Qu.:1.000 1st Qu.:2.000
## Median :-4.017 Median :1.000 Median :2.000
## Mean :-4.014 Mean :1.303 Mean :1.985
## 3rd Qu.:-3.730 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :-3.058 Max. :2.000 Max. :2.000
## E2
## Min. :1.000
## 1st Qu.:1.000
## Median :1.000
## Mean :1.061
## 3rd Qu.:1.000
## Max. :2.000
##################################
# Creating consistent fold assignments
# for the Cross Validation process
##################################
set.seed(12345678)
<- createFolds(PMA_PreModelling_Train$Class ,
KFold_Indices k = 10,
returnTrain=TRUE)
##################################
# Formulating a function to summarize
# model performance metrics
##################################
<- function(...) c(twoClassSummary(...), defaultSummary(...))
FiveMetricsSummary
##################################
# Formulating the controls for the
# model training process
##################################
<- trainControl(method = "cv",
KFold_TrainControl summaryFunction = FiveMetricsSummary,
classProbs = TRUE,
index = KFold_Indices)
##################################
# Running the linear discriminant analysis model
# by setting the caret method to 'lda'
##################################
set.seed(12345678)
<- caret::train(x = PMA_PreModelling_Train[,!names(PMA_PreModelling_Train) %in% c("Class")],
LDA_FULL_Tune y = PMA_PreModelling_Train$Class,
method = "lda",
metric = "Accuracy",
tol = 1.0e-12,
trControl = KFold_TrainControl)
##################################
# Reporting the cross-validation results
# for the train set
##################################
LDA_FULL_Tune
## Linear Discriminant Analysis
##
## 267 samples
## 127 predictors
## 2 classes: 'Impaired', 'Control'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 240, 239, 241, 240, 241, 241, ...
## Resampling results:
##
## ROC Sens Spec Accuracy Kappa
## 0.8015132 0.6428571 0.8142105 0.7675417 0.4377465
$finalModel LDA_FULL_Tune
## Call:
## lda(x, y, tol = 1e-12)
##
## Prior probabilities of groups:
## Impaired Control
## 0.2734082 0.7265918
##
## Group means:
## ACE_CD143_Angiotensin_Converti ACTH_Adrenocorticotropic_Hormon
## Impaired 1.291830 -1.512663
## Control 1.330372 -1.547922
## AXL Adiponectin Alpha_1_Antichymotrypsin Alpha_1_Antitrypsin
## Impaired 0.3606134 -5.062015 1.461656 -12.54310
## Control 0.2900390 -5.252821 1.322478 -13.24365
## Alpha_1_Microglobulin Alpha_2_Macroglobulin Angiopoietin_2_ANG_2
## Impaired -2.812275 -149.8084 0.7189962
## Control -2.977260 -161.9286 0.6556583
## Angiotensinogen Apolipoprotein_A_IV Apolipoprotein_A1
## Impaired 2.358407 -1.836798 -7.489153
## Control 2.302900 -1.861029 -7.480003
## Apolipoprotein_A2 Apolipoprotein_B Apolipoprotein_CI
## Impaired -0.6028357 -5.290980 -1.557583
## Control -0.6475857 -5.686514 -1.592933
## Apolipoprotein_CIII Apolipoprotein_D Apolipoprotein_E Apolipoprotein_H
## Impaired -2.402152 1.534726 2.774604 -0.2462583
## Control -2.528993 1.404988 2.818067 -0.3494279
## B_Lymphocyte_Chemoattractant_BL BMP_6 Beta_2_Microglobulin
## Impaired 2.191212 -1.906947 0.2010301
## Control 1.952112 -1.913117 0.1549796
## Betacellulin C_Reactive_Protein CD40 CD5L Calbindin
## Impaired 51.17808 -6.014766 -1.259173 0.05894109 23.27885
## Control 50.94845 -5.820661 -1.258089 -0.09525496 22.11405
## Calcitonin CgA Clusterin_Apo_J Complement_3 Complement_Factor_H
## Impaired 1.676591 334.8176 2.952344 -14.85689 3.557332
## Control 1.679652 332.7262 2.855998 -15.89379 3.552854
## Connective_Tissue_Growth_Factor Cortisol Creatine_Kinase_MB Cystatin_C
## Impaired 0.7947004 13.42740 -1.702831 8.472553
## Control 0.7660980 11.44124 -1.663646 8.628643
## EGF_R EN_RAGE ENA_78 Eotaxin_3 FAS
## Impaired -0.6825736 -3.643713 -1.372875 64.00000 -0.3996292
## Control -0.7083446 -3.632086 -1.372303 55.97938 -0.5778808
## FSH_Follicle_Stimulation_Hormon Fas_Ligand Fatty_Acid_Binding_Protein
## Impaired -1.153392 3.240515 1.614389
## Control -1.138530 2.865447 1.254534
## Ferritin Fetuin_A Fibrinogen GRO_alpha
## Impaired 2.931628 1.434358 -7.090535 1.399543
## Control 2.701751 1.318421 -7.456059 1.370087
## Gamma_Interferon_induced_Monokin Glutathione_S_Transferase_alpha
## Impaired 2.829613 0.9529078
## Control 2.769219 0.9505496
## HB_EGF HCC_4 Hepatocyte_Growth_Factor_HGF I_309 ICAM_1
## Impaired 7.177725 -3.394734 0.2821109 3.023617 -0.5474443
## Control 6.703511 -3.539563 0.1639794 2.933276 -0.6071264
## IGF_BP_2 IL_11 IL_13 IL_16 IL_17E IL_1alpha IL_3
## Impaired 5.384293 4.680067 1.283034 2.919439 4.671603 -7.477581 -3.987613
## Control 5.291718 4.741512 1.283942 2.933033 4.923843 -7.527105 -3.923848
## IL_4 IL_5 IL_6 IL_6_Receptor IL_7 IL_8
## Impaired 1.753057 0.1442299 -0.09617877 0.10115529 2.525637 1.713003
## Control 1.780749 0.2025861 -0.17580453 0.09257149 2.957174 1.701045
## IP_10_Inducible_Protein_10 IgA Insulin
## Impaired 5.875371 -5.928985 -1.262448
## Control 5.709495 -6.193464 -1.221804
## Kidney_Injury_Molecule_1_KIM_1 LOX_1 Leptin Lipoprotein_a
## Impaired -1.179252 1.272405 -1.544007 -4.277014
## Control -1.186509 1.286978 -1.489273 -4.470063
## MCP_1 MCP_2 MIF MIP_1alpha MIP_1beta MMP_2 MMP_3
## Impaired 6.548438 2.104018 -1.724998 4.281621 2.877145 2.781498 -2.324019
## Control 6.477030 1.780639 -1.916871 3.961372 2.790792 2.910655 -2.491243
## MMP10 MMP7 Myoglobin NT_proBNP NrCAM Osteopontin
## Impaired -3.404275 -3.156452 -1.246804 4.720840 4.330498 5.313664
## Control -3.721335 -4.027631 -1.412393 4.488264 4.373735 5.162882
## PAI_1 PAPP_A PLGF PYY Pancreatic_polypeptide
## Impaired 0.275440319 -2.843776 4.012400 3.033706 0.3059723
## Control 0.002927648 -2.857862 3.874869 3.008467 -0.1333379
## Prolactin Prostatic_Acid_Phosphatase Protein_S
## Impaired 0.07862477 -1.673206 -2.161885
## Control 0.03227524 -1.689756 -2.268898
## Pulmonary_and_Activation_Regulat RANTES Resistin S100b
## Impaired -1.368849 -6.453900 -15.66720 1.339447
## Control -1.532822 -6.532627 -18.38319 1.217084
## SGOT SHBG SOD Serum_Amyloid_P Sortilin
## Impaired -0.3863058 -2.373962 5.389265 -5.975203 4.101578
## Control -0.4130533 -2.515326 5.316217 -6.032948 3.757482
## Stem_Cell_Factor TGF_alpha TIMP_1 TNF_RII TRAIL_R3
## Impaired 3.289750 9.950444 12.17891 -0.4725251 -0.4163806
## Control 3.305627 9.744131 11.58805 -0.6396312 -0.5856845
## TTR_prealbumin Tamm_Horsfall_Protein_THP Thrombomodulin Thrombopoietin
## Impaired 2.845662 -3.113195 -1.448956 -0.8002665
## Control 2.857278 -3.116753 -1.526102 -0.7368538
## Thymus_Expressed_Chemokine_TECK Thyroid_Stimulating_Hormone
## Impaired 4.152730 -4.494425
## Control 3.733299 -4.500959
## Thyroxine_Binding_Globulin Tissue_Factor Transferrin
## Impaired -1.413281 1.185679 2.920215
## Control -1.503474 1.164395 2.904860
## Trefoil_Factor_3_TFF3 VCAM_1 VEGF Vitronectin
## Impaired -3.841630 2.750947 16.49858 -0.2877221
## Control -3.889182 2.663773 17.17183 -0.2836034
## von_Willebrand_Factor E4 E3 E2
## Impaired -3.848044 1.589041 1.890411 1.082192
## Control -3.927726 1.329897 1.927835 1.190722
##
## Coefficients of linear discriminants:
## LD1
## ACE_CD143_Angiotensin_Converti 0.569504941
## ACTH_Adrenocorticotropic_Hormon -0.895338194
## AXL -0.821182573
## Adiponectin -0.135161947
## Alpha_1_Antichymotrypsin -0.041071635
## Alpha_1_Antitrypsin -0.059362982
## Alpha_1_Microglobulin -0.786319216
## Alpha_2_Macroglobulin 0.012875285
## Angiopoietin_2_ANG_2 -0.797385222
## Angiotensinogen -0.296477791
## Apolipoprotein_A_IV 0.226600353
## Apolipoprotein_A1 2.489443027
## Apolipoprotein_A2 0.596888238
## Apolipoprotein_B -0.076453360
## Apolipoprotein_CI -1.119275874
## Apolipoprotein_CIII -0.629834266
## Apolipoprotein_D -0.052057126
## Apolipoprotein_E 0.569408798
## Apolipoprotein_H -0.102695136
## B_Lymphocyte_Chemoattractant_BL 0.318850160
## BMP_6 -0.212091110
## Beta_2_Microglobulin 1.498295786
## Betacellulin -0.028149817
## C_Reactive_Protein -0.228407589
## CD40 -0.972360146
## CD5L -0.039003898
## Calbindin 0.004420323
## Calcitonin 0.200564040
## CgA -0.002357913
## Clusterin_Apo_J -3.921135761
## Complement_3 -0.131845987
## Complement_Factor_H 0.187804895
## Connective_Tissue_Growth_Factor -2.065196644
## Cortisol -0.102529183
## Creatine_Kinase_MB -0.987522965
## Cystatin_C 1.821906379
## EGF_R 0.985441261
## EN_RAGE -0.088123902
## ENA_78 18.288727363
## Eotaxin_3 -0.012987019
## FAS -0.553120590
## FSH_Follicle_Stimulation_Hormon 0.187421956
## Fas_Ligand -0.022180189
## Fatty_Acid_Binding_Protein -0.714514954
## Ferritin -0.332781446
## Fetuin_A -1.495223078
## Fibrinogen -0.372424062
## GRO_alpha -3.624048833
## Gamma_Interferon_induced_Monokin 2.828644960
## Glutathione_S_Transferase_alpha -0.214189905
## HB_EGF 0.019575787
## HCC_4 0.109076895
## Hepatocyte_Growth_Factor_HGF -1.870434054
## I_309 1.431656002
## ICAM_1 0.122494244
## IGF_BP_2 0.550693401
## IL_11 0.288369179
## IL_13 -14.463595889
## IL_16 0.614118828
## IL_17E -0.019224225
## IL_1alpha -0.002997531
## IL_3 -0.430747600
## IL_4 -0.230035338
## IL_5 0.332906195
## IL_6 0.167908082
## IL_6_Receptor 0.136225541
## IL_7 0.159315144
## IL_8 1.388397550
## IP_10_Inducible_Protein_10 -0.026249724
## IgA 0.260463630
## Insulin -0.506328776
## Kidney_Injury_Molecule_1_KIM_1 10.757005612
## LOX_1 1.050265777
## Leptin 0.759660525
## Lipoprotein_a -0.046808860
## MCP_1 -0.236939879
## MCP_2 -0.323106590
## MIF 0.102910171
## MIP_1alpha -0.048747819
## MIP_1beta -0.234755079
## MMP_2 -0.296947941
## MMP_3 -0.314383059
## MMP10 -0.871312465
## MMP7 -0.001813268
## Myoglobin 0.371661604
## NT_proBNP -1.025594984
## NrCAM 1.863397920
## Osteopontin -0.240002679
## PAI_1 -1.168572932
## PAPP_A -1.466741892
## PLGF 0.086854139
## PYY -0.783593964
## Pancreatic_polypeptide -0.172598415
## Prolactin -0.675392299
## Prostatic_Acid_Phosphatase -2.017922226
## Protein_S 2.074044988
## Pulmonary_and_Activation_Regulat -0.463430382
## RANTES 1.787930693
## Resistin -0.041841639
## S100b 0.840086713
## SGOT -0.175139101
## SHBG 0.100971900
## SOD -2.830930524
## Serum_Amyloid_P 0.279417654
## Sortilin -0.001487249
## Stem_Cell_Factor 0.688548628
## TGF_alpha -0.422316280
## TIMP_1 0.013526273
## TNF_RII -2.577558465
## TRAIL_R3 -0.802371183
## TTR_prealbumin 0.343654701
## Tamm_Horsfall_Protein_THP 0.463766273
## Thrombomodulin 1.153883219
## Thrombopoietin -1.543486362
## Thymus_Expressed_Chemokine_TECK 0.137644167
## Thyroid_Stimulating_Hormone 0.133797927
## Thyroxine_Binding_Globulin -0.493144003
## Tissue_Factor -0.601270799
## Transferrin 0.207970519
## Trefoil_Factor_3_TFF3 0.697235040
## VCAM_1 0.391948501
## VEGF 0.502708648
## Vitronectin 0.441995676
## von_Willebrand_Factor 0.769056331
## E4 0.263306628
## E3 -0.165457885
## E2 0.052923879
$results LDA_FULL_Tune
## parameter ROC Sens Spec Accuracy Kappa ROCSD
## 1 none 0.8015132 0.6428571 0.8142105 0.7675417 0.4377465 0.07919456
## SensSD SpecSD AccuracySD KappaSD
## 1 0.1295925 0.05582698 0.05778037 0.1370938
<- LDA_FULL_Tune$results[,c("Accuracy")]) (LDA_FULL_Train_Accuracy
## [1] 0.7675417
##################################
# Independently evaluating the model and
# reporting the independent evaluation results
# on the test set
##################################
<- data.frame(LDA_FULL_Observed = PMA_PreModelling_Test$Class,
LDA_FULL_Test LDA_FULL_Predicted = predict(LDA_FULL_Tune,
!names(PMA_PreModelling_Test) %in% c("Class")],
PMA_PreModelling_Test[,type = "raw"))
LDA_FULL_Test
## LDA_FULL_Observed LDA_FULL_Predicted
## 1 Control Control
## 2 Impaired Impaired
## 3 Impaired Impaired
## 4 Control Impaired
## 5 Impaired Control
## 6 Impaired Control
## 7 Impaired Impaired
## 8 Control Impaired
## 9 Impaired Impaired
## 10 Control Control
## 11 Control Impaired
## 12 Control Control
## 13 Control Control
## 14 Impaired Control
## 15 Control Control
## 16 Control Control
## 17 Control Control
## 18 Impaired Impaired
## 19 Control Control
## 20 Control Control
## 21 Control Control
## 22 Control Control
## 23 Control Control
## 24 Control Impaired
## 25 Control Control
## 26 Control Impaired
## 27 Control Control
## 28 Control Control
## 29 Control Impaired
## 30 Control Control
## 31 Impaired Impaired
## 32 Control Control
## 33 Control Control
## 34 Control Control
## 35 Control Impaired
## 36 Control Impaired
## 37 Control Control
## 38 Control Control
## 39 Impaired Control
## 40 Control Control
## 41 Control Control
## 42 Control Control
## 43 Control Control
## 44 Impaired Impaired
## 45 Control Control
## 46 Control Control
## 47 Control Control
## 48 Impaired Impaired
## 49 Control Control
## 50 Impaired Impaired
## 51 Control Impaired
## 52 Impaired Impaired
## 53 Impaired Impaired
## 54 Control Control
## 55 Control Control
## 56 Control Control
## 57 Control Control
## 58 Impaired Control
## 59 Control Control
## 60 Impaired Control
## 61 Control Control
## 62 Control Control
## 63 Impaired Control
## 64 Control Control
## 65 Control Control
## 66 Control Control
##################################
# Reporting the independent evaluation results
# for the test set
##################################
<- Accuracy(y_pred = LDA_FULL_Test$LDA_FULL_Predicted,
(LDA_FULL_Test_Accuracy y_true = LDA_FULL_Test$LDA_FULL_Observed))
## [1] 0.7575758
##################################
# Creating consistent fold assignments
# for the Cross Validation process
##################################
set.seed(12345678)
<- createFolds(PMA_PreModelling_Train$Class ,
KFold_Indices k = 10,
returnTrain=TRUE)
##################################
# Formulating the controls for the
# model training process
##################################
<- trainControl(method = "cv",
KFold_TrainControl classProbs = TRUE)
##################################
# Formulating the controls for the
# simulated annealing process
##################################
<- safsControl(method = "cv",
KFold_SAControl verbose = TRUE,
functions = caretSA,
index = KFold_Indices,
returnResamp = "final")
##################################
# Running the linear discriminant analysis model
# by setting the caret method to 'lda'
# with implementation of simulated annealing
##################################
set.seed(12345678)
<- caret::safs(x = PMA_PreModelling_Train[,!names(PMA_PreModelling_Train) %in% c("Class")],
LDA_SA_Tune y = PMA_PreModelling_Train$Class,
iters = 10,
method = "lda",
metric = "Accuracy",
safsControl = KFold_SAControl,
trControl = KFold_TrainControl)
## Fold01 1 0.7751739 (26)
## Fold01 2 0.7751739->0.7614638 (26+1, 96.3%) 0.9652452 A
## Fold01 3 0.7751739->0.7333406 (26+2, 92.9%) 0.850527 A
## Fold01 4 0.7751739->0.7506087 (26+3, 89.7%) 0.8809453 A
## Fold01 5 0.7751739->0.7832536 (26+4, 86.7%) *
## Fold01 6 0.7832536->0.7340072 (30+1, 96.8%) 0.6857483 A
## Fold01 7 0.7832536->0.753058 (30+0, 93.5%) 0.7634857 A
## Fold01 8 0.7832536->0.729029 (30-1, 90.3%) 0.5747386 A
## Fold01 9 0.7832536->0.7373116 (30+0, 87.5%) 0.5898433 A
## Fold01 10 0.7832536->0.7457029 (30+1, 84.8%) 0.6191426 A
## Fold02 1 0.7823116 (26)
## Fold02 2 0.7823116->0.7867971 (26-1, 96.2%) *
## Fold02 3 0.7867971->0.7750725 (25-1, 96.0%) 0.9562794 A
## Fold02 4 0.7867971->0.7756159 (25+0, 92.3%) 0.9447415 A
## Fold02 5 0.7867971->0.7992029 (25+1, 88.9%) *
## Fold02 6 0.7992029->0.7784348 (26+1, 96.3%) 0.8556309 A
## Fold02 7 0.7992029->0.7745072 (26+2, 92.9%) 0.8054916 A
## Fold02 8 0.7992029->0.7707101 (26+3, 89.7%) 0.7518551 A
## Fold02 9 0.7992029->0.7695797 (26+4, 86.7%) 0.7163451 A
## Fold02 10 0.7992029->0.7703551 (26+5, 83.9%) 0.6970086
## Fold03 1 0.8017174 (26)
## Fold03 2 0.8017174->0.8004783 (26+1, 96.3%) 0.9969136 A
## Fold03 3 0.8017174->0.7878333 (26+2, 92.9%) 0.9493728 A
## Fold03 4 0.8017174->0.7882826 (26+3, 89.7%) 0.9351671 A
## Fold03 5 0.8017174->0.8058986 (26+4, 86.7%) *
## Fold03 6 0.8058986->0.8002609 (30+1, 96.8%) 0.9588955 A
## Fold03 7 0.8058986->0.8052029 (30+2, 93.8%) 0.9939758 A
## Fold03 8 0.8058986->0.8091884 (30+3, 90.9%) *
## Fold03 9 0.8091884->0.8185217 (33+1, 97.1%) *
## Fold03 10 0.8185217->0.7680217 (34+1, 97.1%) 0.5395791 A
## Fold04 1 0.7625217 (26)
## Fold04 2 0.7625217->0.7456594 (26+1, 96.3%) 0.956736 A
## Fold04 3 0.7625217->0.7572391 (26+2, 92.9%) 0.979431 A
## Fold04 4 0.7625217->0.7510507 (26+3, 89.7%) 0.9416006 A
## Fold04 5 0.7625217->0.7569565 (26+4, 86.7%) 0.9641656 A
## Fold04 6 0.7625217->0.7542029 (26+5, 83.9%) 0.9366385 A
## Fold04 7 0.7625217->0.7449855 (26+6, 81.2%) 0.8513059 A
## Fold04 8 0.7625217->0.7251449 (26+5, 78.1%) 0.6756102 A
## Fold04 9 0.7625217->0.7412826 (26+4, 75.0%) 0.7782681
## Fold04 10 0.7625217->0.7142971 (26+6, 75.8%) 0.5312959 A
## Fold05 1 0.7475507 (26)
## Fold05 2 0.7475507->0.7764348 (26+1, 96.3%) *
## Fold05 3 0.7764348->0.7591739 (27+1, 96.4%) 0.9354825 A
## Fold05 4 0.7764348->0.7691739 (27+2, 93.1%) 0.9632848 A
## Fold05 5 0.7764348->0.763913 (27+1, 89.7%) 0.9225293 A
## Fold05 6 0.7764348->0.7957536 (27+2, 86.7%) *
## Fold05 7 0.7957536->0.7841739 (29+1, 96.7%) 0.9031531 A
## Fold05 8 0.7957536->0.7632464 (29+2, 93.5%) 0.7212228 A
## Fold05 9 0.7957536->0.7806884 (29+3, 90.6%) 0.8433374 A
## Fold05 10 0.7957536->0.7716739 (29+4, 87.9%) 0.7388927 A
## Fold06 1 0.779413 (26)
## Fold06 2 0.779413->0.7506594 (26+1, 96.3%) 0.9288735 A
## Fold06 3 0.779413->0.7377101 (26+0, 92.6%) 0.8517037 A
## Fold06 4 0.779413->0.7638696 (26+1, 89.3%) 0.9233285 A
## Fold06 5 0.779413->0.7623406 (26+2, 86.2%) 0.8962631 A
## Fold06 6 0.779413->0.7332101 (26+3, 83.3%) 0.7007006 A
## Fold06 7 0.779413->0.7501594 (26+4, 80.6%) 0.7689493 A
## Fold06 8 0.779413->0.770587 (26+5, 78.1%) 0.9133902 A
## Fold06 9 0.779413->0.7271087 (26+6, 75.8%) 0.5466393
## Fold06 10 0.779413->0.7338261 (26+6, 75.8%) 0.5571681
## Fold07 1 0.7296087 (26)
## Fold07 2 0.7296087->0.7389203 (26+1, 96.3%) *
## Fold07 3 0.7389203->0.7727174 (27+1, 96.4%) *
## Fold07 4 0.7727174->0.7933261 (28-1, 96.4%) *
## Fold07 5 0.7933261->0.759587 (27-1, 96.3%) 0.8084443
## Fold07 6 0.7933261->0.7762391 (27+1, 96.4%) 0.8787716 A
## Fold07 7 0.7933261->0.7712319 (27+2, 93.1%) 0.8228753 A
## Fold07 8 0.7933261->0.7515507 (27+3, 90.0%) 0.6562142
## Fold07 9 0.7933261->0.7595217 (27+3, 90.0%) 0.6814733 A
## Fold07 10 0.7933261->0.7556884 (27+4, 87.1%) 0.6222403 A
## Fold08 1 0.7746449 (26)
## Fold08 2 0.7746449->0.7797319 (26+1, 96.3%) *
## Fold08 3 0.7797319->0.7954928 (27+1, 96.4%) *
## Fold08 4 0.7954928->0.7618406 (28+1, 96.6%) 0.844328 A
## Fold08 5 0.7954928->0.777558 (28+2, 93.3%) 0.8933941 A
## Fold08 6 0.7954928->0.7839783 (28+3, 90.3%) 0.9168164 A
## Fold08 7 0.7954928->0.7912029 (28+4, 87.5%) 0.9629547 A
## Fold08 8 0.7954928->0.8132174 (28+3, 84.4%) *
## Fold08 9 0.8132174->0.7797826 (31+1, 96.9%) 0.6907151 A
## Fold08 10 0.8132174->0.7911594 (31+2, 93.9%) 0.762431
## Fold09 1 0.743587 (26)
## Fold09 2 0.743587->0.7562899 (26+1, 96.3%) *
## Fold09 3 0.7562899->0.7617681 (27+1, 96.4%) *
## Fold09 4 0.7617681->0.7535145 (28+1, 96.6%) 0.9575864
## Fold09 5 0.7617681->0.7571957 (28+1, 96.6%) 0.9704337 A
## Fold09 6 0.7617681->0.7708116 (28+2, 93.3%) *
## Fold09 7 0.7708116->0.7666304 (30+1, 96.8%) 0.9627413 A
## Fold09 8 0.7708116->0.7657826 (30+2, 93.8%) 0.9491445 A
## Fold09 9 0.7708116->0.7662826 (30+3, 90.9%) 0.9484934 A
## Fold09 10 0.7708116->0.7874928 (30+2, 87.9%) *
## Fold10 1 0.8297319 (26)
## Fold10 2 0.8297319->0.820913 (26+1, 96.3%) 0.9789673 A
## Fold10 3 0.8297319->0.8252609 (26+2, 92.9%) 0.9839644 A
## Fold10 4 0.8297319->0.8415652 (26+3, 89.7%) *
## Fold10 5 0.8415652->0.8382826 (29-1, 96.6%) 0.980686 A
## Fold10 6 0.8415652->0.8041739 (29+0, 93.3%) 0.7659917 A
## Fold10 7 0.8415652->0.8286884 (29-1, 90.0%) 0.8984294
## Fold10 8 0.8415652->0.8181884 (29-1, 90.0%) 0.8007374 A
## Fold10 9 0.8415652->0.8162029 (29-2, 86.7%) 0.7624383 A
## Fold10 10 0.8415652->0.8097029 (29-1, 90.0%) 0.6848141
## + final SA
## 1 0.8057184 (26)
## 2 0.8057184->0.8016484 (26-1, 96.2%) 0.9899481 A
## 3 0.8057184->0.7710826 (26+0, 92.6%) 0.8790072 A
## 4 0.8057184->0.7639499 (26+1, 89.3%) 0.8127271 A
## 5 0.8057184->0.7681726 (26+2, 86.2%) 0.792157 A
## 6 0.8057184->0.7752239 (26+3, 83.3%) 0.7968526
## 7 0.8057184->0.7646418 (26+3, 83.3%) 0.6998644 A
## 8 0.8057184->0.7796296 (26+4, 80.6%) 0.7717955 A
## 9 0.8057184->0.783547 (26+5, 78.1%) 0.7806273 A
## 10 0.8057184->0.7682641 (26+4, 75.0%) 0.6282263 A
## + final model
##################################
# Reporting the cross-validation results
# for the train set
##################################
LDA_SA_Tune
##
## Simulated Annealing Feature Selection
##
## 267 samples
## 127 predictors
## 2 classes: 'Impaired', 'Control'
##
## Maximum search iterations: 10
##
## Internal performance values: Accuracy, Kappa
## Subset selection driven to maximize internal Accuracy
##
## External performance values: Accuracy, Kappa
## Best iteration chose by maximizing external Accuracy
## External resampling method: Cross-Validated (10 fold)
##
## During resampling:
## * the top 5 selected variables (out of a possible 127):
## Apolipoprotein_D (50%), Fatty_Acid_Binding_Protein (50%), HCC_4 (50%), IL_3 (50%), MCP_2 (50%)
## * on average, 29 variables were selected (min = 26, max = 34)
##
## In the final search using the entire training set:
## * 31 features selected at iteration 9 including:
## ACTH_Adrenocorticotropic_Hormon, Apolipoprotein_A2, Apolipoprotein_CI, Beta_2_Microglobulin, Connective_Tissue_Growth_Factor ...
## * external performance at this iteration is
##
## Accuracy Kappa
## 0.8094 0.4720
$fit LDA_SA_Tune
## Linear Discriminant Analysis
##
## 267 samples
## 31 predictor
## 2 classes: 'Impaired', 'Control'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 241, 239, 241, 239, 241, 241, ...
## Resampling results:
##
## Accuracy Kappa
## 0.7912393 0.4781993
$averages LDA_SA_Tune
## Iter Accuracy Kappa
## 1 1 0.7752137 0.3711203
## 2 2 0.7791921 0.3847643
## 3 3 0.7796093 0.3924960
## 4 4 0.7986976 0.4407613
## 5 5 0.7988400 0.4446132
## 6 6 0.7949939 0.4313315
## 7 7 0.8024115 0.4469942
## 8 8 0.7910155 0.4216211
## 9 9 0.8094119 0.4720489
## 10 10 0.8020045 0.4529000
<- LDA_SA_Tune$averages[LDA_SA_Tune$averages$Accuracy==max(LDA_SA_Tune$averages$Accuracy),
(LDA_SA_Train_Accuracy c("Accuracy")])
## [1] 0.8094119
##################################
# Independently evaluating the model and
# reporting the independent evaluation results
# on the test set
##################################
<- data.frame(LDA_SA_Observed = PMA_PreModelling_Test$Class,
LDA_SA_Test LDA_SA_Predicted = predict(LDA_SA_Tune,
!names(PMA_PreModelling_Test) %in% c("Class")],
PMA_PreModelling_Test[,type = "raw"))
LDA_SA_Test
## LDA_SA_Observed LDA_SA_Predicted.pred LDA_SA_Predicted.Impaired
## 4 Control Control 0.001117002
## 10 Impaired Control 0.202015024
## 13 Impaired Impaired 0.634018751
## 15 Control Control 0.056100052
## 27 Impaired Control 0.077386757
## 32 Impaired Control 0.002428007
## 33 Impaired Impaired 0.927490529
## 49 Control Control 0.118022391
## 52 Impaired Impaired 0.991362989
## 54 Control Control 0.003531488
## 58 Control Control 0.466873059
## 66 Control Control 0.327480206
## 79 Control Control 0.136651720
## 87 Impaired Control 0.328440901
## 89 Control Impaired 0.658809127
## 91 Control Control 0.021986699
## 92 Control Control 0.003566254
## 101 Impaired Impaired 0.997353376
## 102 Control Control 0.467304220
## 106 Control Control 0.001837830
## 116 Control Control 0.003090223
## 119 Control Control 0.167828034
## 120 Control Control 0.011248641
## 122 Control Control 0.020045395
## 125 Control Control 0.015035653
## 127 Control Control 0.364697923
## 138 Control Control 0.131881702
## 142 Control Control 0.049538373
## 150 Control Control 0.208472331
## 151 Control Control 0.169587439
## 164 Impaired Control 0.059932561
## 173 Control Control 0.001724339
## 187 Control Control 0.018097307
## 188 Control Control 0.109212518
## 196 Control Impaired 0.684300481
## 199 Control Impaired 0.784693963
## 203 Control Control 0.008990814
## 204 Control Control 0.376566904
## 206 Impaired Impaired 0.790240075
## 207 Control Control 0.006595012
## 209 Control Control 0.082794730
## 211 Control Control 0.083738824
## 217 Control Control 0.314585693
## 221 Impaired Impaired 0.699416176
## 222 Control Control 0.012729950
## 235 Control Control 0.016102086
## 238 Control Control 0.132841555
## 248 Impaired Control 0.190389435
## 252 Control Control 0.017958343
## 259 Impaired Control 0.094014109
## 266 Control Control 0.130070036
## 276 Impaired Impaired 0.785507425
## 280 Impaired Control 0.490750839
## 284 Control Control 0.046257225
## 285 Control Control 0.094341516
## 286 Control Control 0.047203239
## 288 Control Control 0.123262019
## 293 Impaired Control 0.008037950
## 295 Control Control 0.087466091
## 296 Impaired Impaired 0.702170305
## 300 Control Control 0.410044571
## 309 Control Control 0.063964905
## 310 Impaired Impaired 0.580402992
## 318 Control Control 0.074637291
## 319 Control Control 0.034766067
## 328 Control Control 0.043932395
## LDA_SA_Predicted.Control
## 4 0.998882998
## 10 0.797984976
## 13 0.365981249
## 15 0.943899948
## 27 0.922613243
## 32 0.997571993
## 33 0.072509471
## 49 0.881977609
## 52 0.008637011
## 54 0.996468512
## 58 0.533126941
## 66 0.672519794
## 79 0.863348280
## 87 0.671559099
## 89 0.341190873
## 91 0.978013301
## 92 0.996433746
## 101 0.002646624
## 102 0.532695780
## 106 0.998162170
## 116 0.996909777
## 119 0.832171966
## 120 0.988751359
## 122 0.979954605
## 125 0.984964347
## 127 0.635302077
## 138 0.868118298
## 142 0.950461627
## 150 0.791527669
## 151 0.830412561
## 164 0.940067439
## 173 0.998275661
## 187 0.981902693
## 188 0.890787482
## 196 0.315699519
## 199 0.215306037
## 203 0.991009186
## 204 0.623433096
## 206 0.209759925
## 207 0.993404988
## 209 0.917205270
## 211 0.916261176
## 217 0.685414307
## 221 0.300583824
## 222 0.987270050
## 235 0.983897914
## 238 0.867158445
## 248 0.809610565
## 252 0.982041657
## 259 0.905985891
## 266 0.869929964
## 276 0.214492575
## 280 0.509249161
## 284 0.953742775
## 285 0.905658484
## 286 0.952796761
## 288 0.876737981
## 293 0.991962050
## 295 0.912533909
## 296 0.297829695
## 300 0.589955429
## 309 0.936035095
## 310 0.419597008
## 318 0.925362709
## 319 0.965233933
## 328 0.956067605
##################################
# Reporting the independent evaluation results
# for the test set
##################################
<- Accuracy(y_pred = LDA_SA_Test$LDA_SA_Predicted.pred,
(LDA_SA_Test_Accuracy y_true = LDA_SA_Test$LDA_SA_Observed))
## [1] 0.8181818
##################################
# Creating consistent fold assignments
# for the Cross Validation process
##################################
set.seed(12345678)
<- createFolds(PMA_PreModelling_Train$Class ,
KFold_Indices k = 10,
returnTrain=TRUE)
##################################
# Formulating the controls for the
# model training process
##################################
<- trainControl(method = "cv",
KFold_TrainControl classProbs = TRUE)
##################################
# Formulating the controls for the
# genetic algorithms process
##################################
<- gafsControl(method = "cv",
KFold_GAControl verbose = TRUE,
functions = caretGA,
index = KFold_Indices,
returnResamp = "final")
##################################
# Running the linear discriminant analysis model
# by setting the caret method to 'lda'
# with implementation of genetic algorithms
##################################
set.seed(12345678)
<- caret::gafs(x = PMA_PreModelling_Train[,!names(PMA_PreModelling_Train) %in% c("Class")],
LDA_GA_Tune y = PMA_PreModelling_Train$Class,
iters = 10,
method = "lda",
metric = "Accuracy",
gafsControl = KFold_GAControl,
trControl = KFold_TrainControl)
## Fold01 1 0.7999638 (57)
## Fold01 2 0.7999638->0.8011014 ( 57-> 69, 38.5%) *
## Fold01 3 0.8011014->0.8135362 ( 69-> 69, 38.0%) *
## Fold01 4 0.8135362->0.8290362 ( 69-> 62, 84.5%) *
## Fold01 5 0.8290362->0.8302754 ( 62-> 80, 35.2%) *
## Fold01 6 0.8302754->0.8375 ( 80-> 51, 33.7%) *
## Fold01 7 0.8375->0.8418406 ( 51-> 53, 40.5%) *
## Fold01 8 0.8418406->0.8459928 ( 53-> 44, 31.1%) *
## Fold01 9 0.8459928->0.8467609 ( 44-> 43, 97.7%) *
## Fold01 10 0.8467609->0.8461087 ( 43-> 61, 36.8%)
## Fold02 1 0.8119783 (47)
## Fold02 2 0.8119783->0.8330217 ( 47-> 36, 15.3%) *
## Fold02 3 0.8330217->0.8460797 ( 36-> 40, 22.6%) *
## Fold02 4 0.8460797->0.8445362 ( 40-> 51, 35.8%)
## Fold02 5 0.8460797->0.8447826 ( 40-> 60, 20.5%)
## Fold02 6 0.8460797->0.8534203 ( 40-> 32, 20.0%) *
## Fold02 7 0.8534203->0.8498696 ( 32-> 44, 20.6%)
## Fold02 8 0.8534203->0.8532174 ( 32-> 44, 16.9%)
## Fold02 9 0.8534203->0.8618406 ( 32-> 36, 21.4%) *
## Fold02 10 0.8618406->0.8616594 ( 36-> 31, 42.6%)
## Fold03 1 0.8178841 (35)
## Fold03 2 0.8178841->0.817971 ( 35-> 32, 19.6%) *
## Fold03 3 0.817971->0.8378841 ( 32-> 51, 22.1%) *
## Fold03 4 0.8378841->0.8265362 ( 51-> 54, 52.2%)
## Fold03 5 0.8378841->0.845913 ( 51-> 49, 44.9%) *
## Fold03 6 0.845913->0.8508043 ( 49-> 49, 100.0%) *
## Fold03 7 0.8508043->0.8461884 ( 49-> 49, 100.0%)
## Fold03 8 0.8508043->0.8595942 ( 49-> 47, 71.4%) *
## Fold03 9 0.8595942->0.8596522 ( 47-> 43, 60.7%) *
## Fold03 10 0.8596522->0.850913 ( 43-> 47, 60.7%)
## Fold04 1 0.8170507 (32)
## Fold04 2 0.8170507->0.8165942 ( 32-> 32, 100.0%)
## Fold04 3 0.8170507->0.8254565 ( 32-> 31, 85.3%) *
## Fold04 4 0.8254565->0.8448696 ( 31-> 43, 39.6%) *
## Fold04 5 0.8448696->0.8333986 ( 43-> 43, 30.3%)
## Fold04 6 0.8448696->0.845913 ( 43-> 50, 45.3%) *
## Fold04 7 0.845913->0.8462174 ( 50-> 28, 39.3%) *
## Fold04 8 0.8462174->0.8424565 ( 28-> 39, 48.9%)
## Fold04 9 0.8462174->0.8538841 ( 28-> 30, 70.6%) *
## Fold04 10 0.8538841->0.8333551 ( 30-> 39, 43.8%)
## Fold05 1 0.8092754 (67)
## Fold05 2 0.8092754->0.8378406 ( 67-> 69, 37.4%) *
## Fold05 3 0.8378406->0.8456884 ( 69-> 71, 84.2%) *
## Fold05 4 0.8456884->0.8268696 ( 71-> 71, 91.9%)
## Fold05 5 0.8456884->0.846942 ( 71-> 66, 90.3%) *
## Fold05 6 0.846942->0.8836377 ( 66-> 45, 48.0%) *
## Fold05 7 0.8836377->0.8668841 ( 45-> 45, 100.0%)
## Fold05 8 0.8836377->0.8505507 ( 45-> 45, 100.0%)
## Fold05 9 0.8836377->0.8546087 ( 45-> 46, 97.8%)
## Fold05 10 0.8836377->0.8471377 ( 45-> 65, 44.7%)
## Fold06 1 0.8087174 (72)
## Fold06 2 0.8087174->0.8260217 ( 72-> 70, 94.5%) *
## Fold06 3 0.8260217->0.8293696 ( 70-> 71, 41.0%) *
## Fold06 4 0.8293696->0.8300362 ( 71-> 52, 32.3%) *
## Fold06 5 0.8300362->0.8503333 ( 52-> 55, 27.4%) *
## Fold06 6 0.8503333->0.8466377 ( 55-> 51, 92.7%)
## Fold06 7 0.8503333->0.8307464 ( 55-> 55, 52.8%)
## Fold06 8 0.8503333->0.8346377 ( 55-> 81, 51.1%)
## Fold06 9 0.8503333->0.8380362 ( 55-> 60, 40.2%)
## Fold06 10 0.8503333->0.8471087 ( 55-> 64, 70.0%)
## Fold07 1 0.8303116 (28)
## Fold07 2 0.8303116->0.8220652 ( 28-> 28, 93.1%)
## Fold07 3 0.8303116->0.8140217 ( 28-> 34, 8.8%)
## Fold07 4 0.8303116->0.8302754 ( 28-> 34, 8.8%)
## Fold07 5 0.8303116->0.8350072 ( 28-> 63, 16.7%) *
## Fold07 6 0.8350072->0.8328986 ( 63-> 39, 52.2%)
## Fold07 7 0.8350072->0.8387754 ( 63-> 60, 57.7%) *
## Fold07 8 0.8387754->0.8338116 ( 60-> 46, 73.8%)
## Fold07 9 0.8387754->0.8296449 ( 60-> 46, 23.3%)
## Fold07 10 0.8387754->0.8305217 ( 60-> 50, 41.0%)
## Fold08 1 0.8138696 (41)
## Fold08 2 0.8138696->0.8082971 ( 41->102, 32.4%)
## Fold08 3 0.8138696->0.8217029 ( 41-> 48, 21.9%) *
## Fold08 4 0.8217029->0.8337754 ( 48-> 50, 88.5%) *
## Fold08 5 0.8337754->0.8240072 ( 50-> 51, 18.8%)
## Fold08 6 0.8337754->0.8549275 ( 50-> 55, 25.0%) *
## Fold08 7 0.8549275->0.8390652 ( 55-> 55, 100.0%)
## Fold08 8 0.8549275->0.8376884 ( 55-> 46, 80.4%)
## Fold08 9 0.8549275->0.8584275 ( 55-> 62, 62.5%) *
## Fold08 10 0.8584275->0.8488406 ( 62-> 62, 100.0%)
## Fold09 1 0.8156739 (81)
## Fold09 2 0.8156739->0.8160507 ( 81-> 62, 36.2%) *
## Fold09 3 0.8160507->0.8321304 ( 62-> 67, 31.6%) *
## Fold09 4 0.8321304->0.8493986 ( 67-> 84, 36.0%) *
## Fold09 5 0.8493986->0.833087 ( 84-> 63, 41.3%)
## Fold09 6 0.8493986->0.8457029 ( 84-> 64, 64.4%)
## Fold09 7 0.8493986->0.8576884 ( 84-> 37, 23.5%) *
## Fold09 8 0.8576884->0.8613406 ( 37-> 35, 84.6%) *
## Fold09 9 0.8613406->0.8532971 ( 35-> 37, 84.6%)
## Fold09 10 0.8613406->0.8661449 ( 35-> 46, 58.8%) *
## Fold10 1 0.8305652 (62)
## Fold10 2 0.8305652->0.8277464 ( 62-> 61, 39.8%)
## Fold10 3 0.8305652->0.8218986 ( 62-> 34, 23.1%)
## Fold10 4 0.8305652->0.8673986 ( 62-> 68, 44.4%) *
## Fold10 5 0.8673986->0.8713841 ( 68-> 55, 78.3%) *
## Fold10 6 0.8713841->0.850913 ( 55-> 58, 59.2%)
## Fold10 7 0.8713841->0.8545942 ( 55-> 75, 38.3%)
## Fold10 8 0.8713841->0.8721232 ( 55-> 47, 37.8%) *
## Fold10 9 0.8721232->0.8506594 ( 47-> 58, 72.1%)
## Fold10 10 0.8721232->0.8673043 ( 47-> 45, 37.3%)
## + final GA
## 1 0.8164733 (41)
## 2 0.8164733->0.808893 ( 41-> 25, 11.9%)
## 3 0.8164733->0.8281441 ( 41-> 41, 100.0%) *
## 4 0.8281441->0.8463777 ( 41-> 61, 43.7%) *
## 5 0.8463777->0.8351547 ( 61-> 51, 57.7%)
## 6 0.8463777->0.8449634 ( 61-> 58, 25.3%)
## 7 0.8463777->0.8466321 ( 61-> 53, 18.8%) *
## 8 0.8466321->0.834707 ( 53-> 47, 49.3%)
## 9 0.8466321->0.8354294 ( 53-> 61, 23.9%)
## 10 0.8466321->0.8435287 ( 53-> 49, 70.0%)
## + final model
##################################
# Reporting the cross-validation results
# for the train set
##################################
LDA_GA_Tune
##
## Genetic Algorithm Feature Selection
##
## 267 samples
## 127 predictors
## 2 classes: 'Impaired', 'Control'
##
## Maximum generations: 10
## Population per generation: 50
## Crossover probability: 0.8
## Mutation probability: 0.1
## Elitism: 0
##
## Internal performance values: Accuracy, Kappa
## Subset selection driven to maximize internal Accuracy
##
## External performance values: Accuracy, Kappa
## Best iteration chose by maximizing external Accuracy
## External resampling method: Cross-Validated (10 fold)
##
## During resampling:
## * the top 5 selected variables (out of a possible 127):
## Cystatin_C (90%), MMP10 (90%), Apolipoprotein_A1 (80%), Cortisol (80%), VEGF (80%)
## * on average, 46.7 variables were selected (min = 30, max = 62)
##
## In the final search using the entire training set:
## * 61 features selected at iteration 4 including:
## Alpha_1_Microglobulin, Alpha_2_Macroglobulin, Angiotensinogen, Apolipoprotein_CIII, Apolipoprotein_D ...
## * external performance at this iteration is
##
## Accuracy Kappa
## 0.7940 0.4894
$fit LDA_GA_Tune
## Linear Discriminant Analysis
##
## 267 samples
## 61 predictor
## 2 classes: 'Impaired', 'Control'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 241, 240, 239, 241, 239, 241, ...
## Resampling results:
##
## Accuracy Kappa
## 0.8091372 0.5091527
$averages LDA_GA_Tune
## Iter Accuracy Kappa
## 1 1 0.7757530 0.4076982
## 2 2 0.7572446 0.3840816
## 3 3 0.7907509 0.4854290
## 4 4 0.7940273 0.4893657
## 5 5 0.7719170 0.4169970
## 6 6 0.7791921 0.4459888
## 7 7 0.7636956 0.3945538
## 8 8 0.7640924 0.4192004
## 9 9 0.7595543 0.3801691
## 10 10 0.7712556 0.4149006
<- LDA_GA_Tune$averages[LDA_GA_Tune$averages$Accuracy==max(LDA_GA_Tune$averages$Accuracy),
(LDA_GA_Train_Accuracy c("Accuracy")])
## [1] 0.7940273
##################################
# Independently evaluating the model and
# reporting the independent evaluation results
# on the test set
##################################
<- data.frame(LDA_GA_Observed = PMA_PreModelling_Test$Class,
LDA_GA_Test LDA_GA_Predicted = predict(LDA_GA_Tune,
!names(PMA_PreModelling_Test) %in% c("Class")],
PMA_PreModelling_Test[,type = "raw"))
LDA_GA_Test
## LDA_GA_Observed LDA_GA_Predicted.pred LDA_GA_Predicted.Impaired
## 4 Control Control 1.344467e-03
## 10 Impaired Impaired 9.648635e-01
## 13 Impaired Impaired 9.945636e-01
## 15 Control Control 4.525513e-01
## 27 Impaired Control 2.383854e-03
## 32 Impaired Control 2.210414e-03
## 33 Impaired Impaired 9.188461e-01
## 49 Control Control 2.158310e-02
## 52 Impaired Impaired 8.944029e-01
## 54 Control Control 8.452680e-02
## 58 Control Control 4.582054e-01
## 66 Control Control 1.274641e-03
## 79 Control Impaired 7.553738e-01
## 87 Impaired Control 1.587294e-01
## 89 Control Control 1.703803e-02
## 91 Control Control 7.594765e-02
## 92 Control Control 1.265478e-02
## 101 Impaired Impaired 9.949380e-01
## 102 Control Control 8.502519e-02
## 106 Control Control 2.760468e-02
## 116 Control Control 2.425436e-05
## 119 Control Control 3.278133e-03
## 120 Control Control 7.913202e-04
## 122 Control Control 2.953061e-02
## 125 Control Control 1.815725e-04
## 127 Control Control 7.718520e-03
## 138 Control Control 3.352697e-03
## 142 Control Control 2.496666e-01
## 150 Control Control 8.810881e-03
## 151 Control Control 1.563173e-02
## 164 Impaired Impaired 9.710313e-01
## 173 Control Control 3.017419e-03
## 187 Control Control 1.139955e-02
## 188 Control Control 1.834751e-03
## 196 Control Impaired 6.068846e-01
## 199 Control Impaired 9.969451e-01
## 203 Control Control 5.481570e-02
## 204 Control Control 1.022203e-01
## 206 Impaired Control 1.005856e-02
## 207 Control Control 2.813131e-03
## 209 Control Control 5.077507e-03
## 211 Control Control 3.200174e-01
## 217 Control Control 2.912975e-02
## 221 Impaired Impaired 9.800391e-01
## 222 Control Control 1.122470e-02
## 235 Control Control 1.666724e-03
## 238 Control Control 1.849200e-05
## 248 Impaired Impaired 5.587321e-01
## 252 Control Control 1.352342e-03
## 259 Impaired Impaired 9.741620e-01
## 266 Control Control 2.499570e-01
## 276 Impaired Impaired 9.458447e-01
## 280 Impaired Control 4.462540e-01
## 284 Control Control 2.577057e-03
## 285 Control Control 2.565497e-04
## 286 Control Control 3.899414e-02
## 288 Control Control 3.638500e-02
## 293 Impaired Control 1.139168e-03
## 295 Control Control 1.681312e-03
## 296 Impaired Impaired 5.191660e-01
## 300 Control Control 1.324779e-01
## 309 Control Control 1.717021e-02
## 310 Impaired Control 1.449930e-01
## 318 Control Control 9.490306e-03
## 319 Control Control 6.748458e-02
## 328 Control Control 5.151371e-03
## LDA_GA_Predicted.Control
## 4 0.998655533
## 10 0.035136540
## 13 0.005436385
## 15 0.547448712
## 27 0.997616146
## 32 0.997789586
## 33 0.081153927
## 49 0.978416897
## 52 0.105597076
## 54 0.915473198
## 58 0.541794577
## 66 0.998725359
## 79 0.244626204
## 87 0.841270595
## 89 0.982961973
## 91 0.924052348
## 92 0.987345220
## 101 0.005061995
## 102 0.914974810
## 106 0.972395320
## 116 0.999975746
## 119 0.996721867
## 120 0.999208680
## 122 0.970469390
## 125 0.999818428
## 127 0.992281480
## 138 0.996647303
## 142 0.750333399
## 150 0.991189119
## 151 0.984368275
## 164 0.028968735
## 173 0.996982581
## 187 0.988600451
## 188 0.998165249
## 196 0.393115388
## 199 0.003054864
## 203 0.945184300
## 204 0.897779666
## 206 0.989941442
## 207 0.997186869
## 209 0.994922493
## 211 0.679982577
## 217 0.970870254
## 221 0.019960885
## 222 0.988775300
## 235 0.998333276
## 238 0.999981508
## 248 0.441267869
## 252 0.998647658
## 259 0.025837998
## 266 0.750042957
## 276 0.054155258
## 280 0.553745988
## 284 0.997422943
## 285 0.999743450
## 286 0.961005863
## 288 0.963614996
## 293 0.998860832
## 295 0.998318688
## 296 0.480833989
## 300 0.867522101
## 309 0.982829787
## 310 0.855007037
## 318 0.990509694
## 319 0.932515416
## 328 0.994848629
##################################
# Reporting the independent evaluation results
# for the test set
##################################
<- Accuracy(y_pred = LDA_GA_Test$LDA_GA_Predicted.pred,
(LDA_GA_Test_Accuracy y_true = LDA_GA_Test$LDA_GA_Observed))
## [1] 0.8484848
##################################
# Consolidating all evaluation results
# for the train and test sets
# using the accuracy metric
##################################
<- c('RF_FULL','RF_SA','RF_GA','LDA_FULL','LDA_SA','LDA_GA',
Model 'RF_FULL','RF_SA','RF_GA','LDA_FULL','LDA_SA','LDA_GA')
<- c(rep('Cross-Validation',6),rep('Test',6))
Set
<- c(RF_FULL_Train_Accuracy,
Accuracy
RF_SA_Train_Accuracy,
RF_GA_Train_Accuracy,
LDA_FULL_Train_Accuracy,
LDA_SA_Train_Accuracy,
LDA_GA_Train_Accuracy,
RF_FULL_Test_Accuracy,
RF_SA_Test_Accuracy,
RF_GA_Test_Accuracy,
LDA_FULL_Test_Accuracy,
LDA_SA_Test_Accuracy,
LDA_GA_Test_Accuracy)
<- as.data.frame(cbind(Model,Set,Accuracy))
Accuracy_Summary
$Accuracy <- as.numeric(as.character(Accuracy_Summary$Accuracy))
Accuracy_Summary$Set <- factor(Accuracy_Summary$Set,
Accuracy_Summarylevels = c("Cross-Validation",
"Test"))
$Model <- factor(Accuracy_Summary$Model,
Accuracy_Summarylevels = c('RF_FULL',
'RF_SA',
'RF_GA',
'LDA_FULL',
'LDA_SA',
'LDA_GA'))
print(Accuracy_Summary, row.names=FALSE)
## Model Set Accuracy
## RF_FULL Cross-Validation 0.7797517
## RF_SA Cross-Validation 0.7761803
## RF_GA Cross-Validation 0.7984229
## LDA_FULL Cross-Validation 0.7675417
## LDA_SA Cross-Validation 0.8094119
## LDA_GA Cross-Validation 0.7940273
## RF_FULL Test 0.8030303
## RF_SA Test 0.7727273
## RF_GA Test 0.7878788
## LDA_FULL Test 0.7575758
## LDA_SA Test 0.8181818
## LDA_GA Test 0.8484848
<- dotplot(Model ~ Accuracy,
(Accuracy_Plot data = Accuracy_Summary,
groups = Set,
main = "Classification Model Performance Comparison",
ylab = "Model",
xlab = "Accuracy",
auto.key = list(adj=1, space="top", columns=2),
type=c("p", "h"),
origin = 0,
alpha = 0.45,
pch = 16,
cex = 2))