##################################
# Loading R libraries
##################################
library(AppliedPredictiveModeling)
library(caret)
library(moments)
library(skimr)
library(dplyr)
library(RANN)
library(corrplot)
library(lares)
library(DMwR2)
library(magrittr)
##################################
# Loading dataset
##################################
data(ChemicalManufacturingProcess)
##################################
# Performing a general exploration of the dataset
##################################
dim(ChemicalManufacturingProcess)
## [1] 176 58
str(ChemicalManufacturingProcess)
## 'data.frame': 176 obs. of 58 variables:
## $ Yield : num 38 42.4 42 41.4 42.5 ...
## $ BiologicalMaterial01 : num 6.25 8.01 8.01 8.01 7.47 6.12 7.48 6.94 6.94 6.94 ...
## $ BiologicalMaterial02 : num 49.6 61 61 61 63.3 ...
## $ BiologicalMaterial03 : num 57 67.5 67.5 67.5 72.2 ...
## $ BiologicalMaterial04 : num 12.7 14.6 14.6 14.6 14 ...
## $ BiologicalMaterial05 : num 19.5 19.4 19.4 19.4 17.9 ...
## $ BiologicalMaterial06 : num 43.7 53.1 53.1 53.1 54.7 ...
## $ BiologicalMaterial07 : num 100 100 100 100 100 100 100 100 100 100 ...
## $ BiologicalMaterial08 : num 16.7 19 19 19 18.2 ...
## $ BiologicalMaterial09 : num 11.4 12.6 12.6 12.6 12.8 ...
## $ BiologicalMaterial10 : num 3.46 3.46 3.46 3.46 3.05 3.78 3.04 3.85 3.85 3.85 ...
## $ BiologicalMaterial11 : num 138 154 154 154 148 ...
## $ BiologicalMaterial12 : num 18.8 21.1 21.1 21.1 21.1 ...
## $ ManufacturingProcess01: num NA 0 0 0 10.7 12 11.5 12 12 12 ...
## $ ManufacturingProcess02: num NA 0 0 0 0 0 0 0 0 0 ...
## $ ManufacturingProcess03: num NA NA NA NA NA NA 1.56 1.55 1.56 1.55 ...
## $ ManufacturingProcess04: num NA 917 912 911 918 924 933 929 928 938 ...
## $ ManufacturingProcess05: num NA 1032 1004 1015 1028 ...
## $ ManufacturingProcess06: num NA 210 207 213 206 ...
## $ ManufacturingProcess07: num NA 177 178 177 178 178 177 178 177 177 ...
## $ ManufacturingProcess08: num NA 178 178 177 178 178 178 178 177 177 ...
## $ ManufacturingProcess09: num 43 46.6 45.1 44.9 45 ...
## $ ManufacturingProcess10: num NA NA NA NA NA NA 11.6 10.2 9.7 10.1 ...
## $ ManufacturingProcess11: num NA NA NA NA NA NA 11.5 11.3 11.1 10.2 ...
## $ ManufacturingProcess12: num NA 0 0 0 0 0 0 0 0 0 ...
## $ ManufacturingProcess13: num 35.5 34 34.8 34.8 34.6 34 32.4 33.6 33.9 34.3 ...
## $ ManufacturingProcess14: num 4898 4869 4878 4897 4992 ...
## $ ManufacturingProcess15: num 6108 6095 6087 6102 6233 ...
## $ ManufacturingProcess16: num 4682 4617 4617 4635 4733 ...
## $ ManufacturingProcess17: num 35.5 34 34.8 34.8 33.9 33.4 33.8 33.6 33.9 35.3 ...
## $ ManufacturingProcess18: num 4865 4867 4877 4872 4886 ...
## $ ManufacturingProcess19: num 6049 6097 6078 6073 6102 ...
## $ ManufacturingProcess20: num 4665 4621 4621 4611 4659 ...
## $ ManufacturingProcess21: num 0 0 0 0 -0.7 -0.6 1.4 0 0 1 ...
## $ ManufacturingProcess22: num NA 3 4 5 8 9 1 2 3 4 ...
## $ ManufacturingProcess23: num NA 0 1 2 4 1 1 2 3 1 ...
## $ ManufacturingProcess24: num NA 3 4 5 18 1 1 2 3 4 ...
## $ ManufacturingProcess25: num 4873 4869 4897 4892 4930 ...
## $ ManufacturingProcess26: num 6074 6107 6116 6111 6151 ...
## $ ManufacturingProcess27: num 4685 4630 4637 4630 4684 ...
## $ ManufacturingProcess28: num 10.7 11.2 11.1 11.1 11.3 11.4 11.2 11.1 11.3 11.4 ...
## $ ManufacturingProcess29: num 21 21.4 21.3 21.3 21.6 21.7 21.2 21.2 21.5 21.7 ...
## $ ManufacturingProcess30: num 9.9 9.9 9.4 9.4 9 10.1 11.2 10.9 10.5 9.8 ...
## $ ManufacturingProcess31: num 69.1 68.7 69.3 69.3 69.4 68.2 67.6 67.9 68 68.5 ...
## $ ManufacturingProcess32: num 156 169 173 171 171 173 159 161 160 164 ...
## $ ManufacturingProcess33: num 66 66 66 68 70 70 65 65 65 66 ...
## $ ManufacturingProcess34: num 2.4 2.6 2.6 2.5 2.5 2.5 2.5 2.5 2.5 2.5 ...
## $ ManufacturingProcess35: num 486 508 509 496 468 490 475 478 491 488 ...
## $ ManufacturingProcess36: num 0.019 0.019 0.018 0.018 0.017 0.018 0.019 0.019 0.019 0.019 ...
## $ ManufacturingProcess37: num 0.5 2 0.7 1.2 0.2 0.4 0.8 1 1.2 1.8 ...
## $ ManufacturingProcess38: num 3 2 2 2 2 2 2 2 3 3 ...
## $ ManufacturingProcess39: num 7.2 7.2 7.2 7.2 7.3 7.2 7.3 7.3 7.4 7.1 ...
## $ ManufacturingProcess40: num NA 0.1 0 0 0 0 0 0 0 0 ...
## $ ManufacturingProcess41: num NA 0.15 0 0 0 0 0 0 0 0 ...
## $ ManufacturingProcess42: num 11.6 11.1 12 10.6 11 11.5 11.7 11.4 11.4 11.3 ...
## $ ManufacturingProcess43: num 3 0.9 1 1.1 1.1 2.2 0.7 0.8 0.9 0.8 ...
## $ ManufacturingProcess44: num 1.8 1.9 1.8 1.8 1.7 1.8 2 2 1.9 1.9 ...
## $ ManufacturingProcess45: num 2.4 2.2 2.3 2.1 2.1 2 2.2 2.2 2.1 2.4 ...
summary(ChemicalManufacturingProcess)
## Yield BiologicalMaterial01 BiologicalMaterial02 BiologicalMaterial03
## Min. :35.25 Min. :4.580 Min. :46.87 Min. :56.97
## 1st Qu.:38.75 1st Qu.:5.978 1st Qu.:52.68 1st Qu.:64.98
## Median :39.97 Median :6.305 Median :55.09 Median :67.22
## Mean :40.18 Mean :6.411 Mean :55.69 Mean :67.70
## 3rd Qu.:41.48 3rd Qu.:6.870 3rd Qu.:58.74 3rd Qu.:70.43
## Max. :46.34 Max. :8.810 Max. :64.75 Max. :78.25
##
## BiologicalMaterial04 BiologicalMaterial05 BiologicalMaterial06
## Min. : 9.38 Min. :13.24 Min. :40.60
## 1st Qu.:11.24 1st Qu.:17.23 1st Qu.:46.05
## Median :12.10 Median :18.49 Median :48.46
## Mean :12.35 Mean :18.60 Mean :48.91
## 3rd Qu.:13.22 3rd Qu.:19.90 3rd Qu.:51.34
## Max. :23.09 Max. :24.85 Max. :59.38
##
## BiologicalMaterial07 BiologicalMaterial08 BiologicalMaterial09
## Min. :100.0 Min. :15.88 Min. :11.44
## 1st Qu.:100.0 1st Qu.:17.06 1st Qu.:12.60
## Median :100.0 Median :17.51 Median :12.84
## Mean :100.0 Mean :17.49 Mean :12.85
## 3rd Qu.:100.0 3rd Qu.:17.88 3rd Qu.:13.13
## Max. :100.8 Max. :19.14 Max. :14.08
##
## BiologicalMaterial10 BiologicalMaterial11 BiologicalMaterial12
## Min. :1.770 Min. :135.8 Min. :18.35
## 1st Qu.:2.460 1st Qu.:143.8 1st Qu.:19.73
## Median :2.710 Median :146.1 Median :20.12
## Mean :2.801 Mean :147.0 Mean :20.20
## 3rd Qu.:2.990 3rd Qu.:149.6 3rd Qu.:20.75
## Max. :6.870 Max. :158.7 Max. :22.21
##
## ManufacturingProcess01 ManufacturingProcess02 ManufacturingProcess03
## Min. : 0.00 Min. : 0.00 Min. :1.47
## 1st Qu.:10.80 1st Qu.:19.30 1st Qu.:1.53
## Median :11.40 Median :21.00 Median :1.54
## Mean :11.21 Mean :16.68 Mean :1.54
## 3rd Qu.:12.15 3rd Qu.:21.50 3rd Qu.:1.55
## Max. :14.10 Max. :22.50 Max. :1.60
## NA's :1 NA's :3 NA's :15
## ManufacturingProcess04 ManufacturingProcess05 ManufacturingProcess06
## Min. :911.0 Min. : 923.0 Min. :203.0
## 1st Qu.:928.0 1st Qu.: 986.8 1st Qu.:205.7
## Median :934.0 Median : 999.2 Median :206.8
## Mean :931.9 Mean :1001.7 Mean :207.4
## 3rd Qu.:936.0 3rd Qu.:1008.9 3rd Qu.:208.7
## Max. :946.0 Max. :1175.3 Max. :227.4
## NA's :1 NA's :1 NA's :2
## ManufacturingProcess07 ManufacturingProcess08 ManufacturingProcess09
## Min. :177.0 Min. :177.0 Min. :38.89
## 1st Qu.:177.0 1st Qu.:177.0 1st Qu.:44.89
## Median :177.0 Median :178.0 Median :45.73
## Mean :177.5 Mean :177.6 Mean :45.66
## 3rd Qu.:178.0 3rd Qu.:178.0 3rd Qu.:46.52
## Max. :178.0 Max. :178.0 Max. :49.36
## NA's :1 NA's :1
## ManufacturingProcess10 ManufacturingProcess11 ManufacturingProcess12
## Min. : 7.500 Min. : 7.500 Min. : 0.0
## 1st Qu.: 8.700 1st Qu.: 9.000 1st Qu.: 0.0
## Median : 9.100 Median : 9.400 Median : 0.0
## Mean : 9.179 Mean : 9.386 Mean : 857.8
## 3rd Qu.: 9.550 3rd Qu.: 9.900 3rd Qu.: 0.0
## Max. :11.600 Max. :11.500 Max. :4549.0
## NA's :9 NA's :10 NA's :1
## ManufacturingProcess13 ManufacturingProcess14 ManufacturingProcess15
## Min. :32.10 Min. :4701 Min. :5904
## 1st Qu.:33.90 1st Qu.:4828 1st Qu.:6010
## Median :34.60 Median :4856 Median :6032
## Mean :34.51 Mean :4854 Mean :6039
## 3rd Qu.:35.20 3rd Qu.:4882 3rd Qu.:6061
## Max. :38.60 Max. :5055 Max. :6233
## NA's :1
## ManufacturingProcess16 ManufacturingProcess17 ManufacturingProcess18
## Min. : 0 Min. :31.30 Min. : 0
## 1st Qu.:4561 1st Qu.:33.50 1st Qu.:4813
## Median :4588 Median :34.40 Median :4835
## Mean :4566 Mean :34.34 Mean :4810
## 3rd Qu.:4619 3rd Qu.:35.10 3rd Qu.:4862
## Max. :4852 Max. :40.00 Max. :4971
##
## ManufacturingProcess19 ManufacturingProcess20 ManufacturingProcess21
## Min. :5890 Min. : 0 Min. :-1.8000
## 1st Qu.:6001 1st Qu.:4553 1st Qu.:-0.6000
## Median :6022 Median :4582 Median :-0.3000
## Mean :6028 Mean :4556 Mean :-0.1642
## 3rd Qu.:6050 3rd Qu.:4610 3rd Qu.: 0.0000
## Max. :6146 Max. :4759 Max. : 3.6000
##
## ManufacturingProcess22 ManufacturingProcess23 ManufacturingProcess24
## Min. : 0.000 Min. :0.000 Min. : 0.000
## 1st Qu.: 3.000 1st Qu.:2.000 1st Qu.: 4.000
## Median : 5.000 Median :3.000 Median : 8.000
## Mean : 5.406 Mean :3.017 Mean : 8.834
## 3rd Qu.: 8.000 3rd Qu.:4.000 3rd Qu.:14.000
## Max. :12.000 Max. :6.000 Max. :23.000
## NA's :1 NA's :1 NA's :1
## ManufacturingProcess25 ManufacturingProcess26 ManufacturingProcess27
## Min. : 0 Min. : 0 Min. : 0
## 1st Qu.:4832 1st Qu.:6020 1st Qu.:4560
## Median :4855 Median :6047 Median :4587
## Mean :4828 Mean :6016 Mean :4563
## 3rd Qu.:4877 3rd Qu.:6070 3rd Qu.:4609
## Max. :4990 Max. :6161 Max. :4710
## NA's :5 NA's :5 NA's :5
## ManufacturingProcess28 ManufacturingProcess29 ManufacturingProcess30
## Min. : 0.000 Min. : 0.00 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.:19.70 1st Qu.: 8.800
## Median :10.400 Median :19.90 Median : 9.100
## Mean : 6.592 Mean :20.01 Mean : 9.161
## 3rd Qu.:10.750 3rd Qu.:20.40 3rd Qu.: 9.700
## Max. :11.500 Max. :22.00 Max. :11.200
## NA's :5 NA's :5 NA's :5
## ManufacturingProcess31 ManufacturingProcess32 ManufacturingProcess33
## Min. : 0.00 Min. :143.0 Min. :56.00
## 1st Qu.:70.10 1st Qu.:155.0 1st Qu.:62.00
## Median :70.80 Median :158.0 Median :64.00
## Mean :70.18 Mean :158.5 Mean :63.54
## 3rd Qu.:71.40 3rd Qu.:162.0 3rd Qu.:65.00
## Max. :72.50 Max. :173.0 Max. :70.00
## NA's :5 NA's :5
## ManufacturingProcess34 ManufacturingProcess35 ManufacturingProcess36
## Min. :2.300 Min. :463.0 Min. :0.01700
## 1st Qu.:2.500 1st Qu.:490.0 1st Qu.:0.01900
## Median :2.500 Median :495.0 Median :0.02000
## Mean :2.494 Mean :495.6 Mean :0.01957
## 3rd Qu.:2.500 3rd Qu.:501.5 3rd Qu.:0.02000
## Max. :2.600 Max. :522.0 Max. :0.02200
## NA's :5 NA's :5 NA's :5
## ManufacturingProcess37 ManufacturingProcess38 ManufacturingProcess39
## Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.700 1st Qu.:2.000 1st Qu.:7.100
## Median :1.000 Median :3.000 Median :7.200
## Mean :1.014 Mean :2.534 Mean :6.851
## 3rd Qu.:1.300 3rd Qu.:3.000 3rd Qu.:7.300
## Max. :2.300 Max. :3.000 Max. :7.500
##
## ManufacturingProcess40 ManufacturingProcess41 ManufacturingProcess42
## Min. :0.00000 Min. :0.00000 Min. : 0.00
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:11.40
## Median :0.00000 Median :0.00000 Median :11.60
## Mean :0.01771 Mean :0.02371 Mean :11.21
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:11.70
## Max. :0.10000 Max. :0.20000 Max. :12.10
## NA's :1 NA's :1
## ManufacturingProcess43 ManufacturingProcess44 ManufacturingProcess45
## Min. : 0.0000 Min. :0.000 Min. :0.000
## 1st Qu.: 0.6000 1st Qu.:1.800 1st Qu.:2.100
## Median : 0.8000 Median :1.900 Median :2.200
## Mean : 0.9119 Mean :1.805 Mean :2.138
## 3rd Qu.: 1.0250 3rd Qu.:1.900 3rd Qu.:2.300
## Max. :11.0000 Max. :2.100 Max. :2.600
##
##################################
# Formulating a data type assessment summary
##################################
<- ChemicalManufacturingProcess
PDA <- data.frame(
(PDA.Summary Column.Index=c(1:length(names(PDA))),
Column.Name= names(PDA),
Column.Type=sapply(PDA, function(x) class(x)),
row.names=NULL)
)
## Column.Index Column.Name Column.Type
## 1 1 Yield numeric
## 2 2 BiologicalMaterial01 numeric
## 3 3 BiologicalMaterial02 numeric
## 4 4 BiologicalMaterial03 numeric
## 5 5 BiologicalMaterial04 numeric
## 6 6 BiologicalMaterial05 numeric
## 7 7 BiologicalMaterial06 numeric
## 8 8 BiologicalMaterial07 numeric
## 9 9 BiologicalMaterial08 numeric
## 10 10 BiologicalMaterial09 numeric
## 11 11 BiologicalMaterial10 numeric
## 12 12 BiologicalMaterial11 numeric
## 13 13 BiologicalMaterial12 numeric
## 14 14 ManufacturingProcess01 numeric
## 15 15 ManufacturingProcess02 numeric
## 16 16 ManufacturingProcess03 numeric
## 17 17 ManufacturingProcess04 numeric
## 18 18 ManufacturingProcess05 numeric
## 19 19 ManufacturingProcess06 numeric
## 20 20 ManufacturingProcess07 numeric
## 21 21 ManufacturingProcess08 numeric
## 22 22 ManufacturingProcess09 numeric
## 23 23 ManufacturingProcess10 numeric
## 24 24 ManufacturingProcess11 numeric
## 25 25 ManufacturingProcess12 numeric
## 26 26 ManufacturingProcess13 numeric
## 27 27 ManufacturingProcess14 numeric
## 28 28 ManufacturingProcess15 numeric
## 29 29 ManufacturingProcess16 numeric
## 30 30 ManufacturingProcess17 numeric
## 31 31 ManufacturingProcess18 numeric
## 32 32 ManufacturingProcess19 numeric
## 33 33 ManufacturingProcess20 numeric
## 34 34 ManufacturingProcess21 numeric
## 35 35 ManufacturingProcess22 numeric
## 36 36 ManufacturingProcess23 numeric
## 37 37 ManufacturingProcess24 numeric
## 38 38 ManufacturingProcess25 numeric
## 39 39 ManufacturingProcess26 numeric
## 40 40 ManufacturingProcess27 numeric
## 41 41 ManufacturingProcess28 numeric
## 42 42 ManufacturingProcess29 numeric
## 43 43 ManufacturingProcess30 numeric
## 44 44 ManufacturingProcess31 numeric
## 45 45 ManufacturingProcess32 numeric
## 46 46 ManufacturingProcess33 numeric
## 47 47 ManufacturingProcess34 numeric
## 48 48 ManufacturingProcess35 numeric
## 49 49 ManufacturingProcess36 numeric
## 50 50 ManufacturingProcess37 numeric
## 51 51 ManufacturingProcess38 numeric
## 52 52 ManufacturingProcess39 numeric
## 53 53 ManufacturingProcess40 numeric
## 54 54 ManufacturingProcess41 numeric
## 55 55 ManufacturingProcess42 numeric
## 56 56 ManufacturingProcess43 numeric
## 57 57 ManufacturingProcess44 numeric
## 58 58 ManufacturingProcess45 numeric
##################################
# Loading dataset
##################################
<- ChemicalManufacturingProcess
DQA
##################################
# Listing all predictors
##################################
<- DQA[,!names(DQA) %in% c("Yield")]
DQA.Predictors
##################################
# Formulating an overall data quality assessment summary
##################################
<- data.frame(
(DQA.Summary Column.Index=c(1:length(names(DQA))),
Column.Name= names(DQA),
Column.Type=sapply(DQA, function(x) class(x)),
Row.Count=sapply(DQA, function(x) nrow(DQA)),
NA.Count=sapply(DQA,function(x)sum(is.na(x))),
Fill.Rate=sapply(DQA,function(x)format(round((sum(!is.na(x))/nrow(DQA)),3),nsmall=3)),
row.names=NULL)
)
## Column.Index Column.Name Column.Type Row.Count NA.Count Fill.Rate
## 1 1 Yield numeric 176 0 1.000
## 2 2 BiologicalMaterial01 numeric 176 0 1.000
## 3 3 BiologicalMaterial02 numeric 176 0 1.000
## 4 4 BiologicalMaterial03 numeric 176 0 1.000
## 5 5 BiologicalMaterial04 numeric 176 0 1.000
## 6 6 BiologicalMaterial05 numeric 176 0 1.000
## 7 7 BiologicalMaterial06 numeric 176 0 1.000
## 8 8 BiologicalMaterial07 numeric 176 0 1.000
## 9 9 BiologicalMaterial08 numeric 176 0 1.000
## 10 10 BiologicalMaterial09 numeric 176 0 1.000
## 11 11 BiologicalMaterial10 numeric 176 0 1.000
## 12 12 BiologicalMaterial11 numeric 176 0 1.000
## 13 13 BiologicalMaterial12 numeric 176 0 1.000
## 14 14 ManufacturingProcess01 numeric 176 1 0.994
## 15 15 ManufacturingProcess02 numeric 176 3 0.983
## 16 16 ManufacturingProcess03 numeric 176 15 0.915
## 17 17 ManufacturingProcess04 numeric 176 1 0.994
## 18 18 ManufacturingProcess05 numeric 176 1 0.994
## 19 19 ManufacturingProcess06 numeric 176 2 0.989
## 20 20 ManufacturingProcess07 numeric 176 1 0.994
## 21 21 ManufacturingProcess08 numeric 176 1 0.994
## 22 22 ManufacturingProcess09 numeric 176 0 1.000
## 23 23 ManufacturingProcess10 numeric 176 9 0.949
## 24 24 ManufacturingProcess11 numeric 176 10 0.943
## 25 25 ManufacturingProcess12 numeric 176 1 0.994
## 26 26 ManufacturingProcess13 numeric 176 0 1.000
## 27 27 ManufacturingProcess14 numeric 176 1 0.994
## 28 28 ManufacturingProcess15 numeric 176 0 1.000
## 29 29 ManufacturingProcess16 numeric 176 0 1.000
## 30 30 ManufacturingProcess17 numeric 176 0 1.000
## 31 31 ManufacturingProcess18 numeric 176 0 1.000
## 32 32 ManufacturingProcess19 numeric 176 0 1.000
## 33 33 ManufacturingProcess20 numeric 176 0 1.000
## 34 34 ManufacturingProcess21 numeric 176 0 1.000
## 35 35 ManufacturingProcess22 numeric 176 1 0.994
## 36 36 ManufacturingProcess23 numeric 176 1 0.994
## 37 37 ManufacturingProcess24 numeric 176 1 0.994
## 38 38 ManufacturingProcess25 numeric 176 5 0.972
## 39 39 ManufacturingProcess26 numeric 176 5 0.972
## 40 40 ManufacturingProcess27 numeric 176 5 0.972
## 41 41 ManufacturingProcess28 numeric 176 5 0.972
## 42 42 ManufacturingProcess29 numeric 176 5 0.972
## 43 43 ManufacturingProcess30 numeric 176 5 0.972
## 44 44 ManufacturingProcess31 numeric 176 5 0.972
## 45 45 ManufacturingProcess32 numeric 176 0 1.000
## 46 46 ManufacturingProcess33 numeric 176 5 0.972
## 47 47 ManufacturingProcess34 numeric 176 5 0.972
## 48 48 ManufacturingProcess35 numeric 176 5 0.972
## 49 49 ManufacturingProcess36 numeric 176 5 0.972
## 50 50 ManufacturingProcess37 numeric 176 0 1.000
## 51 51 ManufacturingProcess38 numeric 176 0 1.000
## 52 52 ManufacturingProcess39 numeric 176 0 1.000
## 53 53 ManufacturingProcess40 numeric 176 1 0.994
## 54 54 ManufacturingProcess41 numeric 176 1 0.994
## 55 55 ManufacturingProcess42 numeric 176 0 1.000
## 56 56 ManufacturingProcess43 numeric 176 0 1.000
## 57 57 ManufacturingProcess44 numeric 176 0 1.000
## 58 58 ManufacturingProcess45 numeric 176 0 1.000
##################################
# Listing all numeric predictors
##################################
<- DQA.Predictors[,sapply(DQA.Predictors, is.numeric)]
DQA.Predictors.Numeric
if (length(names(DQA.Predictors.Numeric))>0) {
print(paste0("There are ",
length(names(DQA.Predictors.Numeric))),
(" numeric predictor variable(s)."))
else {
} print("There are no numeric predictor variables.")
}
## [1] "There are 57 numeric predictor variable(s)."
##################################
# Listing all factor predictors
##################################
<- DQA.Predictors[,sapply(DQA.Predictors, is.factor)]
DQA.Predictors.Factor
if (length(names(DQA.Predictors.Factor))>0) {
print(paste0("There are ",
length(names(DQA.Predictors.Factor))),
(" factor predictor variable(s)."))
else {
} print("There are no factor predictor variables.")
}
## [1] "There are no factor predictor variables."
##################################
# Formulating a data quality assessment summary for factor predictors
##################################
if (length(names(DQA.Predictors.Factor))>0) {
##################################
# Formulating a function to determine the first mode
##################################
<- function(x) {
FirstModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab == max(tab)]
ux[tab
}
##################################
# Formulating a function to determine the second mode
##################################
<- function(x) {
SecondModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab = ux[tab == max(tab)]
fm = x[!(x %in% fm)]
sm <- unique(sm)
usm <- tabulate(match(sm, usm))
tabsm == max(tabsm)]
usm[tabsm
}
<- data.frame(
(DQA.Predictors.Factor.Summary Column.Name= names(DQA.Predictors.Factor),
Column.Type=sapply(DQA.Predictors.Factor, function(x) class(x)),
Unique.Count=sapply(DQA.Predictors.Factor, function(x) length(unique(x))),
First.Mode.Value=sapply(DQA.Predictors.Factor, function(x) as.character(FirstModes(x)[1])),
Second.Mode.Value=sapply(DQA.Predictors.Factor, function(x) as.character(SecondModes(x)[1])),
First.Mode.Count=sapply(DQA.Predictors.Factor, function(x) sum(na.omit(x) == FirstModes(x)[1])),
Second.Mode.Count=sapply(DQA.Predictors.Factor, function(x) sum(na.omit(x) == SecondModes(x)[1])),
Unique.Count.Ratio=sapply(DQA.Predictors.Factor, function(x) format(round((length(unique(x))/nrow(DQA.Predictors.Factor)),3), nsmall=3)),
First.Second.Mode.Ratio=sapply(DQA.Predictors.Factor, function(x) format(round((sum(x == FirstModes(x)[1])/sum(x == SecondModes(x)[1])),3), nsmall=3)),
row.names=NULL)
)
}
##################################
# Formulating a data quality assessment summary for numeric predictors
##################################
if (length(names(DQA.Predictors.Numeric))>0) {
##################################
# Formulating a function to determine the first mode
##################################
<- function(x) {
FirstModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab == max(tab)]
ux[tab
}
##################################
# Formulating a function to determine the second mode
##################################
<- function(x) {
SecondModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab = ux[tab == max(tab)]
fm = na.omit(x)[!(na.omit(x) %in% fm)]
sm <- unique(sm)
usm <- tabulate(match(sm, usm))
tabsm == max(tabsm)]
usm[tabsm
}
<- data.frame(
(DQA.Predictors.Numeric.Summary Column.Name= names(DQA.Predictors.Numeric),
Column.Type=sapply(DQA.Predictors.Numeric, function(x) class(x)),
Unique.Count=sapply(DQA.Predictors.Numeric, function(x) length(unique(x))),
Unique.Count.Ratio=sapply(DQA.Predictors.Numeric, function(x) format(round((length(unique(x))/nrow(DQA.Predictors.Numeric)),3), nsmall=3)),
First.Mode.Value=sapply(DQA.Predictors.Numeric, function(x) format(round((FirstModes(x)[1]),3),nsmall=3)),
Second.Mode.Value=sapply(DQA.Predictors.Numeric, function(x) format(round((SecondModes(x)[1]),3),nsmall=3)),
First.Mode.Count=sapply(DQA.Predictors.Numeric, function(x) sum(na.omit(x) == FirstModes(x)[1])),
Second.Mode.Count=sapply(DQA.Predictors.Numeric, function(x) sum(na.omit(x) == SecondModes(x)[1])),
First.Second.Mode.Ratio=sapply(DQA.Predictors.Numeric, function(x) format(round((sum(na.omit(x) == FirstModes(x)[1])/sum(na.omit(x) == SecondModes(x)[1])),3), nsmall=3)),
Minimum=sapply(DQA.Predictors.Numeric, function(x) format(round(min(x,na.rm = TRUE),3), nsmall=3)),
Mean=sapply(DQA.Predictors.Numeric, function(x) format(round(mean(x,na.rm = TRUE),3), nsmall=3)),
Median=sapply(DQA.Predictors.Numeric, function(x) format(round(median(x,na.rm = TRUE),3), nsmall=3)),
Maximum=sapply(DQA.Predictors.Numeric, function(x) format(round(max(x,na.rm = TRUE),3), nsmall=3)),
Skewness=sapply(DQA.Predictors.Numeric, function(x) format(round(skewness(x,na.rm = TRUE),3), nsmall=3)),
Kurtosis=sapply(DQA.Predictors.Numeric, function(x) format(round(kurtosis(x,na.rm = TRUE),3), nsmall=3)),
Percentile25th=sapply(DQA.Predictors.Numeric, function(x) format(round(quantile(x,probs=0.25,na.rm = TRUE),3), nsmall=3)),
Percentile75th=sapply(DQA.Predictors.Numeric, function(x) format(round(quantile(x,probs=0.75,na.rm = TRUE),3), nsmall=3)),
row.names=NULL)
)
}
## Column.Name Column.Type Unique.Count Unique.Count.Ratio
## 1 BiologicalMaterial01 numeric 89 0.506
## 2 BiologicalMaterial02 numeric 106 0.602
## 3 BiologicalMaterial03 numeric 101 0.574
## 4 BiologicalMaterial04 numeric 102 0.580
## 5 BiologicalMaterial05 numeric 103 0.585
## 6 BiologicalMaterial06 numeric 105 0.597
## 7 BiologicalMaterial07 numeric 2 0.011
## 8 BiologicalMaterial08 numeric 90 0.511
## 9 BiologicalMaterial09 numeric 79 0.449
## 10 BiologicalMaterial10 numeric 79 0.449
## 11 BiologicalMaterial11 numeric 105 0.597
## 12 BiologicalMaterial12 numeric 87 0.494
## 13 ManufacturingProcess01 numeric 42 0.239
## 14 ManufacturingProcess02 numeric 28 0.159
## 15 ManufacturingProcess03 numeric 15 0.085
## 16 ManufacturingProcess04 numeric 29 0.165
## 17 ManufacturingProcess05 numeric 155 0.881
## 18 ManufacturingProcess06 numeric 41 0.233
## 19 ManufacturingProcess07 numeric 3 0.017
## 20 ManufacturingProcess08 numeric 3 0.017
## 21 ManufacturingProcess09 numeric 148 0.841
## 22 ManufacturingProcess10 numeric 37 0.210
## 23 ManufacturingProcess11 numeric 36 0.205
## 24 ManufacturingProcess12 numeric 3 0.017
## 25 ManufacturingProcess13 numeric 42 0.239
## 26 ManufacturingProcess14 numeric 115 0.653
## 27 ManufacturingProcess15 numeric 119 0.676
## 28 ManufacturingProcess16 numeric 120 0.682
## 29 ManufacturingProcess17 numeric 42 0.239
## 30 ManufacturingProcess18 numeric 106 0.602
## 31 ManufacturingProcess19 numeric 112 0.636
## 32 ManufacturingProcess20 numeric 112 0.636
## 33 ManufacturingProcess21 numeric 33 0.188
## 34 ManufacturingProcess22 numeric 14 0.080
## 35 ManufacturingProcess23 numeric 8 0.045
## 36 ManufacturingProcess24 numeric 25 0.142
## 37 ManufacturingProcess25 numeric 107 0.608
## 38 ManufacturingProcess26 numeric 106 0.602
## 39 ManufacturingProcess27 numeric 104 0.591
## 40 ManufacturingProcess28 numeric 18 0.102
## 41 ManufacturingProcess29 numeric 31 0.176
## 42 ManufacturingProcess30 numeric 33 0.188
## 43 ManufacturingProcess31 numeric 45 0.256
## 44 ManufacturingProcess32 numeric 27 0.153
## 45 ManufacturingProcess33 numeric 15 0.085
## 46 ManufacturingProcess34 numeric 5 0.028
## 47 ManufacturingProcess35 numeric 47 0.267
## 48 ManufacturingProcess36 numeric 7 0.040
## 49 ManufacturingProcess37 numeric 21 0.119
## 50 ManufacturingProcess38 numeric 3 0.017
## 51 ManufacturingProcess39 numeric 10 0.057
## 52 ManufacturingProcess40 numeric 3 0.017
## 53 ManufacturingProcess41 numeric 5 0.028
## 54 ManufacturingProcess42 numeric 17 0.097
## 55 ManufacturingProcess43 numeric 23 0.131
## 56 ManufacturingProcess44 numeric 8 0.045
## 57 ManufacturingProcess45 numeric 10 0.057
## First.Mode.Value Second.Mode.Value First.Mode.Count Second.Mode.Count
## 1 6.250 6.940 7 6
## 2 60.970 53.180 3 2
## 3 66.950 67.480 4 3
## 4 10.500 11.830 6 4
## 5 18.800 18.040 6 4
## 6 52.830 53.140 4 3
## 7 100.000 100.830 173 3
## 8 17.740 17.870 6 5
## 9 12.750 13.310 11 7
## 10 2.460 2.750 10 9
## 11 153.670 152.820 3 2
## 12 20.250 20.330 9 7
## 13 11.400 11.300 12 11
## 14 0.000 21.500 35 19
## 15 1.550 1.540 63 38
## 16 934.000 936.000 21 17
## 17 1003.800 1014.600 3 2
## 18 206.400 206.600 12 11
## 19 177.000 178.000 91 84
## 20 178.000 177.000 97 78
## 21 45.730 44.920 3 2
## 22 9.000 9.400 14 12
## 23 9.100 9.400 16 12
## 24 0.000 4549.000 142 33
## 25 35.200 34.400 12 11
## 26 4869.000 4878.000 4 3
## 27 6022.000 6029.000 5 4
## 28 4617.000 4577.000 5 4
## 29 34.800 33.900 12 10
## 30 4844.000 4849.000 6 4
## 31 6022.000 6028.000 4 3
## 32 4621.000 4611.000 4 3
## 33 0.000 -0.400 45 17
## 34 3.000 4.000 21 19
## 35 1.000 2.000 36 31
## 36 3.000 7.000 13 12
## 37 4820.000 4846.000 6 4
## 38 6041.000 6060.000 5 4
## 39 4606.000 4572.000 6 5
## 40 0.000 10.700 66 13
## 41 19.700 20.000 24 15
## 42 9.100 8.800 17 14
## 43 70.700 71.400 11 10
## 44 156.000 160.000 22 21
## 45 65.000 63.000 29 28
## 46 2.500 2.400 123 28
## 47 490.000 493.000 13 10
## 48 0.019 0.020 70 63
## 49 1.000 0.700 21 20
## 50 3.000 2.000 104 67
## 51 7.200 7.300 46 40
## 52 0.000 0.100 144 31
## 53 0.000 0.100 143 22
## 54 11.600 11.700 35 30
## 55 0.800 0.700 26 19
## 56 1.900 1.800 74 60
## 57 2.300 2.200 39 38
## First.Second.Mode.Ratio Minimum Mean Median Maximum Skewness
## 1 1.167 4.580 6.411 6.305 8.810 0.276
## 2 1.500 46.870 55.689 55.090 64.750 0.246
## 3 1.333 56.970 67.705 67.220 78.250 0.029
## 4 1.500 9.380 12.349 12.100 23.090 1.747
## 5 1.500 13.240 18.599 18.490 24.850 0.307
## 6 1.333 40.600 48.910 48.460 59.380 0.372
## 7 57.667 100.000 100.014 100.000 100.830 7.462
## 8 1.200 15.880 17.495 17.510 19.140 0.222
## 9 1.571 11.440 12.850 12.835 14.080 -0.271
## 10 1.111 1.770 2.801 2.710 6.870 2.423
## 11 1.500 135.810 146.953 146.080 158.730 0.362
## 12 1.286 18.350 20.200 20.120 22.210 0.306
## 13 1.091 0.000 11.207 11.400 14.100 -3.954
## 14 1.842 0.000 16.683 21.000 22.500 -1.443
## 15 1.658 1.470 1.540 1.540 1.600 -0.484
## 16 1.235 911.000 931.851 934.000 946.000 -0.704
## 17 1.500 923.000 1001.693 999.200 1175.300 2.610
## 18 1.091 203.000 207.402 206.800 227.400 3.068
## 19 1.083 177.000 177.480 177.000 178.000 0.080
## 20 1.244 177.000 177.554 178.000 178.000 -0.218
## 21 1.500 38.890 45.660 45.730 49.360 -0.949
## 22 1.167 7.500 9.179 9.100 11.600 0.655
## 23 1.333 7.500 9.386 9.400 11.500 -0.019
## 24 4.303 0.000 857.811 0.000 4549.000 1.592
## 25 1.091 32.100 34.508 34.600 38.600 0.484
## 26 1.333 4701.000 4853.869 4856.000 5055.000 -0.011
## 27 1.250 5904.000 6038.920 6031.500 6233.000 0.680
## 28 1.250 0.000 4565.801 4588.000 4852.000 -12.527
## 29 1.200 31.300 34.344 34.400 40.000 1.173
## 30 1.500 0.000 4809.682 4835.000 4971.000 -12.845
## 31 1.333 5890.000 6028.199 6022.000 6146.000 0.300
## 32 1.333 0.000 4556.460 4582.000 4759.000 -12.747
## 33 2.647 -1.800 -0.164 -0.300 3.600 1.744
## 34 1.105 0.000 5.406 5.000 12.000 0.318
## 35 1.161 0.000 3.017 3.000 6.000 0.198
## 36 1.083 0.000 8.834 8.000 23.000 0.362
## 37 1.500 0.000 4828.175 4855.000 4990.000 -12.743
## 38 1.250 0.000 6015.596 6047.000 6161.000 -12.781
## 39 1.200 0.000 4562.509 4587.000 4710.000 -12.628
## 40 5.077 0.000 6.592 10.400 11.500 -0.460
## 41 1.600 0.000 20.011 19.900 22.000 -10.174
## 42 1.214 0.000 9.161 9.100 11.200 -4.798
## 43 1.100 0.000 70.185 70.800 72.500 -11.928
## 44 1.048 143.000 158.466 158.000 173.000 0.213
## 45 1.036 56.000 63.544 64.000 70.000 -0.132
## 46 4.393 2.300 2.494 2.500 2.600 -0.266
## 47 1.300 463.000 495.596 495.000 522.000 -0.157
## 48 1.111 0.017 0.020 0.020 0.022 0.147
## 49 1.050 0.000 1.014 1.000 2.300 0.382
## 50 1.552 0.000 2.534 3.000 3.000 -1.696
## 51 1.150 0.000 6.851 7.200 7.500 -4.306
## 52 4.645 0.000 0.018 0.000 0.100 1.691
## 53 6.500 0.000 0.024 0.000 0.200 2.187
## 54 1.167 0.000 11.206 11.600 12.100 -5.497
## 55 1.368 0.000 0.912 0.800 11.000 9.133
## 56 1.233 0.000 1.805 1.900 2.100 -5.013
## 57 1.026 0.000 2.138 2.200 2.600 -4.113
## Kurtosis Percentile25th Percentile75th
## 1 3.496 5.978 6.870
## 2 2.321 52.680 58.737
## 3 2.909 64.980 70.428
## 4 10.172 11.245 13.220
## 5 3.257 17.235 19.900
## 6 2.665 46.055 51.345
## 7 56.684 100.000 100.000
## 8 3.098 17.060 17.880
## 9 3.331 12.602 13.130
## 10 14.815 2.460 2.990
## 11 3.051 143.817 149.600
## 12 3.049 19.730 20.750
## 13 25.155 10.800 12.150
## 14 3.142 19.300 21.500
## 15 4.787 1.530 1.550
## 16 3.098 928.000 936.000
## 17 14.915 986.750 1008.850
## 18 20.613 205.700 208.700
## 19 1.006 177.000 178.000
## 20 1.048 177.000 178.000
## 21 6.342 44.890 46.515
## 22 3.676 8.700 9.550
## 23 3.363 9.000 9.900
## 24 3.535 0.000 0.000
## 25 5.016 33.900 35.200
## 26 4.125 4828.000 4882.500
## 27 4.265 6010.000 6061.000
## 28 163.248 4560.750 4619.000
## 29 7.751 33.500 35.100
## 30 168.649 4813.000 4862.000
## 31 3.334 6000.750 6050.250
## 32 166.958 4552.750 4609.500
## 33 8.119 -0.600 0.000
## 34 2.005 3.000 8.000
## 35 2.026 2.000 4.000
## 36 2.002 4.000 14.000
## 37 165.257 4832.000 4877.000
## 38 165.920 6019.500 6070.500
## 39 163.297 4560.000 4609.000
## 40 1.223 0.000 10.750
## 41 123.883 19.700 20.400
## 42 46.629 8.800 9.700
## 43 150.768 70.100 71.400
## 44 3.095 155.000 162.000
## 45 3.313 62.000 65.000
## 46 4.049 2.500 2.500
## 47 3.453 490.000 501.500
## 48 2.979 0.019 0.020
## 49 3.105 0.700 1.300
## 50 6.998 2.000 3.000
## 51 19.722 7.100 7.300
## 52 3.860 0.000 0.000
## 53 6.705 0.000 0.000
## 54 31.890 11.400 11.700
## 55 105.226 0.600 1.025
## 56 28.410 1.800 1.900
## 57 22.006 2.100 2.300
##################################
# Identifying potential data quality issues
##################################
##################################
# Checking for missing observations
##################################
if ((nrow(DQA.Summary[DQA.Summary$NA.Count>0,]))>0){
print(paste0("Missing observations noted for ",
nrow(DQA.Summary[DQA.Summary$NA.Count>0,])),
(" variable(s) with NA.Count>0 and Fill.Rate<1.0."))
$NA.Count>0,]
DQA.Summary[DQA.Summaryelse {
} print("No missing observations noted.")
}
## [1] "Missing observations noted for 28 variable(s) with NA.Count>0 and Fill.Rate<1.0."
## Column.Index Column.Name Column.Type Row.Count NA.Count Fill.Rate
## 14 14 ManufacturingProcess01 numeric 176 1 0.994
## 15 15 ManufacturingProcess02 numeric 176 3 0.983
## 16 16 ManufacturingProcess03 numeric 176 15 0.915
## 17 17 ManufacturingProcess04 numeric 176 1 0.994
## 18 18 ManufacturingProcess05 numeric 176 1 0.994
## 19 19 ManufacturingProcess06 numeric 176 2 0.989
## 20 20 ManufacturingProcess07 numeric 176 1 0.994
## 21 21 ManufacturingProcess08 numeric 176 1 0.994
## 23 23 ManufacturingProcess10 numeric 176 9 0.949
## 24 24 ManufacturingProcess11 numeric 176 10 0.943
## 25 25 ManufacturingProcess12 numeric 176 1 0.994
## 27 27 ManufacturingProcess14 numeric 176 1 0.994
## 35 35 ManufacturingProcess22 numeric 176 1 0.994
## 36 36 ManufacturingProcess23 numeric 176 1 0.994
## 37 37 ManufacturingProcess24 numeric 176 1 0.994
## 38 38 ManufacturingProcess25 numeric 176 5 0.972
## 39 39 ManufacturingProcess26 numeric 176 5 0.972
## 40 40 ManufacturingProcess27 numeric 176 5 0.972
## 41 41 ManufacturingProcess28 numeric 176 5 0.972
## 42 42 ManufacturingProcess29 numeric 176 5 0.972
## 43 43 ManufacturingProcess30 numeric 176 5 0.972
## 44 44 ManufacturingProcess31 numeric 176 5 0.972
## 46 46 ManufacturingProcess33 numeric 176 5 0.972
## 47 47 ManufacturingProcess34 numeric 176 5 0.972
## 48 48 ManufacturingProcess35 numeric 176 5 0.972
## 49 49 ManufacturingProcess36 numeric 176 5 0.972
## 53 53 ManufacturingProcess40 numeric 176 1 0.994
## 54 54 ManufacturingProcess41 numeric 176 1 0.994
##################################
# Checking for zero or near-zero variance predictors
##################################
if (length(names(DQA.Predictors.Factor))==0) {
print("No factor predictors noted.")
else if (nrow(DQA.Predictors.Factor.Summary[as.numeric(as.character(DQA.Predictors.Factor.Summary$First.Second.Mode.Ratio))>5,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Predictors.Factor.Summary[as.numeric(as.character(DQA.Predictors.Factor.Summary$First.Second.Mode.Ratio))>5,])),
(" factor variable(s) with First.Second.Mode.Ratio>5."))
as.numeric(as.character(DQA.Predictors.Factor.Summary$First.Second.Mode.Ratio))>5,]
DQA.Predictors.Factor.Summary[else {
} print("No low variance factor predictors due to high first-second mode ratio noted.")
}
## [1] "No factor predictors noted."
if (length(names(DQA.Predictors.Numeric))==0) {
print("No numeric predictors noted.")
else if (nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$First.Second.Mode.Ratio))>5,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$First.Second.Mode.Ratio))>5,])),
(" numeric variable(s) with First.Second.Mode.Ratio>5."))
as.numeric(as.character(DQA.Predictors.Numeric.Summary$First.Second.Mode.Ratio))>5,]
DQA.Predictors.Numeric.Summary[else {
} print("No low variance numeric predictors due to high first-second mode ratio noted.")
}
## [1] "Low variance observed for 3 numeric variable(s) with First.Second.Mode.Ratio>5."
## Column.Name Column.Type Unique.Count Unique.Count.Ratio
## 7 BiologicalMaterial07 numeric 2 0.011
## 40 ManufacturingProcess28 numeric 18 0.102
## 53 ManufacturingProcess41 numeric 5 0.028
## First.Mode.Value Second.Mode.Value First.Mode.Count Second.Mode.Count
## 7 100.000 100.830 173 3
## 40 0.000 10.700 66 13
## 53 0.000 0.100 143 22
## First.Second.Mode.Ratio Minimum Mean Median Maximum Skewness Kurtosis
## 7 57.667 100.000 100.014 100.000 100.830 7.462 56.684
## 40 5.077 0.000 6.592 10.400 11.500 -0.460 1.223
## 53 6.500 0.000 0.024 0.000 0.200 2.187 6.705
## Percentile25th Percentile75th
## 7 100.000 100.000
## 40 0.000 10.750
## 53 0.000 0.000
if (length(names(DQA.Predictors.Numeric))==0) {
print("No numeric predictors noted.")
else if (nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Unique.Count.Ratio))<0.01,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Unique.Count.Ratio))<0.01,])),
(" numeric variable(s) with Unique.Count.Ratio<0.01."))
as.numeric(as.character(DQA.Predictors.Numeric.Summary$Unique.Count.Ratio))<0.01,]
DQA.Predictors.Numeric.Summary[else {
} print("No low variance numeric predictors due to low unique count ratio noted.")
}
## [1] "No low variance numeric predictors due to low unique count ratio noted."
##################################
# Checking for skewed predictors
##################################
if (length(names(DQA.Predictors.Numeric))==0) {
print("No numeric predictors noted.")
else if (nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))>3 |
} as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))<(-3),])>0){
print(paste0("High skewness observed for ",
nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))>3 |
(as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))<(-3),])),
" numeric variable(s) with Skewness>3 or Skewness<(-3)."))
as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))>3 |
DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))<(-3),]
else {
} print("No skewed numeric predictors noted.")
}
## [1] "High skewness observed for 17 numeric variable(s) with Skewness>3 or Skewness<(-3)."
## Column.Name Column.Type Unique.Count Unique.Count.Ratio
## 7 BiologicalMaterial07 numeric 2 0.011
## 13 ManufacturingProcess01 numeric 42 0.239
## 18 ManufacturingProcess06 numeric 41 0.233
## 28 ManufacturingProcess16 numeric 120 0.682
## 30 ManufacturingProcess18 numeric 106 0.602
## 32 ManufacturingProcess20 numeric 112 0.636
## 37 ManufacturingProcess25 numeric 107 0.608
## 38 ManufacturingProcess26 numeric 106 0.602
## 39 ManufacturingProcess27 numeric 104 0.591
## 41 ManufacturingProcess29 numeric 31 0.176
## 42 ManufacturingProcess30 numeric 33 0.188
## 43 ManufacturingProcess31 numeric 45 0.256
## 51 ManufacturingProcess39 numeric 10 0.057
## 54 ManufacturingProcess42 numeric 17 0.097
## 55 ManufacturingProcess43 numeric 23 0.131
## 56 ManufacturingProcess44 numeric 8 0.045
## 57 ManufacturingProcess45 numeric 10 0.057
## First.Mode.Value Second.Mode.Value First.Mode.Count Second.Mode.Count
## 7 100.000 100.830 173 3
## 13 11.400 11.300 12 11
## 18 206.400 206.600 12 11
## 28 4617.000 4577.000 5 4
## 30 4844.000 4849.000 6 4
## 32 4621.000 4611.000 4 3
## 37 4820.000 4846.000 6 4
## 38 6041.000 6060.000 5 4
## 39 4606.000 4572.000 6 5
## 41 19.700 20.000 24 15
## 42 9.100 8.800 17 14
## 43 70.700 71.400 11 10
## 51 7.200 7.300 46 40
## 54 11.600 11.700 35 30
## 55 0.800 0.700 26 19
## 56 1.900 1.800 74 60
## 57 2.300 2.200 39 38
## First.Second.Mode.Ratio Minimum Mean Median Maximum Skewness Kurtosis
## 7 57.667 100.000 100.014 100.000 100.830 7.462 56.684
## 13 1.091 0.000 11.207 11.400 14.100 -3.954 25.155
## 18 1.091 203.000 207.402 206.800 227.400 3.068 20.613
## 28 1.250 0.000 4565.801 4588.000 4852.000 -12.527 163.248
## 30 1.500 0.000 4809.682 4835.000 4971.000 -12.845 168.649
## 32 1.333 0.000 4556.460 4582.000 4759.000 -12.747 166.958
## 37 1.500 0.000 4828.175 4855.000 4990.000 -12.743 165.257
## 38 1.250 0.000 6015.596 6047.000 6161.000 -12.781 165.920
## 39 1.200 0.000 4562.509 4587.000 4710.000 -12.628 163.297
## 41 1.600 0.000 20.011 19.900 22.000 -10.174 123.883
## 42 1.214 0.000 9.161 9.100 11.200 -4.798 46.629
## 43 1.100 0.000 70.185 70.800 72.500 -11.928 150.768
## 51 1.150 0.000 6.851 7.200 7.500 -4.306 19.722
## 54 1.167 0.000 11.206 11.600 12.100 -5.497 31.890
## 55 1.368 0.000 0.912 0.800 11.000 9.133 105.226
## 56 1.233 0.000 1.805 1.900 2.100 -5.013 28.410
## 57 1.026 0.000 2.138 2.200 2.600 -4.113 22.006
## Percentile25th Percentile75th
## 7 100.000 100.000
## 13 10.800 12.150
## 18 205.700 208.700
## 28 4560.750 4619.000
## 30 4813.000 4862.000
## 32 4552.750 4609.500
## 37 4832.000 4877.000
## 38 6019.500 6070.500
## 39 4560.000 4609.000
## 41 19.700 20.400
## 42 8.800 9.700
## 43 70.100 71.400
## 51 7.100 7.300
## 54 11.400 11.700
## 55 0.600 1.025
## 56 1.800 1.900
## 57 2.100 2.300
##################################
# Loading dataset
##################################
<- ChemicalManufacturingProcess
DPA
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA)) (DPA_Skimmed
Name | DPA |
Number of rows | 176 |
Number of columns | 58 |
_______________________ | |
Column type frequency: | |
numeric | 58 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
Yield | 0 | 1.00 | 40.18 | 1.85 | 35.25 | 38.75 | 39.97 | 41.48 | 46.34 | ▁▇▇▃▁ |
BiologicalMaterial01 | 0 | 1.00 | 6.41 | 0.71 | 4.58 | 5.98 | 6.30 | 6.87 | 8.81 | ▂▇▇▂▁ |
BiologicalMaterial02 | 0 | 1.00 | 55.69 | 4.03 | 46.87 | 52.68 | 55.09 | 58.74 | 64.75 | ▂▇▆▅▃ |
BiologicalMaterial03 | 0 | 1.00 | 67.70 | 4.00 | 56.97 | 64.98 | 67.22 | 70.43 | 78.25 | ▂▅▇▆▁ |
BiologicalMaterial04 | 0 | 1.00 | 12.35 | 1.77 | 9.38 | 11.25 | 12.10 | 13.22 | 23.09 | ▇▆▁▁▁ |
BiologicalMaterial05 | 0 | 1.00 | 18.60 | 1.84 | 13.24 | 17.24 | 18.49 | 19.90 | 24.85 | ▁▅▇▃▁ |
BiologicalMaterial06 | 0 | 1.00 | 48.91 | 3.75 | 40.60 | 46.06 | 48.46 | 51.34 | 59.38 | ▂▇▆▅▁ |
BiologicalMaterial07 | 0 | 1.00 | 100.01 | 0.11 | 100.00 | 100.00 | 100.00 | 100.00 | 100.83 | ▇▁▁▁▁ |
BiologicalMaterial08 | 0 | 1.00 | 17.49 | 0.68 | 15.88 | 17.06 | 17.51 | 17.88 | 19.14 | ▁▅▇▃▂ |
BiologicalMaterial09 | 0 | 1.00 | 12.85 | 0.42 | 11.44 | 12.60 | 12.84 | 13.13 | 14.08 | ▁▃▇▅▁ |
BiologicalMaterial10 | 0 | 1.00 | 2.80 | 0.60 | 1.77 | 2.46 | 2.71 | 2.99 | 6.87 | ▇▅▁▁▁ |
BiologicalMaterial11 | 0 | 1.00 | 146.95 | 4.82 | 135.81 | 143.82 | 146.08 | 149.60 | 158.73 | ▂▆▇▃▂ |
BiologicalMaterial12 | 0 | 1.00 | 20.20 | 0.77 | 18.35 | 19.73 | 20.12 | 20.75 | 22.21 | ▂▆▇▃▂ |
ManufacturingProcess01 | 1 | 0.99 | 11.21 | 1.82 | 0.00 | 10.80 | 11.40 | 12.15 | 14.10 | ▁▁▁▅▇ |
ManufacturingProcess02 | 3 | 0.98 | 16.68 | 8.47 | 0.00 | 19.30 | 21.00 | 21.50 | 22.50 | ▂▁▁▁▇ |
ManufacturingProcess03 | 15 | 0.91 | 1.54 | 0.02 | 1.47 | 1.53 | 1.54 | 1.55 | 1.60 | ▁▃▆▇▁ |
ManufacturingProcess04 | 1 | 0.99 | 931.85 | 6.27 | 911.00 | 928.00 | 934.00 | 936.00 | 946.00 | ▁▂▃▇▁ |
ManufacturingProcess05 | 1 | 0.99 | 1001.69 | 30.53 | 923.00 | 986.75 | 999.20 | 1008.85 | 1175.30 | ▁▇▁▁▁ |
ManufacturingProcess06 | 2 | 0.99 | 207.40 | 2.70 | 203.00 | 205.70 | 206.80 | 208.70 | 227.40 | ▇▃▁▁▁ |
ManufacturingProcess07 | 1 | 0.99 | 177.48 | 0.50 | 177.00 | 177.00 | 177.00 | 178.00 | 178.00 | ▇▁▁▁▇ |
ManufacturingProcess08 | 1 | 0.99 | 177.55 | 0.50 | 177.00 | 177.00 | 178.00 | 178.00 | 178.00 | ▆▁▁▁▇ |
ManufacturingProcess09 | 0 | 1.00 | 45.66 | 1.55 | 38.89 | 44.89 | 45.73 | 46.52 | 49.36 | ▁▁▅▇▂ |
ManufacturingProcess10 | 9 | 0.95 | 9.18 | 0.77 | 7.50 | 8.70 | 9.10 | 9.55 | 11.60 | ▂▇▆▂▁ |
ManufacturingProcess11 | 10 | 0.94 | 9.39 | 0.72 | 7.50 | 9.00 | 9.40 | 9.90 | 11.50 | ▂▆▇▅▁ |
ManufacturingProcess12 | 1 | 0.99 | 857.81 | 1784.53 | 0.00 | 0.00 | 0.00 | 0.00 | 4549.00 | ▇▁▁▁▂ |
ManufacturingProcess13 | 0 | 1.00 | 34.51 | 1.02 | 32.10 | 33.90 | 34.60 | 35.20 | 38.60 | ▃▇▇▁▁ |
ManufacturingProcess14 | 1 | 0.99 | 4853.87 | 54.52 | 4701.00 | 4828.00 | 4856.00 | 4882.50 | 5055.00 | ▁▅▇▂▁ |
ManufacturingProcess15 | 0 | 1.00 | 6038.92 | 58.31 | 5904.00 | 6010.00 | 6031.50 | 6061.00 | 6233.00 | ▂▇▆▂▁ |
ManufacturingProcess16 | 0 | 1.00 | 4565.80 | 351.70 | 0.00 | 4560.75 | 4588.00 | 4619.00 | 4852.00 | ▁▁▁▁▇ |
ManufacturingProcess17 | 0 | 1.00 | 34.34 | 1.25 | 31.30 | 33.50 | 34.40 | 35.10 | 40.00 | ▂▇▆▁▁ |
ManufacturingProcess18 | 0 | 1.00 | 4809.68 | 367.48 | 0.00 | 4813.00 | 4835.00 | 4862.00 | 4971.00 | ▁▁▁▁▇ |
ManufacturingProcess19 | 0 | 1.00 | 6028.20 | 45.58 | 5890.00 | 6000.75 | 6022.00 | 6050.25 | 6146.00 | ▁▃▇▃▂ |
ManufacturingProcess20 | 0 | 1.00 | 4556.46 | 349.01 | 0.00 | 4552.75 | 4582.00 | 4609.50 | 4759.00 | ▁▁▁▁▇ |
ManufacturingProcess21 | 0 | 1.00 | -0.16 | 0.78 | -1.80 | -0.60 | -0.30 | 0.00 | 3.60 | ▂▇▂▁▁ |
ManufacturingProcess22 | 1 | 0.99 | 5.41 | 3.33 | 0.00 | 3.00 | 5.00 | 8.00 | 12.00 | ▇▇▇▅▅ |
ManufacturingProcess23 | 1 | 0.99 | 3.02 | 1.66 | 0.00 | 2.00 | 3.00 | 4.00 | 6.00 | ▇▆▇▆▇ |
ManufacturingProcess24 | 1 | 0.99 | 8.83 | 5.80 | 0.00 | 4.00 | 8.00 | 14.00 | 23.00 | ▇▇▅▆▁ |
ManufacturingProcess25 | 5 | 0.97 | 4828.18 | 373.48 | 0.00 | 4832.00 | 4855.00 | 4877.00 | 4990.00 | ▁▁▁▁▇ |
ManufacturingProcess26 | 5 | 0.97 | 6015.60 | 464.87 | 0.00 | 6019.50 | 6047.00 | 6070.50 | 6161.00 | ▁▁▁▁▇ |
ManufacturingProcess27 | 5 | 0.97 | 4562.51 | 353.98 | 0.00 | 4560.00 | 4587.00 | 4609.00 | 4710.00 | ▁▁▁▁▇ |
ManufacturingProcess28 | 5 | 0.97 | 6.59 | 5.25 | 0.00 | 0.00 | 10.40 | 10.75 | 11.50 | ▅▁▁▁▇ |
ManufacturingProcess29 | 5 | 0.97 | 20.01 | 1.66 | 0.00 | 19.70 | 19.90 | 20.40 | 22.00 | ▁▁▁▁▇ |
ManufacturingProcess30 | 5 | 0.97 | 9.16 | 0.98 | 0.00 | 8.80 | 9.10 | 9.70 | 11.20 | ▁▁▁▅▇ |
ManufacturingProcess31 | 5 | 0.97 | 70.18 | 5.56 | 0.00 | 70.10 | 70.80 | 71.40 | 72.50 | ▁▁▁▁▇ |
ManufacturingProcess32 | 0 | 1.00 | 158.47 | 5.40 | 143.00 | 155.00 | 158.00 | 162.00 | 173.00 | ▁▃▇▃▁ |
ManufacturingProcess33 | 5 | 0.97 | 63.54 | 2.48 | 56.00 | 62.00 | 64.00 | 65.00 | 70.00 | ▁▃▇▅▁ |
ManufacturingProcess34 | 5 | 0.97 | 2.49 | 0.05 | 2.30 | 2.50 | 2.50 | 2.50 | 2.60 | ▁▂▁▇▁ |
ManufacturingProcess35 | 5 | 0.97 | 495.60 | 10.82 | 463.00 | 490.00 | 495.00 | 501.50 | 522.00 | ▁▂▇▅▂ |
ManufacturingProcess36 | 5 | 0.97 | 0.02 | 0.00 | 0.02 | 0.02 | 0.02 | 0.02 | 0.02 | ▂▇▇▁▃ |
ManufacturingProcess37 | 0 | 1.00 | 1.01 | 0.45 | 0.00 | 0.70 | 1.00 | 1.30 | 2.30 | ▂▇▇▃▁ |
ManufacturingProcess38 | 0 | 1.00 | 2.53 | 0.65 | 0.00 | 2.00 | 3.00 | 3.00 | 3.00 | ▁▁▁▅▇ |
ManufacturingProcess39 | 0 | 1.00 | 6.85 | 1.51 | 0.00 | 7.10 | 7.20 | 7.30 | 7.50 | ▁▁▁▁▇ |
ManufacturingProcess40 | 1 | 0.99 | 0.02 | 0.04 | 0.00 | 0.00 | 0.00 | 0.00 | 0.10 | ▇▁▁▁▂ |
ManufacturingProcess41 | 1 | 0.99 | 0.02 | 0.05 | 0.00 | 0.00 | 0.00 | 0.00 | 0.20 | ▇▁▁▁▁ |
ManufacturingProcess42 | 0 | 1.00 | 11.21 | 1.94 | 0.00 | 11.40 | 11.60 | 11.70 | 12.10 | ▁▁▁▁▇ |
ManufacturingProcess43 | 0 | 1.00 | 0.91 | 0.87 | 0.00 | 0.60 | 0.80 | 1.02 | 11.00 | ▇▁▁▁▁ |
ManufacturingProcess44 | 0 | 1.00 | 1.81 | 0.32 | 0.00 | 1.80 | 1.90 | 1.90 | 2.10 | ▁▁▁▁▇ |
ManufacturingProcess45 | 0 | 1.00 | 2.14 | 0.41 | 0.00 | 2.10 | 2.20 | 2.30 | 2.60 | ▁▁▁▂▇ |
##################################
# Identifying columns with missing data
#################################
%>%
DPA skim() %>%
::filter(n_missing > 0) dplyr
Name | Piped data |
Number of rows | 176 |
Number of columns | 58 |
_______________________ | |
Column type frequency: | |
numeric | 28 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
ManufacturingProcess01 | 1 | 0.99 | 11.21 | 1.82 | 0.00 | 10.80 | 11.40 | 12.15 | 14.10 | ▁▁▁▅▇ |
ManufacturingProcess02 | 3 | 0.98 | 16.68 | 8.47 | 0.00 | 19.30 | 21.00 | 21.50 | 22.50 | ▂▁▁▁▇ |
ManufacturingProcess03 | 15 | 0.91 | 1.54 | 0.02 | 1.47 | 1.53 | 1.54 | 1.55 | 1.60 | ▁▃▆▇▁ |
ManufacturingProcess04 | 1 | 0.99 | 931.85 | 6.27 | 911.00 | 928.00 | 934.00 | 936.00 | 946.00 | ▁▂▃▇▁ |
ManufacturingProcess05 | 1 | 0.99 | 1001.69 | 30.53 | 923.00 | 986.75 | 999.20 | 1008.85 | 1175.30 | ▁▇▁▁▁ |
ManufacturingProcess06 | 2 | 0.99 | 207.40 | 2.70 | 203.00 | 205.70 | 206.80 | 208.70 | 227.40 | ▇▃▁▁▁ |
ManufacturingProcess07 | 1 | 0.99 | 177.48 | 0.50 | 177.00 | 177.00 | 177.00 | 178.00 | 178.00 | ▇▁▁▁▇ |
ManufacturingProcess08 | 1 | 0.99 | 177.55 | 0.50 | 177.00 | 177.00 | 178.00 | 178.00 | 178.00 | ▆▁▁▁▇ |
ManufacturingProcess10 | 9 | 0.95 | 9.18 | 0.77 | 7.50 | 8.70 | 9.10 | 9.55 | 11.60 | ▂▇▆▂▁ |
ManufacturingProcess11 | 10 | 0.94 | 9.39 | 0.72 | 7.50 | 9.00 | 9.40 | 9.90 | 11.50 | ▂▆▇▅▁ |
ManufacturingProcess12 | 1 | 0.99 | 857.81 | 1784.53 | 0.00 | 0.00 | 0.00 | 0.00 | 4549.00 | ▇▁▁▁▂ |
ManufacturingProcess14 | 1 | 0.99 | 4853.87 | 54.52 | 4701.00 | 4828.00 | 4856.00 | 4882.50 | 5055.00 | ▁▅▇▂▁ |
ManufacturingProcess22 | 1 | 0.99 | 5.41 | 3.33 | 0.00 | 3.00 | 5.00 | 8.00 | 12.00 | ▇▇▇▅▅ |
ManufacturingProcess23 | 1 | 0.99 | 3.02 | 1.66 | 0.00 | 2.00 | 3.00 | 4.00 | 6.00 | ▇▆▇▆▇ |
ManufacturingProcess24 | 1 | 0.99 | 8.83 | 5.80 | 0.00 | 4.00 | 8.00 | 14.00 | 23.00 | ▇▇▅▆▁ |
ManufacturingProcess25 | 5 | 0.97 | 4828.18 | 373.48 | 0.00 | 4832.00 | 4855.00 | 4877.00 | 4990.00 | ▁▁▁▁▇ |
ManufacturingProcess26 | 5 | 0.97 | 6015.60 | 464.87 | 0.00 | 6019.50 | 6047.00 | 6070.50 | 6161.00 | ▁▁▁▁▇ |
ManufacturingProcess27 | 5 | 0.97 | 4562.51 | 353.98 | 0.00 | 4560.00 | 4587.00 | 4609.00 | 4710.00 | ▁▁▁▁▇ |
ManufacturingProcess28 | 5 | 0.97 | 6.59 | 5.25 | 0.00 | 0.00 | 10.40 | 10.75 | 11.50 | ▅▁▁▁▇ |
ManufacturingProcess29 | 5 | 0.97 | 20.01 | 1.66 | 0.00 | 19.70 | 19.90 | 20.40 | 22.00 | ▁▁▁▁▇ |
ManufacturingProcess30 | 5 | 0.97 | 9.16 | 0.98 | 0.00 | 8.80 | 9.10 | 9.70 | 11.20 | ▁▁▁▅▇ |
ManufacturingProcess31 | 5 | 0.97 | 70.18 | 5.56 | 0.00 | 70.10 | 70.80 | 71.40 | 72.50 | ▁▁▁▁▇ |
ManufacturingProcess33 | 5 | 0.97 | 63.54 | 2.48 | 56.00 | 62.00 | 64.00 | 65.00 | 70.00 | ▁▃▇▅▁ |
ManufacturingProcess34 | 5 | 0.97 | 2.49 | 0.05 | 2.30 | 2.50 | 2.50 | 2.50 | 2.60 | ▁▂▁▇▁ |
ManufacturingProcess35 | 5 | 0.97 | 495.60 | 10.82 | 463.00 | 490.00 | 495.00 | 501.50 | 522.00 | ▁▂▇▅▂ |
ManufacturingProcess36 | 5 | 0.97 | 0.02 | 0.00 | 0.02 | 0.02 | 0.02 | 0.02 | 0.02 | ▂▇▇▁▃ |
ManufacturingProcess40 | 1 | 0.99 | 0.02 | 0.04 | 0.00 | 0.00 | 0.00 | 0.00 | 0.10 | ▇▁▁▁▂ |
ManufacturingProcess41 | 1 | 0.99 | 0.02 | 0.05 | 0.00 | 0.00 | 0.00 | 0.00 | 0.20 | ▇▁▁▁▁ |
##################################
# Creating a KNN imputation model from the dataset
#################################
<- preProcess(DPA, method='knnImpute')) (DPA_KNNImputeModel
## Created from 152 samples and 58 variables
##
## Pre-processing:
## - centered (58)
## - ignored (0)
## - 5 nearest neighbor imputation (58)
## - scaled (58)
<- predict(DPA_KNNImputeModel, newdata=DPA)
DPA_KNNImputed
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_KNNImputed)) (DPA_KNNImputed_Skimmed
Name | DPA_KNNImputed |
Number of rows | 176 |
Number of columns | 58 |
_______________________ | |
Column type frequency: | |
numeric | 58 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
Yield | 0 | 1 | 0.00 | 1.00 | -2.67 | -0.77 | -0.11 | 0.70 | 3.34 | ▁▇▇▃▁ |
BiologicalMaterial01 | 0 | 1 | 0.00 | 1.00 | -2.57 | -0.61 | -0.15 | 0.64 | 3.36 | ▂▇▇▂▁ |
BiologicalMaterial02 | 0 | 1 | 0.00 | 1.00 | -2.19 | -0.75 | -0.15 | 0.76 | 2.25 | ▂▇▆▅▃ |
BiologicalMaterial03 | 0 | 1 | 0.00 | 1.00 | -2.68 | -0.68 | -0.12 | 0.68 | 2.64 | ▂▅▇▆▁ |
BiologicalMaterial04 | 0 | 1 | 0.00 | 1.00 | -1.67 | -0.62 | -0.14 | 0.49 | 6.05 | ▇▆▁▁▁ |
BiologicalMaterial05 | 0 | 1 | 0.00 | 1.00 | -2.91 | -0.74 | -0.06 | 0.71 | 3.39 | ▁▅▇▃▁ |
BiologicalMaterial06 | 0 | 1 | 0.00 | 1.00 | -2.22 | -0.76 | -0.12 | 0.65 | 2.79 | ▂▇▆▅▁ |
BiologicalMaterial07 | 0 | 1 | 0.00 | 1.00 | -0.13 | -0.13 | -0.13 | -0.13 | 7.57 | ▇▁▁▁▁ |
BiologicalMaterial08 | 0 | 1 | 0.00 | 1.00 | -2.39 | -0.64 | 0.02 | 0.57 | 2.43 | ▁▅▇▃▂ |
BiologicalMaterial09 | 0 | 1 | 0.00 | 1.00 | -3.40 | -0.60 | -0.04 | 0.67 | 2.96 | ▁▃▇▅▁ |
BiologicalMaterial10 | 0 | 1 | 0.00 | 1.00 | -1.72 | -0.57 | -0.15 | 0.32 | 6.79 | ▇▅▁▁▁ |
BiologicalMaterial11 | 0 | 1 | 0.00 | 1.00 | -2.31 | -0.65 | -0.18 | 0.55 | 2.44 | ▂▆▇▃▂ |
BiologicalMaterial12 | 0 | 1 | 0.00 | 1.00 | -2.39 | -0.61 | -0.10 | 0.71 | 2.60 | ▂▆▇▃▂ |
ManufacturingProcess01 | 0 | 1 | 0.00 | 1.00 | -6.15 | -0.22 | 0.11 | 0.50 | 1.59 | ▁▁▁▅▇ |
ManufacturingProcess02 | 0 | 1 | 0.01 | 0.99 | -1.97 | 0.31 | 0.51 | 0.57 | 0.69 | ▂▁▁▁▇ |
ManufacturingProcess03 | 0 | 1 | 0.04 | 0.97 | -3.11 | -0.43 | 0.38 | 0.47 | 2.70 | ▁▂▆▇▁ |
ManufacturingProcess04 | 0 | 1 | 0.00 | 1.00 | -3.32 | -0.61 | 0.34 | 0.66 | 2.25 | ▁▂▃▇▂ |
ManufacturingProcess05 | 0 | 1 | 0.00 | 1.00 | -2.58 | -0.49 | -0.09 | 0.23 | 5.69 | ▁▇▁▁▁ |
ManufacturingProcess06 | 0 | 1 | -0.01 | 1.00 | -1.63 | -0.63 | -0.22 | 0.48 | 7.41 | ▇▃▁▁▁ |
ManufacturingProcess07 | 0 | 1 | 0.00 | 1.00 | -0.96 | -0.96 | -0.96 | 1.04 | 1.04 | ▇▁▁▁▇ |
ManufacturingProcess08 | 0 | 1 | 0.00 | 1.00 | -1.11 | -1.11 | 0.89 | 0.89 | 0.89 | ▆▁▁▁▇ |
ManufacturingProcess09 | 0 | 1 | 0.00 | 1.00 | -4.38 | -0.50 | 0.05 | 0.55 | 2.39 | ▁▁▅▇▂ |
ManufacturingProcess10 | 0 | 1 | 0.02 | 0.99 | -2.19 | -0.62 | -0.10 | 0.55 | 3.16 | ▂▇▇▂▁ |
ManufacturingProcess11 | 0 | 1 | 0.03 | 1.00 | -2.63 | -0.54 | 0.02 | 0.72 | 2.95 | ▁▇▇▆▁ |
ManufacturingProcess12 | 0 | 1 | 0.00 | 1.00 | -0.48 | -0.48 | -0.48 | -0.48 | 2.07 | ▇▁▁▁▂ |
ManufacturingProcess13 | 0 | 1 | 0.00 | 1.00 | -2.37 | -0.60 | 0.09 | 0.68 | 4.03 | ▃▆▇▁▁ |
ManufacturingProcess14 | 0 | 1 | 0.00 | 1.00 | -2.80 | -0.49 | 0.03 | 0.52 | 3.69 | ▁▅▇▂▁ |
ManufacturingProcess15 | 0 | 1 | 0.00 | 1.00 | -2.31 | -0.50 | -0.13 | 0.38 | 3.33 | ▂▇▆▂▁ |
ManufacturingProcess16 | 0 | 1 | 0.00 | 1.00 | -12.98 | -0.01 | 0.06 | 0.15 | 0.81 | ▁▁▁▁▇ |
ManufacturingProcess17 | 0 | 1 | 0.00 | 1.00 | -2.44 | -0.68 | 0.05 | 0.61 | 4.53 | ▂▇▆▁▁ |
ManufacturingProcess18 | 0 | 1 | 0.00 | 1.00 | -13.09 | 0.01 | 0.07 | 0.14 | 0.44 | ▁▁▁▁▇ |
ManufacturingProcess19 | 0 | 1 | 0.00 | 1.00 | -3.03 | -0.60 | -0.14 | 0.48 | 2.58 | ▁▃▇▃▂ |
ManufacturingProcess20 | 0 | 1 | 0.00 | 1.00 | -13.06 | -0.01 | 0.07 | 0.15 | 0.58 | ▁▁▁▁▇ |
ManufacturingProcess21 | 0 | 1 | 0.00 | 1.00 | -2.10 | -0.56 | -0.17 | 0.21 | 4.84 | ▂▇▂▁▁ |
ManufacturingProcess22 | 0 | 1 | 0.00 | 1.00 | -1.62 | -0.72 | -0.12 | 0.78 | 1.98 | ▇▇▇▅▅ |
ManufacturingProcess23 | 0 | 1 | 0.00 | 1.00 | -1.81 | -0.61 | -0.01 | 0.59 | 1.79 | ▇▆▇▆▇ |
ManufacturingProcess24 | 0 | 1 | 0.01 | 1.00 | -1.52 | -0.83 | -0.14 | 0.89 | 2.44 | ▇▇▅▆▁ |
ManufacturingProcess25 | 0 | 1 | 0.00 | 0.99 | -12.93 | 0.00 | 0.07 | 0.13 | 0.43 | ▁▁▁▁▇ |
ManufacturingProcess26 | 0 | 1 | 0.00 | 0.99 | -12.94 | 0.01 | 0.06 | 0.12 | 0.31 | ▁▁▁▁▇ |
ManufacturingProcess27 | 0 | 1 | 0.00 | 0.99 | -12.89 | 0.00 | 0.07 | 0.13 | 0.42 | ▁▁▁▁▇ |
ManufacturingProcess28 | 0 | 1 | -0.03 | 1.00 | -1.26 | -1.26 | 0.73 | 0.78 | 0.94 | ▅▁▁▁▇ |
ManufacturingProcess29 | 0 | 1 | 0.00 | 0.99 | -12.03 | -0.19 | -0.07 | 0.23 | 1.20 | ▁▁▁▁▇ |
ManufacturingProcess30 | 0 | 1 | 0.01 | 0.99 | -9.39 | -0.37 | 0.04 | 0.55 | 2.09 | ▁▁▁▅▇ |
ManufacturingProcess31 | 0 | 1 | 0.00 | 0.99 | -12.63 | -0.02 | 0.11 | 0.22 | 0.42 | ▁▁▁▁▇ |
ManufacturingProcess32 | 0 | 1 | 0.00 | 1.00 | -2.87 | -0.64 | -0.09 | 0.65 | 2.69 | ▁▃▇▃▁ |
ManufacturingProcess33 | 0 | 1 | -0.01 | 0.99 | -3.04 | -0.62 | 0.18 | 0.59 | 2.60 | ▁▃▇▅▁ |
ManufacturingProcess34 | 0 | 1 | -0.01 | 0.99 | -3.56 | 0.12 | 0.12 | 0.12 | 1.96 | ▁▂▁▇▁ |
ManufacturingProcess35 | 0 | 1 | -0.02 | 1.00 | -3.01 | -0.52 | -0.06 | 0.50 | 2.44 | ▁▃▇▅▂ |
ManufacturingProcess36 | 0 | 1 | -0.01 | 0.99 | -2.94 | -0.66 | -0.08 | 0.49 | 2.78 | ▂▇▁▇▃ |
ManufacturingProcess37 | 0 | 1 | 0.00 | 1.00 | -2.28 | -0.70 | -0.03 | 0.64 | 2.89 | ▂▇▇▃▁ |
ManufacturingProcess38 | 0 | 1 | 0.00 | 1.00 | -3.90 | -0.82 | 0.72 | 0.72 | 0.72 | ▁▁▁▅▇ |
ManufacturingProcess39 | 0 | 1 | 0.00 | 1.00 | -4.55 | 0.17 | 0.23 | 0.30 | 0.43 | ▁▁▁▁▇ |
ManufacturingProcess40 | 0 | 1 | 0.00 | 1.00 | -0.46 | -0.46 | -0.46 | -0.46 | 2.15 | ▇▁▁▁▂ |
ManufacturingProcess41 | 0 | 1 | 0.00 | 1.00 | -0.44 | -0.44 | -0.44 | -0.44 | 3.28 | ▇▁▁▁▁ |
ManufacturingProcess42 | 0 | 1 | 0.00 | 1.00 | -5.77 | 0.10 | 0.20 | 0.25 | 0.46 | ▁▁▁▁▇ |
ManufacturingProcess43 | 0 | 1 | 0.00 | 1.00 | -1.05 | -0.36 | -0.13 | 0.13 | 11.62 | ▇▁▁▁▁ |
ManufacturingProcess44 | 0 | 1 | 0.00 | 1.00 | -5.61 | -0.02 | 0.29 | 0.29 | 0.92 | ▁▁▁▁▇ |
ManufacturingProcess45 | 0 | 1 | 0.00 | 1.00 | -5.25 | -0.09 | 0.15 | 0.40 | 1.14 | ▁▁▁▂▇ |
##################################
# Identifying columns with missing data
#################################
%>%
DPA_KNNImputed skim() %>%
::filter(n_missing > 0) dplyr
## # A tibble: 0 x 12
## # i 12 variables: skim_type <chr>, skim_variable <chr>, n_missing <int>,
## # complete_rate <dbl>, numeric.mean <dbl>, numeric.sd <dbl>,
## # numeric.p0 <dbl>, numeric.p25 <dbl>, numeric.p50 <dbl>, numeric.p75 <dbl>,
## # numeric.p100 <dbl>, numeric.hist <chr>
##################################
# Creating a Bagged Tree imputation model from the dataset
#################################
<- preProcess(DPA, method='bagImpute')) (DPA_BagImputeModel
## Created from 152 samples and 58 variables
##
## Pre-processing:
## - bagged tree imputation (58)
## - ignored (0)
<- predict(DPA_BagImputeModel, newdata=DPA)
DPA_BagImputed
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_BagImputed)) (DPA_BagImputed_Skimmed
Name | DPA_BagImputed |
Number of rows | 176 |
Number of columns | 58 |
_______________________ | |
Column type frequency: | |
numeric | 58 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
Yield | 0 | 1 | 40.18 | 1.85 | 35.25 | 38.75 | 39.97 | 41.48 | 46.34 | ▁▇▇▃▁ |
BiologicalMaterial01 | 0 | 1 | 6.41 | 0.71 | 4.58 | 5.98 | 6.30 | 6.87 | 8.81 | ▂▇▇▂▁ |
BiologicalMaterial02 | 0 | 1 | 55.69 | 4.03 | 46.87 | 52.68 | 55.09 | 58.74 | 64.75 | ▂▇▆▅▃ |
BiologicalMaterial03 | 0 | 1 | 67.70 | 4.00 | 56.97 | 64.98 | 67.22 | 70.43 | 78.25 | ▂▅▇▆▁ |
BiologicalMaterial04 | 0 | 1 | 12.35 | 1.77 | 9.38 | 11.25 | 12.10 | 13.22 | 23.09 | ▇▆▁▁▁ |
BiologicalMaterial05 | 0 | 1 | 18.60 | 1.84 | 13.24 | 17.24 | 18.49 | 19.90 | 24.85 | ▁▅▇▃▁ |
BiologicalMaterial06 | 0 | 1 | 48.91 | 3.75 | 40.60 | 46.06 | 48.46 | 51.34 | 59.38 | ▂▇▆▅▁ |
BiologicalMaterial07 | 0 | 1 | 100.01 | 0.11 | 100.00 | 100.00 | 100.00 | 100.00 | 100.83 | ▇▁▁▁▁ |
BiologicalMaterial08 | 0 | 1 | 17.49 | 0.68 | 15.88 | 17.06 | 17.51 | 17.88 | 19.14 | ▁▅▇▃▂ |
BiologicalMaterial09 | 0 | 1 | 12.85 | 0.42 | 11.44 | 12.60 | 12.84 | 13.13 | 14.08 | ▁▃▇▅▁ |
BiologicalMaterial10 | 0 | 1 | 2.80 | 0.60 | 1.77 | 2.46 | 2.71 | 2.99 | 6.87 | ▇▅▁▁▁ |
BiologicalMaterial11 | 0 | 1 | 146.95 | 4.82 | 135.81 | 143.82 | 146.08 | 149.60 | 158.73 | ▂▆▇▃▂ |
BiologicalMaterial12 | 0 | 1 | 20.20 | 0.77 | 18.35 | 19.73 | 20.12 | 20.75 | 22.21 | ▂▆▇▃▂ |
ManufacturingProcess01 | 0 | 1 | 11.21 | 1.82 | 0.00 | 10.80 | 11.40 | 12.12 | 14.10 | ▁▁▁▅▇ |
ManufacturingProcess02 | 0 | 1 | 16.71 | 8.42 | 0.00 | 19.23 | 21.00 | 21.50 | 22.50 | ▂▁▁▁▇ |
ManufacturingProcess03 | 0 | 1 | 1.54 | 0.02 | 1.47 | 1.53 | 1.54 | 1.55 | 1.60 | ▁▂▆▇▁ |
ManufacturingProcess04 | 0 | 1 | 931.83 | 6.26 | 911.00 | 927.98 | 934.00 | 936.00 | 946.00 | ▁▂▃▇▁ |
ManufacturingProcess05 | 0 | 1 | 1001.70 | 30.44 | 923.00 | 986.82 | 999.35 | 1008.72 | 1175.30 | ▁▇▁▁▁ |
ManufacturingProcess06 | 0 | 1 | 207.38 | 2.69 | 203.00 | 205.70 | 206.80 | 208.70 | 227.40 | ▇▃▁▁▁ |
ManufacturingProcess07 | 0 | 1 | 177.48 | 0.50 | 177.00 | 177.00 | 177.00 | 178.00 | 178.00 | ▇▁▁▁▇ |
ManufacturingProcess08 | 0 | 1 | 177.56 | 0.50 | 177.00 | 177.00 | 178.00 | 178.00 | 178.00 | ▆▁▁▁▇ |
ManufacturingProcess09 | 0 | 1 | 45.66 | 1.55 | 38.89 | 44.89 | 45.73 | 46.52 | 49.36 | ▁▁▅▇▂ |
ManufacturingProcess10 | 0 | 1 | 9.18 | 0.76 | 7.50 | 8.70 | 9.10 | 9.53 | 11.60 | ▂▇▆▂▁ |
ManufacturingProcess11 | 0 | 1 | 9.39 | 0.70 | 7.50 | 9.00 | 9.40 | 9.90 | 11.50 | ▂▆▇▅▁ |
ManufacturingProcess12 | 0 | 1 | 855.69 | 1779.64 | 0.00 | 0.00 | 0.00 | 0.00 | 4549.00 | ▇▁▁▁▂ |
ManufacturingProcess13 | 0 | 1 | 34.51 | 1.02 | 32.10 | 33.90 | 34.60 | 35.20 | 38.60 | ▃▇▇▁▁ |
ManufacturingProcess14 | 0 | 1 | 4853.33 | 54.84 | 4701.00 | 4827.25 | 4855.50 | 4882.25 | 5055.00 | ▁▅▇▂▁ |
ManufacturingProcess15 | 0 | 1 | 6038.92 | 58.31 | 5904.00 | 6010.00 | 6031.50 | 6061.00 | 6233.00 | ▂▇▆▂▁ |
ManufacturingProcess16 | 0 | 1 | 4565.80 | 351.70 | 0.00 | 4560.75 | 4588.00 | 4619.00 | 4852.00 | ▁▁▁▁▇ |
ManufacturingProcess17 | 0 | 1 | 34.34 | 1.25 | 31.30 | 33.50 | 34.40 | 35.10 | 40.00 | ▂▇▆▁▁ |
ManufacturingProcess18 | 0 | 1 | 4809.68 | 367.48 | 0.00 | 4813.00 | 4835.00 | 4862.00 | 4971.00 | ▁▁▁▁▇ |
ManufacturingProcess19 | 0 | 1 | 6028.20 | 45.58 | 5890.00 | 6000.75 | 6022.00 | 6050.25 | 6146.00 | ▁▃▇▃▂ |
ManufacturingProcess20 | 0 | 1 | 4556.46 | 349.01 | 0.00 | 4552.75 | 4582.00 | 4609.50 | 4759.00 | ▁▁▁▁▇ |
ManufacturingProcess21 | 0 | 1 | -0.16 | 0.78 | -1.80 | -0.60 | -0.30 | 0.00 | 3.60 | ▂▇▂▁▁ |
ManufacturingProcess22 | 0 | 1 | 5.40 | 3.32 | 0.00 | 3.00 | 5.00 | 8.00 | 12.00 | ▇▇▇▅▅ |
ManufacturingProcess23 | 0 | 1 | 3.02 | 1.66 | 0.00 | 2.00 | 3.00 | 4.00 | 6.00 | ▇▆▇▆▇ |
ManufacturingProcess24 | 0 | 1 | 8.87 | 5.80 | 0.00 | 4.00 | 8.00 | 14.00 | 23.00 | ▇▇▅▆▁ |
ManufacturingProcess25 | 0 | 1 | 4828.92 | 368.13 | 0.00 | 4833.50 | 4855.00 | 4876.25 | 4990.00 | ▁▁▁▁▇ |
ManufacturingProcess26 | 0 | 1 | 6016.48 | 458.21 | 0.00 | 6020.75 | 6046.50 | 6069.25 | 6161.00 | ▁▁▁▁▇ |
ManufacturingProcess27 | 0 | 1 | 4563.21 | 348.92 | 0.00 | 4562.75 | 4587.48 | 4609.00 | 4710.00 | ▁▁▁▁▇ |
ManufacturingProcess28 | 0 | 1 | 6.55 | 5.18 | 0.00 | 0.00 | 10.40 | 10.70 | 11.50 | ▅▁▁▁▇ |
ManufacturingProcess29 | 0 | 1 | 20.01 | 1.64 | 0.00 | 19.70 | 19.90 | 20.40 | 22.00 | ▁▁▁▁▇ |
ManufacturingProcess30 | 0 | 1 | 9.18 | 0.97 | 0.00 | 8.80 | 9.20 | 9.70 | 11.20 | ▁▁▁▅▇ |
ManufacturingProcess31 | 0 | 1 | 70.19 | 5.48 | 0.00 | 70.10 | 70.79 | 71.40 | 72.50 | ▁▁▁▁▇ |
ManufacturingProcess32 | 0 | 1 | 158.47 | 5.40 | 143.00 | 155.00 | 158.00 | 162.00 | 173.00 | ▁▃▇▃▁ |
ManufacturingProcess33 | 0 | 1 | 63.55 | 2.46 | 56.00 | 62.00 | 64.00 | 65.00 | 70.00 | ▁▃▇▅▁ |
ManufacturingProcess34 | 0 | 1 | 2.49 | 0.05 | 2.30 | 2.50 | 2.50 | 2.50 | 2.60 | ▁▂▁▇▁ |
ManufacturingProcess35 | 0 | 1 | 495.53 | 10.69 | 463.00 | 490.00 | 495.00 | 501.00 | 522.00 | ▁▂▇▅▂ |
ManufacturingProcess36 | 0 | 1 | 0.02 | 0.00 | 0.02 | 0.02 | 0.02 | 0.02 | 0.02 | ▂▇▇▁▃ |
ManufacturingProcess37 | 0 | 1 | 1.01 | 0.45 | 0.00 | 0.70 | 1.00 | 1.30 | 2.30 | ▂▇▇▃▁ |
ManufacturingProcess38 | 0 | 1 | 2.53 | 0.65 | 0.00 | 2.00 | 3.00 | 3.00 | 3.00 | ▁▁▁▅▇ |
ManufacturingProcess39 | 0 | 1 | 6.85 | 1.51 | 0.00 | 7.10 | 7.20 | 7.30 | 7.50 | ▁▁▁▁▇ |
ManufacturingProcess40 | 0 | 1 | 0.02 | 0.04 | 0.00 | 0.00 | 0.00 | 0.00 | 0.10 | ▇▁▁▁▂ |
ManufacturingProcess41 | 0 | 1 | 0.02 | 0.05 | 0.00 | 0.00 | 0.00 | 0.00 | 0.20 | ▇▁▁▁▁ |
ManufacturingProcess42 | 0 | 1 | 11.21 | 1.94 | 0.00 | 11.40 | 11.60 | 11.70 | 12.10 | ▁▁▁▁▇ |
ManufacturingProcess43 | 0 | 1 | 0.91 | 0.87 | 0.00 | 0.60 | 0.80 | 1.02 | 11.00 | ▇▁▁▁▁ |
ManufacturingProcess44 | 0 | 1 | 1.81 | 0.32 | 0.00 | 1.80 | 1.90 | 1.90 | 2.10 | ▁▁▁▁▇ |
ManufacturingProcess45 | 0 | 1 | 2.14 | 0.41 | 0.00 | 2.10 | 2.20 | 2.30 | 2.60 | ▁▁▁▂▇ |
##################################
# Identifying columns with missing data
#################################
%>%
DPA_BagImputed skim() %>%
::filter(n_missing > 0) dplyr
## # A tibble: 0 x 12
## # i 12 variables: skim_type <chr>, skim_variable <chr>, n_missing <int>,
## # complete_rate <dbl>, numeric.mean <dbl>, numeric.sd <dbl>,
## # numeric.p0 <dbl>, numeric.p25 <dbl>, numeric.p50 <dbl>, numeric.p75 <dbl>,
## # numeric.p100 <dbl>, numeric.hist <chr>
##################################
# Creating a Median imputation model from the dataset
#################################
<- preProcess(DPA, method='medianImpute')) (DPA_MedianImputeModel
## Created from 152 samples and 58 variables
##
## Pre-processing:
## - ignored (0)
## - median imputation (58)
<- predict(DPA_MedianImputeModel, newdata=DPA)
DPA_MedianImputed
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_MedianImputed)) (DPA_MedianImputed_Skimmed
Name | DPA_MedianImputed |
Number of rows | 176 |
Number of columns | 58 |
_______________________ | |
Column type frequency: | |
numeric | 58 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
Yield | 0 | 1 | 40.18 | 1.85 | 35.25 | 38.75 | 39.97 | 41.48 | 46.34 | ▁▇▇▃▁ |
BiologicalMaterial01 | 0 | 1 | 6.41 | 0.71 | 4.58 | 5.98 | 6.30 | 6.87 | 8.81 | ▂▇▇▂▁ |
BiologicalMaterial02 | 0 | 1 | 55.69 | 4.03 | 46.87 | 52.68 | 55.09 | 58.74 | 64.75 | ▂▇▆▅▃ |
BiologicalMaterial03 | 0 | 1 | 67.70 | 4.00 | 56.97 | 64.98 | 67.22 | 70.43 | 78.25 | ▂▅▇▆▁ |
BiologicalMaterial04 | 0 | 1 | 12.35 | 1.77 | 9.38 | 11.25 | 12.10 | 13.22 | 23.09 | ▇▆▁▁▁ |
BiologicalMaterial05 | 0 | 1 | 18.60 | 1.84 | 13.24 | 17.24 | 18.49 | 19.90 | 24.85 | ▁▅▇▃▁ |
BiologicalMaterial06 | 0 | 1 | 48.91 | 3.75 | 40.60 | 46.06 | 48.46 | 51.34 | 59.38 | ▂▇▆▅▁ |
BiologicalMaterial07 | 0 | 1 | 100.01 | 0.11 | 100.00 | 100.00 | 100.00 | 100.00 | 100.83 | ▇▁▁▁▁ |
BiologicalMaterial08 | 0 | 1 | 17.49 | 0.68 | 15.88 | 17.06 | 17.51 | 17.88 | 19.14 | ▁▅▇▃▂ |
BiologicalMaterial09 | 0 | 1 | 12.85 | 0.42 | 11.44 | 12.60 | 12.84 | 13.13 | 14.08 | ▁▃▇▅▁ |
BiologicalMaterial10 | 0 | 1 | 2.80 | 0.60 | 1.77 | 2.46 | 2.71 | 2.99 | 6.87 | ▇▅▁▁▁ |
BiologicalMaterial11 | 0 | 1 | 146.95 | 4.82 | 135.81 | 143.82 | 146.08 | 149.60 | 158.73 | ▂▆▇▃▂ |
BiologicalMaterial12 | 0 | 1 | 20.20 | 0.77 | 18.35 | 19.73 | 20.12 | 20.75 | 22.21 | ▂▆▇▃▂ |
ManufacturingProcess01 | 0 | 1 | 11.21 | 1.82 | 0.00 | 10.80 | 11.40 | 12.12 | 14.10 | ▁▁▁▅▇ |
ManufacturingProcess02 | 0 | 1 | 16.76 | 8.42 | 0.00 | 19.30 | 21.00 | 21.50 | 22.50 | ▂▁▁▁▇ |
ManufacturingProcess03 | 0 | 1 | 1.54 | 0.02 | 1.47 | 1.53 | 1.54 | 1.55 | 1.60 | ▁▃▇▇▁ |
ManufacturingProcess04 | 0 | 1 | 931.86 | 6.26 | 911.00 | 928.00 | 934.00 | 936.00 | 946.00 | ▁▂▃▇▁ |
ManufacturingProcess05 | 0 | 1 | 1001.68 | 30.44 | 923.00 | 986.82 | 999.20 | 1008.72 | 1175.30 | ▁▇▁▁▁ |
ManufacturingProcess06 | 0 | 1 | 207.39 | 2.68 | 203.00 | 205.70 | 206.80 | 208.70 | 227.40 | ▇▃▁▁▁ |
ManufacturingProcess07 | 0 | 1 | 177.48 | 0.50 | 177.00 | 177.00 | 177.00 | 178.00 | 178.00 | ▇▁▁▁▇ |
ManufacturingProcess08 | 0 | 1 | 177.56 | 0.50 | 177.00 | 177.00 | 178.00 | 178.00 | 178.00 | ▆▁▁▁▇ |
ManufacturingProcess09 | 0 | 1 | 45.66 | 1.55 | 38.89 | 44.89 | 45.73 | 46.52 | 49.36 | ▁▁▅▇▂ |
ManufacturingProcess10 | 0 | 1 | 9.18 | 0.75 | 7.50 | 8.70 | 9.10 | 9.50 | 11.60 | ▂▇▆▂▁ |
ManufacturingProcess11 | 0 | 1 | 9.39 | 0.69 | 7.50 | 9.00 | 9.40 | 9.83 | 11.50 | ▂▅▇▃▁ |
ManufacturingProcess12 | 0 | 1 | 852.94 | 1780.60 | 0.00 | 0.00 | 0.00 | 0.00 | 4549.00 | ▇▁▁▁▂ |
ManufacturingProcess13 | 0 | 1 | 34.51 | 1.02 | 32.10 | 33.90 | 34.60 | 35.20 | 38.60 | ▃▇▇▁▁ |
ManufacturingProcess14 | 0 | 1 | 4853.88 | 54.37 | 4701.00 | 4828.00 | 4856.00 | 4882.25 | 5055.00 | ▁▅▇▂▁ |
ManufacturingProcess15 | 0 | 1 | 6038.92 | 58.31 | 5904.00 | 6010.00 | 6031.50 | 6061.00 | 6233.00 | ▂▇▆▂▁ |
ManufacturingProcess16 | 0 | 1 | 4565.80 | 351.70 | 0.00 | 4560.75 | 4588.00 | 4619.00 | 4852.00 | ▁▁▁▁▇ |
ManufacturingProcess17 | 0 | 1 | 34.34 | 1.25 | 31.30 | 33.50 | 34.40 | 35.10 | 40.00 | ▂▇▆▁▁ |
ManufacturingProcess18 | 0 | 1 | 4809.68 | 367.48 | 0.00 | 4813.00 | 4835.00 | 4862.00 | 4971.00 | ▁▁▁▁▇ |
ManufacturingProcess19 | 0 | 1 | 6028.20 | 45.58 | 5890.00 | 6000.75 | 6022.00 | 6050.25 | 6146.00 | ▁▃▇▃▂ |
ManufacturingProcess20 | 0 | 1 | 4556.46 | 349.01 | 0.00 | 4552.75 | 4582.00 | 4609.50 | 4759.00 | ▁▁▁▁▇ |
ManufacturingProcess21 | 0 | 1 | -0.16 | 0.78 | -1.80 | -0.60 | -0.30 | 0.00 | 3.60 | ▂▇▂▁▁ |
ManufacturingProcess22 | 0 | 1 | 5.40 | 3.32 | 0.00 | 3.00 | 5.00 | 8.00 | 12.00 | ▇▇▇▅▅ |
ManufacturingProcess23 | 0 | 1 | 3.02 | 1.66 | 0.00 | 2.00 | 3.00 | 4.00 | 6.00 | ▇▆▇▆▇ |
ManufacturingProcess24 | 0 | 1 | 8.83 | 5.78 | 0.00 | 4.00 | 8.00 | 14.00 | 23.00 | ▇▇▅▆▁ |
ManufacturingProcess25 | 0 | 1 | 4828.94 | 368.13 | 0.00 | 4833.50 | 4855.00 | 4876.25 | 4990.00 | ▁▁▁▁▇ |
ManufacturingProcess26 | 0 | 1 | 6016.49 | 458.21 | 0.00 | 6020.75 | 6047.00 | 6069.25 | 6161.00 | ▁▁▁▁▇ |
ManufacturingProcess27 | 0 | 1 | 4563.20 | 348.92 | 0.00 | 4562.75 | 4587.00 | 4609.00 | 4710.00 | ▁▁▁▁▇ |
ManufacturingProcess28 | 0 | 1 | 6.70 | 5.21 | 0.00 | 0.00 | 10.40 | 10.70 | 11.50 | ▅▁▁▁▇ |
ManufacturingProcess29 | 0 | 1 | 20.01 | 1.64 | 0.00 | 19.70 | 19.90 | 20.40 | 22.00 | ▁▁▁▁▇ |
ManufacturingProcess30 | 0 | 1 | 9.16 | 0.96 | 0.00 | 8.80 | 9.10 | 9.70 | 11.20 | ▁▁▁▅▇ |
ManufacturingProcess31 | 0 | 1 | 70.20 | 5.48 | 0.00 | 70.10 | 70.80 | 71.40 | 72.50 | ▁▁▁▁▇ |
ManufacturingProcess32 | 0 | 1 | 158.47 | 5.40 | 143.00 | 155.00 | 158.00 | 162.00 | 173.00 | ▁▃▇▃▁ |
ManufacturingProcess33 | 0 | 1 | 63.56 | 2.45 | 56.00 | 62.00 | 64.00 | 65.00 | 70.00 | ▁▃▇▅▁ |
ManufacturingProcess34 | 0 | 1 | 2.49 | 0.05 | 2.30 | 2.50 | 2.50 | 2.50 | 2.60 | ▁▂▁▇▁ |
ManufacturingProcess35 | 0 | 1 | 495.58 | 10.66 | 463.00 | 490.00 | 495.00 | 501.00 | 522.00 | ▁▂▇▅▂ |
ManufacturingProcess36 | 0 | 1 | 0.02 | 0.00 | 0.02 | 0.02 | 0.02 | 0.02 | 0.02 | ▂▇▇▁▃ |
ManufacturingProcess37 | 0 | 1 | 1.01 | 0.45 | 0.00 | 0.70 | 1.00 | 1.30 | 2.30 | ▂▇▇▃▁ |
ManufacturingProcess38 | 0 | 1 | 2.53 | 0.65 | 0.00 | 2.00 | 3.00 | 3.00 | 3.00 | ▁▁▁▅▇ |
ManufacturingProcess39 | 0 | 1 | 6.85 | 1.51 | 0.00 | 7.10 | 7.20 | 7.30 | 7.50 | ▁▁▁▁▇ |
ManufacturingProcess40 | 0 | 1 | 0.02 | 0.04 | 0.00 | 0.00 | 0.00 | 0.00 | 0.10 | ▇▁▁▁▂ |
ManufacturingProcess41 | 0 | 1 | 0.02 | 0.05 | 0.00 | 0.00 | 0.00 | 0.00 | 0.20 | ▇▁▁▁▁ |
ManufacturingProcess42 | 0 | 1 | 11.21 | 1.94 | 0.00 | 11.40 | 11.60 | 11.70 | 12.10 | ▁▁▁▁▇ |
ManufacturingProcess43 | 0 | 1 | 0.91 | 0.87 | 0.00 | 0.60 | 0.80 | 1.02 | 11.00 | ▇▁▁▁▁ |
ManufacturingProcess44 | 0 | 1 | 1.81 | 0.32 | 0.00 | 1.80 | 1.90 | 1.90 | 2.10 | ▁▁▁▁▇ |
ManufacturingProcess45 | 0 | 1 | 2.14 | 0.41 | 0.00 | 2.10 | 2.20 | 2.30 | 2.60 | ▁▁▁▂▇ |
##################################
# Identifying columns with missing data
#################################
%>%
DPA_MedianImputed skim() %>%
::filter(n_missing > 0) dplyr
## # A tibble: 0 x 12
## # i 12 variables: skim_type <chr>, skim_variable <chr>, n_missing <int>,
## # complete_rate <dbl>, numeric.mean <dbl>, numeric.sd <dbl>,
## # numeric.p0 <dbl>, numeric.p25 <dbl>, numeric.p50 <dbl>, numeric.p75 <dbl>,
## # numeric.p100 <dbl>, numeric.hist <chr>
##################################
# Loading dataset
##################################
<- ChemicalManufacturingProcess
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("Yield")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,sapply(DPA.Predictors, is.numeric)]
DPA.Predictors.Numeric
##################################
# Identifying outliers for the numeric predictors
##################################
<- c()
OutlierCountList
for (i in 1:ncol(DPA.Predictors.Numeric)) {
<- boxplot.stats(DPA.Predictors.Numeric[,i])$out
Outliers <- length(Outliers)
OutlierCount <- append(OutlierCountList,OutlierCount)
OutlierCountList <- which(DPA.Predictors.Numeric[,i] %in% c(Outliers))
OutlierIndices boxplot(DPA.Predictors.Numeric[,i],
ylab = names(DPA.Predictors.Numeric)[i],
main = names(DPA.Predictors.Numeric)[i],
horizontal=TRUE)
mtext(paste0(OutlierCount, " Outlier(s) Detected"))
}
<- as.data.frame(cbind(names(DPA.Predictors.Numeric),(OutlierCountList)))
OutlierCountSummary names(OutlierCountSummary) <- c("NumericPredictors","OutlierCount")
$OutlierCount <- as.numeric(as.character(OutlierCountSummary$OutlierCount))
OutlierCountSummary<- nrow(OutlierCountSummary[OutlierCountSummary$OutlierCount>0,])
NumericPredictorWithOutlierCount print(paste0(NumericPredictorWithOutlierCount, " numeric variable(s) were noted with outlier(s)." ))
## [1] "48 numeric variable(s) were noted with outlier(s)."
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA.Predictors.Numeric)) (DPA_Skimmed
Name | DPA.Predictors.Numeric |
Number of rows | 176 |
Number of columns | 57 |
_______________________ | |
Column type frequency: | |
numeric | 57 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
BiologicalMaterial01 | 0 | 1.00 | 6.41 | 0.71 | 4.58 | 5.98 | 6.30 | 6.87 | 8.81 | ▂▇▇▂▁ |
BiologicalMaterial02 | 0 | 1.00 | 55.69 | 4.03 | 46.87 | 52.68 | 55.09 | 58.74 | 64.75 | ▂▇▆▅▃ |
BiologicalMaterial03 | 0 | 1.00 | 67.70 | 4.00 | 56.97 | 64.98 | 67.22 | 70.43 | 78.25 | ▂▅▇▆▁ |
BiologicalMaterial04 | 0 | 1.00 | 12.35 | 1.77 | 9.38 | 11.25 | 12.10 | 13.22 | 23.09 | ▇▆▁▁▁ |
BiologicalMaterial05 | 0 | 1.00 | 18.60 | 1.84 | 13.24 | 17.24 | 18.49 | 19.90 | 24.85 | ▁▅▇▃▁ |
BiologicalMaterial06 | 0 | 1.00 | 48.91 | 3.75 | 40.60 | 46.06 | 48.46 | 51.34 | 59.38 | ▂▇▆▅▁ |
BiologicalMaterial07 | 0 | 1.00 | 100.01 | 0.11 | 100.00 | 100.00 | 100.00 | 100.00 | 100.83 | ▇▁▁▁▁ |
BiologicalMaterial08 | 0 | 1.00 | 17.49 | 0.68 | 15.88 | 17.06 | 17.51 | 17.88 | 19.14 | ▁▅▇▃▂ |
BiologicalMaterial09 | 0 | 1.00 | 12.85 | 0.42 | 11.44 | 12.60 | 12.84 | 13.13 | 14.08 | ▁▃▇▅▁ |
BiologicalMaterial10 | 0 | 1.00 | 2.80 | 0.60 | 1.77 | 2.46 | 2.71 | 2.99 | 6.87 | ▇▅▁▁▁ |
BiologicalMaterial11 | 0 | 1.00 | 146.95 | 4.82 | 135.81 | 143.82 | 146.08 | 149.60 | 158.73 | ▂▆▇▃▂ |
BiologicalMaterial12 | 0 | 1.00 | 20.20 | 0.77 | 18.35 | 19.73 | 20.12 | 20.75 | 22.21 | ▂▆▇▃▂ |
ManufacturingProcess01 | 1 | 0.99 | 11.21 | 1.82 | 0.00 | 10.80 | 11.40 | 12.15 | 14.10 | ▁▁▁▅▇ |
ManufacturingProcess02 | 3 | 0.98 | 16.68 | 8.47 | 0.00 | 19.30 | 21.00 | 21.50 | 22.50 | ▂▁▁▁▇ |
ManufacturingProcess03 | 15 | 0.91 | 1.54 | 0.02 | 1.47 | 1.53 | 1.54 | 1.55 | 1.60 | ▁▃▆▇▁ |
ManufacturingProcess04 | 1 | 0.99 | 931.85 | 6.27 | 911.00 | 928.00 | 934.00 | 936.00 | 946.00 | ▁▂▃▇▁ |
ManufacturingProcess05 | 1 | 0.99 | 1001.69 | 30.53 | 923.00 | 986.75 | 999.20 | 1008.85 | 1175.30 | ▁▇▁▁▁ |
ManufacturingProcess06 | 2 | 0.99 | 207.40 | 2.70 | 203.00 | 205.70 | 206.80 | 208.70 | 227.40 | ▇▃▁▁▁ |
ManufacturingProcess07 | 1 | 0.99 | 177.48 | 0.50 | 177.00 | 177.00 | 177.00 | 178.00 | 178.00 | ▇▁▁▁▇ |
ManufacturingProcess08 | 1 | 0.99 | 177.55 | 0.50 | 177.00 | 177.00 | 178.00 | 178.00 | 178.00 | ▆▁▁▁▇ |
ManufacturingProcess09 | 0 | 1.00 | 45.66 | 1.55 | 38.89 | 44.89 | 45.73 | 46.52 | 49.36 | ▁▁▅▇▂ |
ManufacturingProcess10 | 9 | 0.95 | 9.18 | 0.77 | 7.50 | 8.70 | 9.10 | 9.55 | 11.60 | ▂▇▆▂▁ |
ManufacturingProcess11 | 10 | 0.94 | 9.39 | 0.72 | 7.50 | 9.00 | 9.40 | 9.90 | 11.50 | ▂▆▇▅▁ |
ManufacturingProcess12 | 1 | 0.99 | 857.81 | 1784.53 | 0.00 | 0.00 | 0.00 | 0.00 | 4549.00 | ▇▁▁▁▂ |
ManufacturingProcess13 | 0 | 1.00 | 34.51 | 1.02 | 32.10 | 33.90 | 34.60 | 35.20 | 38.60 | ▃▇▇▁▁ |
ManufacturingProcess14 | 1 | 0.99 | 4853.87 | 54.52 | 4701.00 | 4828.00 | 4856.00 | 4882.50 | 5055.00 | ▁▅▇▂▁ |
ManufacturingProcess15 | 0 | 1.00 | 6038.92 | 58.31 | 5904.00 | 6010.00 | 6031.50 | 6061.00 | 6233.00 | ▂▇▆▂▁ |
ManufacturingProcess16 | 0 | 1.00 | 4565.80 | 351.70 | 0.00 | 4560.75 | 4588.00 | 4619.00 | 4852.00 | ▁▁▁▁▇ |
ManufacturingProcess17 | 0 | 1.00 | 34.34 | 1.25 | 31.30 | 33.50 | 34.40 | 35.10 | 40.00 | ▂▇▆▁▁ |
ManufacturingProcess18 | 0 | 1.00 | 4809.68 | 367.48 | 0.00 | 4813.00 | 4835.00 | 4862.00 | 4971.00 | ▁▁▁▁▇ |
ManufacturingProcess19 | 0 | 1.00 | 6028.20 | 45.58 | 5890.00 | 6000.75 | 6022.00 | 6050.25 | 6146.00 | ▁▃▇▃▂ |
ManufacturingProcess20 | 0 | 1.00 | 4556.46 | 349.01 | 0.00 | 4552.75 | 4582.00 | 4609.50 | 4759.00 | ▁▁▁▁▇ |
ManufacturingProcess21 | 0 | 1.00 | -0.16 | 0.78 | -1.80 | -0.60 | -0.30 | 0.00 | 3.60 | ▂▇▂▁▁ |
ManufacturingProcess22 | 1 | 0.99 | 5.41 | 3.33 | 0.00 | 3.00 | 5.00 | 8.00 | 12.00 | ▇▇▇▅▅ |
ManufacturingProcess23 | 1 | 0.99 | 3.02 | 1.66 | 0.00 | 2.00 | 3.00 | 4.00 | 6.00 | ▇▆▇▆▇ |
ManufacturingProcess24 | 1 | 0.99 | 8.83 | 5.80 | 0.00 | 4.00 | 8.00 | 14.00 | 23.00 | ▇▇▅▆▁ |
ManufacturingProcess25 | 5 | 0.97 | 4828.18 | 373.48 | 0.00 | 4832.00 | 4855.00 | 4877.00 | 4990.00 | ▁▁▁▁▇ |
ManufacturingProcess26 | 5 | 0.97 | 6015.60 | 464.87 | 0.00 | 6019.50 | 6047.00 | 6070.50 | 6161.00 | ▁▁▁▁▇ |
ManufacturingProcess27 | 5 | 0.97 | 4562.51 | 353.98 | 0.00 | 4560.00 | 4587.00 | 4609.00 | 4710.00 | ▁▁▁▁▇ |
ManufacturingProcess28 | 5 | 0.97 | 6.59 | 5.25 | 0.00 | 0.00 | 10.40 | 10.75 | 11.50 | ▅▁▁▁▇ |
ManufacturingProcess29 | 5 | 0.97 | 20.01 | 1.66 | 0.00 | 19.70 | 19.90 | 20.40 | 22.00 | ▁▁▁▁▇ |
ManufacturingProcess30 | 5 | 0.97 | 9.16 | 0.98 | 0.00 | 8.80 | 9.10 | 9.70 | 11.20 | ▁▁▁▅▇ |
ManufacturingProcess31 | 5 | 0.97 | 70.18 | 5.56 | 0.00 | 70.10 | 70.80 | 71.40 | 72.50 | ▁▁▁▁▇ |
ManufacturingProcess32 | 0 | 1.00 | 158.47 | 5.40 | 143.00 | 155.00 | 158.00 | 162.00 | 173.00 | ▁▃▇▃▁ |
ManufacturingProcess33 | 5 | 0.97 | 63.54 | 2.48 | 56.00 | 62.00 | 64.00 | 65.00 | 70.00 | ▁▃▇▅▁ |
ManufacturingProcess34 | 5 | 0.97 | 2.49 | 0.05 | 2.30 | 2.50 | 2.50 | 2.50 | 2.60 | ▁▂▁▇▁ |
ManufacturingProcess35 | 5 | 0.97 | 495.60 | 10.82 | 463.00 | 490.00 | 495.00 | 501.50 | 522.00 | ▁▂▇▅▂ |
ManufacturingProcess36 | 5 | 0.97 | 0.02 | 0.00 | 0.02 | 0.02 | 0.02 | 0.02 | 0.02 | ▂▇▇▁▃ |
ManufacturingProcess37 | 0 | 1.00 | 1.01 | 0.45 | 0.00 | 0.70 | 1.00 | 1.30 | 2.30 | ▂▇▇▃▁ |
ManufacturingProcess38 | 0 | 1.00 | 2.53 | 0.65 | 0.00 | 2.00 | 3.00 | 3.00 | 3.00 | ▁▁▁▅▇ |
ManufacturingProcess39 | 0 | 1.00 | 6.85 | 1.51 | 0.00 | 7.10 | 7.20 | 7.30 | 7.50 | ▁▁▁▁▇ |
ManufacturingProcess40 | 1 | 0.99 | 0.02 | 0.04 | 0.00 | 0.00 | 0.00 | 0.00 | 0.10 | ▇▁▁▁▂ |
ManufacturingProcess41 | 1 | 0.99 | 0.02 | 0.05 | 0.00 | 0.00 | 0.00 | 0.00 | 0.20 | ▇▁▁▁▁ |
ManufacturingProcess42 | 0 | 1.00 | 11.21 | 1.94 | 0.00 | 11.40 | 11.60 | 11.70 | 12.10 | ▁▁▁▁▇ |
ManufacturingProcess43 | 0 | 1.00 | 0.91 | 0.87 | 0.00 | 0.60 | 0.80 | 1.02 | 11.00 | ▇▁▁▁▁ |
ManufacturingProcess44 | 0 | 1.00 | 1.81 | 0.32 | 0.00 | 1.80 | 1.90 | 1.90 | 2.10 | ▁▁▁▁▇ |
ManufacturingProcess45 | 0 | 1.00 | 2.14 | 0.41 | 0.00 | 2.10 | 2.20 | 2.30 | 2.60 | ▁▁▁▂▇ |
##################################
# Applying a center, scale and spatial sign data transformation
##################################
<- preProcess(DPA.Predictors.Numeric, method = c("center","scale","spatialSign"))
DPA_CenteredScaledSpatialSigned <- predict(DPA_CenteredScaledSpatialSigned, DPA.Predictors.Numeric)
DPA_CenteredScaledSpatialSignedTransformed
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_CenteredScaledSpatialSignedTransformed)) (DPA_CenteredScaledSpatialSignedTransformedSkimmed
Name | DPA_CenteredScaledSpatial… |
Number of rows | 176 |
Number of columns | 57 |
_______________________ | |
Column type frequency: | |
numeric | 57 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
BiologicalMaterial01 | 0 | 1.00 | -0.01 | 0.14 | -0.35 | -0.12 | -0.02 | 0.10 | 0.28 | ▁▅▇▆▃ |
BiologicalMaterial02 | 0 | 1.00 | -0.02 | 0.15 | -0.27 | -0.13 | -0.02 | 0.11 | 0.27 | ▆▇▆▆▃ |
BiologicalMaterial03 | 0 | 1.00 | 0.00 | 0.15 | -0.36 | -0.12 | -0.01 | 0.12 | 0.35 | ▂▇▇▇▂ |
BiologicalMaterial04 | 0 | 1.00 | -0.01 | 0.13 | -0.27 | -0.09 | -0.03 | 0.06 | 0.50 | ▅▇▅▂▁ |
BiologicalMaterial05 | 0 | 1.00 | -0.01 | 0.15 | -0.41 | -0.13 | -0.01 | 0.08 | 0.53 | ▂▇▇▃▁ |
BiologicalMaterial06 | 0 | 1.00 | -0.01 | 0.15 | -0.28 | -0.14 | -0.02 | 0.09 | 0.42 | ▆▇▇▃▁ |
BiologicalMaterial07 | 0 | 1.00 | -0.01 | 0.10 | -0.04 | -0.03 | -0.02 | -0.02 | 0.78 | ▇▁▁▁▁ |
BiologicalMaterial08 | 0 | 1.00 | -0.01 | 0.14 | -0.33 | -0.11 | 0.00 | 0.08 | 0.28 | ▂▆▇▇▃ |
BiologicalMaterial09 | 0 | 1.00 | 0.01 | 0.15 | -0.46 | -0.08 | 0.00 | 0.10 | 0.35 | ▁▃▇▆▂ |
BiologicalMaterial10 | 0 | 1.00 | -0.01 | 0.13 | -0.26 | -0.10 | -0.03 | 0.05 | 0.56 | ▃▇▂▁▁ |
BiologicalMaterial11 | 0 | 1.00 | -0.01 | 0.14 | -0.31 | -0.10 | -0.02 | 0.09 | 0.30 | ▂▇▇▅▃ |
BiologicalMaterial12 | 0 | 1.00 | -0.01 | 0.14 | -0.34 | -0.11 | -0.01 | 0.09 | 0.38 | ▂▇▇▅▁ |
ManufacturingProcess01 | 1 | 0.99 | 0.01 | 0.12 | -0.62 | -0.04 | 0.02 | 0.08 | 0.24 | ▁▁▂▇▅ |
ManufacturingProcess02 | 3 | 0.98 | 0.02 | 0.14 | -0.32 | 0.04 | 0.08 | 0.11 | 0.17 | ▂▁▁▅▇ |
ManufacturingProcess03 | 15 | 0.91 | 0.00 | 0.16 | -0.57 | -0.07 | 0.00 | 0.09 | 0.50 | ▁▂▇▆▁ |
ManufacturingProcess04 | 1 | 0.99 | 0.02 | 0.15 | -0.37 | -0.10 | 0.04 | 0.12 | 0.37 | ▂▅▇▇▂ |
ManufacturingProcess05 | 1 | 0.99 | -0.01 | 0.13 | -0.41 | -0.08 | -0.01 | 0.04 | 0.77 | ▁▇▂▁▁ |
ManufacturingProcess06 | 2 | 0.99 | -0.01 | 0.14 | -0.26 | -0.10 | -0.03 | 0.06 | 0.75 | ▇▇▂▁▁ |
ManufacturingProcess07 | 1 | 0.99 | 0.00 | 0.17 | -0.28 | -0.16 | -0.08 | 0.17 | 0.28 | ▇▇▁▇▆ |
ManufacturingProcess08 | 1 | 0.99 | 0.00 | 0.17 | -0.32 | -0.17 | 0.09 | 0.16 | 0.25 | ▃▇▁▅▇ |
ManufacturingProcess09 | 0 | 1.00 | -0.01 | 0.14 | -0.42 | -0.09 | 0.01 | 0.09 | 0.32 | ▁▃▆▇▂ |
ManufacturingProcess10 | 9 | 0.95 | -0.01 | 0.15 | -0.37 | -0.12 | -0.02 | 0.07 | 0.37 | ▂▅▇▃▁ |
ManufacturingProcess11 | 10 | 0.94 | -0.01 | 0.15 | -0.38 | -0.10 | 0.00 | 0.09 | 0.34 | ▂▃▇▆▂ |
ManufacturingProcess12 | 1 | 0.99 | -0.01 | 0.16 | -0.14 | -0.09 | -0.07 | -0.05 | 0.41 | ▇▁▁▁▁ |
ManufacturingProcess13 | 0 | 1.00 | 0.01 | 0.14 | -0.39 | -0.09 | 0.01 | 0.11 | 0.36 | ▁▅▇▆▁ |
ManufacturingProcess14 | 1 | 0.99 | 0.00 | 0.14 | -0.36 | -0.08 | 0.01 | 0.09 | 0.33 | ▁▃▇▅▂ |
ManufacturingProcess15 | 0 | 1.00 | 0.00 | 0.14 | -0.32 | -0.08 | -0.02 | 0.06 | 0.36 | ▂▅▇▂▂ |
ManufacturingProcess16 | 0 | 1.00 | 0.01 | 0.07 | -0.87 | 0.00 | 0.01 | 0.03 | 0.13 | ▁▁▁▁▇ |
ManufacturingProcess17 | 0 | 1.00 | 0.00 | 0.13 | -0.33 | -0.10 | 0.01 | 0.09 | 0.44 | ▂▇▇▂▁ |
ManufacturingProcess18 | 0 | 1.00 | 0.01 | 0.05 | -0.64 | 0.00 | 0.01 | 0.02 | 0.07 | ▁▁▁▁▇ |
ManufacturingProcess19 | 0 | 1.00 | -0.01 | 0.14 | -0.44 | -0.10 | -0.02 | 0.08 | 0.47 | ▁▅▇▃▁ |
ManufacturingProcess20 | 0 | 1.00 | 0.01 | 0.05 | -0.63 | 0.00 | 0.01 | 0.02 | 0.09 | ▁▁▁▁▇ |
ManufacturingProcess21 | 0 | 1.00 | -0.02 | 0.13 | -0.32 | -0.09 | -0.02 | 0.04 | 0.40 | ▂▇▇▂▁ |
ManufacturingProcess22 | 1 | 0.99 | 0.00 | 0.17 | -0.30 | -0.13 | -0.02 | 0.12 | 0.45 | ▃▇▅▃▁ |
ManufacturingProcess23 | 1 | 0.99 | 0.01 | 0.17 | -0.27 | -0.12 | 0.00 | 0.11 | 0.52 | ▇▇▅▃▁ |
ManufacturingProcess24 | 1 | 0.99 | 0.00 | 0.16 | -0.26 | -0.12 | -0.03 | 0.14 | 0.33 | ▅▇▅▅▃ |
ManufacturingProcess25 | 5 | 0.97 | 0.01 | 0.04 | -0.43 | 0.00 | 0.01 | 0.02 | 0.07 | ▁▁▁▁▇ |
ManufacturingProcess26 | 5 | 0.97 | 0.01 | 0.04 | -0.43 | 0.00 | 0.01 | 0.02 | 0.06 | ▁▁▁▁▇ |
ManufacturingProcess27 | 5 | 0.97 | 0.01 | 0.04 | -0.43 | 0.00 | 0.01 | 0.02 | 0.06 | ▁▁▁▁▇ |
ManufacturingProcess28 | 5 | 0.97 | -0.01 | 0.17 | -0.31 | -0.21 | 0.09 | 0.13 | 0.21 | ▅▂▁▅▇ |
ManufacturingProcess29 | 5 | 0.97 | 0.00 | 0.06 | -0.40 | -0.03 | -0.01 | 0.03 | 0.18 | ▁▁▂▇▂ |
ManufacturingProcess30 | 5 | 0.97 | 0.00 | 0.11 | -0.31 | -0.07 | -0.01 | 0.08 | 0.24 | ▁▃▇▇▂ |
ManufacturingProcess31 | 5 | 0.97 | 0.01 | 0.05 | -0.42 | 0.00 | 0.02 | 0.04 | 0.08 | ▁▁▁▁▇ |
ManufacturingProcess32 | 0 | 1.00 | -0.01 | 0.15 | -0.32 | -0.10 | -0.01 | 0.08 | 0.39 | ▃▆▇▅▁ |
ManufacturingProcess33 | 5 | 0.97 | -0.01 | 0.15 | -0.42 | -0.12 | 0.03 | 0.10 | 0.34 | ▂▅▇▇▂ |
ManufacturingProcess34 | 5 | 0.97 | 0.00 | 0.16 | -0.60 | 0.01 | 0.02 | 0.02 | 0.42 | ▁▂▂▇▁ |
ManufacturingProcess35 | 5 | 0.97 | 0.00 | 0.16 | -0.43 | -0.08 | -0.01 | 0.09 | 0.47 | ▁▃▇▂▁ |
ManufacturingProcess36 | 5 | 0.97 | 0.00 | 0.15 | -0.33 | -0.11 | 0.04 | 0.10 | 0.34 | ▁▇▂▆▂ |
ManufacturingProcess37 | 0 | 1.00 | 0.00 | 0.16 | -0.39 | -0.11 | -0.01 | 0.08 | 0.42 | ▂▅▇▃▁ |
ManufacturingProcess38 | 0 | 1.00 | 0.01 | 0.14 | -0.32 | -0.13 | 0.08 | 0.12 | 0.18 | ▁▃▂▁▇ |
ManufacturingProcess39 | 0 | 1.00 | 0.01 | 0.11 | -0.68 | 0.02 | 0.03 | 0.05 | 0.09 | ▁▁▁▁▇ |
ManufacturingProcess40 | 1 | 0.99 | 0.00 | 0.16 | -0.13 | -0.09 | -0.07 | -0.05 | 0.48 | ▇▁▁▁▁ |
ManufacturingProcess41 | 1 | 0.99 | -0.01 | 0.15 | -0.13 | -0.08 | -0.07 | -0.04 | 0.59 | ▇▁▁▁▁ |
ManufacturingProcess42 | 0 | 1.00 | 0.01 | 0.08 | -0.47 | 0.01 | 0.03 | 0.04 | 0.09 | ▁▁▁▁▇ |
ManufacturingProcess43 | 0 | 1.00 | -0.01 | 0.10 | -0.17 | -0.06 | -0.02 | 0.02 | 0.84 | ▇▂▁▁▁ |
ManufacturingProcess44 | 0 | 1.00 | 0.02 | 0.09 | -0.46 | 0.00 | 0.03 | 0.06 | 0.17 | ▁▁▁▇▇ |
ManufacturingProcess45 | 0 | 1.00 | 0.01 | 0.10 | -0.43 | -0.02 | 0.03 | 0.08 | 0.20 | ▁▁▂▇▅ |
##################################
# Identifying outliers for the numeric predictors
##################################
<- c()
OutlierCountList
for (i in 1:ncol(DPA.Predictors.Numeric)) {
<- boxplot.stats(DPA_CenteredScaledSpatialSignedTransformed[,i])$out
Outliers <- length(Outliers)
OutlierCount <- append(OutlierCountList,OutlierCount)
OutlierCountList <- which(DPA.Predictors.Numeric[,i] %in% c(Outliers))
OutlierIndices boxplot(DPA_CenteredScaledSpatialSignedTransformed[,i],
ylab = names(DPA.Predictors.Numeric)[i],
main = names(DPA.Predictors.Numeric)[i],
horizontal=TRUE)
mtext(paste0(OutlierCount, " Outlier(s) Detected"))
}
<- as.data.frame(cbind(names(DPA.Predictors.Numeric),(OutlierCountList)))
OutlierCountSummary names(OutlierCountSummary) <- c("NumericPredictors","OutlierCount")
$OutlierCount <- as.numeric(as.character(OutlierCountSummary$OutlierCount))
OutlierCountSummary<- nrow(OutlierCountSummary[OutlierCountSummary$OutlierCount>0,])
NumericPredictorWithOutlierCount print(paste0(NumericPredictorWithOutlierCount, " numeric variable(s) were noted with outlier(s)." ))
## [1] "40 numeric variable(s) were noted with outlier(s)."
##################################
# Loading dataset
##################################
<- ChemicalManufacturingProcess
DPA
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA)) (DPA_Skimmed
Name | DPA |
Number of rows | 176 |
Number of columns | 58 |
_______________________ | |
Column type frequency: | |
numeric | 58 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
Yield | 0 | 1.00 | 40.18 | 1.85 | 35.25 | 38.75 | 39.97 | 41.48 | 46.34 | ▁▇▇▃▁ |
BiologicalMaterial01 | 0 | 1.00 | 6.41 | 0.71 | 4.58 | 5.98 | 6.30 | 6.87 | 8.81 | ▂▇▇▂▁ |
BiologicalMaterial02 | 0 | 1.00 | 55.69 | 4.03 | 46.87 | 52.68 | 55.09 | 58.74 | 64.75 | ▂▇▆▅▃ |
BiologicalMaterial03 | 0 | 1.00 | 67.70 | 4.00 | 56.97 | 64.98 | 67.22 | 70.43 | 78.25 | ▂▅▇▆▁ |
BiologicalMaterial04 | 0 | 1.00 | 12.35 | 1.77 | 9.38 | 11.25 | 12.10 | 13.22 | 23.09 | ▇▆▁▁▁ |
BiologicalMaterial05 | 0 | 1.00 | 18.60 | 1.84 | 13.24 | 17.24 | 18.49 | 19.90 | 24.85 | ▁▅▇▃▁ |
BiologicalMaterial06 | 0 | 1.00 | 48.91 | 3.75 | 40.60 | 46.06 | 48.46 | 51.34 | 59.38 | ▂▇▆▅▁ |
BiologicalMaterial07 | 0 | 1.00 | 100.01 | 0.11 | 100.00 | 100.00 | 100.00 | 100.00 | 100.83 | ▇▁▁▁▁ |
BiologicalMaterial08 | 0 | 1.00 | 17.49 | 0.68 | 15.88 | 17.06 | 17.51 | 17.88 | 19.14 | ▁▅▇▃▂ |
BiologicalMaterial09 | 0 | 1.00 | 12.85 | 0.42 | 11.44 | 12.60 | 12.84 | 13.13 | 14.08 | ▁▃▇▅▁ |
BiologicalMaterial10 | 0 | 1.00 | 2.80 | 0.60 | 1.77 | 2.46 | 2.71 | 2.99 | 6.87 | ▇▅▁▁▁ |
BiologicalMaterial11 | 0 | 1.00 | 146.95 | 4.82 | 135.81 | 143.82 | 146.08 | 149.60 | 158.73 | ▂▆▇▃▂ |
BiologicalMaterial12 | 0 | 1.00 | 20.20 | 0.77 | 18.35 | 19.73 | 20.12 | 20.75 | 22.21 | ▂▆▇▃▂ |
ManufacturingProcess01 | 1 | 0.99 | 11.21 | 1.82 | 0.00 | 10.80 | 11.40 | 12.15 | 14.10 | ▁▁▁▅▇ |
ManufacturingProcess02 | 3 | 0.98 | 16.68 | 8.47 | 0.00 | 19.30 | 21.00 | 21.50 | 22.50 | ▂▁▁▁▇ |
ManufacturingProcess03 | 15 | 0.91 | 1.54 | 0.02 | 1.47 | 1.53 | 1.54 | 1.55 | 1.60 | ▁▃▆▇▁ |
ManufacturingProcess04 | 1 | 0.99 | 931.85 | 6.27 | 911.00 | 928.00 | 934.00 | 936.00 | 946.00 | ▁▂▃▇▁ |
ManufacturingProcess05 | 1 | 0.99 | 1001.69 | 30.53 | 923.00 | 986.75 | 999.20 | 1008.85 | 1175.30 | ▁▇▁▁▁ |
ManufacturingProcess06 | 2 | 0.99 | 207.40 | 2.70 | 203.00 | 205.70 | 206.80 | 208.70 | 227.40 | ▇▃▁▁▁ |
ManufacturingProcess07 | 1 | 0.99 | 177.48 | 0.50 | 177.00 | 177.00 | 177.00 | 178.00 | 178.00 | ▇▁▁▁▇ |
ManufacturingProcess08 | 1 | 0.99 | 177.55 | 0.50 | 177.00 | 177.00 | 178.00 | 178.00 | 178.00 | ▆▁▁▁▇ |
ManufacturingProcess09 | 0 | 1.00 | 45.66 | 1.55 | 38.89 | 44.89 | 45.73 | 46.52 | 49.36 | ▁▁▅▇▂ |
ManufacturingProcess10 | 9 | 0.95 | 9.18 | 0.77 | 7.50 | 8.70 | 9.10 | 9.55 | 11.60 | ▂▇▆▂▁ |
ManufacturingProcess11 | 10 | 0.94 | 9.39 | 0.72 | 7.50 | 9.00 | 9.40 | 9.90 | 11.50 | ▂▆▇▅▁ |
ManufacturingProcess12 | 1 | 0.99 | 857.81 | 1784.53 | 0.00 | 0.00 | 0.00 | 0.00 | 4549.00 | ▇▁▁▁▂ |
ManufacturingProcess13 | 0 | 1.00 | 34.51 | 1.02 | 32.10 | 33.90 | 34.60 | 35.20 | 38.60 | ▃▇▇▁▁ |
ManufacturingProcess14 | 1 | 0.99 | 4853.87 | 54.52 | 4701.00 | 4828.00 | 4856.00 | 4882.50 | 5055.00 | ▁▅▇▂▁ |
ManufacturingProcess15 | 0 | 1.00 | 6038.92 | 58.31 | 5904.00 | 6010.00 | 6031.50 | 6061.00 | 6233.00 | ▂▇▆▂▁ |
ManufacturingProcess16 | 0 | 1.00 | 4565.80 | 351.70 | 0.00 | 4560.75 | 4588.00 | 4619.00 | 4852.00 | ▁▁▁▁▇ |
ManufacturingProcess17 | 0 | 1.00 | 34.34 | 1.25 | 31.30 | 33.50 | 34.40 | 35.10 | 40.00 | ▂▇▆▁▁ |
ManufacturingProcess18 | 0 | 1.00 | 4809.68 | 367.48 | 0.00 | 4813.00 | 4835.00 | 4862.00 | 4971.00 | ▁▁▁▁▇ |
ManufacturingProcess19 | 0 | 1.00 | 6028.20 | 45.58 | 5890.00 | 6000.75 | 6022.00 | 6050.25 | 6146.00 | ▁▃▇▃▂ |
ManufacturingProcess20 | 0 | 1.00 | 4556.46 | 349.01 | 0.00 | 4552.75 | 4582.00 | 4609.50 | 4759.00 | ▁▁▁▁▇ |
ManufacturingProcess21 | 0 | 1.00 | -0.16 | 0.78 | -1.80 | -0.60 | -0.30 | 0.00 | 3.60 | ▂▇▂▁▁ |
ManufacturingProcess22 | 1 | 0.99 | 5.41 | 3.33 | 0.00 | 3.00 | 5.00 | 8.00 | 12.00 | ▇▇▇▅▅ |
ManufacturingProcess23 | 1 | 0.99 | 3.02 | 1.66 | 0.00 | 2.00 | 3.00 | 4.00 | 6.00 | ▇▆▇▆▇ |
ManufacturingProcess24 | 1 | 0.99 | 8.83 | 5.80 | 0.00 | 4.00 | 8.00 | 14.00 | 23.00 | ▇▇▅▆▁ |
ManufacturingProcess25 | 5 | 0.97 | 4828.18 | 373.48 | 0.00 | 4832.00 | 4855.00 | 4877.00 | 4990.00 | ▁▁▁▁▇ |
ManufacturingProcess26 | 5 | 0.97 | 6015.60 | 464.87 | 0.00 | 6019.50 | 6047.00 | 6070.50 | 6161.00 | ▁▁▁▁▇ |
ManufacturingProcess27 | 5 | 0.97 | 4562.51 | 353.98 | 0.00 | 4560.00 | 4587.00 | 4609.00 | 4710.00 | ▁▁▁▁▇ |
ManufacturingProcess28 | 5 | 0.97 | 6.59 | 5.25 | 0.00 | 0.00 | 10.40 | 10.75 | 11.50 | ▅▁▁▁▇ |
ManufacturingProcess29 | 5 | 0.97 | 20.01 | 1.66 | 0.00 | 19.70 | 19.90 | 20.40 | 22.00 | ▁▁▁▁▇ |
ManufacturingProcess30 | 5 | 0.97 | 9.16 | 0.98 | 0.00 | 8.80 | 9.10 | 9.70 | 11.20 | ▁▁▁▅▇ |
ManufacturingProcess31 | 5 | 0.97 | 70.18 | 5.56 | 0.00 | 70.10 | 70.80 | 71.40 | 72.50 | ▁▁▁▁▇ |
ManufacturingProcess32 | 0 | 1.00 | 158.47 | 5.40 | 143.00 | 155.00 | 158.00 | 162.00 | 173.00 | ▁▃▇▃▁ |
ManufacturingProcess33 | 5 | 0.97 | 63.54 | 2.48 | 56.00 | 62.00 | 64.00 | 65.00 | 70.00 | ▁▃▇▅▁ |
ManufacturingProcess34 | 5 | 0.97 | 2.49 | 0.05 | 2.30 | 2.50 | 2.50 | 2.50 | 2.60 | ▁▂▁▇▁ |
ManufacturingProcess35 | 5 | 0.97 | 495.60 | 10.82 | 463.00 | 490.00 | 495.00 | 501.50 | 522.00 | ▁▂▇▅▂ |
ManufacturingProcess36 | 5 | 0.97 | 0.02 | 0.00 | 0.02 | 0.02 | 0.02 | 0.02 | 0.02 | ▂▇▇▁▃ |
ManufacturingProcess37 | 0 | 1.00 | 1.01 | 0.45 | 0.00 | 0.70 | 1.00 | 1.30 | 2.30 | ▂▇▇▃▁ |
ManufacturingProcess38 | 0 | 1.00 | 2.53 | 0.65 | 0.00 | 2.00 | 3.00 | 3.00 | 3.00 | ▁▁▁▅▇ |
ManufacturingProcess39 | 0 | 1.00 | 6.85 | 1.51 | 0.00 | 7.10 | 7.20 | 7.30 | 7.50 | ▁▁▁▁▇ |
ManufacturingProcess40 | 1 | 0.99 | 0.02 | 0.04 | 0.00 | 0.00 | 0.00 | 0.00 | 0.10 | ▇▁▁▁▂ |
ManufacturingProcess41 | 1 | 0.99 | 0.02 | 0.05 | 0.00 | 0.00 | 0.00 | 0.00 | 0.20 | ▇▁▁▁▁ |
ManufacturingProcess42 | 0 | 1.00 | 11.21 | 1.94 | 0.00 | 11.40 | 11.60 | 11.70 | 12.10 | ▁▁▁▁▇ |
ManufacturingProcess43 | 0 | 1.00 | 0.91 | 0.87 | 0.00 | 0.60 | 0.80 | 1.02 | 11.00 | ▇▁▁▁▁ |
ManufacturingProcess44 | 0 | 1.00 | 1.81 | 0.32 | 0.00 | 1.80 | 1.90 | 1.90 | 2.10 | ▁▁▁▁▇ |
ManufacturingProcess45 | 0 | 1.00 | 2.14 | 0.41 | 0.00 | 2.10 | 2.20 | 2.30 | 2.60 | ▁▁▁▂▇ |
##################################
# Identifying columns with low variance
###################################
<- nearZeroVar(DPA,
DPA_LowVariance freqCut = 80/20,
uniqueCut = 10,
saveMetrics= TRUE)
$nzv,]) (DPA_LowVariance[DPA_LowVariance
## freqRatio percentUnique zeroVar nzv
## BiologicalMaterial07 57.666667 1.136364 FALSE TRUE
## ManufacturingProcess12 4.303030 1.136364 FALSE TRUE
## ManufacturingProcess28 5.076923 9.659091 FALSE TRUE
## ManufacturingProcess34 4.392857 2.272727 FALSE TRUE
## ManufacturingProcess40 4.645161 1.136364 FALSE TRUE
## ManufacturingProcess41 6.500000 2.272727 FALSE TRUE
if ((nrow(DPA_LowVariance[DPA_LowVariance$nzv,]))==0){
print("No low variance predictors noted.")
else {
}
print(paste0("Low variance observed for ",
nrow(DPA_LowVariance[DPA_LowVariance$nzv,])),
(" numeric variable(s) with First.Second.Mode.Ratio>4 and Unique.Count.Ratio<0.10."))
<- (nrow(DPA_LowVariance[DPA_LowVariance$nzv,]))
DPA_LowVarianceForRemoval
print(paste0("Low variance can be resolved by removing ",
nrow(DPA_LowVariance[DPA_LowVariance$nzv,])),
(" numeric variable(s)."))
for (j in 1:DPA_LowVarianceForRemoval) {
<- rownames(DPA_LowVariance[DPA_LowVariance$nzv,])[j]
DPA_LowVarianceRemovedVariable print(paste0("Variable ",
j," for removal: ",
DPA_LowVarianceRemovedVariable))
}
%>%
DPA skim() %>%
::filter(skim_variable %in% rownames(DPA_LowVariance[DPA_LowVariance$nzv,]))
dplyr
##################################
# Filtering out columns with low variance
#################################
<- DPA[,!names(DPA) %in% rownames(DPA_LowVariance[DPA_LowVariance$nzv,])]
DPA_ExcludedLowVariance
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_ExcludedLowVariance))
(DPA_ExcludedLowVariance_Skimmed }
## [1] "Low variance observed for 6 numeric variable(s) with First.Second.Mode.Ratio>4 and Unique.Count.Ratio<0.10."
## [1] "Low variance can be resolved by removing 6 numeric variable(s)."
## [1] "Variable 1 for removal: BiologicalMaterial07"
## [1] "Variable 2 for removal: ManufacturingProcess12"
## [1] "Variable 3 for removal: ManufacturingProcess28"
## [1] "Variable 4 for removal: ManufacturingProcess34"
## [1] "Variable 5 for removal: ManufacturingProcess40"
## [1] "Variable 6 for removal: ManufacturingProcess41"
Name | DPA_ExcludedLowVariance |
Number of rows | 176 |
Number of columns | 52 |
_______________________ | |
Column type frequency: | |
numeric | 52 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
Yield | 0 | 1.00 | 40.18 | 1.85 | 35.25 | 38.75 | 39.97 | 41.48 | 46.34 | ▁▇▇▃▁ |
BiologicalMaterial01 | 0 | 1.00 | 6.41 | 0.71 | 4.58 | 5.98 | 6.30 | 6.87 | 8.81 | ▂▇▇▂▁ |
BiologicalMaterial02 | 0 | 1.00 | 55.69 | 4.03 | 46.87 | 52.68 | 55.09 | 58.74 | 64.75 | ▂▇▆▅▃ |
BiologicalMaterial03 | 0 | 1.00 | 67.70 | 4.00 | 56.97 | 64.98 | 67.22 | 70.43 | 78.25 | ▂▅▇▆▁ |
BiologicalMaterial04 | 0 | 1.00 | 12.35 | 1.77 | 9.38 | 11.25 | 12.10 | 13.22 | 23.09 | ▇▆▁▁▁ |
BiologicalMaterial05 | 0 | 1.00 | 18.60 | 1.84 | 13.24 | 17.24 | 18.49 | 19.90 | 24.85 | ▁▅▇▃▁ |
BiologicalMaterial06 | 0 | 1.00 | 48.91 | 3.75 | 40.60 | 46.06 | 48.46 | 51.34 | 59.38 | ▂▇▆▅▁ |
BiologicalMaterial08 | 0 | 1.00 | 17.49 | 0.68 | 15.88 | 17.06 | 17.51 | 17.88 | 19.14 | ▁▅▇▃▂ |
BiologicalMaterial09 | 0 | 1.00 | 12.85 | 0.42 | 11.44 | 12.60 | 12.84 | 13.13 | 14.08 | ▁▃▇▅▁ |
BiologicalMaterial10 | 0 | 1.00 | 2.80 | 0.60 | 1.77 | 2.46 | 2.71 | 2.99 | 6.87 | ▇▅▁▁▁ |
BiologicalMaterial11 | 0 | 1.00 | 146.95 | 4.82 | 135.81 | 143.82 | 146.08 | 149.60 | 158.73 | ▂▆▇▃▂ |
BiologicalMaterial12 | 0 | 1.00 | 20.20 | 0.77 | 18.35 | 19.73 | 20.12 | 20.75 | 22.21 | ▂▆▇▃▂ |
ManufacturingProcess01 | 1 | 0.99 | 11.21 | 1.82 | 0.00 | 10.80 | 11.40 | 12.15 | 14.10 | ▁▁▁▅▇ |
ManufacturingProcess02 | 3 | 0.98 | 16.68 | 8.47 | 0.00 | 19.30 | 21.00 | 21.50 | 22.50 | ▂▁▁▁▇ |
ManufacturingProcess03 | 15 | 0.91 | 1.54 | 0.02 | 1.47 | 1.53 | 1.54 | 1.55 | 1.60 | ▁▃▆▇▁ |
ManufacturingProcess04 | 1 | 0.99 | 931.85 | 6.27 | 911.00 | 928.00 | 934.00 | 936.00 | 946.00 | ▁▂▃▇▁ |
ManufacturingProcess05 | 1 | 0.99 | 1001.69 | 30.53 | 923.00 | 986.75 | 999.20 | 1008.85 | 1175.30 | ▁▇▁▁▁ |
ManufacturingProcess06 | 2 | 0.99 | 207.40 | 2.70 | 203.00 | 205.70 | 206.80 | 208.70 | 227.40 | ▇▃▁▁▁ |
ManufacturingProcess07 | 1 | 0.99 | 177.48 | 0.50 | 177.00 | 177.00 | 177.00 | 178.00 | 178.00 | ▇▁▁▁▇ |
ManufacturingProcess08 | 1 | 0.99 | 177.55 | 0.50 | 177.00 | 177.00 | 178.00 | 178.00 | 178.00 | ▆▁▁▁▇ |
ManufacturingProcess09 | 0 | 1.00 | 45.66 | 1.55 | 38.89 | 44.89 | 45.73 | 46.52 | 49.36 | ▁▁▅▇▂ |
ManufacturingProcess10 | 9 | 0.95 | 9.18 | 0.77 | 7.50 | 8.70 | 9.10 | 9.55 | 11.60 | ▂▇▆▂▁ |
ManufacturingProcess11 | 10 | 0.94 | 9.39 | 0.72 | 7.50 | 9.00 | 9.40 | 9.90 | 11.50 | ▂▆▇▅▁ |
ManufacturingProcess13 | 0 | 1.00 | 34.51 | 1.02 | 32.10 | 33.90 | 34.60 | 35.20 | 38.60 | ▃▇▇▁▁ |
ManufacturingProcess14 | 1 | 0.99 | 4853.87 | 54.52 | 4701.00 | 4828.00 | 4856.00 | 4882.50 | 5055.00 | ▁▅▇▂▁ |
ManufacturingProcess15 | 0 | 1.00 | 6038.92 | 58.31 | 5904.00 | 6010.00 | 6031.50 | 6061.00 | 6233.00 | ▂▇▆▂▁ |
ManufacturingProcess16 | 0 | 1.00 | 4565.80 | 351.70 | 0.00 | 4560.75 | 4588.00 | 4619.00 | 4852.00 | ▁▁▁▁▇ |
ManufacturingProcess17 | 0 | 1.00 | 34.34 | 1.25 | 31.30 | 33.50 | 34.40 | 35.10 | 40.00 | ▂▇▆▁▁ |
ManufacturingProcess18 | 0 | 1.00 | 4809.68 | 367.48 | 0.00 | 4813.00 | 4835.00 | 4862.00 | 4971.00 | ▁▁▁▁▇ |
ManufacturingProcess19 | 0 | 1.00 | 6028.20 | 45.58 | 5890.00 | 6000.75 | 6022.00 | 6050.25 | 6146.00 | ▁▃▇▃▂ |
ManufacturingProcess20 | 0 | 1.00 | 4556.46 | 349.01 | 0.00 | 4552.75 | 4582.00 | 4609.50 | 4759.00 | ▁▁▁▁▇ |
ManufacturingProcess21 | 0 | 1.00 | -0.16 | 0.78 | -1.80 | -0.60 | -0.30 | 0.00 | 3.60 | ▂▇▂▁▁ |
ManufacturingProcess22 | 1 | 0.99 | 5.41 | 3.33 | 0.00 | 3.00 | 5.00 | 8.00 | 12.00 | ▇▇▇▅▅ |
ManufacturingProcess23 | 1 | 0.99 | 3.02 | 1.66 | 0.00 | 2.00 | 3.00 | 4.00 | 6.00 | ▇▆▇▆▇ |
ManufacturingProcess24 | 1 | 0.99 | 8.83 | 5.80 | 0.00 | 4.00 | 8.00 | 14.00 | 23.00 | ▇▇▅▆▁ |
ManufacturingProcess25 | 5 | 0.97 | 4828.18 | 373.48 | 0.00 | 4832.00 | 4855.00 | 4877.00 | 4990.00 | ▁▁▁▁▇ |
ManufacturingProcess26 | 5 | 0.97 | 6015.60 | 464.87 | 0.00 | 6019.50 | 6047.00 | 6070.50 | 6161.00 | ▁▁▁▁▇ |
ManufacturingProcess27 | 5 | 0.97 | 4562.51 | 353.98 | 0.00 | 4560.00 | 4587.00 | 4609.00 | 4710.00 | ▁▁▁▁▇ |
ManufacturingProcess29 | 5 | 0.97 | 20.01 | 1.66 | 0.00 | 19.70 | 19.90 | 20.40 | 22.00 | ▁▁▁▁▇ |
ManufacturingProcess30 | 5 | 0.97 | 9.16 | 0.98 | 0.00 | 8.80 | 9.10 | 9.70 | 11.20 | ▁▁▁▅▇ |
ManufacturingProcess31 | 5 | 0.97 | 70.18 | 5.56 | 0.00 | 70.10 | 70.80 | 71.40 | 72.50 | ▁▁▁▁▇ |
ManufacturingProcess32 | 0 | 1.00 | 158.47 | 5.40 | 143.00 | 155.00 | 158.00 | 162.00 | 173.00 | ▁▃▇▃▁ |
ManufacturingProcess33 | 5 | 0.97 | 63.54 | 2.48 | 56.00 | 62.00 | 64.00 | 65.00 | 70.00 | ▁▃▇▅▁ |
ManufacturingProcess35 | 5 | 0.97 | 495.60 | 10.82 | 463.00 | 490.00 | 495.00 | 501.50 | 522.00 | ▁▂▇▅▂ |
ManufacturingProcess36 | 5 | 0.97 | 0.02 | 0.00 | 0.02 | 0.02 | 0.02 | 0.02 | 0.02 | ▂▇▇▁▃ |
ManufacturingProcess37 | 0 | 1.00 | 1.01 | 0.45 | 0.00 | 0.70 | 1.00 | 1.30 | 2.30 | ▂▇▇▃▁ |
ManufacturingProcess38 | 0 | 1.00 | 2.53 | 0.65 | 0.00 | 2.00 | 3.00 | 3.00 | 3.00 | ▁▁▁▅▇ |
ManufacturingProcess39 | 0 | 1.00 | 6.85 | 1.51 | 0.00 | 7.10 | 7.20 | 7.30 | 7.50 | ▁▁▁▁▇ |
ManufacturingProcess42 | 0 | 1.00 | 11.21 | 1.94 | 0.00 | 11.40 | 11.60 | 11.70 | 12.10 | ▁▁▁▁▇ |
ManufacturingProcess43 | 0 | 1.00 | 0.91 | 0.87 | 0.00 | 0.60 | 0.80 | 1.02 | 11.00 | ▇▁▁▁▁ |
ManufacturingProcess44 | 0 | 1.00 | 1.81 | 0.32 | 0.00 | 1.80 | 1.90 | 1.90 | 2.10 | ▁▁▁▁▇ |
ManufacturingProcess45 | 0 | 1.00 | 2.14 | 0.41 | 0.00 | 2.10 | 2.20 | 2.30 | 2.60 | ▁▁▁▂▇ |
##################################
# Loading dataset
##################################
<- ChemicalManufacturingProcess
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("Yield")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,sapply(DPA.Predictors, is.numeric)]
DPA.Predictors.Numeric
##################################
# Visualizing pairwise correlation between predictors
##################################
<- cor.mtest(DPA.Predictors.Numeric,
DPA_CorrelationTest method = "pearson",
conf.level = .95)
corrplot(cor(DPA.Predictors.Numeric,
method = "pearson",
use="pairwise.complete.obs"),
method = "circle",
type = "upper",
order = "original",
tl.col = "black",
tl.cex = 0.75,
tl.srt = 90,
sig.level = 0.05,
p.mat = DPA_CorrelationTest$p,
insig = "blank")
##################################
# Identifying the highly correlated variables
##################################
<- cor(DPA.Predictors.Numeric,
DPA_Correlation method = "pearson",
use="pairwise.complete.obs")
<- sum(abs(DPA_Correlation[upper.tri(DPA_Correlation)]) > 0.95)) (DPA_HighlyCorrelatedCount
## [1] 10
if (DPA_HighlyCorrelatedCount == 0) {
print("No highly correlated predictors noted.")
else {
} print(paste0("High correlation observed for ",
(DPA_HighlyCorrelatedCount)," pairs of numeric variable(s) with Correlation.Coefficient>0.95."))
<- corr_cross(DPA.Predictors.Numeric,
(DPA_HighlyCorrelatedPairs max_pvalue = 0.05,
top = DPA_HighlyCorrelatedCount,
rm.na = TRUE,
grid = FALSE
))
}
## [1] "High correlation observed for 10 pairs of numeric variable(s) with Correlation.Coefficient>0.95."
if (DPA_HighlyCorrelatedCount > 0) {
<- findCorrelation(DPA_Correlation, cutoff = 0.95)
DPA_HighlyCorrelated
<- length(DPA_HighlyCorrelated))
(DPA_HighlyCorrelatedForRemoval
print(paste0("High correlation can be resolved by removing ",
(DPA_HighlyCorrelatedForRemoval)," numeric variable(s)."))
for (j in 1:DPA_HighlyCorrelatedForRemoval) {
<- colnames(DPA.Predictors.Numeric)[DPA_HighlyCorrelated[j]]
DPA_HighlyCorrelatedRemovedVariable print(paste0("Variable ",
j," for removal: ",
DPA_HighlyCorrelatedRemovedVariable))
}
##################################
# Filtering out columns with high correlation
#################################
<- DPA[,-DPA_HighlyCorrelated]
DPA_ExcludedHighCorrelation
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_ExcludedHighCorrelation))
(DPA_ExcludedHighCorrelation_Skimmed }
## [1] "High correlation can be resolved by removing 6 numeric variable(s)."
## [1] "Variable 1 for removal: BiologicalMaterial02"
## [1] "Variable 2 for removal: ManufacturingProcess29"
## [1] "Variable 3 for removal: ManufacturingProcess27"
## [1] "Variable 4 for removal: ManufacturingProcess25"
## [1] "Variable 5 for removal: ManufacturingProcess31"
## [1] "Variable 6 for removal: ManufacturingProcess20"
Name | DPA_ExcludedHighCorrelati… |
Number of rows | 176 |
Number of columns | 52 |
_______________________ | |
Column type frequency: | |
numeric | 52 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
Yield | 0 | 1.00 | 40.18 | 1.85 | 35.25 | 38.75 | 39.97 | 41.48 | 46.34 | ▁▇▇▃▁ |
BiologicalMaterial02 | 0 | 1.00 | 55.69 | 4.03 | 46.87 | 52.68 | 55.09 | 58.74 | 64.75 | ▂▇▆▅▃ |
BiologicalMaterial03 | 0 | 1.00 | 67.70 | 4.00 | 56.97 | 64.98 | 67.22 | 70.43 | 78.25 | ▂▅▇▆▁ |
BiologicalMaterial04 | 0 | 1.00 | 12.35 | 1.77 | 9.38 | 11.25 | 12.10 | 13.22 | 23.09 | ▇▆▁▁▁ |
BiologicalMaterial05 | 0 | 1.00 | 18.60 | 1.84 | 13.24 | 17.24 | 18.49 | 19.90 | 24.85 | ▁▅▇▃▁ |
BiologicalMaterial06 | 0 | 1.00 | 48.91 | 3.75 | 40.60 | 46.06 | 48.46 | 51.34 | 59.38 | ▂▇▆▅▁ |
BiologicalMaterial07 | 0 | 1.00 | 100.01 | 0.11 | 100.00 | 100.00 | 100.00 | 100.00 | 100.83 | ▇▁▁▁▁ |
BiologicalMaterial08 | 0 | 1.00 | 17.49 | 0.68 | 15.88 | 17.06 | 17.51 | 17.88 | 19.14 | ▁▅▇▃▂ |
BiologicalMaterial09 | 0 | 1.00 | 12.85 | 0.42 | 11.44 | 12.60 | 12.84 | 13.13 | 14.08 | ▁▃▇▅▁ |
BiologicalMaterial10 | 0 | 1.00 | 2.80 | 0.60 | 1.77 | 2.46 | 2.71 | 2.99 | 6.87 | ▇▅▁▁▁ |
BiologicalMaterial11 | 0 | 1.00 | 146.95 | 4.82 | 135.81 | 143.82 | 146.08 | 149.60 | 158.73 | ▂▆▇▃▂ |
BiologicalMaterial12 | 0 | 1.00 | 20.20 | 0.77 | 18.35 | 19.73 | 20.12 | 20.75 | 22.21 | ▂▆▇▃▂ |
ManufacturingProcess01 | 1 | 0.99 | 11.21 | 1.82 | 0.00 | 10.80 | 11.40 | 12.15 | 14.10 | ▁▁▁▅▇ |
ManufacturingProcess02 | 3 | 0.98 | 16.68 | 8.47 | 0.00 | 19.30 | 21.00 | 21.50 | 22.50 | ▂▁▁▁▇ |
ManufacturingProcess03 | 15 | 0.91 | 1.54 | 0.02 | 1.47 | 1.53 | 1.54 | 1.55 | 1.60 | ▁▃▆▇▁ |
ManufacturingProcess04 | 1 | 0.99 | 931.85 | 6.27 | 911.00 | 928.00 | 934.00 | 936.00 | 946.00 | ▁▂▃▇▁ |
ManufacturingProcess05 | 1 | 0.99 | 1001.69 | 30.53 | 923.00 | 986.75 | 999.20 | 1008.85 | 1175.30 | ▁▇▁▁▁ |
ManufacturingProcess06 | 2 | 0.99 | 207.40 | 2.70 | 203.00 | 205.70 | 206.80 | 208.70 | 227.40 | ▇▃▁▁▁ |
ManufacturingProcess07 | 1 | 0.99 | 177.48 | 0.50 | 177.00 | 177.00 | 177.00 | 178.00 | 178.00 | ▇▁▁▁▇ |
ManufacturingProcess08 | 1 | 0.99 | 177.55 | 0.50 | 177.00 | 177.00 | 178.00 | 178.00 | 178.00 | ▆▁▁▁▇ |
ManufacturingProcess09 | 0 | 1.00 | 45.66 | 1.55 | 38.89 | 44.89 | 45.73 | 46.52 | 49.36 | ▁▁▅▇▂ |
ManufacturingProcess10 | 9 | 0.95 | 9.18 | 0.77 | 7.50 | 8.70 | 9.10 | 9.55 | 11.60 | ▂▇▆▂▁ |
ManufacturingProcess11 | 10 | 0.94 | 9.39 | 0.72 | 7.50 | 9.00 | 9.40 | 9.90 | 11.50 | ▂▆▇▅▁ |
ManufacturingProcess12 | 1 | 0.99 | 857.81 | 1784.53 | 0.00 | 0.00 | 0.00 | 0.00 | 4549.00 | ▇▁▁▁▂ |
ManufacturingProcess13 | 0 | 1.00 | 34.51 | 1.02 | 32.10 | 33.90 | 34.60 | 35.20 | 38.60 | ▃▇▇▁▁ |
ManufacturingProcess14 | 1 | 0.99 | 4853.87 | 54.52 | 4701.00 | 4828.00 | 4856.00 | 4882.50 | 5055.00 | ▁▅▇▂▁ |
ManufacturingProcess15 | 0 | 1.00 | 6038.92 | 58.31 | 5904.00 | 6010.00 | 6031.50 | 6061.00 | 6233.00 | ▂▇▆▂▁ |
ManufacturingProcess16 | 0 | 1.00 | 4565.80 | 351.70 | 0.00 | 4560.75 | 4588.00 | 4619.00 | 4852.00 | ▁▁▁▁▇ |
ManufacturingProcess17 | 0 | 1.00 | 34.34 | 1.25 | 31.30 | 33.50 | 34.40 | 35.10 | 40.00 | ▂▇▆▁▁ |
ManufacturingProcess18 | 0 | 1.00 | 4809.68 | 367.48 | 0.00 | 4813.00 | 4835.00 | 4862.00 | 4971.00 | ▁▁▁▁▇ |
ManufacturingProcess20 | 0 | 1.00 | 4556.46 | 349.01 | 0.00 | 4552.75 | 4582.00 | 4609.50 | 4759.00 | ▁▁▁▁▇ |
ManufacturingProcess21 | 0 | 1.00 | -0.16 | 0.78 | -1.80 | -0.60 | -0.30 | 0.00 | 3.60 | ▂▇▂▁▁ |
ManufacturingProcess22 | 1 | 0.99 | 5.41 | 3.33 | 0.00 | 3.00 | 5.00 | 8.00 | 12.00 | ▇▇▇▅▅ |
ManufacturingProcess23 | 1 | 0.99 | 3.02 | 1.66 | 0.00 | 2.00 | 3.00 | 4.00 | 6.00 | ▇▆▇▆▇ |
ManufacturingProcess25 | 5 | 0.97 | 4828.18 | 373.48 | 0.00 | 4832.00 | 4855.00 | 4877.00 | 4990.00 | ▁▁▁▁▇ |
ManufacturingProcess27 | 5 | 0.97 | 4562.51 | 353.98 | 0.00 | 4560.00 | 4587.00 | 4609.00 | 4710.00 | ▁▁▁▁▇ |
ManufacturingProcess29 | 5 | 0.97 | 20.01 | 1.66 | 0.00 | 19.70 | 19.90 | 20.40 | 22.00 | ▁▁▁▁▇ |
ManufacturingProcess31 | 5 | 0.97 | 70.18 | 5.56 | 0.00 | 70.10 | 70.80 | 71.40 | 72.50 | ▁▁▁▁▇ |
ManufacturingProcess32 | 0 | 1.00 | 158.47 | 5.40 | 143.00 | 155.00 | 158.00 | 162.00 | 173.00 | ▁▃▇▃▁ |
ManufacturingProcess33 | 5 | 0.97 | 63.54 | 2.48 | 56.00 | 62.00 | 64.00 | 65.00 | 70.00 | ▁▃▇▅▁ |
ManufacturingProcess34 | 5 | 0.97 | 2.49 | 0.05 | 2.30 | 2.50 | 2.50 | 2.50 | 2.60 | ▁▂▁▇▁ |
ManufacturingProcess35 | 5 | 0.97 | 495.60 | 10.82 | 463.00 | 490.00 | 495.00 | 501.50 | 522.00 | ▁▂▇▅▂ |
ManufacturingProcess36 | 5 | 0.97 | 0.02 | 0.00 | 0.02 | 0.02 | 0.02 | 0.02 | 0.02 | ▂▇▇▁▃ |
ManufacturingProcess37 | 0 | 1.00 | 1.01 | 0.45 | 0.00 | 0.70 | 1.00 | 1.30 | 2.30 | ▂▇▇▃▁ |
ManufacturingProcess38 | 0 | 1.00 | 2.53 | 0.65 | 0.00 | 2.00 | 3.00 | 3.00 | 3.00 | ▁▁▁▅▇ |
ManufacturingProcess39 | 0 | 1.00 | 6.85 | 1.51 | 0.00 | 7.10 | 7.20 | 7.30 | 7.50 | ▁▁▁▁▇ |
ManufacturingProcess40 | 1 | 0.99 | 0.02 | 0.04 | 0.00 | 0.00 | 0.00 | 0.00 | 0.10 | ▇▁▁▁▂ |
ManufacturingProcess41 | 1 | 0.99 | 0.02 | 0.05 | 0.00 | 0.00 | 0.00 | 0.00 | 0.20 | ▇▁▁▁▁ |
ManufacturingProcess42 | 0 | 1.00 | 11.21 | 1.94 | 0.00 | 11.40 | 11.60 | 11.70 | 12.10 | ▁▁▁▁▇ |
ManufacturingProcess43 | 0 | 1.00 | 0.91 | 0.87 | 0.00 | 0.60 | 0.80 | 1.02 | 11.00 | ▇▁▁▁▁ |
ManufacturingProcess44 | 0 | 1.00 | 1.81 | 0.32 | 0.00 | 1.80 | 1.90 | 1.90 | 2.10 | ▁▁▁▁▇ |
ManufacturingProcess45 | 0 | 1.00 | 2.14 | 0.41 | 0.00 | 2.10 | 2.20 | 2.30 | 2.60 | ▁▁▁▂▇ |
##################################
# Reusing imputed dataset
# to facilitate linear dependency assessment
# on complete data with no missing values
##################################
<- DPA_BagImputed
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("Yield")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,sapply(DPA.Predictors, is.numeric)]
DPA.Predictors.Numeric
##################################
# Finding linear dependencies
##################################
<- findLinearCombos(DPA.Predictors.Numeric)
DPA_LinearlyDependent
##################################
# Identifying the linearly dependent variables
##################################
<- findLinearCombos(DPA.Predictors.Numeric)
DPA_LinearlyDependent
<- length(DPA_LinearlyDependent$linearCombos)) (DPA_LinearlyDependentCount
## [1] 1
if (DPA_LinearlyDependentCount == 0) {
print("No linearly dependent predictors noted.")
else {
} print(paste0("Linear dependency observed for ",
(DPA_LinearlyDependentCount)," subset(s) of numeric variable(s)."))
for (i in 1:DPA_LinearlyDependentCount) {
<- colnames(DPA.Predictors.Numeric)[DPA_LinearlyDependent$linearCombos[[i]]]
DPA_LinearlyDependentSubset print(paste0("Linear dependent variable(s) for subset ",
i," include: ",
DPA_LinearlyDependentSubset))
}
}
## [1] "Linear dependency observed for 1 subset(s) of numeric variable(s)."
## [1] "Linear dependent variable(s) for subset 1 include: ManufacturingProcess21"
## [2] "Linear dependent variable(s) for subset 1 include: ManufacturingProcess13"
## [3] "Linear dependent variable(s) for subset 1 include: ManufacturingProcess17"
##################################
# Identifying the linearly dependent variables for removal
##################################
if (DPA_LinearlyDependentCount > 0) {
<- findLinearCombos(DPA.Predictors.Numeric)
DPA_LinearlyDependent
<- length(DPA_LinearlyDependent$remove)
DPA_LinearlyDependentForRemoval
print(paste0("Linear dependency can be resolved by removing ",
(DPA_LinearlyDependentForRemoval)," numeric variable(s)."))
for (j in 1:DPA_LinearlyDependentForRemoval) {
<- colnames(DPA.Predictors.Numeric)[DPA_LinearlyDependent$remove[j]]
DPA_LinearlyDependentRemovedVariable print(paste0("Variable ",
j," for removal: ",
DPA_LinearlyDependentRemovedVariable))
}
##################################
# Filtering out columns with linear dependency
#################################
<- DPA.Predictors.Numeric[,-DPA_LinearlyDependent$remove]
DPA_ExcludedLinearlyDependent
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_ExcludedLinearlyDependent))
(DPA_ExcludedLinearlyDependent_Skimmed
}
## [1] "Linear dependency can be resolved by removing 1 numeric variable(s)."
## [1] "Variable 1 for removal: ManufacturingProcess21"
Name | DPA_ExcludedLinearlyDepen… |
Number of rows | 176 |
Number of columns | 56 |
_______________________ | |
Column type frequency: | |
numeric | 56 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
BiologicalMaterial01 | 0 | 1 | 6.41 | 0.71 | 4.58 | 5.98 | 6.30 | 6.87 | 8.81 | ▂▇▇▂▁ |
BiologicalMaterial02 | 0 | 1 | 55.69 | 4.03 | 46.87 | 52.68 | 55.09 | 58.74 | 64.75 | ▂▇▆▅▃ |
BiologicalMaterial03 | 0 | 1 | 67.70 | 4.00 | 56.97 | 64.98 | 67.22 | 70.43 | 78.25 | ▂▅▇▆▁ |
BiologicalMaterial04 | 0 | 1 | 12.35 | 1.77 | 9.38 | 11.25 | 12.10 | 13.22 | 23.09 | ▇▆▁▁▁ |
BiologicalMaterial05 | 0 | 1 | 18.60 | 1.84 | 13.24 | 17.24 | 18.49 | 19.90 | 24.85 | ▁▅▇▃▁ |
BiologicalMaterial06 | 0 | 1 | 48.91 | 3.75 | 40.60 | 46.06 | 48.46 | 51.34 | 59.38 | ▂▇▆▅▁ |
BiologicalMaterial07 | 0 | 1 | 100.01 | 0.11 | 100.00 | 100.00 | 100.00 | 100.00 | 100.83 | ▇▁▁▁▁ |
BiologicalMaterial08 | 0 | 1 | 17.49 | 0.68 | 15.88 | 17.06 | 17.51 | 17.88 | 19.14 | ▁▅▇▃▂ |
BiologicalMaterial09 | 0 | 1 | 12.85 | 0.42 | 11.44 | 12.60 | 12.84 | 13.13 | 14.08 | ▁▃▇▅▁ |
BiologicalMaterial10 | 0 | 1 | 2.80 | 0.60 | 1.77 | 2.46 | 2.71 | 2.99 | 6.87 | ▇▅▁▁▁ |
BiologicalMaterial11 | 0 | 1 | 146.95 | 4.82 | 135.81 | 143.82 | 146.08 | 149.60 | 158.73 | ▂▆▇▃▂ |
BiologicalMaterial12 | 0 | 1 | 20.20 | 0.77 | 18.35 | 19.73 | 20.12 | 20.75 | 22.21 | ▂▆▇▃▂ |
ManufacturingProcess01 | 0 | 1 | 11.21 | 1.82 | 0.00 | 10.80 | 11.40 | 12.12 | 14.10 | ▁▁▁▅▇ |
ManufacturingProcess02 | 0 | 1 | 16.71 | 8.42 | 0.00 | 19.23 | 21.00 | 21.50 | 22.50 | ▂▁▁▁▇ |
ManufacturingProcess03 | 0 | 1 | 1.54 | 0.02 | 1.47 | 1.53 | 1.54 | 1.55 | 1.60 | ▁▂▆▇▁ |
ManufacturingProcess04 | 0 | 1 | 931.83 | 6.26 | 911.00 | 927.98 | 934.00 | 936.00 | 946.00 | ▁▂▃▇▁ |
ManufacturingProcess05 | 0 | 1 | 1001.70 | 30.44 | 923.00 | 986.82 | 999.35 | 1008.72 | 1175.30 | ▁▇▁▁▁ |
ManufacturingProcess06 | 0 | 1 | 207.38 | 2.69 | 203.00 | 205.70 | 206.80 | 208.70 | 227.40 | ▇▃▁▁▁ |
ManufacturingProcess07 | 0 | 1 | 177.48 | 0.50 | 177.00 | 177.00 | 177.00 | 178.00 | 178.00 | ▇▁▁▁▇ |
ManufacturingProcess08 | 0 | 1 | 177.56 | 0.50 | 177.00 | 177.00 | 178.00 | 178.00 | 178.00 | ▆▁▁▁▇ |
ManufacturingProcess09 | 0 | 1 | 45.66 | 1.55 | 38.89 | 44.89 | 45.73 | 46.52 | 49.36 | ▁▁▅▇▂ |
ManufacturingProcess10 | 0 | 1 | 9.18 | 0.76 | 7.50 | 8.70 | 9.10 | 9.53 | 11.60 | ▂▇▆▂▁ |
ManufacturingProcess11 | 0 | 1 | 9.39 | 0.70 | 7.50 | 9.00 | 9.40 | 9.90 | 11.50 | ▂▆▇▅▁ |
ManufacturingProcess12 | 0 | 1 | 855.69 | 1779.64 | 0.00 | 0.00 | 0.00 | 0.00 | 4549.00 | ▇▁▁▁▂ |
ManufacturingProcess13 | 0 | 1 | 34.51 | 1.02 | 32.10 | 33.90 | 34.60 | 35.20 | 38.60 | ▃▇▇▁▁ |
ManufacturingProcess14 | 0 | 1 | 4853.33 | 54.84 | 4701.00 | 4827.25 | 4855.50 | 4882.25 | 5055.00 | ▁▅▇▂▁ |
ManufacturingProcess15 | 0 | 1 | 6038.92 | 58.31 | 5904.00 | 6010.00 | 6031.50 | 6061.00 | 6233.00 | ▂▇▆▂▁ |
ManufacturingProcess16 | 0 | 1 | 4565.80 | 351.70 | 0.00 | 4560.75 | 4588.00 | 4619.00 | 4852.00 | ▁▁▁▁▇ |
ManufacturingProcess17 | 0 | 1 | 34.34 | 1.25 | 31.30 | 33.50 | 34.40 | 35.10 | 40.00 | ▂▇▆▁▁ |
ManufacturingProcess18 | 0 | 1 | 4809.68 | 367.48 | 0.00 | 4813.00 | 4835.00 | 4862.00 | 4971.00 | ▁▁▁▁▇ |
ManufacturingProcess19 | 0 | 1 | 6028.20 | 45.58 | 5890.00 | 6000.75 | 6022.00 | 6050.25 | 6146.00 | ▁▃▇▃▂ |
ManufacturingProcess20 | 0 | 1 | 4556.46 | 349.01 | 0.00 | 4552.75 | 4582.00 | 4609.50 | 4759.00 | ▁▁▁▁▇ |
ManufacturingProcess22 | 0 | 1 | 5.40 | 3.32 | 0.00 | 3.00 | 5.00 | 8.00 | 12.00 | ▇▇▇▅▅ |
ManufacturingProcess23 | 0 | 1 | 3.02 | 1.66 | 0.00 | 2.00 | 3.00 | 4.00 | 6.00 | ▇▆▇▆▇ |
ManufacturingProcess24 | 0 | 1 | 8.87 | 5.80 | 0.00 | 4.00 | 8.00 | 14.00 | 23.00 | ▇▇▅▆▁ |
ManufacturingProcess25 | 0 | 1 | 4828.92 | 368.13 | 0.00 | 4833.50 | 4855.00 | 4876.25 | 4990.00 | ▁▁▁▁▇ |
ManufacturingProcess26 | 0 | 1 | 6016.48 | 458.21 | 0.00 | 6020.75 | 6046.50 | 6069.25 | 6161.00 | ▁▁▁▁▇ |
ManufacturingProcess27 | 0 | 1 | 4563.21 | 348.92 | 0.00 | 4562.75 | 4587.48 | 4609.00 | 4710.00 | ▁▁▁▁▇ |
ManufacturingProcess28 | 0 | 1 | 6.55 | 5.18 | 0.00 | 0.00 | 10.40 | 10.70 | 11.50 | ▅▁▁▁▇ |
ManufacturingProcess29 | 0 | 1 | 20.01 | 1.64 | 0.00 | 19.70 | 19.90 | 20.40 | 22.00 | ▁▁▁▁▇ |
ManufacturingProcess30 | 0 | 1 | 9.18 | 0.97 | 0.00 | 8.80 | 9.20 | 9.70 | 11.20 | ▁▁▁▅▇ |
ManufacturingProcess31 | 0 | 1 | 70.19 | 5.48 | 0.00 | 70.10 | 70.79 | 71.40 | 72.50 | ▁▁▁▁▇ |
ManufacturingProcess32 | 0 | 1 | 158.47 | 5.40 | 143.00 | 155.00 | 158.00 | 162.00 | 173.00 | ▁▃▇▃▁ |
ManufacturingProcess33 | 0 | 1 | 63.55 | 2.46 | 56.00 | 62.00 | 64.00 | 65.00 | 70.00 | ▁▃▇▅▁ |
ManufacturingProcess34 | 0 | 1 | 2.49 | 0.05 | 2.30 | 2.50 | 2.50 | 2.50 | 2.60 | ▁▂▁▇▁ |
ManufacturingProcess35 | 0 | 1 | 495.53 | 10.69 | 463.00 | 490.00 | 495.00 | 501.00 | 522.00 | ▁▂▇▅▂ |
ManufacturingProcess36 | 0 | 1 | 0.02 | 0.00 | 0.02 | 0.02 | 0.02 | 0.02 | 0.02 | ▂▇▇▁▃ |
ManufacturingProcess37 | 0 | 1 | 1.01 | 0.45 | 0.00 | 0.70 | 1.00 | 1.30 | 2.30 | ▂▇▇▃▁ |
ManufacturingProcess38 | 0 | 1 | 2.53 | 0.65 | 0.00 | 2.00 | 3.00 | 3.00 | 3.00 | ▁▁▁▅▇ |
ManufacturingProcess39 | 0 | 1 | 6.85 | 1.51 | 0.00 | 7.10 | 7.20 | 7.30 | 7.50 | ▁▁▁▁▇ |
ManufacturingProcess40 | 0 | 1 | 0.02 | 0.04 | 0.00 | 0.00 | 0.00 | 0.00 | 0.10 | ▇▁▁▁▂ |
ManufacturingProcess41 | 0 | 1 | 0.02 | 0.05 | 0.00 | 0.00 | 0.00 | 0.00 | 0.20 | ▇▁▁▁▁ |
ManufacturingProcess42 | 0 | 1 | 11.21 | 1.94 | 0.00 | 11.40 | 11.60 | 11.70 | 12.10 | ▁▁▁▁▇ |
ManufacturingProcess43 | 0 | 1 | 0.91 | 0.87 | 0.00 | 0.60 | 0.80 | 1.02 | 11.00 | ▇▁▁▁▁ |
ManufacturingProcess44 | 0 | 1 | 1.81 | 0.32 | 0.00 | 1.80 | 1.90 | 1.90 | 2.10 | ▁▁▁▁▇ |
ManufacturingProcess45 | 0 | 1 | 2.14 | 0.41 | 0.00 | 2.10 | 2.20 | 2.30 | 2.60 | ▁▁▁▂▇ |
##################################
# Loading dataset
##################################
<- ChemicalManufacturingProcess
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("Yield")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,sapply(DPA.Predictors, is.numeric)]
DPA.Predictors.Numeric
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA.Predictors.Numeric)) (DPA_Skimmed
Name | DPA.Predictors.Numeric |
Number of rows | 176 |
Number of columns | 57 |
_______________________ | |
Column type frequency: | |
numeric | 57 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
BiologicalMaterial01 | 0 | 1.00 | 6.41 | 0.71 | 4.58 | 5.98 | 6.30 | 6.87 | 8.81 | ▂▇▇▂▁ |
BiologicalMaterial02 | 0 | 1.00 | 55.69 | 4.03 | 46.87 | 52.68 | 55.09 | 58.74 | 64.75 | ▂▇▆▅▃ |
BiologicalMaterial03 | 0 | 1.00 | 67.70 | 4.00 | 56.97 | 64.98 | 67.22 | 70.43 | 78.25 | ▂▅▇▆▁ |
BiologicalMaterial04 | 0 | 1.00 | 12.35 | 1.77 | 9.38 | 11.25 | 12.10 | 13.22 | 23.09 | ▇▆▁▁▁ |
BiologicalMaterial05 | 0 | 1.00 | 18.60 | 1.84 | 13.24 | 17.24 | 18.49 | 19.90 | 24.85 | ▁▅▇▃▁ |
BiologicalMaterial06 | 0 | 1.00 | 48.91 | 3.75 | 40.60 | 46.06 | 48.46 | 51.34 | 59.38 | ▂▇▆▅▁ |
BiologicalMaterial07 | 0 | 1.00 | 100.01 | 0.11 | 100.00 | 100.00 | 100.00 | 100.00 | 100.83 | ▇▁▁▁▁ |
BiologicalMaterial08 | 0 | 1.00 | 17.49 | 0.68 | 15.88 | 17.06 | 17.51 | 17.88 | 19.14 | ▁▅▇▃▂ |
BiologicalMaterial09 | 0 | 1.00 | 12.85 | 0.42 | 11.44 | 12.60 | 12.84 | 13.13 | 14.08 | ▁▃▇▅▁ |
BiologicalMaterial10 | 0 | 1.00 | 2.80 | 0.60 | 1.77 | 2.46 | 2.71 | 2.99 | 6.87 | ▇▅▁▁▁ |
BiologicalMaterial11 | 0 | 1.00 | 146.95 | 4.82 | 135.81 | 143.82 | 146.08 | 149.60 | 158.73 | ▂▆▇▃▂ |
BiologicalMaterial12 | 0 | 1.00 | 20.20 | 0.77 | 18.35 | 19.73 | 20.12 | 20.75 | 22.21 | ▂▆▇▃▂ |
ManufacturingProcess01 | 1 | 0.99 | 11.21 | 1.82 | 0.00 | 10.80 | 11.40 | 12.15 | 14.10 | ▁▁▁▅▇ |
ManufacturingProcess02 | 3 | 0.98 | 16.68 | 8.47 | 0.00 | 19.30 | 21.00 | 21.50 | 22.50 | ▂▁▁▁▇ |
ManufacturingProcess03 | 15 | 0.91 | 1.54 | 0.02 | 1.47 | 1.53 | 1.54 | 1.55 | 1.60 | ▁▃▆▇▁ |
ManufacturingProcess04 | 1 | 0.99 | 931.85 | 6.27 | 911.00 | 928.00 | 934.00 | 936.00 | 946.00 | ▁▂▃▇▁ |
ManufacturingProcess05 | 1 | 0.99 | 1001.69 | 30.53 | 923.00 | 986.75 | 999.20 | 1008.85 | 1175.30 | ▁▇▁▁▁ |
ManufacturingProcess06 | 2 | 0.99 | 207.40 | 2.70 | 203.00 | 205.70 | 206.80 | 208.70 | 227.40 | ▇▃▁▁▁ |
ManufacturingProcess07 | 1 | 0.99 | 177.48 | 0.50 | 177.00 | 177.00 | 177.00 | 178.00 | 178.00 | ▇▁▁▁▇ |
ManufacturingProcess08 | 1 | 0.99 | 177.55 | 0.50 | 177.00 | 177.00 | 178.00 | 178.00 | 178.00 | ▆▁▁▁▇ |
ManufacturingProcess09 | 0 | 1.00 | 45.66 | 1.55 | 38.89 | 44.89 | 45.73 | 46.52 | 49.36 | ▁▁▅▇▂ |
ManufacturingProcess10 | 9 | 0.95 | 9.18 | 0.77 | 7.50 | 8.70 | 9.10 | 9.55 | 11.60 | ▂▇▆▂▁ |
ManufacturingProcess11 | 10 | 0.94 | 9.39 | 0.72 | 7.50 | 9.00 | 9.40 | 9.90 | 11.50 | ▂▆▇▅▁ |
ManufacturingProcess12 | 1 | 0.99 | 857.81 | 1784.53 | 0.00 | 0.00 | 0.00 | 0.00 | 4549.00 | ▇▁▁▁▂ |
ManufacturingProcess13 | 0 | 1.00 | 34.51 | 1.02 | 32.10 | 33.90 | 34.60 | 35.20 | 38.60 | ▃▇▇▁▁ |
ManufacturingProcess14 | 1 | 0.99 | 4853.87 | 54.52 | 4701.00 | 4828.00 | 4856.00 | 4882.50 | 5055.00 | ▁▅▇▂▁ |
ManufacturingProcess15 | 0 | 1.00 | 6038.92 | 58.31 | 5904.00 | 6010.00 | 6031.50 | 6061.00 | 6233.00 | ▂▇▆▂▁ |
ManufacturingProcess16 | 0 | 1.00 | 4565.80 | 351.70 | 0.00 | 4560.75 | 4588.00 | 4619.00 | 4852.00 | ▁▁▁▁▇ |
ManufacturingProcess17 | 0 | 1.00 | 34.34 | 1.25 | 31.30 | 33.50 | 34.40 | 35.10 | 40.00 | ▂▇▆▁▁ |
ManufacturingProcess18 | 0 | 1.00 | 4809.68 | 367.48 | 0.00 | 4813.00 | 4835.00 | 4862.00 | 4971.00 | ▁▁▁▁▇ |
ManufacturingProcess19 | 0 | 1.00 | 6028.20 | 45.58 | 5890.00 | 6000.75 | 6022.00 | 6050.25 | 6146.00 | ▁▃▇▃▂ |
ManufacturingProcess20 | 0 | 1.00 | 4556.46 | 349.01 | 0.00 | 4552.75 | 4582.00 | 4609.50 | 4759.00 | ▁▁▁▁▇ |
ManufacturingProcess21 | 0 | 1.00 | -0.16 | 0.78 | -1.80 | -0.60 | -0.30 | 0.00 | 3.60 | ▂▇▂▁▁ |
ManufacturingProcess22 | 1 | 0.99 | 5.41 | 3.33 | 0.00 | 3.00 | 5.00 | 8.00 | 12.00 | ▇▇▇▅▅ |
ManufacturingProcess23 | 1 | 0.99 | 3.02 | 1.66 | 0.00 | 2.00 | 3.00 | 4.00 | 6.00 | ▇▆▇▆▇ |
ManufacturingProcess24 | 1 | 0.99 | 8.83 | 5.80 | 0.00 | 4.00 | 8.00 | 14.00 | 23.00 | ▇▇▅▆▁ |
ManufacturingProcess25 | 5 | 0.97 | 4828.18 | 373.48 | 0.00 | 4832.00 | 4855.00 | 4877.00 | 4990.00 | ▁▁▁▁▇ |
ManufacturingProcess26 | 5 | 0.97 | 6015.60 | 464.87 | 0.00 | 6019.50 | 6047.00 | 6070.50 | 6161.00 | ▁▁▁▁▇ |
ManufacturingProcess27 | 5 | 0.97 | 4562.51 | 353.98 | 0.00 | 4560.00 | 4587.00 | 4609.00 | 4710.00 | ▁▁▁▁▇ |
ManufacturingProcess28 | 5 | 0.97 | 6.59 | 5.25 | 0.00 | 0.00 | 10.40 | 10.75 | 11.50 | ▅▁▁▁▇ |
ManufacturingProcess29 | 5 | 0.97 | 20.01 | 1.66 | 0.00 | 19.70 | 19.90 | 20.40 | 22.00 | ▁▁▁▁▇ |
ManufacturingProcess30 | 5 | 0.97 | 9.16 | 0.98 | 0.00 | 8.80 | 9.10 | 9.70 | 11.20 | ▁▁▁▅▇ |
ManufacturingProcess31 | 5 | 0.97 | 70.18 | 5.56 | 0.00 | 70.10 | 70.80 | 71.40 | 72.50 | ▁▁▁▁▇ |
ManufacturingProcess32 | 0 | 1.00 | 158.47 | 5.40 | 143.00 | 155.00 | 158.00 | 162.00 | 173.00 | ▁▃▇▃▁ |
ManufacturingProcess33 | 5 | 0.97 | 63.54 | 2.48 | 56.00 | 62.00 | 64.00 | 65.00 | 70.00 | ▁▃▇▅▁ |
ManufacturingProcess34 | 5 | 0.97 | 2.49 | 0.05 | 2.30 | 2.50 | 2.50 | 2.50 | 2.60 | ▁▂▁▇▁ |
ManufacturingProcess35 | 5 | 0.97 | 495.60 | 10.82 | 463.00 | 490.00 | 495.00 | 501.50 | 522.00 | ▁▂▇▅▂ |
ManufacturingProcess36 | 5 | 0.97 | 0.02 | 0.00 | 0.02 | 0.02 | 0.02 | 0.02 | 0.02 | ▂▇▇▁▃ |
ManufacturingProcess37 | 0 | 1.00 | 1.01 | 0.45 | 0.00 | 0.70 | 1.00 | 1.30 | 2.30 | ▂▇▇▃▁ |
ManufacturingProcess38 | 0 | 1.00 | 2.53 | 0.65 | 0.00 | 2.00 | 3.00 | 3.00 | 3.00 | ▁▁▁▅▇ |
ManufacturingProcess39 | 0 | 1.00 | 6.85 | 1.51 | 0.00 | 7.10 | 7.20 | 7.30 | 7.50 | ▁▁▁▁▇ |
ManufacturingProcess40 | 1 | 0.99 | 0.02 | 0.04 | 0.00 | 0.00 | 0.00 | 0.00 | 0.10 | ▇▁▁▁▂ |
ManufacturingProcess41 | 1 | 0.99 | 0.02 | 0.05 | 0.00 | 0.00 | 0.00 | 0.00 | 0.20 | ▇▁▁▁▁ |
ManufacturingProcess42 | 0 | 1.00 | 11.21 | 1.94 | 0.00 | 11.40 | 11.60 | 11.70 | 12.10 | ▁▁▁▁▇ |
ManufacturingProcess43 | 0 | 1.00 | 0.91 | 0.87 | 0.00 | 0.60 | 0.80 | 1.02 | 11.00 | ▇▁▁▁▁ |
ManufacturingProcess44 | 0 | 1.00 | 1.81 | 0.32 | 0.00 | 1.80 | 1.90 | 1.90 | 2.10 | ▁▁▁▁▇ |
ManufacturingProcess45 | 0 | 1.00 | 2.14 | 0.41 | 0.00 | 2.10 | 2.20 | 2.30 | 2.60 | ▁▁▁▂▇ |
##################################
# Applying a center transformation
##################################
<- preProcess(DPA.Predictors.Numeric, method = c("center"))
DPA_Centered <- predict(DPA_Centered, DPA.Predictors.Numeric)
DPA_CenteredTransformed
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_CenteredTransformed)) (DPA_CenteredTransformedSkimmed
Name | DPA_CenteredTransformed |
Number of rows | 176 |
Number of columns | 57 |
_______________________ | |
Column type frequency: | |
numeric | 57 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
BiologicalMaterial01 | 0 | 1.00 | 0 | 0.71 | -1.83 | -0.43 | -0.11 | 0.46 | 2.40 | ▂▇▇▂▁ |
BiologicalMaterial02 | 0 | 1.00 | 0 | 4.03 | -8.82 | -3.01 | -0.60 | 3.05 | 9.06 | ▂▇▆▅▃ |
BiologicalMaterial03 | 0 | 1.00 | 0 | 4.00 | -10.73 | -2.72 | -0.48 | 2.72 | 10.55 | ▂▅▇▆▁ |
BiologicalMaterial04 | 0 | 1.00 | 0 | 1.77 | -2.97 | -1.10 | -0.25 | 0.87 | 10.74 | ▇▆▁▁▁ |
BiologicalMaterial05 | 0 | 1.00 | 0 | 1.84 | -5.36 | -1.36 | -0.11 | 1.30 | 6.25 | ▁▅▇▃▁ |
BiologicalMaterial06 | 0 | 1.00 | 0 | 3.75 | -8.31 | -2.86 | -0.45 | 2.43 | 10.47 | ▂▇▆▅▁ |
BiologicalMaterial07 | 0 | 1.00 | 0 | 0.11 | -0.01 | -0.01 | -0.01 | -0.01 | 0.82 | ▇▁▁▁▁ |
BiologicalMaterial08 | 0 | 1.00 | 0 | 0.68 | -1.61 | -0.43 | 0.02 | 0.39 | 1.65 | ▁▅▇▃▂ |
BiologicalMaterial09 | 0 | 1.00 | 0 | 0.42 | -1.41 | -0.25 | -0.02 | 0.28 | 1.23 | ▁▃▇▅▁ |
BiologicalMaterial10 | 0 | 1.00 | 0 | 0.60 | -1.03 | -0.34 | -0.09 | 0.19 | 4.07 | ▇▅▁▁▁ |
BiologicalMaterial11 | 0 | 1.00 | 0 | 4.82 | -11.14 | -3.14 | -0.87 | 2.65 | 11.78 | ▂▆▇▃▂ |
BiologicalMaterial12 | 0 | 1.00 | 0 | 0.77 | -1.85 | -0.47 | -0.08 | 0.55 | 2.01 | ▂▆▇▃▂ |
ManufacturingProcess01 | 1 | 0.99 | 0 | 1.82 | -11.21 | -0.41 | 0.19 | 0.94 | 2.89 | ▁▁▁▅▇ |
ManufacturingProcess02 | 3 | 0.98 | 0 | 8.47 | -16.68 | 2.62 | 4.32 | 4.82 | 5.82 | ▂▁▁▁▇ |
ManufacturingProcess03 | 15 | 0.91 | 0 | 0.02 | -0.07 | -0.01 | 0.00 | 0.01 | 0.06 | ▁▃▆▇▁ |
ManufacturingProcess04 | 1 | 0.99 | 0 | 6.27 | -20.85 | -3.85 | 2.15 | 4.15 | 14.15 | ▁▂▃▇▁ |
ManufacturingProcess05 | 1 | 0.99 | 0 | 30.53 | -78.69 | -14.94 | -2.49 | 7.16 | 173.61 | ▁▇▁▁▁ |
ManufacturingProcess06 | 2 | 0.99 | 0 | 2.70 | -4.40 | -1.70 | -0.60 | 1.30 | 20.00 | ▇▃▁▁▁ |
ManufacturingProcess07 | 1 | 0.99 | 0 | 0.50 | -0.48 | -0.48 | -0.48 | 0.52 | 0.52 | ▇▁▁▁▇ |
ManufacturingProcess08 | 1 | 0.99 | 0 | 0.50 | -0.55 | -0.55 | 0.45 | 0.45 | 0.45 | ▆▁▁▁▇ |
ManufacturingProcess09 | 0 | 1.00 | 0 | 1.55 | -6.77 | -0.77 | 0.07 | 0.85 | 3.70 | ▁▁▅▇▂ |
ManufacturingProcess10 | 9 | 0.95 | 0 | 0.77 | -1.68 | -0.48 | -0.08 | 0.37 | 2.42 | ▂▇▆▂▁ |
ManufacturingProcess11 | 10 | 0.94 | 0 | 0.72 | -1.89 | -0.39 | 0.01 | 0.51 | 2.11 | ▁▆▇▅▁ |
ManufacturingProcess12 | 1 | 0.99 | 0 | 1784.53 | -857.81 | -857.81 | -857.81 | -857.81 | 3691.19 | ▇▁▁▁▂ |
ManufacturingProcess13 | 0 | 1.00 | 0 | 1.02 | -2.41 | -0.61 | 0.09 | 0.69 | 4.09 | ▃▆▇▁▁ |
ManufacturingProcess14 | 1 | 0.99 | 0 | 54.52 | -152.87 | -25.87 | 2.13 | 28.63 | 201.13 | ▁▅▇▂▁ |
ManufacturingProcess15 | 0 | 1.00 | 0 | 58.31 | -134.92 | -28.92 | -7.42 | 22.08 | 194.08 | ▂▇▆▂▁ |
ManufacturingProcess16 | 0 | 1.00 | 0 | 351.70 | -4565.80 | -5.05 | 22.20 | 53.20 | 286.20 | ▁▁▁▁▇ |
ManufacturingProcess17 | 0 | 1.00 | 0 | 1.25 | -3.04 | -0.84 | 0.06 | 0.76 | 5.66 | ▂▇▆▁▁ |
ManufacturingProcess18 | 0 | 1.00 | 0 | 367.48 | -4809.68 | 3.32 | 25.32 | 52.32 | 161.32 | ▁▁▁▁▇ |
ManufacturingProcess19 | 0 | 1.00 | 0 | 45.58 | -138.20 | -27.45 | -6.20 | 22.05 | 117.80 | ▁▃▇▃▂ |
ManufacturingProcess20 | 0 | 1.00 | 0 | 349.01 | -4556.46 | -3.71 | 25.54 | 53.04 | 202.54 | ▁▁▁▁▇ |
ManufacturingProcess21 | 0 | 1.00 | 0 | 0.78 | -1.64 | -0.44 | -0.14 | 0.16 | 3.76 | ▂▇▂▁▁ |
ManufacturingProcess22 | 1 | 0.99 | 0 | 3.33 | -5.41 | -2.41 | -0.41 | 2.59 | 6.59 | ▇▇▇▅▅ |
ManufacturingProcess23 | 1 | 0.99 | 0 | 1.66 | -3.02 | -1.02 | -0.02 | 0.98 | 2.98 | ▇▆▇▆▇ |
ManufacturingProcess24 | 1 | 0.99 | 0 | 5.80 | -8.83 | -4.83 | -0.83 | 5.17 | 14.17 | ▇▇▅▆▁ |
ManufacturingProcess25 | 5 | 0.97 | 0 | 373.48 | -4828.18 | 3.82 | 26.82 | 48.82 | 161.82 | ▁▁▁▁▇ |
ManufacturingProcess26 | 5 | 0.97 | 0 | 464.87 | -6015.60 | 3.90 | 31.40 | 54.90 | 145.40 | ▁▁▁▁▇ |
ManufacturingProcess27 | 5 | 0.97 | 0 | 353.98 | -4562.51 | -2.51 | 24.49 | 46.49 | 147.49 | ▁▁▁▁▇ |
ManufacturingProcess28 | 5 | 0.97 | 0 | 5.25 | -6.59 | -6.59 | 3.81 | 4.16 | 4.91 | ▅▁▁▁▇ |
ManufacturingProcess29 | 5 | 0.97 | 0 | 1.66 | -20.01 | -0.31 | -0.11 | 0.39 | 1.99 | ▁▁▁▁▇ |
ManufacturingProcess30 | 5 | 0.97 | 0 | 0.98 | -9.16 | -0.36 | -0.06 | 0.54 | 2.04 | ▁▁▁▅▇ |
ManufacturingProcess31 | 5 | 0.97 | 0 | 5.56 | -70.18 | -0.08 | 0.62 | 1.22 | 2.32 | ▁▁▁▁▇ |
ManufacturingProcess32 | 0 | 1.00 | 0 | 5.40 | -15.47 | -3.47 | -0.47 | 3.53 | 14.53 | ▁▃▇▃▁ |
ManufacturingProcess33 | 5 | 0.97 | 0 | 2.48 | -7.54 | -1.54 | 0.46 | 1.46 | 6.46 | ▁▃▇▅▁ |
ManufacturingProcess34 | 5 | 0.97 | 0 | 0.05 | -0.19 | 0.01 | 0.01 | 0.01 | 0.11 | ▁▂▁▇▁ |
ManufacturingProcess35 | 5 | 0.97 | 0 | 10.82 | -32.60 | -5.60 | -0.60 | 5.90 | 26.40 | ▁▂▇▅▂ |
ManufacturingProcess36 | 5 | 0.97 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | ▂▇▁▇▃ |
ManufacturingProcess37 | 0 | 1.00 | 0 | 0.45 | -1.01 | -0.31 | -0.01 | 0.29 | 1.29 | ▂▇▇▃▁ |
ManufacturingProcess38 | 0 | 1.00 | 0 | 0.65 | -2.53 | -0.53 | 0.47 | 0.47 | 0.47 | ▁▁▁▅▇ |
ManufacturingProcess39 | 0 | 1.00 | 0 | 1.51 | -6.85 | 0.25 | 0.35 | 0.45 | 0.65 | ▁▁▁▁▇ |
ManufacturingProcess40 | 1 | 0.99 | 0 | 0.04 | -0.02 | -0.02 | -0.02 | -0.02 | 0.08 | ▇▁▁▁▂ |
ManufacturingProcess41 | 1 | 0.99 | 0 | 0.05 | -0.02 | -0.02 | -0.02 | -0.02 | 0.18 | ▇▁▁▁▁ |
ManufacturingProcess42 | 0 | 1.00 | 0 | 1.94 | -11.21 | 0.19 | 0.39 | 0.49 | 0.89 | ▁▁▁▁▇ |
ManufacturingProcess43 | 0 | 1.00 | 0 | 0.87 | -0.91 | -0.31 | -0.11 | 0.11 | 10.09 | ▇▁▁▁▁ |
ManufacturingProcess44 | 0 | 1.00 | 0 | 0.32 | -1.81 | -0.01 | 0.09 | 0.09 | 0.29 | ▁▁▁▁▇ |
ManufacturingProcess45 | 0 | 1.00 | 0 | 0.41 | -2.14 | -0.04 | 0.06 | 0.16 | 0.46 | ▁▁▁▂▇ |
##################################
# Applying a center and scale data transformation
##################################
<- preProcess(DPA.Predictors.Numeric, method = c("center","scale"))
DPA_CenteredScaled <- predict(DPA_CenteredScaled, DPA.Predictors.Numeric)
DPA_CenteredScaledTransformed
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_CenteredScaledTransformed)) (DPA_CenteredScaledTransformedSkimmed
Name | DPA_CenteredScaledTransfo… |
Number of rows | 176 |
Number of columns | 57 |
_______________________ | |
Column type frequency: | |
numeric | 57 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
BiologicalMaterial01 | 0 | 1.00 | 0 | 1 | -2.57 | -0.61 | -0.15 | 0.64 | 3.36 | ▂▇▇▂▁ |
BiologicalMaterial02 | 0 | 1.00 | 0 | 1 | -2.19 | -0.75 | -0.15 | 0.76 | 2.25 | ▂▇▆▅▃ |
BiologicalMaterial03 | 0 | 1.00 | 0 | 1 | -2.68 | -0.68 | -0.12 | 0.68 | 2.64 | ▂▅▇▆▁ |
BiologicalMaterial04 | 0 | 1.00 | 0 | 1 | -1.67 | -0.62 | -0.14 | 0.49 | 6.05 | ▇▆▁▁▁ |
BiologicalMaterial05 | 0 | 1.00 | 0 | 1 | -2.91 | -0.74 | -0.06 | 0.71 | 3.39 | ▁▅▇▃▁ |
BiologicalMaterial06 | 0 | 1.00 | 0 | 1 | -2.22 | -0.76 | -0.12 | 0.65 | 2.79 | ▂▇▆▅▁ |
BiologicalMaterial07 | 0 | 1.00 | 0 | 1 | -0.13 | -0.13 | -0.13 | -0.13 | 7.57 | ▇▁▁▁▁ |
BiologicalMaterial08 | 0 | 1.00 | 0 | 1 | -2.39 | -0.64 | 0.02 | 0.57 | 2.43 | ▁▅▇▃▂ |
BiologicalMaterial09 | 0 | 1.00 | 0 | 1 | -3.40 | -0.60 | -0.04 | 0.67 | 2.96 | ▁▃▇▅▁ |
BiologicalMaterial10 | 0 | 1.00 | 0 | 1 | -1.72 | -0.57 | -0.15 | 0.32 | 6.79 | ▇▅▁▁▁ |
BiologicalMaterial11 | 0 | 1.00 | 0 | 1 | -2.31 | -0.65 | -0.18 | 0.55 | 2.44 | ▂▆▇▃▂ |
BiologicalMaterial12 | 0 | 1.00 | 0 | 1 | -2.39 | -0.61 | -0.10 | 0.71 | 2.60 | ▂▆▇▃▂ |
ManufacturingProcess01 | 1 | 0.99 | 0 | 1 | -6.15 | -0.22 | 0.11 | 0.52 | 1.59 | ▁▁▁▅▇ |
ManufacturingProcess02 | 3 | 0.98 | 0 | 1 | -1.97 | 0.31 | 0.51 | 0.57 | 0.69 | ▂▁▁▁▇ |
ManufacturingProcess03 | 15 | 0.91 | 0 | 1 | -3.11 | -0.43 | 0.02 | 0.47 | 2.70 | ▁▃▆▇▁ |
ManufacturingProcess04 | 1 | 0.99 | 0 | 1 | -3.32 | -0.61 | 0.34 | 0.66 | 2.25 | ▁▂▃▇▂ |
ManufacturingProcess05 | 1 | 0.99 | 0 | 1 | -2.58 | -0.49 | -0.08 | 0.23 | 5.69 | ▁▇▁▁▁ |
ManufacturingProcess06 | 2 | 0.99 | 0 | 1 | -1.63 | -0.63 | -0.22 | 0.48 | 7.41 | ▇▃▁▁▁ |
ManufacturingProcess07 | 1 | 0.99 | 0 | 1 | -0.96 | -0.96 | -0.96 | 1.04 | 1.04 | ▇▁▁▁▇ |
ManufacturingProcess08 | 1 | 0.99 | 0 | 1 | -1.11 | -1.11 | 0.89 | 0.89 | 0.89 | ▆▁▁▁▇ |
ManufacturingProcess09 | 0 | 1.00 | 0 | 1 | -4.38 | -0.50 | 0.05 | 0.55 | 2.39 | ▁▁▅▇▂ |
ManufacturingProcess10 | 9 | 0.95 | 0 | 1 | -2.19 | -0.62 | -0.10 | 0.48 | 3.16 | ▂▇▆▂▁ |
ManufacturingProcess11 | 10 | 0.94 | 0 | 1 | -2.63 | -0.54 | 0.02 | 0.72 | 2.95 | ▁▇▇▅▁ |
ManufacturingProcess12 | 1 | 0.99 | 0 | 1 | -0.48 | -0.48 | -0.48 | -0.48 | 2.07 | ▇▁▁▁▂ |
ManufacturingProcess13 | 0 | 1.00 | 0 | 1 | -2.37 | -0.60 | 0.09 | 0.68 | 4.03 | ▃▆▇▁▁ |
ManufacturingProcess14 | 1 | 0.99 | 0 | 1 | -2.80 | -0.47 | 0.04 | 0.53 | 3.69 | ▁▅▇▂▁ |
ManufacturingProcess15 | 0 | 1.00 | 0 | 1 | -2.31 | -0.50 | -0.13 | 0.38 | 3.33 | ▂▇▆▂▁ |
ManufacturingProcess16 | 0 | 1.00 | 0 | 1 | -12.98 | -0.01 | 0.06 | 0.15 | 0.81 | ▁▁▁▁▇ |
ManufacturingProcess17 | 0 | 1.00 | 0 | 1 | -2.44 | -0.68 | 0.05 | 0.61 | 4.53 | ▂▇▆▁▁ |
ManufacturingProcess18 | 0 | 1.00 | 0 | 1 | -13.09 | 0.01 | 0.07 | 0.14 | 0.44 | ▁▁▁▁▇ |
ManufacturingProcess19 | 0 | 1.00 | 0 | 1 | -3.03 | -0.60 | -0.14 | 0.48 | 2.58 | ▁▃▇▃▂ |
ManufacturingProcess20 | 0 | 1.00 | 0 | 1 | -13.06 | -0.01 | 0.07 | 0.15 | 0.58 | ▁▁▁▁▇ |
ManufacturingProcess21 | 0 | 1.00 | 0 | 1 | -2.10 | -0.56 | -0.17 | 0.21 | 4.84 | ▂▇▂▁▁ |
ManufacturingProcess22 | 1 | 0.99 | 0 | 1 | -1.62 | -0.72 | -0.12 | 0.78 | 1.98 | ▇▇▇▅▅ |
ManufacturingProcess23 | 1 | 0.99 | 0 | 1 | -1.81 | -0.61 | -0.01 | 0.59 | 1.79 | ▇▆▇▆▇ |
ManufacturingProcess24 | 1 | 0.99 | 0 | 1 | -1.52 | -0.83 | -0.14 | 0.89 | 2.44 | ▇▇▅▆▁ |
ManufacturingProcess25 | 5 | 0.97 | 0 | 1 | -12.93 | 0.01 | 0.07 | 0.13 | 0.43 | ▁▁▁▁▇ |
ManufacturingProcess26 | 5 | 0.97 | 0 | 1 | -12.94 | 0.01 | 0.07 | 0.12 | 0.31 | ▁▁▁▁▇ |
ManufacturingProcess27 | 5 | 0.97 | 0 | 1 | -12.89 | -0.01 | 0.07 | 0.13 | 0.42 | ▁▁▁▁▇ |
ManufacturingProcess28 | 5 | 0.97 | 0 | 1 | -1.26 | -1.26 | 0.73 | 0.79 | 0.94 | ▅▁▁▁▇ |
ManufacturingProcess29 | 5 | 0.97 | 0 | 1 | -12.03 | -0.19 | -0.07 | 0.23 | 1.20 | ▁▁▁▁▇ |
ManufacturingProcess30 | 5 | 0.97 | 0 | 1 | -9.39 | -0.37 | -0.06 | 0.55 | 2.09 | ▁▁▁▅▇ |
ManufacturingProcess31 | 5 | 0.97 | 0 | 1 | -12.63 | -0.02 | 0.11 | 0.22 | 0.42 | ▁▁▁▁▇ |
ManufacturingProcess32 | 0 | 1.00 | 0 | 1 | -2.87 | -0.64 | -0.09 | 0.65 | 2.69 | ▁▃▇▃▁ |
ManufacturingProcess33 | 5 | 0.97 | 0 | 1 | -3.04 | -0.62 | 0.18 | 0.59 | 2.60 | ▁▃▇▅▁ |
ManufacturingProcess34 | 5 | 0.97 | 0 | 1 | -3.56 | 0.12 | 0.12 | 0.12 | 1.96 | ▁▂▁▇▁ |
ManufacturingProcess35 | 5 | 0.97 | 0 | 1 | -3.01 | -0.52 | -0.06 | 0.55 | 2.44 | ▁▂▇▅▂ |
ManufacturingProcess36 | 5 | 0.97 | 0 | 1 | -2.94 | -0.66 | 0.49 | 0.49 | 2.78 | ▂▇▁▇▃ |
ManufacturingProcess37 | 0 | 1.00 | 0 | 1 | -2.28 | -0.70 | -0.03 | 0.64 | 2.89 | ▂▇▇▃▁ |
ManufacturingProcess38 | 0 | 1.00 | 0 | 1 | -3.90 | -0.82 | 0.72 | 0.72 | 0.72 | ▁▁▁▅▇ |
ManufacturingProcess39 | 0 | 1.00 | 0 | 1 | -4.55 | 0.17 | 0.23 | 0.30 | 0.43 | ▁▁▁▁▇ |
ManufacturingProcess40 | 1 | 0.99 | 0 | 1 | -0.46 | -0.46 | -0.46 | -0.46 | 2.15 | ▇▁▁▁▂ |
ManufacturingProcess41 | 1 | 0.99 | 0 | 1 | -0.44 | -0.44 | -0.44 | -0.44 | 3.28 | ▇▁▁▁▁ |
ManufacturingProcess42 | 0 | 1.00 | 0 | 1 | -5.77 | 0.10 | 0.20 | 0.25 | 0.46 | ▁▁▁▁▇ |
ManufacturingProcess43 | 0 | 1.00 | 0 | 1 | -1.05 | -0.36 | -0.13 | 0.13 | 11.62 | ▇▁▁▁▁ |
ManufacturingProcess44 | 0 | 1.00 | 0 | 1 | -5.61 | -0.02 | 0.29 | 0.29 | 0.92 | ▁▁▁▁▇ |
ManufacturingProcess45 | 0 | 1.00 | 0 | 1 | -5.25 | -0.09 | 0.15 | 0.40 | 1.14 | ▁▁▁▂▇ |
##################################
# Applying a range transformation
##################################
<- preProcess(DPA.Predictors.Numeric, method = c("range"), rangeBounds = c(0, 1))
DPA_Ranged <- predict(DPA_Ranged, DPA.Predictors.Numeric)
DPA_RangedTransformed
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_RangedTransformed)) (DPA_RangedTransformedSkimmed
Name | DPA_RangedTransformed |
Number of rows | 176 |
Number of columns | 57 |
_______________________ | |
Column type frequency: | |
numeric | 57 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
BiologicalMaterial01 | 0 | 1.00 | 0.43 | 0.17 | 0 | 0.33 | 0.41 | 0.54 | 1 | ▂▇▇▂▁ |
BiologicalMaterial02 | 0 | 1.00 | 0.49 | 0.23 | 0 | 0.32 | 0.46 | 0.66 | 1 | ▂▇▆▅▃ |
BiologicalMaterial03 | 0 | 1.00 | 0.50 | 0.19 | 0 | 0.38 | 0.48 | 0.63 | 1 | ▂▅▇▆▁ |
BiologicalMaterial04 | 0 | 1.00 | 0.22 | 0.13 | 0 | 0.14 | 0.20 | 0.28 | 1 | ▇▆▁▁▁ |
BiologicalMaterial05 | 0 | 1.00 | 0.46 | 0.16 | 0 | 0.34 | 0.45 | 0.57 | 1 | ▁▅▇▃▁ |
BiologicalMaterial06 | 0 | 1.00 | 0.44 | 0.20 | 0 | 0.29 | 0.42 | 0.57 | 1 | ▂▇▆▅▁ |
BiologicalMaterial07 | 0 | 1.00 | 0.02 | 0.13 | 0 | 0.00 | 0.00 | 0.00 | 1 | ▇▁▁▁▁ |
BiologicalMaterial08 | 0 | 1.00 | 0.50 | 0.21 | 0 | 0.36 | 0.50 | 0.61 | 1 | ▁▅▇▃▂ |
BiologicalMaterial09 | 0 | 1.00 | 0.53 | 0.16 | 0 | 0.44 | 0.53 | 0.64 | 1 | ▁▃▇▅▁ |
BiologicalMaterial10 | 0 | 1.00 | 0.20 | 0.12 | 0 | 0.14 | 0.18 | 0.24 | 1 | ▇▅▁▁▁ |
BiologicalMaterial11 | 0 | 1.00 | 0.49 | 0.21 | 0 | 0.35 | 0.45 | 0.60 | 1 | ▂▆▇▃▂ |
BiologicalMaterial12 | 0 | 1.00 | 0.48 | 0.20 | 0 | 0.36 | 0.46 | 0.62 | 1 | ▂▆▇▃▂ |
ManufacturingProcess01 | 1 | 0.99 | 0.79 | 0.13 | 0 | 0.77 | 0.81 | 0.86 | 1 | ▁▁▁▅▇ |
ManufacturingProcess02 | 3 | 0.98 | 0.74 | 0.38 | 0 | 0.86 | 0.93 | 0.96 | 1 | ▂▁▁▁▇ |
ManufacturingProcess03 | 15 | 0.91 | 0.54 | 0.17 | 0 | 0.46 | 0.54 | 0.62 | 1 | ▁▃▆▇▁ |
ManufacturingProcess04 | 1 | 0.99 | 0.60 | 0.18 | 0 | 0.49 | 0.66 | 0.71 | 1 | ▁▂▃▇▁ |
ManufacturingProcess05 | 1 | 0.99 | 0.31 | 0.12 | 0 | 0.25 | 0.30 | 0.34 | 1 | ▁▇▁▁▁ |
ManufacturingProcess06 | 2 | 0.99 | 0.18 | 0.11 | 0 | 0.11 | 0.16 | 0.23 | 1 | ▇▃▁▁▁ |
ManufacturingProcess07 | 1 | 0.99 | 0.48 | 0.50 | 0 | 0.00 | 0.00 | 1.00 | 1 | ▇▁▁▁▇ |
ManufacturingProcess08 | 1 | 0.99 | 0.55 | 0.50 | 0 | 0.00 | 1.00 | 1.00 | 1 | ▆▁▁▁▇ |
ManufacturingProcess09 | 0 | 1.00 | 0.65 | 0.15 | 0 | 0.57 | 0.65 | 0.73 | 1 | ▁▁▅▇▂ |
ManufacturingProcess10 | 9 | 0.95 | 0.41 | 0.19 | 0 | 0.29 | 0.39 | 0.50 | 1 | ▂▇▆▂▁ |
ManufacturingProcess11 | 10 | 0.94 | 0.47 | 0.18 | 0 | 0.38 | 0.48 | 0.60 | 1 | ▁▆▇▅▁ |
ManufacturingProcess12 | 1 | 0.99 | 0.19 | 0.39 | 0 | 0.00 | 0.00 | 0.00 | 1 | ▇▁▁▁▂ |
ManufacturingProcess13 | 0 | 1.00 | 0.37 | 0.16 | 0 | 0.28 | 0.38 | 0.48 | 1 | ▃▆▇▁▁ |
ManufacturingProcess14 | 1 | 0.99 | 0.43 | 0.15 | 0 | 0.36 | 0.44 | 0.51 | 1 | ▁▅▇▂▁ |
ManufacturingProcess15 | 0 | 1.00 | 0.41 | 0.18 | 0 | 0.32 | 0.39 | 0.48 | 1 | ▂▇▆▂▁ |
ManufacturingProcess16 | 0 | 1.00 | 0.94 | 0.07 | 0 | 0.94 | 0.95 | 0.95 | 1 | ▁▁▁▁▇ |
ManufacturingProcess17 | 0 | 1.00 | 0.35 | 0.14 | 0 | 0.25 | 0.36 | 0.44 | 1 | ▂▇▆▁▁ |
ManufacturingProcess18 | 0 | 1.00 | 0.97 | 0.07 | 0 | 0.97 | 0.97 | 0.98 | 1 | ▁▁▁▁▇ |
ManufacturingProcess19 | 0 | 1.00 | 0.54 | 0.18 | 0 | 0.43 | 0.52 | 0.63 | 1 | ▁▃▇▃▂ |
ManufacturingProcess20 | 0 | 1.00 | 0.96 | 0.07 | 0 | 0.96 | 0.96 | 0.97 | 1 | ▁▁▁▁▇ |
ManufacturingProcess21 | 0 | 1.00 | 0.30 | 0.14 | 0 | 0.22 | 0.28 | 0.33 | 1 | ▂▇▂▁▁ |
ManufacturingProcess22 | 1 | 0.99 | 0.45 | 0.28 | 0 | 0.25 | 0.42 | 0.67 | 1 | ▇▇▇▅▅ |
ManufacturingProcess23 | 1 | 0.99 | 0.50 | 0.28 | 0 | 0.33 | 0.50 | 0.67 | 1 | ▇▆▇▆▇ |
ManufacturingProcess24 | 1 | 0.99 | 0.38 | 0.25 | 0 | 0.17 | 0.35 | 0.61 | 1 | ▇▇▅▆▁ |
ManufacturingProcess25 | 5 | 0.97 | 0.97 | 0.07 | 0 | 0.97 | 0.97 | 0.98 | 1 | ▁▁▁▁▇ |
ManufacturingProcess26 | 5 | 0.97 | 0.98 | 0.08 | 0 | 0.98 | 0.98 | 0.99 | 1 | ▁▁▁▁▇ |
ManufacturingProcess27 | 5 | 0.97 | 0.97 | 0.08 | 0 | 0.97 | 0.97 | 0.98 | 1 | ▁▁▁▁▇ |
ManufacturingProcess28 | 5 | 0.97 | 0.57 | 0.46 | 0 | 0.00 | 0.90 | 0.93 | 1 | ▅▁▁▁▇ |
ManufacturingProcess29 | 5 | 0.97 | 0.91 | 0.08 | 0 | 0.90 | 0.90 | 0.93 | 1 | ▁▁▁▁▇ |
ManufacturingProcess30 | 5 | 0.97 | 0.82 | 0.09 | 0 | 0.79 | 0.81 | 0.87 | 1 | ▁▁▁▅▇ |
ManufacturingProcess31 | 5 | 0.97 | 0.97 | 0.08 | 0 | 0.97 | 0.98 | 0.98 | 1 | ▁▁▁▁▇ |
ManufacturingProcess32 | 0 | 1.00 | 0.52 | 0.18 | 0 | 0.40 | 0.50 | 0.63 | 1 | ▁▃▇▃▁ |
ManufacturingProcess33 | 5 | 0.97 | 0.54 | 0.18 | 0 | 0.43 | 0.57 | 0.64 | 1 | ▁▃▇▅▁ |
ManufacturingProcess34 | 5 | 0.97 | 0.65 | 0.18 | 0 | 0.67 | 0.67 | 0.67 | 1 | ▁▂▁▇▁ |
ManufacturingProcess35 | 5 | 0.97 | 0.55 | 0.18 | 0 | 0.46 | 0.54 | 0.65 | 1 | ▁▂▇▅▂ |
ManufacturingProcess36 | 5 | 0.97 | 0.51 | 0.17 | 0 | 0.40 | 0.60 | 0.60 | 1 | ▂▇▇▁▃ |
ManufacturingProcess37 | 0 | 1.00 | 0.44 | 0.19 | 0 | 0.30 | 0.43 | 0.57 | 1 | ▂▇▇▃▁ |
ManufacturingProcess38 | 0 | 1.00 | 0.84 | 0.22 | 0 | 0.67 | 1.00 | 1.00 | 1 | ▁▁▁▅▇ |
ManufacturingProcess39 | 0 | 1.00 | 0.91 | 0.20 | 0 | 0.95 | 0.96 | 0.97 | 1 | ▁▁▁▁▇ |
ManufacturingProcess40 | 1 | 0.99 | 0.18 | 0.38 | 0 | 0.00 | 0.00 | 0.00 | 1 | ▇▁▁▁▂ |
ManufacturingProcess41 | 1 | 0.99 | 0.12 | 0.27 | 0 | 0.00 | 0.00 | 0.00 | 1 | ▇▁▁▁▁ |
ManufacturingProcess42 | 0 | 1.00 | 0.93 | 0.16 | 0 | 0.94 | 0.96 | 0.97 | 1 | ▁▁▁▁▇ |
ManufacturingProcess43 | 0 | 1.00 | 0.08 | 0.08 | 0 | 0.05 | 0.07 | 0.09 | 1 | ▇▁▁▁▁ |
ManufacturingProcess44 | 0 | 1.00 | 0.86 | 0.15 | 0 | 0.86 | 0.90 | 0.90 | 1 | ▁▁▁▁▇ |
ManufacturingProcess45 | 0 | 1.00 | 0.82 | 0.16 | 0 | 0.81 | 0.85 | 0.88 | 1 | ▁▁▁▂▇ |
##################################
# Loading dataset
##################################
<- ChemicalManufacturingProcess
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("Yield")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,sapply(DPA.Predictors, is.numeric)]
DPA.Predictors.Numeric
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA.Predictors.Numeric)) (DPA_Skimmed
Name | DPA.Predictors.Numeric |
Number of rows | 176 |
Number of columns | 57 |
_______________________ | |
Column type frequency: | |
numeric | 57 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
BiologicalMaterial01 | 0 | 1.00 | 6.41 | 0.71 | 4.58 | 5.98 | 6.30 | 6.87 | 8.81 | ▂▇▇▂▁ |
BiologicalMaterial02 | 0 | 1.00 | 55.69 | 4.03 | 46.87 | 52.68 | 55.09 | 58.74 | 64.75 | ▂▇▆▅▃ |
BiologicalMaterial03 | 0 | 1.00 | 67.70 | 4.00 | 56.97 | 64.98 | 67.22 | 70.43 | 78.25 | ▂▅▇▆▁ |
BiologicalMaterial04 | 0 | 1.00 | 12.35 | 1.77 | 9.38 | 11.25 | 12.10 | 13.22 | 23.09 | ▇▆▁▁▁ |
BiologicalMaterial05 | 0 | 1.00 | 18.60 | 1.84 | 13.24 | 17.24 | 18.49 | 19.90 | 24.85 | ▁▅▇▃▁ |
BiologicalMaterial06 | 0 | 1.00 | 48.91 | 3.75 | 40.60 | 46.06 | 48.46 | 51.34 | 59.38 | ▂▇▆▅▁ |
BiologicalMaterial07 | 0 | 1.00 | 100.01 | 0.11 | 100.00 | 100.00 | 100.00 | 100.00 | 100.83 | ▇▁▁▁▁ |
BiologicalMaterial08 | 0 | 1.00 | 17.49 | 0.68 | 15.88 | 17.06 | 17.51 | 17.88 | 19.14 | ▁▅▇▃▂ |
BiologicalMaterial09 | 0 | 1.00 | 12.85 | 0.42 | 11.44 | 12.60 | 12.84 | 13.13 | 14.08 | ▁▃▇▅▁ |
BiologicalMaterial10 | 0 | 1.00 | 2.80 | 0.60 | 1.77 | 2.46 | 2.71 | 2.99 | 6.87 | ▇▅▁▁▁ |
BiologicalMaterial11 | 0 | 1.00 | 146.95 | 4.82 | 135.81 | 143.82 | 146.08 | 149.60 | 158.73 | ▂▆▇▃▂ |
BiologicalMaterial12 | 0 | 1.00 | 20.20 | 0.77 | 18.35 | 19.73 | 20.12 | 20.75 | 22.21 | ▂▆▇▃▂ |
ManufacturingProcess01 | 1 | 0.99 | 11.21 | 1.82 | 0.00 | 10.80 | 11.40 | 12.15 | 14.10 | ▁▁▁▅▇ |
ManufacturingProcess02 | 3 | 0.98 | 16.68 | 8.47 | 0.00 | 19.30 | 21.00 | 21.50 | 22.50 | ▂▁▁▁▇ |
ManufacturingProcess03 | 15 | 0.91 | 1.54 | 0.02 | 1.47 | 1.53 | 1.54 | 1.55 | 1.60 | ▁▃▆▇▁ |
ManufacturingProcess04 | 1 | 0.99 | 931.85 | 6.27 | 911.00 | 928.00 | 934.00 | 936.00 | 946.00 | ▁▂▃▇▁ |
ManufacturingProcess05 | 1 | 0.99 | 1001.69 | 30.53 | 923.00 | 986.75 | 999.20 | 1008.85 | 1175.30 | ▁▇▁▁▁ |
ManufacturingProcess06 | 2 | 0.99 | 207.40 | 2.70 | 203.00 | 205.70 | 206.80 | 208.70 | 227.40 | ▇▃▁▁▁ |
ManufacturingProcess07 | 1 | 0.99 | 177.48 | 0.50 | 177.00 | 177.00 | 177.00 | 178.00 | 178.00 | ▇▁▁▁▇ |
ManufacturingProcess08 | 1 | 0.99 | 177.55 | 0.50 | 177.00 | 177.00 | 178.00 | 178.00 | 178.00 | ▆▁▁▁▇ |
ManufacturingProcess09 | 0 | 1.00 | 45.66 | 1.55 | 38.89 | 44.89 | 45.73 | 46.52 | 49.36 | ▁▁▅▇▂ |
ManufacturingProcess10 | 9 | 0.95 | 9.18 | 0.77 | 7.50 | 8.70 | 9.10 | 9.55 | 11.60 | ▂▇▆▂▁ |
ManufacturingProcess11 | 10 | 0.94 | 9.39 | 0.72 | 7.50 | 9.00 | 9.40 | 9.90 | 11.50 | ▂▆▇▅▁ |
ManufacturingProcess12 | 1 | 0.99 | 857.81 | 1784.53 | 0.00 | 0.00 | 0.00 | 0.00 | 4549.00 | ▇▁▁▁▂ |
ManufacturingProcess13 | 0 | 1.00 | 34.51 | 1.02 | 32.10 | 33.90 | 34.60 | 35.20 | 38.60 | ▃▇▇▁▁ |
ManufacturingProcess14 | 1 | 0.99 | 4853.87 | 54.52 | 4701.00 | 4828.00 | 4856.00 | 4882.50 | 5055.00 | ▁▅▇▂▁ |
ManufacturingProcess15 | 0 | 1.00 | 6038.92 | 58.31 | 5904.00 | 6010.00 | 6031.50 | 6061.00 | 6233.00 | ▂▇▆▂▁ |
ManufacturingProcess16 | 0 | 1.00 | 4565.80 | 351.70 | 0.00 | 4560.75 | 4588.00 | 4619.00 | 4852.00 | ▁▁▁▁▇ |
ManufacturingProcess17 | 0 | 1.00 | 34.34 | 1.25 | 31.30 | 33.50 | 34.40 | 35.10 | 40.00 | ▂▇▆▁▁ |
ManufacturingProcess18 | 0 | 1.00 | 4809.68 | 367.48 | 0.00 | 4813.00 | 4835.00 | 4862.00 | 4971.00 | ▁▁▁▁▇ |
ManufacturingProcess19 | 0 | 1.00 | 6028.20 | 45.58 | 5890.00 | 6000.75 | 6022.00 | 6050.25 | 6146.00 | ▁▃▇▃▂ |
ManufacturingProcess20 | 0 | 1.00 | 4556.46 | 349.01 | 0.00 | 4552.75 | 4582.00 | 4609.50 | 4759.00 | ▁▁▁▁▇ |
ManufacturingProcess21 | 0 | 1.00 | -0.16 | 0.78 | -1.80 | -0.60 | -0.30 | 0.00 | 3.60 | ▂▇▂▁▁ |
ManufacturingProcess22 | 1 | 0.99 | 5.41 | 3.33 | 0.00 | 3.00 | 5.00 | 8.00 | 12.00 | ▇▇▇▅▅ |
ManufacturingProcess23 | 1 | 0.99 | 3.02 | 1.66 | 0.00 | 2.00 | 3.00 | 4.00 | 6.00 | ▇▆▇▆▇ |
ManufacturingProcess24 | 1 | 0.99 | 8.83 | 5.80 | 0.00 | 4.00 | 8.00 | 14.00 | 23.00 | ▇▇▅▆▁ |
ManufacturingProcess25 | 5 | 0.97 | 4828.18 | 373.48 | 0.00 | 4832.00 | 4855.00 | 4877.00 | 4990.00 | ▁▁▁▁▇ |
ManufacturingProcess26 | 5 | 0.97 | 6015.60 | 464.87 | 0.00 | 6019.50 | 6047.00 | 6070.50 | 6161.00 | ▁▁▁▁▇ |
ManufacturingProcess27 | 5 | 0.97 | 4562.51 | 353.98 | 0.00 | 4560.00 | 4587.00 | 4609.00 | 4710.00 | ▁▁▁▁▇ |
ManufacturingProcess28 | 5 | 0.97 | 6.59 | 5.25 | 0.00 | 0.00 | 10.40 | 10.75 | 11.50 | ▅▁▁▁▇ |
ManufacturingProcess29 | 5 | 0.97 | 20.01 | 1.66 | 0.00 | 19.70 | 19.90 | 20.40 | 22.00 | ▁▁▁▁▇ |
ManufacturingProcess30 | 5 | 0.97 | 9.16 | 0.98 | 0.00 | 8.80 | 9.10 | 9.70 | 11.20 | ▁▁▁▅▇ |
ManufacturingProcess31 | 5 | 0.97 | 70.18 | 5.56 | 0.00 | 70.10 | 70.80 | 71.40 | 72.50 | ▁▁▁▁▇ |
ManufacturingProcess32 | 0 | 1.00 | 158.47 | 5.40 | 143.00 | 155.00 | 158.00 | 162.00 | 173.00 | ▁▃▇▃▁ |
ManufacturingProcess33 | 5 | 0.97 | 63.54 | 2.48 | 56.00 | 62.00 | 64.00 | 65.00 | 70.00 | ▁▃▇▅▁ |
ManufacturingProcess34 | 5 | 0.97 | 2.49 | 0.05 | 2.30 | 2.50 | 2.50 | 2.50 | 2.60 | ▁▂▁▇▁ |
ManufacturingProcess35 | 5 | 0.97 | 495.60 | 10.82 | 463.00 | 490.00 | 495.00 | 501.50 | 522.00 | ▁▂▇▅▂ |
ManufacturingProcess36 | 5 | 0.97 | 0.02 | 0.00 | 0.02 | 0.02 | 0.02 | 0.02 | 0.02 | ▂▇▇▁▃ |
ManufacturingProcess37 | 0 | 1.00 | 1.01 | 0.45 | 0.00 | 0.70 | 1.00 | 1.30 | 2.30 | ▂▇▇▃▁ |
ManufacturingProcess38 | 0 | 1.00 | 2.53 | 0.65 | 0.00 | 2.00 | 3.00 | 3.00 | 3.00 | ▁▁▁▅▇ |
ManufacturingProcess39 | 0 | 1.00 | 6.85 | 1.51 | 0.00 | 7.10 | 7.20 | 7.30 | 7.50 | ▁▁▁▁▇ |
ManufacturingProcess40 | 1 | 0.99 | 0.02 | 0.04 | 0.00 | 0.00 | 0.00 | 0.00 | 0.10 | ▇▁▁▁▂ |
ManufacturingProcess41 | 1 | 0.99 | 0.02 | 0.05 | 0.00 | 0.00 | 0.00 | 0.00 | 0.20 | ▇▁▁▁▁ |
ManufacturingProcess42 | 0 | 1.00 | 11.21 | 1.94 | 0.00 | 11.40 | 11.60 | 11.70 | 12.10 | ▁▁▁▁▇ |
ManufacturingProcess43 | 0 | 1.00 | 0.91 | 0.87 | 0.00 | 0.60 | 0.80 | 1.02 | 11.00 | ▇▁▁▁▁ |
ManufacturingProcess44 | 0 | 1.00 | 1.81 | 0.32 | 0.00 | 1.80 | 1.90 | 1.90 | 2.10 | ▁▁▁▁▇ |
ManufacturingProcess45 | 0 | 1.00 | 2.14 | 0.41 | 0.00 | 2.10 | 2.20 | 2.30 | 2.60 | ▁▁▁▂▇ |
##################################
# Applying a Box-Cox transformation
##################################
<- preProcess(DPA.Predictors.Numeric, method = c("BoxCox"))
DPA_BoxCox <- predict(DPA_BoxCox, DPA.Predictors.Numeric)
DPA_BoxCoxTransformed
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_BoxCoxTransformed)) (DPA_BoxCoxTransformedSkimmed
Name | DPA_BoxCoxTransformed |
Number of rows | 176 |
Number of columns | 57 |
_______________________ | |
Column type frequency: | |
numeric | 57 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
BiologicalMaterial01 | 0 | 1.00 | 2.48 | 0.19 | 1.93 | 2.37 | 2.46 | 2.61 | 3.07 | ▁▅▇▃▁ |
BiologicalMaterial02 | 0 | 1.00 | 1.34 | 0.00 | 1.33 | 1.34 | 1.34 | 1.35 | 1.35 | ▂▆▇▇▃ |
BiologicalMaterial03 | 0 | 1.00 | 67.70 | 4.00 | 56.97 | 64.98 | 67.22 | 70.43 | 78.25 | ▂▅▇▆▁ |
BiologicalMaterial04 | 0 | 1.00 | 0.61 | 0.00 | 0.61 | 0.61 | 0.61 | 0.61 | 0.62 | ▂▅▇▃▁ |
BiologicalMaterial05 | 0 | 1.00 | 2.92 | 0.10 | 2.58 | 2.85 | 2.92 | 2.99 | 3.21 | ▁▃▇▅▁ |
BiologicalMaterial06 | 0 | 1.00 | 0.90 | 0.00 | 0.89 | 0.90 | 0.90 | 0.90 | 0.90 | ▁▆▇▇▂ |
BiologicalMaterial07 | 0 | 1.00 | 100.01 | 0.11 | 100.00 | 100.00 | 100.00 | 100.00 | 100.83 | ▇▁▁▁▁ |
BiologicalMaterial08 | 0 | 1.00 | 1.03 | 0.00 | 1.02 | 1.02 | 1.03 | 1.03 | 1.03 | ▁▃▇▅▂ |
BiologicalMaterial09 | 0 | 1.00 | 82.15 | 5.31 | 64.94 | 78.91 | 81.87 | 85.70 | 98.62 | ▁▃▇▅▁ |
BiologicalMaterial10 | 0 | 1.00 | 0.60 | 0.06 | 0.42 | 0.57 | 0.61 | 0.64 | 0.80 | ▁▅▇▃▁ |
BiologicalMaterial11 | 0 | 1.00 | 0.50 | 0.00 | 0.50 | 0.50 | 0.50 | 0.50 | 0.50 | ▁▃▇▅▂ |
BiologicalMaterial12 | 0 | 1.00 | 0.58 | 0.00 | 0.58 | 0.58 | 0.58 | 0.58 | 0.59 | ▁▃▇▅▂ |
ManufacturingProcess01 | 1 | 0.99 | 11.21 | 1.82 | 0.00 | 10.80 | 11.40 | 12.15 | 14.10 | ▁▁▁▅▇ |
ManufacturingProcess02 | 3 | 0.98 | 16.68 | 8.47 | 0.00 | 19.30 | 21.00 | 21.50 | 22.50 | ▂▁▁▁▇ |
ManufacturingProcess03 | 15 | 0.91 | 0.69 | 0.03 | 0.58 | 0.67 | 0.69 | 0.70 | 0.78 | ▁▃▆▇▁ |
ManufacturingProcess04 | 1 | 0.99 | 434192.61 | 5833.08 | 414960.00 | 430591.50 | 436177.50 | 438047.50 | 447457.50 | ▁▂▃▇▁ |
ManufacturingProcess05 | 1 | 0.99 | 0.50 | 0.00 | 0.50 | 0.50 | 0.50 | 0.50 | 0.50 | ▁▇▆▁▁ |
ManufacturingProcess06 | 2 | 0.99 | 0.50 | 0.00 | 0.50 | 0.50 | 0.50 | 0.50 | 0.50 | ▇▆▁▁▁ |
ManufacturingProcess07 | 1 | 0.99 | 177.48 | 0.50 | 177.00 | 177.00 | 177.00 | 178.00 | 178.00 | ▇▁▁▁▇ |
ManufacturingProcess08 | 1 | 0.99 | 177.55 | 0.50 | 177.00 | 177.00 | 178.00 | 178.00 | 178.00 | ▆▁▁▁▇ |
ManufacturingProcess09 | 0 | 1.00 | 1043.11 | 69.52 | 755.72 | 1007.06 | 1045.12 | 1081.32 | 1217.70 | ▁▁▆▇▁ |
ManufacturingProcess10 | 9 | 0.95 | 0.64 | 0.00 | 0.63 | 0.64 | 0.64 | 0.64 | 0.65 | ▁▃▇▃▁ |
ManufacturingProcess11 | 10 | 0.94 | 9.39 | 0.72 | 7.50 | 9.00 | 9.40 | 9.90 | 11.50 | ▂▆▇▅▁ |
ManufacturingProcess12 | 1 | 0.99 | 857.81 | 1784.53 | 0.00 | 0.00 | 0.00 | 0.00 | 4549.00 | ▇▁▁▁▂ |
ManufacturingProcess13 | 0 | 1.00 | 0.50 | 0.00 | 0.50 | 0.50 | 0.50 | 0.50 | 0.50 | ▂▃▇▁▁ |
ManufacturingProcess14 | 1 | 0.99 | 47640.32 | 695.66 | 45697.94 | 47309.35 | 47666.35 | 48004.79 | 50221.25 | ▂▅▇▂▁ |
ManufacturingProcess15 | 0 | 1.00 | 0.50 | 0.00 | 0.50 | 0.50 | 0.50 | 0.50 | 0.50 | ▂▇▇▂▁ |
ManufacturingProcess16 | 0 | 1.00 | 4565.80 | 351.70 | 0.00 | 4560.75 | 4588.00 | 4619.00 | 4852.00 | ▁▁▁▁▇ |
ManufacturingProcess17 | 0 | 1.00 | 0.50 | 0.00 | 0.50 | 0.50 | 0.50 | 0.50 | 0.50 | ▁▆▇▁▁ |
ManufacturingProcess18 | 0 | 1.00 | 4809.68 | 367.48 | 0.00 | 4813.00 | 4835.00 | 4862.00 | 4971.00 | ▁▁▁▁▇ |
ManufacturingProcess19 | 0 | 1.00 | 0.50 | 0.00 | 0.50 | 0.50 | 0.50 | 0.50 | 0.50 | ▁▂▇▃▂ |
ManufacturingProcess20 | 0 | 1.00 | 4556.46 | 349.01 | 0.00 | 4552.75 | 4582.00 | 4609.50 | 4759.00 | ▁▁▁▁▇ |
ManufacturingProcess21 | 0 | 1.00 | -0.16 | 0.78 | -1.80 | -0.60 | -0.30 | 0.00 | 3.60 | ▂▇▂▁▁ |
ManufacturingProcess22 | 1 | 0.99 | 5.41 | 3.33 | 0.00 | 3.00 | 5.00 | 8.00 | 12.00 | ▇▇▇▅▅ |
ManufacturingProcess23 | 1 | 0.99 | 3.02 | 1.66 | 0.00 | 2.00 | 3.00 | 4.00 | 6.00 | ▇▆▇▆▇ |
ManufacturingProcess24 | 1 | 0.99 | 8.83 | 5.80 | 0.00 | 4.00 | 8.00 | 14.00 | 23.00 | ▇▇▅▆▁ |
ManufacturingProcess25 | 5 | 0.97 | 4828.18 | 373.48 | 0.00 | 4832.00 | 4855.00 | 4877.00 | 4990.00 | ▁▁▁▁▇ |
ManufacturingProcess26 | 5 | 0.97 | 6015.60 | 464.87 | 0.00 | 6019.50 | 6047.00 | 6070.50 | 6161.00 | ▁▁▁▁▇ |
ManufacturingProcess27 | 5 | 0.97 | 4562.51 | 353.98 | 0.00 | 4560.00 | 4587.00 | 4609.00 | 4710.00 | ▁▁▁▁▇ |
ManufacturingProcess28 | 5 | 0.97 | 6.59 | 5.25 | 0.00 | 0.00 | 10.40 | 10.75 | 11.50 | ▅▁▁▁▇ |
ManufacturingProcess29 | 5 | 0.97 | 20.01 | 1.66 | 0.00 | 19.70 | 19.90 | 20.40 | 22.00 | ▁▁▁▁▇ |
ManufacturingProcess30 | 5 | 0.97 | 9.16 | 0.98 | 0.00 | 8.80 | 9.10 | 9.70 | 11.20 | ▁▁▁▅▇ |
ManufacturingProcess31 | 5 | 0.97 | 70.18 | 5.56 | 0.00 | 70.10 | 70.80 | 71.40 | 72.50 | ▁▁▁▁▇ |
ManufacturingProcess32 | 0 | 1.00 | 0.99 | 0.00 | 0.99 | 0.99 | 0.99 | 0.99 | 0.99 | ▁▃▇▆▂ |
ManufacturingProcess33 | 5 | 0.97 | 2021.48 | 157.47 | 1567.50 | 1921.50 | 2047.50 | 2112.00 | 2449.50 | ▁▃▇▅▁ |
ManufacturingProcess34 | 5 | 0.97 | 2.61 | 0.14 | 2.14 | 2.62 | 2.62 | 2.62 | 2.88 | ▁▂▁▇▁ |
ManufacturingProcess35 | 5 | 0.97 | 122865.63 | 5353.81 | 107184.00 | 120049.50 | 122512.00 | 125750.75 | 136241.50 | ▁▂▇▅▂ |
ManufacturingProcess36 | 5 | 0.97 | -3.93 | 0.04 | -4.07 | -3.96 | -3.91 | -3.91 | -3.82 | ▁▂▇▇▃ |
ManufacturingProcess37 | 0 | 1.00 | 1.01 | 0.45 | 0.00 | 0.70 | 1.00 | 1.30 | 2.30 | ▂▇▇▃▁ |
ManufacturingProcess38 | 0 | 1.00 | 2.53 | 0.65 | 0.00 | 2.00 | 3.00 | 3.00 | 3.00 | ▁▁▁▅▇ |
ManufacturingProcess39 | 0 | 1.00 | 6.85 | 1.51 | 0.00 | 7.10 | 7.20 | 7.30 | 7.50 | ▁▁▁▁▇ |
ManufacturingProcess40 | 1 | 0.99 | 0.02 | 0.04 | 0.00 | 0.00 | 0.00 | 0.00 | 0.10 | ▇▁▁▁▂ |
ManufacturingProcess41 | 1 | 0.99 | 0.02 | 0.05 | 0.00 | 0.00 | 0.00 | 0.00 | 0.20 | ▇▁▁▁▁ |
ManufacturingProcess42 | 0 | 1.00 | 11.21 | 1.94 | 0.00 | 11.40 | 11.60 | 11.70 | 12.10 | ▁▁▁▁▇ |
ManufacturingProcess43 | 0 | 1.00 | 0.91 | 0.87 | 0.00 | 0.60 | 0.80 | 1.02 | 11.00 | ▇▁▁▁▁ |
ManufacturingProcess44 | 0 | 1.00 | 1.81 | 0.32 | 0.00 | 1.80 | 1.90 | 1.90 | 2.10 | ▁▁▁▁▇ |
ManufacturingProcess45 | 0 | 1.00 | 2.14 | 0.41 | 0.00 | 2.10 | 2.20 | 2.30 | 2.60 | ▁▁▁▂▇ |
##################################
# Applying a Yeo-Johnson transformation
##################################
<- preProcess(DPA.Predictors.Numeric, method = c("YeoJohnson"))
DPA_YeoJohnson <- predict(DPA_YeoJohnson, DPA.Predictors.Numeric)
DPA_YeoJohnsonTransformed
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_YeoJohnsonTransformed)) (DPA_YeoJohnsonTransformedSkimmed
Name | DPA_YeoJohnsonTransformed |
Number of rows | 176 |
Number of columns | 57 |
_______________________ | |
Column type frequency: | |
numeric | 57 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
BiologicalMaterial01 | 0 | 1.00 | 2.52 | 0.15 | 2.09 | 2.43 | 2.50 | 2.62 | 2.98 | ▁▅▇▃▁ |
BiologicalMaterial02 | 0 | 1.00 | 1.34 | 0.00 | 1.33 | 1.34 | 1.34 | 1.34 | 1.35 | ▂▆▇▇▃ |
BiologicalMaterial03 | 0 | 1.00 | 38.12 | 1.88 | 33.00 | 36.84 | 37.90 | 39.40 | 43.02 | ▂▅▇▆▂ |
BiologicalMaterial04 | 0 | 1.00 | 0.56 | 0.00 | 0.55 | 0.56 | 0.56 | 0.56 | 0.56 | ▂▅▇▃▁ |
BiologicalMaterial05 | 0 | 1.00 | 2.91 | 0.09 | 2.61 | 2.84 | 2.91 | 2.97 | 3.18 | ▁▃▇▅▁ |
BiologicalMaterial06 | 0 | 1.00 | 0.87 | 0.00 | 0.87 | 0.87 | 0.87 | 0.88 | 0.88 | ▁▆▇▇▂ |
BiologicalMaterial07 | 0 | 1.00 | 100.01 | 0.11 | 100.00 | 100.00 | 100.00 | 100.00 | 100.83 | ▇▁▁▁▁ |
BiologicalMaterial08 | 0 | 1.00 | 0.97 | 0.00 | 0.97 | 0.97 | 0.97 | 0.97 | 0.98 | ▁▃▇▅▂ |
BiologicalMaterial09 | 0 | 1.00 | 12.85 | 0.42 | 11.44 | 12.60 | 12.84 | 13.13 | 14.08 | ▁▃▇▅▁ |
BiologicalMaterial10 | 0 | 1.00 | 0.48 | 0.01 | 0.45 | 0.48 | 0.48 | 0.49 | 0.52 | ▁▃▇▃▁ |
BiologicalMaterial11 | 0 | 1.00 | 0.36 | 0.00 | 0.36 | 0.36 | 0.36 | 0.36 | 0.36 | ▁▃▇▆▂ |
BiologicalMaterial12 | 0 | 1.00 | 0.54 | 0.00 | 0.54 | 0.54 | 0.54 | 0.54 | 0.54 | ▁▃▇▅▂ |
ManufacturingProcess01 | 1 | 0.99 | 11.21 | 1.82 | 0.00 | 10.80 | 11.40 | 12.15 | 14.10 | ▁▁▁▅▇ |
ManufacturingProcess02 | 3 | 0.98 | 52.47 | 26.79 | 0.00 | 58.59 | 66.12 | 68.39 | 73.01 | ▂▁▁▁▇ |
ManufacturingProcess03 | 15 | 0.91 | 1.54 | 0.02 | 1.47 | 1.53 | 1.54 | 1.55 | 1.60 | ▁▃▆▇▁ |
ManufacturingProcess04 | 1 | 0.99 | 931.85 | 6.27 | 911.00 | 928.00 | 934.00 | 936.00 | 946.00 | ▁▂▃▇▁ |
ManufacturingProcess05 | 1 | 0.99 | 1001.69 | 30.53 | 923.00 | 986.75 | 999.20 | 1008.85 | 1175.30 | ▁▇▁▁▁ |
ManufacturingProcess06 | 2 | 0.99 | 207.40 | 2.70 | 203.00 | 205.70 | 206.80 | 208.70 | 227.40 | ▇▃▁▁▁ |
ManufacturingProcess07 | 1 | 0.99 | 177.48 | 0.50 | 177.00 | 177.00 | 177.00 | 178.00 | 178.00 | ▇▁▁▁▇ |
ManufacturingProcess08 | 1 | 0.99 | 177.55 | 0.50 | 177.00 | 177.00 | 178.00 | 178.00 | 178.00 | ▆▁▁▁▇ |
ManufacturingProcess09 | 0 | 1.00 | 45.66 | 1.55 | 38.89 | 44.89 | 45.73 | 46.52 | 49.36 | ▁▁▅▇▂ |
ManufacturingProcess10 | 9 | 0.95 | 0.56 | 0.00 | 0.56 | 0.56 | 0.56 | 0.56 | 0.57 | ▁▃▇▃▁ |
ManufacturingProcess11 | 10 | 0.94 | 11.33 | 0.94 | 8.87 | 10.82 | 11.34 | 12.00 | 14.13 | ▂▆▇▅▁ |
ManufacturingProcess12 | 1 | 0.99 | 857.81 | 1784.53 | 0.00 | 0.00 | 0.00 | 0.00 | 4549.00 | ▇▁▁▁▂ |
ManufacturingProcess13 | 0 | 1.00 | 0.38 | 0.00 | 0.38 | 0.38 | 0.38 | 0.38 | 0.38 | ▂▃▇▂▁ |
ManufacturingProcess14 | 1 | 0.99 | 53951.62 | 797.39 | 51725.69 | 53572.20 | 53981.39 | 54369.35 | 56910.89 | ▂▅▇▂▁ |
ManufacturingProcess15 | 0 | 1.00 | 0.34 | 0.00 | 0.34 | 0.34 | 0.34 | 0.34 | 0.34 | ▂▆▇▂▁ |
ManufacturingProcess16 | 0 | 1.00 | 4565.80 | 351.70 | 0.00 | 4560.75 | 4588.00 | 4619.00 | 4852.00 | ▁▁▁▁▇ |
ManufacturingProcess17 | 0 | 1.00 | 34.34 | 1.25 | 31.30 | 33.50 | 34.40 | 35.10 | 40.00 | ▂▇▆▁▁ |
ManufacturingProcess18 | 0 | 1.00 | 4809.68 | 367.48 | 0.00 | 4813.00 | 4835.00 | 4862.00 | 4971.00 | ▁▁▁▁▇ |
ManufacturingProcess19 | 0 | 1.00 | 0.33 | 0.00 | 0.33 | 0.33 | 0.33 | 0.33 | 0.33 | ▁▂▇▃▂ |
ManufacturingProcess20 | 0 | 1.00 | 4556.46 | 349.01 | 0.00 | 4552.75 | 4582.00 | 4609.50 | 4759.00 | ▁▁▁▁▇ |
ManufacturingProcess21 | 0 | 1.00 | -0.33 | 0.75 | -2.94 | -0.73 | -0.33 | 0.00 | 1.82 | ▁▂▇▇▁ |
ManufacturingProcess22 | 1 | 0.99 | 2.84 | 1.35 | 0.00 | 1.99 | 2.87 | 3.95 | 5.14 | ▅▇▆▇▇ |
ManufacturingProcess23 | 1 | 0.99 | 2.07 | 0.96 | 0.00 | 1.55 | 2.15 | 2.69 | 3.65 | ▁▅▇▃▅ |
ManufacturingProcess24 | 1 | 0.99 | 3.80 | 1.84 | 0.00 | 2.41 | 3.85 | 5.48 | 7.37 | ▅▇▇▇▆ |
ManufacturingProcess25 | 5 | 0.97 | 4828.18 | 373.48 | 0.00 | 4832.00 | 4855.00 | 4877.00 | 4990.00 | ▁▁▁▁▇ |
ManufacturingProcess26 | 5 | 0.97 | 6015.60 | 464.87 | 0.00 | 6019.50 | 6047.00 | 6070.50 | 6161.00 | ▁▁▁▁▇ |
ManufacturingProcess27 | 5 | 0.97 | 4562.51 | 353.98 | 0.00 | 4560.00 | 4587.00 | 4609.00 | 4710.00 | ▁▁▁▁▇ |
ManufacturingProcess28 | 5 | 0.97 | 3.31 | 2.63 | 0.00 | 0.00 | 5.27 | 5.39 | 5.65 | ▅▁▁▁▇ |
ManufacturingProcess29 | 5 | 0.97 | 20.01 | 1.66 | 0.00 | 19.70 | 19.90 | 20.40 | 22.00 | ▁▁▁▁▇ |
ManufacturingProcess30 | 5 | 0.97 | 9.16 | 0.98 | 0.00 | 8.80 | 9.10 | 9.70 | 11.20 | ▁▁▁▅▇ |
ManufacturingProcess31 | 5 | 0.97 | 70.18 | 5.56 | 0.00 | 70.10 | 70.80 | 71.40 | 72.50 | ▁▁▁▁▇ |
ManufacturingProcess32 | 0 | 1.00 | 0.95 | 0.00 | 0.95 | 0.95 | 0.95 | 0.95 | 0.96 | ▁▃▇▆▂ |
ManufacturingProcess33 | 5 | 0.97 | 2565.27 | 202.24 | 1983.42 | 2436.77 | 2598.54 | 2681.43 | 3115.96 | ▁▃▇▅▁ |
ManufacturingProcess34 | 5 | 0.97 | 2.49 | 0.05 | 2.30 | 2.50 | 2.50 | 2.50 | 2.60 | ▁▂▁▇▁ |
ManufacturingProcess35 | 5 | 0.97 | 495.60 | 10.82 | 463.00 | 490.00 | 495.00 | 501.50 | 522.00 | ▁▂▇▅▂ |
ManufacturingProcess36 | 5 | 0.97 | 0.02 | 0.00 | 0.02 | 0.02 | 0.02 | 0.02 | 0.02 | ▂▇▇▁▃ |
ManufacturingProcess37 | 0 | 1.00 | 0.80 | 0.30 | 0.00 | 0.60 | 0.81 | 1.01 | 1.57 | ▁▅▇▅▂ |
ManufacturingProcess38 | 0 | 1.00 | 2.53 | 0.65 | 0.00 | 2.00 | 3.00 | 3.00 | 3.00 | ▁▁▁▅▇ |
ManufacturingProcess39 | 0 | 1.00 | 6.85 | 1.51 | 0.00 | 7.10 | 7.20 | 7.30 | 7.50 | ▁▁▁▁▇ |
ManufacturingProcess40 | 1 | 0.99 | 0.02 | 0.04 | 0.00 | 0.00 | 0.00 | 0.00 | 0.10 | ▇▁▁▁▂ |
ManufacturingProcess41 | 1 | 0.99 | 0.02 | 0.05 | 0.00 | 0.00 | 0.00 | 0.00 | 0.20 | ▇▁▁▁▁ |
ManufacturingProcess42 | 0 | 1.00 | 11.21 | 1.94 | 0.00 | 11.40 | 11.60 | 11.70 | 12.10 | ▁▁▁▁▇ |
ManufacturingProcess43 | 0 | 1.00 | 0.40 | 0.09 | 0.00 | 0.35 | 0.41 | 0.46 | 0.74 | ▁▁▇▃▁ |
ManufacturingProcess44 | 0 | 1.00 | 1.81 | 0.32 | 0.00 | 1.80 | 1.90 | 1.90 | 2.10 | ▁▁▁▁▇ |
ManufacturingProcess45 | 0 | 1.00 | 2.14 | 0.41 | 0.00 | 2.10 | 2.20 | 2.30 | 2.60 | ▁▁▁▂▇ |
##################################
# Applying an exponential transformation
##################################
<- preProcess(DPA.Predictors.Numeric, method = c("expoTrans"))
DPA_ExpoTrans <- predict(DPA_ExpoTrans, DPA.Predictors.Numeric)
DPA_ExpoTransTransformed
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_ExpoTransTransformed)) (DPA_ExpoTransTransformedSkimmed
Name | DPA_ExpoTransTransformed |
Number of rows | 176 |
Number of columns | 57 |
_______________________ | |
Column type frequency: | |
numeric | 57 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
BiologicalMaterial01 | 0 | 1.00 | 4.57 | 0.35 | 3.59 | 4.37 | 4.53 | 4.800000e+00 | 5.620000e+00 | ▁▅▇▃▁ |
BiologicalMaterial02 | 0 | 1.00 | 55.69 | 4.03 | 46.87 | 52.68 | 55.09 | 5.874000e+01 | 6.475000e+01 | ▂▇▆▅▃ |
BiologicalMaterial03 | 0 | 1.00 | 67.70 | 4.00 | 56.97 | 64.98 | 67.22 | 7.043000e+01 | 7.825000e+01 | ▂▅▇▆▁ |
BiologicalMaterial04 | 0 | 1.00 | 4.64 | 0.14 | 4.30 | 4.55 | 4.64 | 4.740000e+00 | 5.070000e+00 | ▂▅▇▃▁ |
BiologicalMaterial05 | 0 | 1.00 | 11.81 | 0.69 | 9.53 | 11.32 | 11.81 | 1.232000e+01 | 1.384000e+01 | ▁▃▇▅▁ |
BiologicalMaterial06 | 0 | 1.00 | 48.91 | 3.75 | 40.60 | 46.06 | 48.46 | 5.134000e+01 | 5.938000e+01 | ▂▇▆▅▁ |
BiologicalMaterial07 | 0 | 1.00 | 100.01 | 0.11 | 100.00 | 100.00 | 100.00 | 1.000000e+02 | 1.008300e+02 | ▇▁▁▁▁ |
BiologicalMaterial08 | 0 | 1.00 | 17.49 | 0.68 | 15.88 | 17.06 | 17.51 | 1.788000e+01 | 1.914000e+01 | ▁▅▇▃▂ |
BiologicalMaterial09 | 0 | 1.00 | 60.14 | 5.34 | 44.02 | 56.80 | 59.73 | 6.365000e+01 | 7.795000e+01 | ▁▅▇▃▁ |
BiologicalMaterial10 | 0 | 1.00 | 1.20 | 0.07 | 1.00 | 1.16 | 1.20 | 1.230000e+00 | 1.390000e+00 | ▁▅▇▃▁ |
BiologicalMaterial11 | 0 | 1.00 | 146.95 | 4.82 | 135.81 | 143.82 | 146.08 | 1.496000e+02 | 1.587300e+02 | ▂▆▇▃▂ |
BiologicalMaterial12 | 0 | 1.00 | 20.20 | 0.77 | 18.35 | 19.73 | 20.12 | 2.075000e+01 | 2.221000e+01 | ▂▆▇▃▂ |
ManufacturingProcess01 | 1 | 0.99 | 75.63 | 24.57 | 0.00 | 62.39 | 73.80 | 9.089000e+01 | 1.550000e+02 | ▁▃▇▂▁ |
ManufacturingProcess02 | 3 | 0.98 | 204.99 | 110.73 | 0.00 | 186.30 | 257.09 | 2.825100e+02 | 3.409700e+02 | ▅▁▃▇▇ |
ManufacturingProcess03 | 15 | 0.91 | 118.37 | 10.46 | 89.20 | 113.47 | 118.11 | 1.229400e+02 | 1.502100e+02 | ▁▂▇▁▁ |
ManufacturingProcess04 | 1 | 0.99 | 931.85 | 6.27 | 911.00 | 928.00 | 934.00 | 9.360000e+02 | 9.460000e+02 | ▁▂▃▇▁ |
ManufacturingProcess05 | 1 | 0.99 | 1001.69 | 30.53 | 923.00 | 986.75 | 999.20 | 1.008850e+03 | 1.175300e+03 | ▁▇▁▁▁ |
ManufacturingProcess06 | 2 | 0.99 | 0.25 | 0.00 | 0.25 | 0.25 | 0.25 | 2.500000e-01 | 2.500000e-01 | ▁▁▇▁▁ |
ManufacturingProcess07 | 1 | 0.99 | 177.48 | 0.50 | 177.00 | 177.00 | 177.00 | 1.780000e+02 | 1.780000e+02 | ▇▁▁▁▇ |
ManufacturingProcess08 | 1 | 0.99 | 177.55 | 0.50 | 177.00 | 177.00 | 178.00 | 1.780000e+02 | 1.780000e+02 | ▆▁▁▁▇ |
ManufacturingProcess09 | 0 | 1.00 | 45.66 | 1.55 | 38.89 | 44.89 | 45.73 | 4.652000e+01 | 4.936000e+01 | ▁▁▅▇▂ |
ManufacturingProcess10 | 9 | 0.95 | 3.41 | 0.06 | 3.23 | 3.37 | 3.41 | 3.450000e+00 | 3.570000e+00 | ▁▃▇▃▁ |
ManufacturingProcess11 | 10 | 0.94 | 9.75 | 0.77 | 7.73 | 9.33 | 9.76 | 1.030000e+01 | 1.204000e+01 | ▂▆▇▅▁ |
ManufacturingProcess12 | 1 | 0.99 | 857.81 | 1784.53 | 0.00 | 0.00 | 0.00 | 0.000000e+00 | 4.549000e+03 | ▇▁▁▁▂ |
ManufacturingProcess13 | 0 | 1.00 | 1.19 | 0.00 | 1.19 | 1.19 | 1.19 | 1.190000e+00 | 1.190000e+00 | ▁▁▁▂▇ |
ManufacturingProcess14 | 1 | 0.99 | 4853.87 | 54.52 | 4701.00 | 4828.00 | 4856.00 | 4.882500e+03 | 5.055000e+03 | ▁▅▇▂▁ |
ManufacturingProcess15 | 0 | 1.00 | 6038.92 | 58.31 | 5904.00 | 6010.00 | 6031.50 | 6.061000e+03 | 6.233000e+03 | ▂▇▆▂▁ |
ManufacturingProcess16 | 0 | 1.00 | 4565.80 | 351.70 | 0.00 | 4560.75 | 4588.00 | 4.619000e+03 | 4.852000e+03 | ▁▁▁▁▇ |
ManufacturingProcess17 | 0 | 1.00 | 34.34 | 1.25 | 31.30 | 33.50 | 34.40 | 3.510000e+01 | 4.000000e+01 | ▂▇▆▁▁ |
ManufacturingProcess18 | 0 | 1.00 | 4809.68 | 367.48 | 0.00 | 4813.00 | 4835.00 | 4.862000e+03 | 4.971000e+03 | ▁▁▁▁▇ |
ManufacturingProcess19 | 0 | 1.00 | 6028.20 | 45.58 | 5890.00 | 6000.75 | 6022.00 | 6.050250e+03 | 6.146000e+03 | ▁▃▇▃▂ |
ManufacturingProcess20 | 0 | 1.00 | 4556.46 | 349.01 | 0.00 | 4552.75 | 4582.00 | 4.609500e+03 | 4.759000e+03 | ▁▁▁▁▇ |
ManufacturingProcess21 | 0 | 1.00 | -0.31 | 0.73 | -2.88 | -0.70 | -0.32 | 0.000000e+00 | 1.700000e+00 | ▁▂▇▇▂ |
ManufacturingProcess22 | 1 | 0.99 | 4.44 | 2.43 | 0.00 | 2.76 | 4.36 | 6.450000e+00 | 8.740000e+00 | ▃▆▇▅▆ |
ManufacturingProcess23 | 1 | 0.99 | 2.66 | 1.36 | 0.00 | 1.87 | 2.72 | 3.520000e+00 | 4.960000e+00 | ▇▆▇▆▇ |
ManufacturingProcess24 | 1 | 0.99 | 7.06 | 4.07 | 0.00 | 3.71 | 6.90 | 1.086000e+01 | 1.534000e+01 | ▇▇▇▇▅ |
ManufacturingProcess25 | 5 | 0.97 | 4828.18 | 373.48 | 0.00 | 4832.00 | 4855.00 | 4.877000e+03 | 4.990000e+03 | ▁▁▁▁▇ |
ManufacturingProcess26 | 5 | 0.97 | 6015.60 | 464.87 | 0.00 | 6019.50 | 6047.00 | 6.070500e+03 | 6.161000e+03 | ▁▁▁▁▇ |
ManufacturingProcess27 | 5 | 0.97 | 4562.51 | 353.98 | 0.00 | 4560.00 | 4587.00 | 4.609000e+03 | 4.710000e+03 | ▁▁▁▁▇ |
ManufacturingProcess28 | 5 | 0.97 | 13.92 | 11.12 | 0.00 | 0.00 | 21.38 | 2.269000e+01 | 2.571000e+01 | ▅▁▁▁▇ |
ManufacturingProcess29 | 5 | 0.97 | 817.72 | 165.31 | 0.00 | 721.95 | 761.81 | 8.713400e+02 | 1.338620e+03 | ▁▁▇▃▂ |
ManufacturingProcess30 | 5 | 0.97 | 55.63 | 13.43 | 0.00 | 47.55 | 52.57 | 6.417000e+01 | 1.048200e+02 | ▁▂▇▃▁ |
ManufacturingProcess31 | 5 | 0.97 | 936793657.23 | 260438099.97 | 0.00 | 783724059.45 | 949200823.93 | 1.118583e+09 | 1.511482e+09 | ▁▂▆▇▂ |
ManufacturingProcess32 | 0 | 1.00 | 158.47 | 5.40 | 143.00 | 155.00 | 158.00 | 1.620000e+02 | 1.730000e+02 | ▁▃▇▃▁ |
ManufacturingProcess33 | 5 | 0.97 | 0.25 | 0.00 | 0.25 | 0.25 | 0.25 | 2.500000e-01 | 2.500000e-01 | ▁▁▇▁▁ |
ManufacturingProcess34 | 5 | 0.97 | 15.42 | 1.04 | 12.06 | 15.51 | 15.51 | 1.551000e+01 | 1.757000e+01 | ▁▂▁▇▁ |
ManufacturingProcess35 | 5 | 0.97 | 495.60 | 10.82 | 463.00 | 490.00 | 495.00 | 5.015000e+02 | 5.220000e+02 | ▁▂▇▅▂ |
ManufacturingProcess36 | 5 | 0.97 | 0.02 | 0.00 | 0.02 | 0.02 | 0.02 | 2.000000e-02 | 2.000000e-02 | ▁▂▇▇▃ |
ManufacturingProcess37 | 0 | 1.00 | 0.86 | 0.33 | 0.00 | 0.63 | 0.87 | 1.080000e+00 | 1.680000e+00 | ▁▅▇▅▂ |
ManufacturingProcess38 | 0 | 1.00 | 17.90 | 9.12 | 0.00 | 7.60 | 25.39 | 2.539000e+01 | 2.539000e+01 | ▁▅▁▁▇ |
ManufacturingProcess39 | 0 | 1.00 | 34530.65 | 10439.49 | 0.00 | 31479.98 | 36638.66 | 4.264267e+04 | 5.776354e+04 | ▁▁▆▇▂ |
ManufacturingProcess40 | 1 | 0.99 | 0.02 | 0.04 | 0.00 | 0.00 | 0.00 | 0.000000e+00 | 1.000000e-01 | ▇▁▁▁▂ |
ManufacturingProcess41 | 1 | 0.99 | 0.02 | 0.04 | 0.00 | 0.00 | 0.00 | 0.000000e+00 | 1.400000e-01 | ▇▁▁▁▁ |
ManufacturingProcess42 | 0 | 1.00 | 65890.09 | 19774.40 | 0.00 | 57704.41 | 69889.64 | 7.691563e+04 | 1.128292e+05 | ▁▂▆▇▂ |
ManufacturingProcess43 | 0 | 1.00 | 0.55 | 0.16 | 0.00 | 0.46 | 0.56 | 6.500000e-01 | 1.040000e+00 | ▁▃▇▅▁ |
ManufacturingProcess44 | 0 | 1.00 | 111.67 | 37.21 | 0.00 | 92.04 | 126.26 | 1.262600e+02 | 2.374400e+02 | ▁▇▇▂▁ |
ManufacturingProcess45 | 0 | 1.00 | 42.07 | 16.01 | 0.00 | 33.21 | 40.71 | 4.986000e+01 | 9.145000e+01 | ▂▅▇▃▁ |
##################################
# Loading dataset
##################################
<- ChemicalManufacturingProcess
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("Yield")]
DPA.Predictors
##################################
# Listing all predictors
##################################
<- DPA.Predictors[,sapply(DPA.Predictors, is.factor)]
DPA.Predictors.Factor
##################################
# Applying dummy variable creation
##################################
if (length(names(DPA.Predictors.Factor))>0) {
print(paste0("There are ",
length(names(DPA.Predictors.Factor))),
(" factor variables for dummy variable creation."))
<- dummyVars(Yield ~ ., data = DPA)
DPA_DummyVariables <- predict(DPA_DummyVariables, DPA)
DPA_DummyVariablesCreated
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_DummyVariablesCreated))
(DPA_DummyVariablesCreatedSkimmed
else {
} print("There are no factor variables for dummy variable creation.")
}
## [1] "There are no factor variables for dummy variable creation."
##################################
# Loading dataset
##################################
<- ChemicalManufacturingProcess
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("Yield")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,sapply(DPA.Predictors, is.numeric)]
DPA.Predictors.Numeric ncol(DPA.Predictors.Numeric)
## [1] 57
##################################
# Formulating the scatter plot
##################################
featurePlot(x = DPA.Predictors.Numeric,
y = DPA$Yield,
plot = "scatter",
type = c("p", "smooth"),
span = .5,
layout = c(5, ceiling(ncol(DPA.Predictors.Numeric)/5)))