##################################
# Loading R libraries
##################################
library(AppliedPredictiveModeling)
library(caret)
library(rpart)
library(lattice)
library(dplyr)
library(tidyr)
library(moments)
library(skimr)
library(RANN)
library(mlbench)
library(pls)
library(corrplot)
library(lares)
library(DMwR2)
library(gridExtra)
library(rattle)
library(rpart.plot)
library(RColorBrewer)
library(stats)
library(nnet)
library(elasticnet)
library(earth)
library(party)
library(kernlab)
library(randomForest)
library(Cubist)
library(pROC)
library(ggpubr)
library(mda)
library(klaR)
library(pamr)
library(themis)
library(ROSE)
##################################
# Loading source and
# formulating the train set
##################################
data(Sonar)
<- Sonar
Sonar.Original
<- Sonar[Sonar$Class=="M",]
Sonar.M <- Sonar[Sonar$Class=="R",]
Sonar.R set.seed(12345678)
<- Sonar.R[sample(1:nrow(Sonar.R),25),]
Sonar.R.Reduced
<- as.data.frame(rbind(Sonar.M,Sonar.R.Reduced))
Sonar $Class <- factor(Sonar$Class,
Sonarlevels=c("M","R"))
<- Sonar[,c("Class","V1","V11")]
Sonar_Train
##################################
# Performing a general exploration of the train set
##################################
dim(Sonar_Train)
## [1] 136 3
str(Sonar_Train)
## 'data.frame': 136 obs. of 3 variables:
## $ Class: Factor w/ 2 levels "M","R": 1 1 1 1 1 1 1 1 1 1 ...
## $ V1 : num 0.0491 0.1313 0.0201 0.0629 0.0335 ...
## $ V11 : num 0.0947 0.2907 0.2251 0.5466 0.5533 ...
summary(Sonar_Train)
## Class V1 V11
## M:111 Min. :0.00150 Min. :0.0523
## R: 25 1st Qu.:0.01550 1st Qu.:0.1780
## Median :0.02365 Median :0.2503
## Mean :0.03188 Mean :0.2642
## 3rd Qu.:0.03925 3rd Qu.:0.3222
## Max. :0.13710 Max. :0.7342
##################################
# Formulating a data type assessment summary
##################################
<- Sonar_Train
PDA <- data.frame(
(PDA.Summary Column.Index=c(1:length(names(PDA))),
Column.Name= names(PDA),
Column.Type=sapply(PDA, function(x) class(x)),
row.names=NULL)
)
## Column.Index Column.Name Column.Type
## 1 1 Class factor
## 2 2 V1 numeric
## 3 3 V11 numeric
##################################
# Loading dataset
##################################
<- Sonar_Train
DQA
##################################
# Formulating an overall data quality assessment summary
##################################
<- data.frame(
(DQA.Summary Column.Index=c(1:length(names(DQA))),
Column.Name= names(DQA),
Column.Type=sapply(DQA, function(x) class(x)),
Row.Count=sapply(DQA, function(x) nrow(DQA)),
NA.Count=sapply(DQA,function(x)sum(is.na(x))),
Fill.Rate=sapply(DQA,function(x)format(round((sum(!is.na(x))/nrow(DQA)),3),nsmall=3)),
row.names=NULL)
)
## Column.Index Column.Name Column.Type Row.Count NA.Count Fill.Rate
## 1 1 Class factor 136 0 1.000
## 2 2 V1 numeric 136 0 1.000
## 3 3 V11 numeric 136 0 1.000
##################################
# Listing all predictors
##################################
<- DQA[,!names(DQA) %in% c("Class")]
DQA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DQA.Predictors[,sapply(DQA.Predictors, is.numeric)]
DQA.Predictors.Numeric
if (length(names(DQA.Predictors.Numeric))>0) {
print(paste0("There are ",
length(names(DQA.Predictors.Numeric))),
(" numeric predictor variable(s)."))
else {
} print("There are no numeric predictor variables.")
}
## [1] "There are 2 numeric predictor variable(s)."
##################################
# Listing all factor predictors
##################################
<- DQA.Predictors[,sapply(DQA.Predictors, is.factor)]
DQA.Predictors.Factor
if (length(names(DQA.Predictors.Factor))>0) {
print(paste0("There are ",
length(names(DQA.Predictors.Factor))),
(" factor predictor variable(s)."))
else {
} print("There are no factor predictor variables.")
}
## [1] "There are no factor predictor variables."
##################################
# Formulating a data quality assessment summary for factor predictors
##################################
if (length(names(DQA.Predictors.Factor))>0) {
##################################
# Formulating a function to determine the first mode
##################################
<- function(x) {
FirstModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab == max(tab)]
ux[tab
}
##################################
# Formulating a function to determine the second mode
##################################
<- function(x) {
SecondModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab = ux[tab == max(tab)]
fm = x[!(x %in% fm)]
sm <- unique(sm)
usm <- tabulate(match(sm, usm))
tabsm ifelse(is.na(usm[tabsm == max(tabsm)])==TRUE,
return("x"),
return(usm[tabsm == max(tabsm)]))
}
<- data.frame(
(DQA.Predictors.Factor.Summary Column.Name= names(DQA.Predictors.Factor),
Column.Type=sapply(DQA.Predictors.Factor, function(x) class(x)),
Unique.Count=sapply(DQA.Predictors.Factor, function(x) length(unique(x))),
First.Mode.Value=sapply(DQA.Predictors.Factor, function(x) as.character(FirstModes(x)[1])),
Second.Mode.Value=sapply(DQA.Predictors.Factor, function(x) as.character(SecondModes(x)[1])),
First.Mode.Count=sapply(DQA.Predictors.Factor, function(x) sum(na.omit(x) == FirstModes(x)[1])),
Second.Mode.Count=sapply(DQA.Predictors.Factor, function(x) sum(na.omit(x) == SecondModes(x)[1])),
Unique.Count.Ratio=sapply(DQA.Predictors.Factor, function(x) format(round((length(unique(x))/nrow(DQA.Predictors.Factor)),3), nsmall=3)),
First.Second.Mode.Ratio=sapply(DQA.Predictors.Factor, function(x) format(round((sum(na.omit(x) == FirstModes(x)[1])/sum(na.omit(x) == SecondModes(x)[1])),3), nsmall=3)),
row.names=NULL)
)
}
##################################
# Formulating a data quality assessment summary for numeric predictors
##################################
if (length(names(DQA.Predictors.Numeric))>0) {
##################################
# Formulating a function to determine the first mode
##################################
<- function(x) {
FirstModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab == max(tab)]
ux[tab
}
##################################
# Formulating a function to determine the second mode
##################################
<- function(x) {
SecondModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab = ux[tab == max(tab)]
fm = na.omit(x)[!(na.omit(x) %in% fm)]
sm <- unique(sm)
usm <- tabulate(match(sm, usm))
tabsm ifelse(is.na(usm[tabsm == max(tabsm)])==TRUE,
return(0.00001),
return(usm[tabsm == max(tabsm)]))
}
<- data.frame(
(DQA.Predictors.Numeric.Summary Column.Name= names(DQA.Predictors.Numeric),
Column.Type=sapply(DQA.Predictors.Numeric, function(x) class(x)),
Unique.Count=sapply(DQA.Predictors.Numeric, function(x) length(unique(x))),
Unique.Count.Ratio=sapply(DQA.Predictors.Numeric, function(x) format(round((length(unique(x))/nrow(DQA.Predictors.Numeric)),3), nsmall=3)),
First.Mode.Value=sapply(DQA.Predictors.Numeric, function(x) format(round((FirstModes(x)[1]),3),nsmall=3)),
Second.Mode.Value=sapply(DQA.Predictors.Numeric, function(x) format(round((SecondModes(x)[1]),3),nsmall=3)),
First.Mode.Count=sapply(DQA.Predictors.Numeric, function(x) sum(na.omit(x) == FirstModes(x)[1])),
Second.Mode.Count=sapply(DQA.Predictors.Numeric, function(x) sum(na.omit(x) == SecondModes(x)[1])),
First.Second.Mode.Ratio=sapply(DQA.Predictors.Numeric, function(x) format(round((sum(na.omit(x) == FirstModes(x)[1])/sum(na.omit(x) == SecondModes(x)[1])),3), nsmall=3)),
Minimum=sapply(DQA.Predictors.Numeric, function(x) format(round(min(x,na.rm = TRUE),3), nsmall=3)),
Mean=sapply(DQA.Predictors.Numeric, function(x) format(round(mean(x,na.rm = TRUE),3), nsmall=3)),
Median=sapply(DQA.Predictors.Numeric, function(x) format(round(median(x,na.rm = TRUE),3), nsmall=3)),
Maximum=sapply(DQA.Predictors.Numeric, function(x) format(round(max(x,na.rm = TRUE),3), nsmall=3)),
Skewness=sapply(DQA.Predictors.Numeric, function(x) format(round(skewness(x,na.rm = TRUE),3), nsmall=3)),
Kurtosis=sapply(DQA.Predictors.Numeric, function(x) format(round(kurtosis(x,na.rm = TRUE),3), nsmall=3)),
Percentile25th=sapply(DQA.Predictors.Numeric, function(x) format(round(quantile(x,probs=0.25,na.rm = TRUE),3), nsmall=3)),
Percentile75th=sapply(DQA.Predictors.Numeric, function(x) format(round(quantile(x,probs=0.75,na.rm = TRUE),3), nsmall=3)),
row.names=NULL)
)
}
## Column.Name Column.Type Unique.Count Unique.Count.Ratio First.Mode.Value
## 1 V1 numeric 122 0.897 0.020
## 2 V11 numeric 134 0.985 0.213
## Second.Mode.Value First.Mode.Count Second.Mode.Count First.Second.Mode.Ratio
## 1 0.034 3 2 1.500
## 2 0.095 2 1 2.000
## Minimum Mean Median Maximum Skewness Kurtosis Percentile25th Percentile75th
## 1 0.002 0.032 0.024 0.137 1.915 6.988 0.015 0.039
## 2 0.052 0.264 0.250 0.734 0.909 4.151 0.178 0.322
##################################
# Identifying potential data quality issues
##################################
##################################
# Checking for missing observations
##################################
if ((nrow(DQA.Summary[DQA.Summary$NA.Count>0,]))>0){
print(paste0("Missing observations noted for ",
nrow(DQA.Summary[DQA.Summary$NA.Count>0,])),
(" variable(s) with NA.Count>0 and Fill.Rate<1.0."))
$NA.Count>0,]
DQA.Summary[DQA.Summaryelse {
} print("No missing observations noted.")
}
## [1] "No missing observations noted."
##################################
# Checking for zero or near-zero variance predictors
##################################
if (length(names(DQA.Predictors.Factor))==0) {
print("No factor predictors noted.")
else if (nrow(DQA.Predictors.Factor.Summary[as.numeric(as.character(DQA.Predictors.Factor.Summary$First.Second.Mode.Ratio))>5,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Predictors.Factor.Summary[as.numeric(as.character(DQA.Predictors.Factor.Summary$First.Second.Mode.Ratio))>5,])),
(" factor variable(s) with First.Second.Mode.Ratio>5."))
as.numeric(as.character(DQA.Predictors.Factor.Summary$First.Second.Mode.Ratio))>5,]
DQA.Predictors.Factor.Summary[else {
} print("No low variance factor predictors due to high first-second mode ratio noted.")
}
## [1] "No factor predictors noted."
if (length(names(DQA.Predictors.Numeric))==0) {
print("No numeric predictors noted.")
else if (nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$First.Second.Mode.Ratio))>5,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$First.Second.Mode.Ratio))>5,])),
(" numeric variable(s) with First.Second.Mode.Ratio>5."))
as.numeric(as.character(DQA.Predictors.Numeric.Summary$First.Second.Mode.Ratio))>5,]
DQA.Predictors.Numeric.Summary[else {
} print("No low variance numeric predictors due to high first-second mode ratio noted.")
}
## [1] "No low variance numeric predictors due to high first-second mode ratio noted."
if (length(names(DQA.Predictors.Numeric))==0) {
print("No numeric predictors noted.")
else if (nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Unique.Count.Ratio))<0.01,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Unique.Count.Ratio))<0.01,])),
(" numeric variable(s) with Unique.Count.Ratio<0.01."))
as.numeric(as.character(DQA.Predictors.Numeric.Summary$Unique.Count.Ratio))<0.01,]
DQA.Predictors.Numeric.Summary[else {
} print("No low variance numeric predictors due to low unique count ratio noted.")
}
## [1] "No low variance numeric predictors due to low unique count ratio noted."
##################################
# Checking for skewed predictors
##################################
if (length(names(DQA.Predictors.Numeric))==0) {
print("No numeric predictors noted.")
else if (nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))>3 |
} as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))<(-3),])>0){
print(paste0("High skewness observed for ",
nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))>3 |
(as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))<(-3),])),
" numeric variable(s) with Skewness>3 or Skewness<(-3)."))
as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))>3 |
DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))<(-3),]
else {
} print("No skewed numeric predictors noted.")
}
## [1] "No skewed numeric predictors noted."
##################################
# Loading dataset
##################################
<- Sonar_Train
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("Class")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,sapply(DPA.Predictors, is.numeric)]
DPA.Predictors.Numeric
##################################
# Identifying outliers for the numeric predictors
##################################
<- c()
OutlierCountList
for (i in 1:ncol(DPA.Predictors.Numeric)) {
<- boxplot.stats(DPA.Predictors.Numeric[,i])$out
Outliers <- length(Outliers)
OutlierCount <- append(OutlierCountList,OutlierCount)
OutlierCountList <- which(DPA.Predictors.Numeric[,i] %in% c(Outliers))
OutlierIndices boxplot(DPA.Predictors.Numeric[,i],
ylab = names(DPA.Predictors.Numeric)[i],
main = names(DPA.Predictors.Numeric)[i],
horizontal=TRUE)
mtext(paste0(OutlierCount, " Outlier(s) Detected"))
}
<- as.data.frame(cbind(names(DPA.Predictors.Numeric),(OutlierCountList)))
OutlierCountSummary names(OutlierCountSummary) <- c("NumericPredictors","OutlierCount")
$OutlierCount <- as.numeric(as.character(OutlierCountSummary$OutlierCount))
OutlierCountSummary<- nrow(OutlierCountSummary[OutlierCountSummary$OutlierCount>0,])
NumericPredictorWithOutlierCount print(paste0(NumericPredictorWithOutlierCount, " numeric variable(s) were noted with outlier(s)." ))
## [1] "2 numeric variable(s) were noted with outlier(s)."
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA.Predictors.Numeric)) (DPA_Skimmed
Name | DPA.Predictors.Numeric |
Number of rows | 136 |
Number of columns | 2 |
_______________________ | |
Column type frequency: | |
numeric | 2 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
V1 | 0 | 1 | 0.03 | 0.03 | 0.00 | 0.02 | 0.02 | 0.04 | 0.14 | ▇▃▁▁▁ |
V11 | 0 | 1 | 0.26 | 0.13 | 0.05 | 0.18 | 0.25 | 0.32 | 0.73 | ▅▇▂▁▁ |
###################################
# Verifying the data dimensions
###################################
dim(DPA.Predictors.Numeric)
## [1] 136 2
##################################
# Loading dataset
##################################
<- Sonar_Train
DPA
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA)) (DPA_Skimmed
Name | DPA |
Number of rows | 136 |
Number of columns | 3 |
_______________________ | |
Column type frequency: | |
factor | 1 |
numeric | 2 |
________________________ | |
Group variables | None |
Variable type: factor
skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
---|---|---|---|---|---|
Class | 0 | 1 | FALSE | 2 | M: 111, R: 25 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
V1 | 0 | 1 | 0.03 | 0.03 | 0.00 | 0.02 | 0.02 | 0.04 | 0.14 | ▇▃▁▁▁ |
V11 | 0 | 1 | 0.26 | 0.13 | 0.05 | 0.18 | 0.25 | 0.32 | 0.73 | ▅▇▂▁▁ |
##################################
# Identifying columns with low variance
###################################
<- nearZeroVar(DPA,
DPA_LowVariance freqCut = 95/5,
uniqueCut = 10,
saveMetrics= TRUE)
$nzv,]) (DPA_LowVariance[DPA_LowVariance
## [1] freqRatio percentUnique zeroVar nzv
## <0 rows> (or 0-length row.names)
if ((nrow(DPA_LowVariance[DPA_LowVariance$nzv,]))==0){
print("No low variance predictors noted.")
else {
}
print(paste0("Low variance observed for ",
nrow(DPA_LowVariance[DPA_LowVariance$nzv,])),
(" numeric variable(s) with First.Second.Mode.Ratio>4 and Unique.Count.Ratio<0.10."))
<- (nrow(DPA_LowVariance[DPA_LowVariance$nzv,]))
DPA_LowVarianceForRemoval
print(paste0("Low variance can be resolved by removing ",
nrow(DPA_LowVariance[DPA_LowVariance$nzv,])),
(" numeric variable(s)."))
for (j in 1:DPA_LowVarianceForRemoval) {
<- rownames(DPA_LowVariance[DPA_LowVariance$nzv,])[j]
DPA_LowVarianceRemovedVariable print(paste0("Variable ",
j," for removal: ",
DPA_LowVarianceRemovedVariable))
}
%>%
DPA skim() %>%
::filter(skim_variable %in% rownames(DPA_LowVariance[DPA_LowVariance$nzv,]))
dplyr
##################################
# Filtering out columns with low variance
#################################
<- DPA[,!names(DPA) %in% rownames(DPA_LowVariance[DPA_LowVariance$nzv,])]
DPA_ExcludedLowVariance
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_ExcludedLowVariance))
(DPA_ExcludedLowVariance_Skimmed
###################################
# Verifying the data dimensions
###################################
dim(DPA_ExcludedLowVariance)
}
## [1] "No low variance predictors noted."
##################################
# Loading dataset
##################################
<- Sonar_Train
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("Class")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,sapply(DPA.Predictors, is.numeric)]
DPA.Predictors.Numeric
##################################
# Visualizing pairwise correlation between predictors
##################################
<- cor.mtest(DPA.Predictors.Numeric,
DPA_CorrelationTest method = "pearson",
conf.level = .95)
corrplot(cor(DPA.Predictors.Numeric,
method = "pearson",
use="pairwise.complete.obs"),
method = "circle",
type = "upper",
order = "original",
tl.col = "black",
tl.cex = 0.75,
tl.srt = 90,
sig.level = 0.05,
p.mat = DPA_CorrelationTest$p,
insig = "blank")
##################################
# Identifying the highly correlated variables
##################################
<- cor(DPA.Predictors.Numeric,
DPA_Correlation method = "pearson",
use="pairwise.complete.obs")
<- sum(abs(DPA_Correlation[upper.tri(DPA_Correlation)]) > 0.95)) (DPA_HighlyCorrelatedCount
## [1] 0
if (DPA_HighlyCorrelatedCount == 0) {
print("No highly correlated predictors noted.")
else {
} print(paste0("High correlation observed for ",
(DPA_HighlyCorrelatedCount)," pairs of numeric variable(s) with Correlation.Coefficient>0.95."))
<- corr_cross(DPA.Predictors.Numeric,
(DPA_HighlyCorrelatedPairs max_pvalue = 0.05,
top = DPA_HighlyCorrelatedCount,
rm.na = TRUE,
grid = FALSE
))
}
## [1] "No highly correlated predictors noted."
if (DPA_HighlyCorrelatedCount > 0) {
<- findCorrelation(DPA_Correlation, cutoff = 0.95)
DPA_HighlyCorrelated
<- length(DPA_HighlyCorrelated))
(DPA_HighlyCorrelatedForRemoval
print(paste0("High correlation can be resolved by removing ",
(DPA_HighlyCorrelatedForRemoval)," numeric variable(s)."))
for (j in 1:DPA_HighlyCorrelatedForRemoval) {
<- colnames(DPA.Predictors.Numeric)[DPA_HighlyCorrelated[j]]
DPA_HighlyCorrelatedRemovedVariable print(paste0("Variable ",
j," for removal: ",
DPA_HighlyCorrelatedRemovedVariable))
}
##################################
# Filtering out columns with high correlation
#################################
<- DPA[,-DPA_HighlyCorrelated]
DPA_ExcludedHighCorrelation
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_ExcludedHighCorrelation))
(DPA_ExcludedHighCorrelation_Skimmed
###################################
# Verifying the data dimensions
###################################
dim(DPA_ExcludedHighCorrelation)
}
##################################
# Loading dataset
##################################
<- Sonar_Train
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("Class")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,sapply(DPA.Predictors, is.numeric)]
DPA.Predictors.Numeric
##################################
# Identifying the linearly dependent variables
##################################
<- findLinearCombos(DPA.Predictors.Numeric)
DPA_LinearlyDependent
<- length(DPA_LinearlyDependent$linearCombos)) (DPA_LinearlyDependentCount
## [1] 0
if (DPA_LinearlyDependentCount == 0) {
print("No linearly dependent predictors noted.")
else {
} print(paste0("Linear dependency observed for ",
(DPA_LinearlyDependentCount)," subset(s) of numeric variable(s)."))
for (i in 1:DPA_LinearlyDependentCount) {
<- colnames(DPA.Predictors.Numeric)[DPA_LinearlyDependent$linearCombos[[i]]]
DPA_LinearlyDependentSubset print(paste0("Linear dependent variable(s) for subset ",
i," include: ",
DPA_LinearlyDependentSubset))
}
}
## [1] "No linearly dependent predictors noted."
##################################
# Identifying the linearly dependent variables for removal
##################################
if (DPA_LinearlyDependentCount > 0) {
<- findLinearCombos(DPA.Predictors.Numeric)
DPA_LinearlyDependent
<- length(DPA_LinearlyDependent$remove)
DPA_LinearlyDependentForRemoval
print(paste0("Linear dependency can be resolved by removing ",
(DPA_LinearlyDependentForRemoval)," numeric variable(s)."))
for (j in 1:DPA_LinearlyDependentForRemoval) {
<- colnames(DPA.Predictors.Numeric)[DPA_LinearlyDependent$remove[j]]
DPA_LinearlyDependentRemovedVariable print(paste0("Variable ",
j," for removal: ",
DPA_LinearlyDependentRemovedVariable))
}
##################################
# Filtering out columns with linear dependency
#################################
<- DPA[,-DPA_LinearlyDependent$remove]
DPA_ExcludedLinearlyDependent
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_ExcludedLinearlyDependent))
(DPA_ExcludedLinearlyDependent_Skimmed
###################################
# Verifying the data dimensions
###################################
dim(DPA_ExcludedLinearlyDependent)
else {
}
###################################
# Verifying the data dimensions
###################################
dim(DPA)
}
## [1] 136 3
##################################
# Loading dataset
##################################
<- Sonar_Train
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("Class")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,sapply(DPA.Predictors, is.numeric)]
DPA.Predictors.Numeric
##################################
# Applying a Box-Cox transformation
##################################
<- preProcess(DPA.Predictors.Numeric, method = c("BoxCox"))
DPA_BoxCox <- predict(DPA_BoxCox, DPA.Predictors.Numeric)
DPA_BoxCoxTransformed
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA_BoxCoxTransformed)) (DPA_BoxCoxTransformedSkimmed
Name | DPA_BoxCoxTransformed |
Number of rows | 136 |
Number of columns | 2 |
_______________________ | |
Column type frequency: | |
numeric | 2 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
V1 | 0 | 1 | -3.73 | 0.78 | -6.50 | -4.17 | -3.74 | -3.24 | -1.99 | ▁▁▇▇▂ |
V11 | 0 | 1 | -1.07 | 0.29 | -1.73 | -1.25 | -1.06 | -0.91 | -0.29 | ▂▃▇▂▁ |
###################################
# Verifying the data dimensions
###################################
dim(DPA_BoxCoxTransformed)
## [1] 136 2
##################################
# Loading dataset
##################################
<- Sonar_Train
DPA
##################################
# Listing all predictors
##################################
<- DPA[,!names(DPA) %in% c("Class")]
DPA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DPA.Predictors[,sapply(DPA.Predictors, is.numeric)]
DPA.Predictors.Numeric
##################################
# Applying a Box-Cox transformation
##################################
<- preProcess(DPA.Predictors.Numeric, method = c("BoxCox"))
DPA_BoxCox <- predict(DPA_BoxCox, DPA.Predictors.Numeric)
DPA_BoxCoxTransformed
##################################
# Applying a center and scale data transformation
##################################
<- preProcess(DPA_BoxCoxTransformed, method = c("center","scale"))
DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaled <- predict(DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaled, DPA_BoxCoxTransformed)
DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed)) (DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformedSkimmed
Name | DPA.Predictors.Numeric_Bo… |
Number of rows | 136 |
Number of columns | 2 |
_______________________ | |
Column type frequency: | |
numeric | 2 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
V1 | 0 | 1 | 0 | 1 | -3.56 | -0.56 | -0.02 | 0.63 | 2.23 | ▁▁▇▇▂ |
V11 | 0 | 1 | 0 | 1 | -2.27 | -0.59 | 0.04 | 0.57 | 2.71 | ▂▃▇▂▁ |
###################################
# Verifying the data dimensions
###################################
dim(DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed)
## [1] 136 2
##################################
# Creating the pre-modelling
# train set
##################################
<- DPA$Class
Class <- DPA.Predictors.Numeric_BoxCoxTransformed_CenteredScaledTransformed
PMA.Predictors.Numeric <- cbind(Class,PMA.Predictors.Numeric)
PMA_BoxCoxTransformed_CenteredScaledTransformed <- PMA_BoxCoxTransformed_CenteredScaledTransformed
PMA_PreModelling_Train
##################################
# Gathering descriptive statistics
##################################
<- skim(PMA_PreModelling_Train)) (PMA_PreModelling_Train_Skimmed
Name | PMA_PreModelling_Train |
Number of rows | 136 |
Number of columns | 3 |
_______________________ | |
Column type frequency: | |
factor | 1 |
numeric | 2 |
________________________ | |
Group variables | None |
Variable type: factor
skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
---|---|---|---|---|---|
Class | 0 | 1 | FALSE | 2 | M: 111, R: 25 |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
V1 | 0 | 1 | 0 | 1 | -3.56 | -0.56 | -0.02 | 0.63 | 2.23 | ▁▁▇▇▂ |
V11 | 0 | 1 | 0 | 1 | -2.27 | -0.59 | 0.04 | 0.57 | 2.71 | ▂▃▇▂▁ |
###################################
# Verifying the data dimensions
# for the train set
###################################
dim(PMA_PreModelling_Train)
## [1] 136 3
##################################
# Loading dataset
##################################
<- PMA_PreModelling_Train
EDA
##################################
# Listing all predictors
##################################
<- EDA[,!names(EDA) %in% c("Class")]
EDA.Predictors
##################################
# Listing all numeric predictors
##################################
<- EDA.Predictors[,sapply(EDA.Predictors, is.numeric)]
EDA.Predictors.Numeric ncol(EDA.Predictors.Numeric)
## [1] 2
names(EDA.Predictors.Numeric)
## [1] "V1" "V11"
##################################
# Formulating the box plots
##################################
featurePlot(x = EDA.Predictors.Numeric,
y = EDA$Class,
plot = "box",
scales = list(x = list(relation="free", rot = 90),
y = list(relation="free")),
adjust = 1.5,
pch = "|")
##################################
# Creating a local object
# for the train set
##################################
<- PMA_PreModelling_Train
PMA_PreModelling_Train_LR $Label <- rep("LR",nrow(PMA_PreModelling_Train_LR))
PMA_PreModelling_Train_LR
##################################
# Verifying the class distribution
# for the original data
##################################
table(PMA_PreModelling_Train_LR$Class)
##
## M R
## 111 25
##################################
# Visualizing the imbalanced data set
##################################
ggplot(PMA_PreModelling_Train, aes(V1, V11, color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
labs(title = "Original Imbalanced Data Set") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
##################################
# Formulating the structure of the
# Logistic Regression model
##################################
<- glm(Class ~ V1 + V11,
LR_Model data = PMA_PreModelling_Train_LR,
family = binomial)
##################################
# Consolidating the model results
##################################
summary(LR_Model)
##
## Call:
## glm(formula = Class ~ V1 + V11, family = binomial, data = PMA_PreModelling_Train_LR)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.3152 0.3771 -6.139 8.28e-10 ***
## V1 -0.7399 0.3005 -2.462 0.0138 *
## V11 -1.5607 0.3450 -4.524 6.07e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 129.783 on 135 degrees of freedom
## Residual deviance: 88.923 on 133 degrees of freedom
## AIC: 94.923
##
## Number of Fisher Scoring iterations: 6
<- (as.data.frame(LR_Model$coefficients))
LR_Model_Coef $Coef <- rownames(LR_Model_Coef)
LR_Model_Coef$Model <- rep("LR",nrow(LR_Model_Coef))
LR_Model_Coefcolnames(LR_Model_Coef) <- c("Estimates","Coefficients","Model")
print(LR_Model_Coef, rownames=FALSE)
## Estimates Coefficients Model
## (Intercept) -2.3152181 (Intercept) LR
## V1 -0.7399005 V1 LR
## V11 -1.5607048 V11 LR
##################################
# Computing the model predictions
##################################
<- predict(LR_Model,
(LR_Model_Probabilities type = c("response")))
## 98 99 100 101 102 103
## 0.3637496537 0.0113931997 0.1319142547 0.0021783355 0.0037538060 0.0174606489
## 104 105 106 107 108 109
## 0.0725273351 0.0164891503 0.1569862922 0.4818797362 0.2583425943 0.1252724633
## 110 111 112 113 114 115
## 0.4112142881 0.4680556213 0.1332256210 0.0344698396 0.1348432397 0.1615191484
## 116 117 118 119 120 121
## 0.0662797277 0.2436188082 0.0780323151 0.1104267965 0.0515700587 0.1445535907
## 122 123 124 125 126 127
## 0.1731460430 0.0558634929 0.1081612509 0.0165343094 0.0369004305 0.0031146384
## 128 129 130 131 132 133
## 0.0221776030 0.0200623323 0.0028456905 0.0126580025 0.0050535349 0.0081982533
## 134 135 136 137 138 139
## 0.0025368869 0.0014777580 0.0799936459 0.0023418015 0.0008311849 0.0846443282
## 140 141 142 143 144 145
## 0.2606104200 0.1780753538 0.0394327767 0.0403610789 0.0364744270 0.7351055523
## 146 147 148 149 150 151
## 0.0306057253 0.0228736619 0.0552988212 0.0205374459 0.2190542623 0.2301893864
## 152 153 154 155 156 157
## 0.1183496315 0.1771452296 0.3898752302 0.5789364640 0.6505131954 0.1549759240
## 158 159 160 161 162 163
## 0.1418070654 0.0908007706 0.0533759775 0.0797056569 0.0996424369 0.1113892912
## 164 165 166 167 168 169
## 0.2325914484 0.0352838163 0.0868721918 0.2269151196 0.5811364344 0.6804246275
## 170 171 172 173 174 175
## 0.2377989316 0.1546431081 0.0039867863 0.0484288031 0.0350025026 0.0115543490
## 176 177 178 179 180 181
## 0.0124263395 0.0040952633 0.0623582127 0.2442298264 0.0289787190 0.0051456066
## 182 183 184 185 186 187
## 0.0192102410 0.0354944115 0.0420803608 0.0020214950 0.0046310125 0.0336330699
## 188 189 190 191 192 193
## 0.0359277781 0.1172950153 0.1419944753 0.1265297826 0.1047718509 0.2755585363
## 194 195 196 197 198 199
## 0.1390569085 0.0450510742 0.1550648867 0.2872791732 0.0821006438 0.0726099031
## 200 201 202 203 204 205
## 0.0792285656 0.0532267024 0.0299770042 0.0173915161 0.0553203581 0.0345232209
## 206 207 208 95 57 27
## 0.0333086574 0.0446830571 0.0622826685 0.8967905106 0.2407267465 0.6874636823
## 18 68 92 43 87 64
## 0.4352384578 0.4517614566 0.0687256353 0.6033006196 0.1583196380 0.7793277803
## 16 12 61 13 34 66
## 0.0727076286 0.8563207774 0.3983590501 0.7017484676 0.3096802387 0.8196244700
## 49 94 91 72 23 39
## 0.3431752805 0.0384599213 0.0861723774 0.8744789160 0.4935529921 0.5807289286
## 29 73 77 32
## 0.0739144263 0.5017946378 0.5358622581 0.3382071510
##################################
# Creating a classification index
# based from the model predictions
##################################
<- predict(LR_Model,
(LR_Model_Indices type = c("link")))
## 98 99 100 101 102 103
## -0.559126246 -4.463280018 -1.884138369 -6.127013509 -5.581224150 -4.030190682
## 104 105 106 107 108 109
## -2.548499795 -4.088426063 -1.680824728 -0.072512812 -1.054600830 -1.943421380
## 110 111 112 113 114 115
## -0.358947840 -0.127951795 -1.872734622 -3.332592613 -1.858797800 -1.646968043
## 116 117 118 119 120 121
## -2.645292815 -1.132940729 -2.469387136 -2.086388975 -2.911866678 -1.777973140
## 122 123 124 125 126 127
## -1.563492668 -2.827359673 -2.109662164 -4.085645174 -3.261933584 -5.768522713
## 128 129 130 131 132 133
## -3.786245149 -3.888644925 -5.859099785 -4.356726856 -5.282600962 -4.795602114
## 134 135 136 137 138 139
## -5.974277480 -6.515750374 -2.442433371 -6.054490239 -7.091826715 -2.380854599
## 140 141 142 143 144 145
## -1.042798305 -1.529441923 -3.192926600 -3.168691163 -3.273987644 1.020682662
## 146 147 148 149 150 151
## -3.455484324 -3.754629841 -2.838117074 -3.864754157 -1.271186197 -1.207242139
## 152 153 154 155 156 157
## -2.008152348 -1.535809819 -0.447836712 0.318408998 0.621295777 -1.696095344
## 158 159 160 161 162 163
## -1.800361500 -2.303896473 -2.875541209 -2.446352996 -2.201203830 -2.076628048
## 164 165 166 167 168 169
## -1.193735847 -3.308409550 -2.352437879 -1.225812824 0.327440308 0.755723902
## 170 171 172 173 174 175
## -1.164784899 -1.698638961 -5.520775067 -2.978019769 -3.316705945 -4.449071761
## 176 177 178 179 180 181
## -4.375432715 -5.493820583 -2.710472604 -1.129627633 -3.511786651 -5.264453127
## 182 183 184 185 186 187
## -3.932914599 -3.302240366 -3.125182749 -6.201894377 -5.370337994 -3.358033801
## 188 189 190 191 192 193
## -3.289655451 -2.018298780 -1.798822388 -1.931996317 -2.145293464 -0.966600884
## 194 195 196 197 198 199
## -1.823145144 -3.053861030 -1.695416182 -0.908635325 -2.414141893 -2.547272974
## 200 201 202 203 204 205
## -2.452874924 -2.878499479 -3.476889218 -4.034228249 -2.837704887 -3.330989885
## 206 207 208 95 57 27
## -3.368061909 -3.062448770 -2.711765358 2.162061490 -1.148699272 0.788288323
## 18 68 92 43 87 64
## -0.260509533 -0.193556201 -2.606431655 0.419236847 -1.670784310 1.261753296
## 16 12 61 13 34 66
## -2.545822600 1.785061852 -0.412307104 0.855637833 -0.801614636 1.513805316
## 49 94 91 72 23 39
## -0.649175855 -3.218919553 -2.361292279 1.941154440 -0.025789461 0.325766429
## 29 73 77 32
## -2.528058620 0.007178582 0.143695781 -0.671294010
max(LR_Model_Indices)
## [1] 2.162061
min(LR_Model_Indices)
## [1] -7.091827
##################################
# Consolidating the model probabilities
# and classification index
# based from the model predictions
##################################
<- as.data.frame(PMA_PreModelling_Train_LR)
LR_Model_Predictions $LR_Prob <- LR_Model_Probabilities
LR_Model_Predictions$LR_LP <- LR_Model_Indices
LR_Model_Predictions$Class <- as.factor(LR_Model_Predictions$Class)
LR_Model_Predictions$Label <- rep("LR",nrow(LR_Model_Predictions))
LR_Model_Predictions
##################################
# Formulating the probability curve
# using the consolidated model predictions
##################################
%>%
LR_Model_Predictions ggplot(aes(x = LR_LP ,
y = LR_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
ggtitle("Estimated Rock Detection Probabilities Based on Classification Index : Logistic Regression") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
##################################
# Visualizing the imbalanced data set
##################################
ggplot(PMA_PreModelling_Train, aes(V1, V11, color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
labs(title = "Without Undersampling - Random Downsampling") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
##################################
# Implementing US_DOWNSAMPLE
# Visualizing the undersampled data using US_DOWNSAMPLE
##################################
recipe(Class ~ V1 + V11, data = PMA_PreModelling_Train) %>%
step_downsample(Class, seed=123456789) %>%
prep() %>%
bake(new_data = NULL) %>%
ggplot(aes(V1, V11, color = Class)) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
labs(title = "With Undersampling - Random Downsample") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
<- recipe(Class ~ V1 + V11, data = PMA_PreModelling_Train) %>%
US_DOWNSAMPLE step_downsample(Class, seed=123456789) %>%
prep()
<- US_DOWNSAMPLE %>%
PMA_PreModelling_Train_LR_US_DOWNSAMPLE bake(new_data = NULL)
<- as.data.frame(PMA_PreModelling_Train_LR_US_DOWNSAMPLE)) (PMA_PreModelling_Train_LR_US_DOWNSAMPLE
## V1 V11 Class
## 1 -0.295964244 1.50754826 M
## 2 -0.371994173 0.60103638 M
## 3 -0.789109306 -0.36302243 M
## 4 -0.256318859 -0.63813480 M
## 5 -0.779287302 0.73036007 M
## 6 0.537644499 -0.19150300 M
## 7 0.043933590 0.30731955 M
## 8 0.424204681 0.54321732 M
## 9 1.395977603 1.55085071 M
## 10 1.852613244 0.04399440 M
## 11 -0.507041623 0.38985083 M
## 12 -0.555415865 0.01776805 M
## 13 -2.093160440 0.59563328 M
## 14 0.207991857 -0.39104999 M
## 15 0.278484686 -2.26945660 M
## 16 -0.068999246 0.63930609 M
## 17 -0.935159101 0.03686239 M
## 18 0.424204681 1.89154083 M
## 19 0.625610136 0.17668535 M
## 20 0.256869245 1.19827720 M
## 21 0.695599819 -0.11827986 M
## 22 -0.499153812 0.87301303 M
## 23 0.345301912 -0.27257795 M
## 24 1.231717734 1.85842121 M
## 25 -0.957451233 0.02573746 M
## 26 -2.902302684 -1.49282840 R
## 27 -0.588710342 -0.46833442 R
## 28 -0.597170869 -1.70542147 R
## 29 -0.289270938 -1.17938825 R
## 30 0.544629613 -1.61762409 R
## 31 0.064360561 0.15607894 R
## 32 -0.168320027 -1.67226688 R
## 33 -0.316256467 -0.26298086 R
## 34 -1.638716948 -1.51501038 R
## 35 0.274190661 0.01776805 R
## 36 -0.860055136 -2.21946189 R
## 37 -0.789109306 -0.84516218 R
## 38 -1.427539411 -1.35491277 R
## 39 0.779483548 -1.33935885 R
## 40 -0.400799099 -2.26338258 R
## 41 -0.302692686 -0.92399268 R
## 42 0.827857790 0.18656255 R
## 43 -0.829167731 0.42261407 R
## 44 -2.434915803 -1.57286451 R
## 45 -1.138280881 -0.92728240 R
## 46 -0.860055136 -1.28443852 R
## 47 -1.125398710 0.66990478 R
## 48 -0.186674974 -1.39954451 R
## 49 -0.008605128 -1.57143555 R
## 50 -1.348878827 -0.41384379 R
$Label <- rep("LR_US_DOWNSAMPLE",nrow(PMA_PreModelling_Train_LR_US_DOWNSAMPLE))
PMA_PreModelling_Train_LR_US_DOWNSAMPLE
##################################
# Verifying the class distribution
# for the undersampled data using US_DOWNSAMPLE
##################################
table(PMA_PreModelling_Train_LR_US_DOWNSAMPLE$Class)
##
## M R
## 25 25
##################################
# Formulating the structure of the
# Logistic Regression model
##################################
<- glm(Class ~ V1 + V11,
LR_US_DOWNSAMPLE_Model data = PMA_PreModelling_Train_LR_US_DOWNSAMPLE,
family = binomial)
##################################
# Consolidating the model results
##################################
summary(LR_US_DOWNSAMPLE_Model)
##
## Call:
## glm(formula = Class ~ V1 + V11, family = binomial, data = PMA_PreModelling_Train_LR_US_DOWNSAMPLE)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.7625 0.4369 -1.745 0.080920 .
## V1 -0.7448 0.5075 -1.468 0.142179
## V11 -1.7181 0.5063 -3.393 0.000691 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 69.315 on 49 degrees of freedom
## Residual deviance: 43.344 on 47 degrees of freedom
## AIC: 49.344
##
## Number of Fisher Scoring iterations: 5
<- (as.data.frame(LR_US_DOWNSAMPLE_Model$coefficients))
LR_US_DOWNSAMPLE_Model_Coef $Coef <- rownames(LR_US_DOWNSAMPLE_Model_Coef)
LR_US_DOWNSAMPLE_Model_Coef$Model <- rep("LR_US_DOWNSAMPLE",nrow(LR_US_DOWNSAMPLE_Model_Coef))
LR_US_DOWNSAMPLE_Model_Coefcolnames(LR_US_DOWNSAMPLE_Model_Coef) <- c("Estimates","Coefficients","Model")
print(LR_US_DOWNSAMPLE_Model_Coef, rownames=FALSE)
## Estimates Coefficients Model
## (Intercept) -0.7625171 (Intercept) LR_US_DOWNSAMPLE
## V1 -0.7448227 V1 LR_US_DOWNSAMPLE
## V11 -1.7180819 V11 LR_US_DOWNSAMPLE
##################################
# Computing the model predictions
##################################
<- predict(LR_US_DOWNSAMPLE_Model,
(LR_US_DOWNSAMPLE_Model_Probabilities type = c("response")))
## 1 2 3 4 5 6
## 0.041799701 0.179744842 0.610385203 0.628260800 0.192025135 0.302810692
## 7 8 9 10 11 12
## 0.210279740 0.117973061 0.011354052 0.098148202 0.258331895 0.406281809
## 13 14 15 16 17 18
## 0.443534294 0.438912131 0.949269888 0.140697941 0.467714486 0.013018286
## 19 20 21 22 23 24
## 0.177694571 0.046860883 0.253998128 0.131171064 0.365540908 0.007593438
## 25 26 27 28 29 30
## 0.476615880 0.981366508 0.617890633 0.931650474 0.814455352 0.833555873
## 31 32 33 34 35 36
## 0.253769009 0.903431085 0.481223964 0.955252856 0.269478992 0.975667022
## 37 38 39 40 41 42
## 0.781987655 0.932679562 0.722728010 0.968467123 0.740857535 0.154510435
## 43 44 45 46 47 48
## 0.295042156 0.977098243 0.842698840 0.889421156 0.254406742 0.855827057
## 49 50
## 0.874763232 0.721753452
##################################
# Creating a classification index
# based from the model predictions
##################################
<- predict(LR_US_DOWNSAMPLE_Model,
(LR_US_DOWNSAMPLE_Model_Indices type = c("link")))
## 1 2 3 4 5 6
## -3.13216766 -1.51807716 0.44893169 0.52476284 -1.43690467 -0.83394915
## 7 8 9 10 11 12
## -1.32324004 -2.01176629 -4.46676157 -2.21797160 -1.05465667 -0.37935775
## 13 14 15 16 17 18
## -0.22683042 -0.24557829 2.92917349 -1.80950517 -0.12932199 -4.32829655
## 19 20 21 22 23 24
## -1.53204571 -3.01257759 -1.07740121 -1.89064395 -0.55139462 -4.87284841
## 25 26 27 28 29 30
## -0.09360477 3.96398538 0.48060460 2.61232309 1.47922407 1.61104105
## 31 32 33 34 35 36
## -1.07861075 2.23594294 -0.07513948 3.06094845 -0.99726756 3.69128882
## 37 38 39 40 41 42
## 1.27728727 2.62859781 0.95803402 3.42468383 1.05043038 -1.69965419
## 43 44 45 46 47 48
## -0.87101976 3.75337359 1.67844746 2.08484208 -1.07524589 1.78105475
## 49 50
## 1.94374717 0.95317602
max(LR_US_DOWNSAMPLE_Model_Indices)
## [1] 3.963985
min(LR_US_DOWNSAMPLE_Model_Indices)
## [1] -4.872848
##################################
# Consolidating the model probabilities
# and classification index
# based from the model predictions
##################################
<- as.data.frame(PMA_PreModelling_Train_LR_US_DOWNSAMPLE)
LR_US_DOWNSAMPLE_Model_Predictions $LR_US_DOWNSAMPLE_Prob <- LR_US_DOWNSAMPLE_Model_Probabilities
LR_US_DOWNSAMPLE_Model_Predictions$LR_US_DOWNSAMPLE_LP <- LR_US_DOWNSAMPLE_Model_Indices
LR_US_DOWNSAMPLE_Model_Predictions$Class <- as.factor(LR_US_DOWNSAMPLE_Model_Predictions$Class)
LR_US_DOWNSAMPLE_Model_Predictions$Label <- rep("LR_US_DOWNSAMPLE",nrow(LR_US_DOWNSAMPLE_Model_Predictions))
LR_US_DOWNSAMPLE_Model_Predictions
##################################
# Formulating the probability curve
# using the consolidated model predictions
##################################
%>%
LR_US_DOWNSAMPLE_Model_Predictions ggplot(aes(x = LR_US_DOWNSAMPLE_LP ,
y = LR_US_DOWNSAMPLE_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
ggtitle("Estimated Rock Detection Probabilities Based on Classification Index : Logistic Regression (US_DOWNSAMPLE)") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
##################################
# Visualizing the imbalanced data set
##################################
ggplot(PMA_PreModelling_Train, aes(V1, V11, color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
labs(title = "Without Oversampling - Random Upsampling") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
##################################
# Implementing OS_UPSAMPLE
# Visualizing the oversampled data using OS_UPSAMPLE
##################################
recipe(Class ~ V1 + V11, data = PMA_PreModelling_Train) %>%
step_upsample(Class, seed=123456789) %>%
prep() %>%
bake(new_data = NULL) %>%
ggplot(aes(V1, V11, color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
labs(title = "With Undersampling - Random Upsample") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
<- recipe(Class ~ V1 + V11, data = PMA_PreModelling_Train) %>%
OS_UPSAMPLE step_upsample(Class, seed=123456789) %>%
prep()
<- OS_UPSAMPLE %>%
PMA_PreModelling_Train_LR_OS_UPSAMPLE bake(new_data = NULL)
<- as.data.frame(PMA_PreModelling_Train_LR_OS_UPSAMPLE)) (PMA_PreModelling_Train_LR_OS_UPSAMPLE
## V1 V11 Class
## 1 0.914240973 -1.55861587 M
## 2 2.175018299 0.34520609 M
## 3 -0.230553865 -0.16690717 M
## 4 1.231717734 1.85842121 M
## 5 0.424204681 1.89154083 M
## 6 1.143139503 0.55690423 M
## 7 -0.507041623 0.38985083 M
## 8 0.312328639 0.98808941 M
## 9 -0.935159101 0.03686239 M
## 10 0.408807916 -1.63079048 M
## 11 0.738227771 -1.15770286 M
## 12 1.169078303 -0.79246144 M
## 13 0.118911997 -1.30982705 M
## 14 -0.174409184 -1.31877657 M
## 15 1.012209983 -0.76338471 M
## 16 0.813818599 0.26605270 M
## 17 0.207991857 -0.39104999 M
## 18 -0.957451233 0.02573746 M
## 19 0.695599819 -0.11827986 M
## 20 -1.204708453 -0.18639848 M
## 21 -0.068999246 0.13149291 M
## 22 0.527094931 -0.39650480 M
## 23 0.104263091 0.33286517 M
## 24 0.465616266 -0.56497212 M
## 25 -0.507041623 -0.24127887 M
## 26 0.043933590 0.30731955 M
## 27 0.147716923 -0.20173692 M
## 28 0.612463710 0.84401925 M
## 29 -0.068999246 0.63930609 M
## 30 1.395977603 1.55085071 M
## 31 -0.180527407 1.02812486 M
## 32 0.565359423 0.74012531 M
## 33 2.230423635 1.21329169 M
## 34 0.782380194 0.93715691 M
## 35 2.005116912 0.95072168 M
## 36 1.784287809 0.74337479 M
## 37 1.523834205 1.62207084 M
## 38 1.928176420 1.77732114 M
## 39 -1.204708453 0.65264083 M
## 40 1.934080462 1.47897609 M
## 41 0.744203377 2.70773324 M
## 42 1.424344244 -0.63319889 M
## 43 -0.491314245 -0.58236264 M
## 44 0.689392711 -0.83030322 M
## 45 1.381555338 -0.09258957 M
## 46 1.002499594 0.07158504 M
## 47 0.977896818 0.15071601 M
## 48 0.278484686 -2.26945660 M
## 49 1.406688795 0.06372536 M
## 50 1.852613244 0.04399440 M
## 51 1.281675939 -0.27257795 M
## 52 1.390588252 0.33359231 M
## 53 -0.192852168 -0.57752146 M
## 54 -0.180527407 -0.62433567 M
## 55 -0.052243905 -0.17198030 M
## 56 -0.779287302 -0.12994977 M
## 57 -0.041194136 -1.17696945 M
## 58 -0.924156756 -1.24933498 M
## 59 -0.168320027 -1.80173334 M
## 60 -2.093160440 0.59563328 M
## 61 -0.230553865 -0.22058604 M
## 62 -1.038676203 0.48516249 M
## 63 -0.030238811 0.37335493 M
## 64 0.089444831 0.04161877 M
## 65 0.303951059 -0.21715050 M
## 66 -0.132380328 -0.09011428 M
## 67 -1.546463816 0.01457488 M
## 68 -0.499153812 0.87301303 M
## 69 -0.108968439 0.07550792 M
## 70 0.686277848 -1.02337267 M
## 71 -0.721885235 -1.35101476 M
## 72 -3.557061230 -0.28132844 M
## 73 -0.789109306 -0.36302243 M
## 74 -0.750264961 -0.03937819 M
## 75 -0.379134945 2.23365699 M
## 76 -0.371994173 0.60103638 M
## 77 0.401039618 0.45156422 M
## 78 -0.295964244 1.50754826 M
## 79 0.256869245 1.19827720 M
## 80 1.243886483 1.44694263 M
## 81 -0.230553865 0.36255507 M
## 82 -0.256318859 -0.63813480 M
## 83 0.632133128 0.46700244 M
## 84 0.324793230 1.73570326 M
## 85 0.723165726 0.69367752 M
## 86 -1.191144673 1.19711994 M
## 87 -1.177722925 1.07730973 M
## 88 0.142960831 2.42255907 M
## 89 0.443194144 1.74741590 M
## 90 -0.180527407 0.75375436 M
## 91 0.544629613 0.36615870 M
## 92 -1.274767709 0.41409622 M
## 93 -0.539087424 -0.07530230 M
## 94 -0.555415865 0.01776805 M
## 95 0.345301912 -0.27257795 M
## 96 -1.868589923 0.02175517 M
## 97 -0.217863016 -0.21200423 M
## 98 0.625610136 0.17668535 M
## 99 -0.799007156 -0.01833539 M
## 100 -2.013850697 0.05347991 M
## 101 0.537644499 -0.19150300 M
## 102 -0.013979414 0.15531332 M
## 103 -0.935159101 0.53154288 M
## 104 -0.779287302 0.73036007 M
## 105 0.424204681 0.54321732 M
## 106 0.157176488 1.02691760 M
## 107 -0.323092564 0.48794823 M
## 108 0.377448180 0.47190071 M
## 109 0.992715078 0.20396774 M
## 110 0.295518362 0.33867807 M
## 111 0.099342681 0.20698569 M
## 112 -2.434915803 -1.57286451 R
## 113 -0.829167731 0.42261407 R
## 114 -0.829167731 0.42261407 R
## 115 -2.434915803 -1.57286451 R
## 116 -1.348878827 -0.41384379 R
## 117 -0.008605128 -1.57143555 R
## 118 -0.168320027 -1.67226688 R
## 119 -1.348878827 -0.41384379 R
## 120 -0.316256467 -0.26298086 R
## 121 -1.427539411 -1.35491277 R
## 122 -0.597170869 -1.70542147 R
## 123 -0.186674974 -1.39954451 R
## 124 -0.400799099 -2.26338258 R
## 125 0.544629613 -1.61762409 R
## 126 -0.789109306 -0.84516218 R
## 127 -0.316256467 -0.26298086 R
## 128 -0.597170869 -1.70542147 R
## 129 -0.588710342 -0.46833442 R
## 130 -0.186674974 -1.39954451 R
## 131 -2.434915803 -1.57286451 R
## 132 0.544629613 -1.61762409 R
## 133 0.827857790 0.18656255 R
## 134 -0.186674974 -1.39954451 R
## 135 -0.588710342 -0.46833442 R
## 136 0.544629613 -1.61762409 R
## 137 -0.186674974 -1.39954451 R
## 138 0.064360561 0.15607894 R
## 139 -0.789109306 -0.84516218 R
## 140 -0.302692686 -0.92399268 R
## 141 -0.860055136 -1.28443852 R
## 142 -0.289270938 -1.17938825 R
## 143 -0.302692686 -0.92399268 R
## 144 0.544629613 -1.61762409 R
## 145 -0.860055136 -1.28443852 R
## 146 -0.186674974 -1.39954451 R
## 147 0.064360561 0.15607894 R
## 148 -0.008605128 -1.57143555 R
## 149 -1.427539411 -1.35491277 R
## 150 -0.588710342 -0.46833442 R
## 151 0.779483548 -1.33935885 R
## 152 -0.860055136 -1.28443852 R
## 153 0.827857790 0.18656255 R
## 154 -0.186674974 -1.39954451 R
## 155 -0.186674974 -1.39954451 R
## 156 -0.588710342 -0.46833442 R
## 157 -2.434915803 -1.57286451 R
## 158 0.779483548 -1.33935885 R
## 159 -0.168320027 -1.67226688 R
## 160 -0.860055136 -2.21946189 R
## 161 -0.588710342 -0.46833442 R
## 162 -2.434915803 -1.57286451 R
## 163 -1.138280881 -0.92728240 R
## 164 -0.008605128 -1.57143555 R
## 165 -0.316256467 -0.26298086 R
## 166 -0.860055136 -2.21946189 R
## 167 0.544629613 -1.61762409 R
## 168 -0.289270938 -1.17938825 R
## 169 -1.638716948 -1.51501038 R
## 170 -1.638716948 -1.51501038 R
## 171 -0.829167731 0.42261407 R
## 172 -0.302692686 -0.92399268 R
## 173 -2.902302684 -1.49282840 R
## 174 -0.588710342 -0.46833442 R
## 175 -0.588710342 -0.46833442 R
## 176 0.064360561 0.15607894 R
## 177 -0.316256467 -0.26298086 R
## 178 -0.186674974 -1.39954451 R
## 179 -1.348878827 -0.41384379 R
## 180 -2.434915803 -1.57286451 R
## 181 -1.138280881 -0.92728240 R
## 182 -0.302692686 -0.92399268 R
## 183 -0.400799099 -2.26338258 R
## 184 -1.638716948 -1.51501038 R
## 185 -1.125398710 0.66990478 R
## 186 -0.860055136 -1.28443852 R
## 187 -0.789109306 -0.84516218 R
## 188 -0.588710342 -0.46833442 R
## 189 -1.138280881 -0.92728240 R
## 190 -0.302692686 -0.92399268 R
## 191 0.779483548 -1.33935885 R
## 192 0.779483548 -1.33935885 R
## 193 -0.860055136 -1.28443852 R
## 194 -1.638716948 -1.51501038 R
## 195 -0.597170869 -1.70542147 R
## 196 0.827857790 0.18656255 R
## 197 -0.302692686 -0.92399268 R
## 198 -1.138280881 -0.92728240 R
## 199 -0.400799099 -2.26338258 R
## 200 -1.348878827 -0.41384379 R
## 201 -0.302692686 -0.92399268 R
## 202 -0.860055136 -2.21946189 R
## 203 -1.348878827 -0.41384379 R
## 204 -0.789109306 -0.84516218 R
## 205 -0.829167731 0.42261407 R
## 206 -0.168320027 -1.67226688 R
## 207 -0.860055136 -2.21946189 R
## 208 -0.400799099 -2.26338258 R
## 209 -1.427539411 -1.35491277 R
## 210 -0.860055136 -2.21946189 R
## 211 -0.597170869 -1.70542147 R
## 212 -0.400799099 -2.26338258 R
## 213 -0.860055136 -1.28443852 R
## 214 -0.302692686 -0.92399268 R
## 215 -1.427539411 -1.35491277 R
## 216 -1.348878827 -0.41384379 R
## 217 0.064360561 0.15607894 R
## 218 -0.168320027 -1.67226688 R
## 219 -0.168320027 -1.67226688 R
## 220 -0.860055136 -1.28443852 R
## 221 -1.638716948 -1.51501038 R
## 222 -0.588710342 -0.46833442 R
$Label <- rep("LR_OS_UPSAMPLE",nrow(PMA_PreModelling_Train_LR_OS_UPSAMPLE))
PMA_PreModelling_Train_LR_OS_UPSAMPLE
##################################
# Verifying the class distribution
# for the oversampled data using OS_UPSAMPLE
##################################
table(PMA_PreModelling_Train_LR_OS_UPSAMPLE$Class)
##
## M R
## 111 111
##################################
# Formulating the structure of the
# Logistic Regression model
##################################
<- glm(Class ~ V1 + V11,
LR_OS_UPSAMPLE_Model data = PMA_PreModelling_Train_LR_OS_UPSAMPLE,
family = binomial)
##################################
# Consolidating the model results
##################################
summary(LR_OS_UPSAMPLE_Model)
##
## Call:
## glm(formula = Class ~ V1 + V11, family = binomial, data = PMA_PreModelling_Train_LR_OS_UPSAMPLE)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.1647 0.2470 -4.715 2.42e-06 ***
## V1 -0.9110 0.2482 -3.670 0.000243 ***
## V11 -1.8812 0.2634 -7.142 9.17e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 307.76 on 221 degrees of freedom
## Residual deviance: 181.78 on 219 degrees of freedom
## AIC: 187.78
##
## Number of Fisher Scoring iterations: 5
<- (as.data.frame(LR_OS_UPSAMPLE_Model$coefficients))
LR_OS_UPSAMPLE_Model_Coef $Coef <- rownames(LR_OS_UPSAMPLE_Model_Coef)
LR_OS_UPSAMPLE_Model_Coef$Model <- rep("LR_OS_UPSAMPLE",nrow(LR_OS_UPSAMPLE_Model_Coef))
LR_OS_UPSAMPLE_Model_Coefcolnames(LR_OS_UPSAMPLE_Model_Coef) <- c("Estimates","Coefficients","Model")
print(LR_OS_UPSAMPLE_Model_Coef, rownames=FALSE)
## Estimates Coefficients Model
## (Intercept) -1.1646747 (Intercept) LR_OS_UPSAMPLE
## V1 -0.9110338 V1 LR_OS_UPSAMPLE
## V11 -1.8811681 V11 LR_OS_UPSAMPLE
##################################
# Computing the model predictions
##################################
<- predict(LR_OS_UPSAMPLE_Model,
(LR_OS_UPSAMPLE_Model_Probabilities type = c("response")))
## 1 2 3 4 5 6
## 0.7179750734 0.0219760571 0.3450992033 0.0030705786 0.0060031731 0.0371925877
## 7 8 9 10 11 12
## 0.1921472883 0.0352984771 0.4056307613 0.8221055779 0.5843383511 0.3232247505
## 13 14 15 16 17 18
## 0.7669193985 0.8138250288 0.3428144171 0.0826722516 0.3501155120 0.4156101313
## 19 20 21 22 23 24
## 0.1713790099 0.5704046636 0.2060053981 0.2892593757 0.1317198352 0.3714364633
## 25 26 27 28 29 30
## 0.4381049807 0.1439553447 0.2850090965 0.0352163999 0.0907545671 0.0047075528
## 31 32 33 34 35 36
## 0.0504845805 0.0442755776 0.0041558586 0.0255708393 0.0083271556 0.0149403855
## 37 38 39 40 41 42
## 0.0036683904 0.0018985803 0.2150293746 0.0033053895 0.0009708468 0.2190609887
## 43 44 45 46 47 48
## 0.5934995135 0.4425560636 0.0954235210 0.0986203238 0.0879365600 0.9453623871
## 49 50 51 52 53 54
## 0.0713526975 0.0504392980 0.1394866952 0.0448266150 0.5243395342 0.5434581342
## 55 56 57 58 59 60
## 0.3114058838 0.4476277500 0.7478035602 0.8836514796 0.9151368708 0.4065560302
## 61 62 63 64 65 66
## 0.3682636851 0.2439622644 0.1371140479 0.2100782444 0.2624859141 0.2943080924
## 67 68 69 70 71 72
## 0.5539858259 0.0868900350 0.2301536474 0.5337583288 0.8843692868 0.9311925798
## 73 74 75 76 77 78
## 0.5590079462 0.3996122680 0.0065536738 0.1238543705 0.0847490662 0.0234079574
## 79 80 81 82 83 84
## 0.0252623769 0.0065622381 0.1629201398 0.5669159737 0.0679205488 0.0087864263
## 85 86 87 88 89 90
## 0.0419497741 0.0885487004 0.1073279679 0.0028654573 0.0077243548 0.0817993402
## 91 92 93 94 95 96
## 0.0870930602 0.3138237976 0.3700758345 0.3335668084 0.2755803841 0.6216949898
## 97 98 99 100 101 102
## 0.3618419434 0.1123457119 0.4007694305 0.6386262692 0.2151364084 0.1909097870
## 103 104 105 106 107 108
## 0.2120463101 0.1383994535 0.0708949509 0.0376997727 0.1432887529 0.0834577765
## 109 110 111 112 113 114
## 0.0792362717 0.1119463627 0.1618463380 0.9822321769 0.2307141816 0.2307141816
## 115 116 117 118 119 120
## 0.9822321769 0.6990355243 0.8580636277 0.8942101026 0.6990355243 0.4056824362
## 121 122 123 124 125 126
## 0.9361166156 0.9300561845 0.8372859227 0.9694768165 0.7993396439 0.7584357774
## 127 128 129 130 131 132
## 0.4056824362 0.9300561845 0.5628350830 0.8372859227 0.9822321769 0.7993396439
## 133 134 135 136 137 138
## 0.0936520778 0.8372859227 0.5628350830 0.7993396439 0.8372859227 0.1799155074
## 139 140 141 142 143 144
## 0.7584357774 0.7004148673 0.8844343440 0.7887635614 0.7004148673 0.7993396439
## 145 146 147 148 149 150
## 0.8844343440 0.8372859227 0.1799155074 0.8580636277 0.9361166156 0.5628350830
## 151 152 153 154 155 156
## 0.6558261080 0.8844343440 0.0936520778 0.8372859227 0.8372859227 0.5628350830
## 157 158 159 160 161 162
## 0.9822321769 0.6558261080 0.8942101026 0.9779909273 0.5628350830 0.9822321769
## 163 164 165 166 167 168
## 0.8343411495 0.8580636277 0.4056824362 0.9779909273 0.7993396439 0.7887635614
## 169 170 171 172 173 174
## 0.9600071925 0.9600071925 0.2307141816 0.7004148673 0.9864494168 0.5628350830
## 175 176 177 178 179 180
## 0.5628350830 0.1799155074 0.4056824362 0.8372859227 0.6990355243 0.9822321769
## 181 182 183 184 185 186
## 0.8343411495 0.7004148673 0.9694768165 0.9600071925 0.1978792907 0.8844343440
## 187 188 189 190 191 192
## 0.7584357774 0.5628350830 0.8343411495 0.7004148673 0.6558261080 0.6558261080
## 193 194 195 196 197 198
## 0.8844343440 0.9600071925 0.9300561845 0.0936520778 0.7004148673 0.8343411495
## 199 200 201 202 203 204
## 0.9694768165 0.6990355243 0.7004148673 0.9779909273 0.6990355243 0.7584357774
## 205 206 207 208 209 210
## 0.2307141816 0.8942101026 0.9779909273 0.9694768165 0.9361166156 0.9779909273
## 211 212 213 214 215 216
## 0.9300561845 0.9694768165 0.8844343440 0.7004148673 0.9361166156 0.6990355243
## 217 218 219 220 221 222
## 0.1799155074 0.8942101026 0.8942101026 0.8844343440 0.9600071925 0.5628350830
##################################
# Creating a classification index
# based from the model predictions
##################################
<- predict(LR_OS_UPSAMPLE_Model,
(LR_OS_UPSAMPLE_Model_Indices type = c("link")))
## 1 2 3 4 5 6
## 0.93443939 -3.79558061 -0.64065185 -5.78281396 -5.10944583 -3.25374392
## 7 8 9 10 11 12
## -1.43611755 -3.30797893 -0.38205745 1.53067859 0.34060860 -0.73899133
## 13 14 15 16 17 18
## 1.19099739 1.47505845 -0.65077721 -2.40658081 -0.61853150 -0.34082071
## 19 20 21 22 23 24
## -1.57588533 0.28350236 -1.34917429 -0.89898378 -1.88583723 -0.52605929
## 25 26 27 28 29 30
## -0.24885647 -1.78281940 -0.91974872 -3.31039195 -2.30445626 -5.35386842
## 31 32 33 34 35 36
## -2.93428382 -3.07203638 -5.47907171 -3.64039921 -4.77987133 -4.18863418
## 37 38 39 40 41 42
## -5.60432715 -6.26474851 -1.29487165 -5.70889009 -6.93637061 -1.27114688
## 43 44 45 46 47 48
## 0.37845127 -0.23079478 -2.24914176 -2.21264920 -2.33909390 2.85084581
## 49 50 51 52 53 54
## -2.56609386 -2.93522886 -1.81955986 -3.05909083 0.09743515 0.17427227
## 55 56 57 58 59 60
## -0.79355485 -0.21026021 1.08693202 2.02747256 2.37803393 -0.37822105
## 61 62 63 64 65 66
## -0.53967287 -1.13107773 -1.83946948 -1.32445384 -1.03308777 -0.87455160
## 67 68 69 70 71 72
## 0.21678837 -2.35221296 -1.20744383 0.13523906 2.03447311 2.60515452
## 73 74 75 76 77 78
## 0.23713683 -0.40708092 -5.02115426 -1.95642588 -2.37950354 -3.73099298
## 79 80 81 82 83 84
## -3.65285212 -5.01983969 -1.63665934 0.26927933 -2.61907944 -4.72572196
## 85 86 87 88 89 90
## -3.12842716 -2.33148548 -2.11832998 -5.85215776 -4.85562264 -2.41814677
## 91 92 93 94 95 96
## -2.34965674 -0.78230278 -0.53189150 -0.69209673 -0.96649144 0.49674883
## 97 98 99 100 101 102
## -0.56737849 -2.06700152 -0.40226017 0.56940673 -1.29423765 -1.44410943
## 103 104 105 106 107 108
## -1.31263462 -1.82864767 -2.57302259 -3.23967242 -1.78823908 -2.39626730
## 109 110 111 112 113 114
## -2.45276930 -2.07101229 -1.64455410 4.01243858 -1.20428294 -1.20428294
## 115 116 117 118 119 120
## 4.01243858 0.84270933 1.79929938 2.13448574 0.84270933 -0.38184312
## 121 122 123 124 125 126
## 2.68468076 2.58755271 1.63817108 3.45827005 1.38217223 1.14412276
## 127 128 129 130 131 132
## -0.38184312 2.58755271 0.25267615 1.63817108 4.01243858 1.38217223
## 133 134 135 136 137 138
## -2.26983664 1.63817108 0.25267615 1.38217223 1.63817108 -1.51692004
## 139 140 141 142 143 144
## 1.14412276 0.84927420 2.03510946 1.31748854 0.84927420 1.38217223
## 145 146 147 148 149 150
## 2.03510946 1.63817108 -1.51692004 1.79929938 2.68468076 0.25267615
## 151 152 153 154 155 156
## 0.64474864 2.03510946 -2.26983664 1.63817108 1.63817108 0.25267615
## 157 158 159 160 161 162
## 4.01243858 0.64474864 2.13448574 3.79404563 0.25267615 4.01243858
## 163 164 165 166 167 168
## 1.61671181 1.79929938 -0.38184312 3.79404563 1.38217223 1.31748854
## 169 170 171 172 173 174
## 3.17824115 3.17824115 -1.20428294 0.84927420 4.28768246 0.25267615
## 175 176 177 178 179 180
## 0.25267615 -1.51692004 -0.38184312 1.63817108 0.84270933 4.01243858
## 181 182 183 184 185 186
## 1.61671181 0.84927420 3.45827005 3.17824115 -1.39960190 2.03510946
## 187 188 189 190 191 192
## 1.14412276 0.25267615 1.61671181 0.84927420 0.64474864 0.64474864
## 193 194 195 196 197 198
## 2.03510946 3.17824115 2.58755271 -2.26983664 0.84927420 1.61671181
## 199 200 201 202 203 204
## 3.45827005 0.84270933 0.84927420 3.79404563 0.84270933 1.14412276
## 205 206 207 208 209 210
## -1.20428294 2.13448574 3.79404563 3.45827005 2.68468076 3.79404563
## 211 212 213 214 215 216
## 2.58755271 3.45827005 2.03510946 0.84927420 2.68468076 0.84270933
## 217 218 219 220 221 222
## -1.51692004 2.13448574 2.13448574 2.03510946 3.17824115 0.25267615
max(LR_OS_UPSAMPLE_Model_Indices)
## [1] 4.287682
min(LR_OS_UPSAMPLE_Model_Indices)
## [1] -6.936371
##################################
# Consolidating the model probabilities
# and classification index
# based from the model predictions
##################################
<- as.data.frame(PMA_PreModelling_Train_LR_OS_UPSAMPLE)
LR_OS_UPSAMPLE_Model_Predictions $LR_OS_UPSAMPLE_Prob <- LR_OS_UPSAMPLE_Model_Probabilities
LR_OS_UPSAMPLE_Model_Predictions$LR_OS_UPSAMPLE_LP <- LR_OS_UPSAMPLE_Model_Indices
LR_OS_UPSAMPLE_Model_Predictions$Class <- as.factor(LR_OS_UPSAMPLE_Model_Predictions$Class)
LR_OS_UPSAMPLE_Model_Predictions$Label <- rep("LR_OS_UPSAMPLE",nrow(LR_OS_UPSAMPLE_Model_Predictions))
LR_OS_UPSAMPLE_Model_Predictions
##################################
# Formulating the probability curve
# using the consolidated model predictions
##################################
%>%
LR_OS_UPSAMPLE_Model_Predictions ggplot(aes(x = LR_OS_UPSAMPLE_LP ,
y = LR_OS_UPSAMPLE_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
ggtitle("Estimated Rock Detection Probabilities Based on Classification Index : Logistic Regression (OS_UPSAMPLE)") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
##################################
# Visualizing the imbalanced data set
##################################
ggplot(PMA_PreModelling_Train, aes(V1, V11, color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
labs(title = "Without Undersampling - Near Miss Algorithm") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
##################################
# Implementing US_NEARMISS
# Visualizing the undersampled data using US_NEARMISS
##################################
recipe(Class ~ V1 + V11, data = PMA_PreModelling_Train) %>%
step_nearmiss(Class, seed=123456789) %>%
prep() %>%
bake(new_data = NULL) %>%
ggplot(aes(V1, V11, color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
labs(title = "With Undersampling - Near Miss Algorithm") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
<- recipe(Class ~ V1 + V11, data = PMA_PreModelling_Train) %>%
US_NEARMISS step_nearmiss(Class, seed=123456789) %>%
prep()
<- US_NEARMISS %>%
PMA_PreModelling_Train_LR_US_NEARMISS bake(new_data = NULL)
<- as.data.frame(PMA_PreModelling_Train_LR_US_NEARMISS)) (PMA_PreModelling_Train_LR_US_NEARMISS
## V1 V11 Class
## 1 0.914240973 -1.55861587 M
## 2 1.143139503 0.55690423 M
## 3 -0.935159101 0.03686239 M
## 4 1.169078303 -0.79246144 M
## 5 -1.204708453 -0.18639848 M
## 6 -0.068999246 0.13149291 M
## 7 0.565359423 0.74012531 M
## 8 2.230423635 1.21329169 M
## 9 0.782380194 0.93715691 M
## 10 1.784287809 0.74337479 M
## 11 1.934080462 1.47897609 M
## 12 0.744203377 2.70773324 M
## 13 1.002499594 0.07158504 M
## 14 1.281675939 -0.27257795 M
## 15 1.390588252 0.33359231 M
## 16 -0.180527407 -0.62433567 M
## 17 -0.168320027 -1.80173334 M
## 18 -0.132380328 -0.09011428 M
## 19 -0.789109306 -0.36302243 M
## 20 0.401039618 0.45156422 M
## 21 1.243886483 1.44694263 M
## 22 0.142960831 2.42255907 M
## 23 0.625610136 0.17668535 M
## 24 0.537644499 -0.19150300 M
## 25 0.157176488 1.02691760 M
## 26 -2.902302684 -1.49282840 R
## 27 -0.588710342 -0.46833442 R
## 28 -0.597170869 -1.70542147 R
## 29 -0.289270938 -1.17938825 R
## 30 0.544629613 -1.61762409 R
## 31 0.064360561 0.15607894 R
## 32 -0.168320027 -1.67226688 R
## 33 -0.316256467 -0.26298086 R
## 34 -1.638716948 -1.51501038 R
## 35 0.274190661 0.01776805 R
## 36 -0.860055136 -2.21946189 R
## 37 -0.789109306 -0.84516218 R
## 38 -1.427539411 -1.35491277 R
## 39 0.779483548 -1.33935885 R
## 40 -0.400799099 -2.26338258 R
## 41 -0.302692686 -0.92399268 R
## 42 0.827857790 0.18656255 R
## 43 -0.829167731 0.42261407 R
## 44 -2.434915803 -1.57286451 R
## 45 -1.138280881 -0.92728240 R
## 46 -0.860055136 -1.28443852 R
## 47 -1.125398710 0.66990478 R
## 48 -0.186674974 -1.39954451 R
## 49 -0.008605128 -1.57143555 R
## 50 -1.348878827 -0.41384379 R
$Label <- rep("LR_US_NEARMISS",nrow(PMA_PreModelling_Train_LR_US_NEARMISS))
PMA_PreModelling_Train_LR_US_NEARMISS
##################################
# Verifying the class distribution
# for the undersampled data using US_NEARMISS
##################################
table(PMA_PreModelling_Train_LR_US_NEARMISS$Class)
##
## M R
## 25 25
##################################
# Formulating the structure of the
# Logistic Regression model
##################################
<- glm(Class ~ V1 + V11,
LR_US_NEARMISS_Model data = PMA_PreModelling_Train_LR_US_NEARMISS,
family = binomial)
##################################
# Consolidating the model results
##################################
summary(LR_US_NEARMISS_Model)
##
## Call:
## glm(formula = Class ~ V1 + V11, family = binomial, data = PMA_PreModelling_Train_LR_US_NEARMISS)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.5276 0.4512 -1.169 0.2423
## V1 -1.3388 0.5264 -2.543 0.0110 *
## V11 -1.2227 0.4795 -2.550 0.0108 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 69.315 on 49 degrees of freedom
## Residual deviance: 40.340 on 47 degrees of freedom
## AIC: 46.34
##
## Number of Fisher Scoring iterations: 5
<- (as.data.frame(LR_US_NEARMISS_Model$coefficients))
LR_US_NEARMISS_Model_Coef $Coef <- rownames(LR_US_NEARMISS_Model_Coef)
LR_US_NEARMISS_Model_Coef$Model <- rep("LR_US_NEARMISS",nrow(LR_US_NEARMISS_Model_Coef))
LR_US_NEARMISS_Model_Coefcolnames(LR_US_NEARMISS_Model_Coef) <- c("Estimates","Coefficients","Model")
print(LR_US_NEARMISS_Model_Coef, rownames=FALSE)
## Estimates Coefficients Model
## (Intercept) -0.5275579 (Intercept) LR_US_NEARMISS
## V1 -1.3388434 V1 LR_US_NEARMISS
## V11 -1.2227451 V11 LR_US_NEARMISS
##################################
# Computing the model predictions
##################################
<- predict(LR_US_NEARMISS_Model,
(LR_US_NEARMISS_Model_Probabilities type = c("response")))
## 1 2 3 4 5 6
## 0.538475404 0.060711209 0.663604862 0.245308514 0.788060819 0.355268290
## 7 8 9 10 11 12
## 0.100700058 0.006710875 0.061748607 0.021344245 0.007207500 0.007885623
## 13 14 15 16 17 18
## 0.123761916 0.128955243 0.057473709 0.617167616 0.869988474 0.440253186
## 19 20 21 22 23 24
## 0.725681187 0.165669455 0.018666733 0.024575272 0.170626580 0.266350095
## 25 26 27 28 29 30
## 0.119871133 0.994423044 0.697049432 0.913509256 0.786141426 0.672873462
## 31 32 33 34 35 36
## 0.309048509 0.851010973 0.554141527 0.971220746 0.285695645 0.965701640
## 37 38 39 40 41 42
## 0.826690849 0.954366549 0.516626398 0.941397563 0.732532124 0.134231886
## 43 44 45 46 47 48
## 0.516448710 0.990581586 0.893808927 0.899751107 0.539927799 0.807470453
## 49 50
## 0.803043836 0.856239957
##################################
# Creating a classification index
# based from the model predictions
##################################
<- predict(LR_US_NEARMISS_Model,
(LR_US_NEARMISS_Model_Indices type = c("link")))
## 1 2 3 4 5 6
## 0.15420647 -2.73899464 0.67940041 -1.12379238 1.31327592 -0.59596102
## 7 8 9 10 11 12
## -2.18947025 -4.99729239 -2.72094650 -3.82539779 -4.92539958 -4.83479722
## 13 14 15 16 17 18
## -1.95727815 -1.91022797 -2.79723621 0.47754340 1.90085685 -0.24013458
## 19 20 21 22 23 24
## 0.97281980 -1.61663509 -3.96216914 -3.68113228 -1.58119307 -1.01322036
## 25 26 27 28 29 30
## -1.99365106 5.18351957 0.83328668 2.35725611 1.30182178 0.72121023
## 31 32 33 34 35 36
## -0.80457137 1.74255237 0.21741854 3.51889902 -0.91638207 3.33775738
## 37 38 39 40 41 42
## 1.56235380 3.04040680 0.06653012 2.77658926 1.00750772 -1.86404831
## 43 44 45 46 47 48
## 0.06581859 4.65562548 2.13025197 2.19446215 0.16005199 1.43365683
## 49 50
## 1.40542811 1.78440512
max(LR_US_NEARMISS_Model_Indices)
## [1] 5.18352
min(LR_US_NEARMISS_Model_Indices)
## [1] -4.997292
##################################
# Consolidating the model probabilities
# and classification index
# based from the model predictions
##################################
<- as.data.frame(PMA_PreModelling_Train_LR_US_NEARMISS)
LR_US_NEARMISS_Model_Predictions $LR_US_NEARMISS_Prob <- LR_US_NEARMISS_Model_Probabilities
LR_US_NEARMISS_Model_Predictions$LR_US_NEARMISS_LP <- LR_US_NEARMISS_Model_Indices
LR_US_NEARMISS_Model_Predictions$Class <- as.factor(LR_US_NEARMISS_Model_Predictions$Class)
LR_US_NEARMISS_Model_Predictions$Label <- rep("LR_US_NEARMISS",nrow(LR_US_NEARMISS_Model_Predictions))
LR_US_NEARMISS_Model_Predictions
##################################
# Formulating the probability curve
# using the consolidated model predictions
##################################
%>%
LR_US_NEARMISS_Model_Predictions ggplot(aes(x = LR_US_NEARMISS_LP ,
y = LR_US_NEARMISS_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
ggtitle("Estimated Rock Detection Probabilities Based on Classification Index : Logistic Regression (US_NEARMISS)") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
##################################
# Visualizing the imbalanced data set
##################################
ggplot(PMA_PreModelling_Train, aes(V1, V11, color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
labs(title = "Without Undersampling - Tomek Links") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
##################################
# Implementing US_TOMEK
# Visualizing the undersampled data using US_TOMEK
##################################
recipe(Class ~ V1 + V11, data = PMA_PreModelling_Train) %>%
step_tomek(Class, seed=123456789) %>%
prep() %>%
bake(new_data = NULL) %>%
ggplot(aes(V1, V11, color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
labs(title = "With Undersampling - Tomek Links") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
<- recipe(Class ~ V1 + V11, data = PMA_PreModelling_Train) %>%
US_TOMEK step_tomek(Class, seed=123456789) %>%
prep()
<- US_TOMEK %>%
PMA_PreModelling_Train_LR_US_TOMEK bake(new_data = NULL)
<- as.data.frame(PMA_PreModelling_Train_LR_US_TOMEK)) (PMA_PreModelling_Train_LR_US_TOMEK
## V1 V11 Class
## 1 0.914240973 -1.55861587 M
## 2 2.175018299 0.34520609 M
## 3 -0.230553865 -0.16690717 M
## 4 1.231717734 1.85842121 M
## 5 0.424204681 1.89154083 M
## 6 1.143139503 0.55690423 M
## 7 -0.507041623 0.38985083 M
## 8 0.312328639 0.98808941 M
## 9 -0.935159101 0.03686239 M
## 10 0.738227771 -1.15770286 M
## 11 1.169078303 -0.79246144 M
## 12 0.118911997 -1.30982705 M
## 13 1.012209983 -0.76338471 M
## 14 0.207991857 -0.39104999 M
## 15 -0.957451233 0.02573746 M
## 16 0.695599819 -0.11827986 M
## 17 -0.068999246 0.13149291 M
## 18 0.527094931 -0.39650480 M
## 19 0.104263091 0.33286517 M
## 20 0.465616266 -0.56497212 M
## 21 -0.507041623 -0.24127887 M
## 22 0.043933590 0.30731955 M
## 23 0.147716923 -0.20173692 M
## 24 0.612463710 0.84401925 M
## 25 -0.068999246 0.63930609 M
## 26 1.395977603 1.55085071 M
## 27 -0.180527407 1.02812486 M
## 28 0.565359423 0.74012531 M
## 29 2.230423635 1.21329169 M
## 30 0.782380194 0.93715691 M
## 31 2.005116912 0.95072168 M
## 32 1.784287809 0.74337479 M
## 33 1.523834205 1.62207084 M
## 34 1.928176420 1.77732114 M
## 35 1.934080462 1.47897609 M
## 36 0.744203377 2.70773324 M
## 37 1.424344244 -0.63319889 M
## 38 0.689392711 -0.83030322 M
## 39 1.381555338 -0.09258957 M
## 40 1.002499594 0.07158504 M
## 41 0.977896818 0.15071601 M
## 42 0.278484686 -2.26945660 M
## 43 1.406688795 0.06372536 M
## 44 1.852613244 0.04399440 M
## 45 1.281675939 -0.27257795 M
## 46 1.390588252 0.33359231 M
## 47 -0.192852168 -0.57752146 M
## 48 -0.180527407 -0.62433567 M
## 49 -0.052243905 -0.17198030 M
## 50 -0.779287302 -0.12994977 M
## 51 -0.041194136 -1.17696945 M
## 52 -2.093160440 0.59563328 M
## 53 -0.230553865 -0.22058604 M
## 54 -1.038676203 0.48516249 M
## 55 -0.030238811 0.37335493 M
## 56 0.089444831 0.04161877 M
## 57 0.303951059 -0.21715050 M
## 58 -0.132380328 -0.09011428 M
## 59 -1.546463816 0.01457488 M
## 60 -0.499153812 0.87301303 M
## 61 -0.108968439 0.07550792 M
## 62 0.686277848 -1.02337267 M
## 63 -0.721885235 -1.35101476 M
## 64 -3.557061230 -0.28132844 M
## 65 -0.789109306 -0.36302243 M
## 66 -0.750264961 -0.03937819 M
## 67 -0.379134945 2.23365699 M
## 68 -0.371994173 0.60103638 M
## 69 0.401039618 0.45156422 M
## 70 -0.295964244 1.50754826 M
## 71 0.256869245 1.19827720 M
## 72 1.243886483 1.44694263 M
## 73 -0.230553865 0.36255507 M
## 74 -0.256318859 -0.63813480 M
## 75 0.632133128 0.46700244 M
## 76 0.324793230 1.73570326 M
## 77 0.723165726 0.69367752 M
## 78 -1.191144673 1.19711994 M
## 79 -1.177722925 1.07730973 M
## 80 0.142960831 2.42255907 M
## 81 0.443194144 1.74741590 M
## 82 -0.180527407 0.75375436 M
## 83 0.544629613 0.36615870 M
## 84 -1.274767709 0.41409622 M
## 85 -0.539087424 -0.07530230 M
## 86 -0.555415865 0.01776805 M
## 87 0.345301912 -0.27257795 M
## 88 -1.868589923 0.02175517 M
## 89 -0.217863016 -0.21200423 M
## 90 0.625610136 0.17668535 M
## 91 -0.799007156 -0.01833539 M
## 92 -2.013850697 0.05347991 M
## 93 0.537644499 -0.19150300 M
## 94 -0.013979414 0.15531332 M
## 95 -0.935159101 0.53154288 M
## 96 -0.779287302 0.73036007 M
## 97 0.424204681 0.54321732 M
## 98 0.157176488 1.02691760 M
## 99 -0.323092564 0.48794823 M
## 100 0.377448180 0.47190071 M
## 101 0.992715078 0.20396774 M
## 102 0.295518362 0.33867807 M
## 103 -2.902302684 -1.49282840 R
## 104 -0.597170869 -1.70542147 R
## 105 -0.289270938 -1.17938825 R
## 106 -0.316256467 -0.26298086 R
## 107 -1.638716948 -1.51501038 R
## 108 0.274190661 0.01776805 R
## 109 -0.860055136 -2.21946189 R
## 110 -0.789109306 -0.84516218 R
## 111 -1.427539411 -1.35491277 R
## 112 0.779483548 -1.33935885 R
## 113 -0.400799099 -2.26338258 R
## 114 -0.302692686 -0.92399268 R
## 115 -0.829167731 0.42261407 R
## 116 -2.434915803 -1.57286451 R
## 117 -1.138280881 -0.92728240 R
## 118 -0.008605128 -1.57143555 R
$Label <- rep("LR_US_TOMEK",nrow(PMA_PreModelling_Train_LR_US_TOMEK))
PMA_PreModelling_Train_LR_US_TOMEK
##################################
# Verifying the class distribution
# for the undersampled data using US_TOMEK
##################################
table(PMA_PreModelling_Train_LR_US_TOMEK$Class)
##
## M R
## 102 16
##################################
# Formulating the structure of the
# Logistic Regression model
##################################
<- glm(Class ~ V1 + V11,
LR_US_TOMEK_Model data = PMA_PreModelling_Train_LR_US_TOMEK,
family = binomial)
##################################
# Consolidating the model results
##################################
summary(LR_US_TOMEK_Model)
##
## Call:
## glm(formula = Class ~ V1 + V11, family = binomial, data = PMA_PreModelling_Train_LR_US_TOMEK)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.1192 0.5681 -5.491 4.00e-08 ***
## V1 -0.8550 0.3663 -2.334 0.0196 *
## V11 -2.1768 0.5199 -4.187 2.83e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 93.664 on 117 degrees of freedom
## Residual deviance: 51.097 on 115 degrees of freedom
## AIC: 57.097
##
## Number of Fisher Scoring iterations: 7
<- (as.data.frame(LR_US_TOMEK_Model$coefficients))
LR_US_TOMEK_Model_Coef $Coef <- rownames(LR_US_TOMEK_Model_Coef)
LR_US_TOMEK_Model_Coef$Model <- rep("LR_US_TOMEK",nrow(LR_US_TOMEK_Model_Coef))
LR_US_TOMEK_Model_Coefcolnames(LR_US_TOMEK_Model_Coef) <- c("Estimates","Coefficients","Model")
print(LR_US_TOMEK_Model_Coef, rownames=FALSE)
## Estimates Coefficients Model
## (Intercept) -3.1192250 (Intercept) LR_US_TOMEK
## V1 -0.8549776 V1 LR_US_TOMEK
## V11 -2.1767958 V11 LR_US_TOMEK
##################################
# Computing the model predictions
##################################
<- predict(LR_US_TOMEK_Model,
(LR_US_TOMEK_Model_Probabilities type = c("response")))
## 1 2 3 4 5 6
## 3.756409e-01 3.235729e-03 7.183859e-02 2.697569e-04 5.004968e-04 4.923252e-03
## 7 8 9 10 11 12
## 2.835075e-02 3.922375e-03 8.317828e-02 2.261271e-01 8.365176e-02 4.086217e-01
## 13 14 15 16 17 18
## 8.924332e-02 7.974495e-02 8.653858e-02 3.057631e-02 3.401110e-02 6.257509e-02
## 19 20 21 22 23 24
## 1.920963e-02 9.216455e-02 1.033540e-01 2.133670e-02 5.698041e-02 4.151500e-03
## 25 26 27 28 29 30
## 1.152242e-02 4.577910e-04 5.470520e-03 5.412073e-03 4.676720e-04 2.934903e-03
## 31 32 33 34 35 36
## 1.003676e-03 1.902036e-03 3.514870e-04 1.774511e-04 3.379585e-04 6.444174e-05
## 37 38 39 40 41 42
## 4.932868e-02 1.299682e-01 1.632055e-02 1.579461e-02 1.360818e-02 8.296036e-01
## 43 44 45 46 47 48
## 1.142309e-02 8.171200e-03 2.604136e-02 6.468579e-03 1.548320e-01 1.671688e-01
## 49 50 51 52 53 54
## 6.296205e-02 1.024699e-01 3.723887e-01 6.747167e-02 8.003027e-02 3.601015e-02
## 55 56 57 58 59 60
## 1.972224e-02 3.604439e-02 5.183789e-02 5.679248e-02 1.383861e-01 1.002269e-02
## 61 62 63 64 65 66
## 3.952765e-02 1.856837e-01 6.079859e-01 6.305035e-01 1.605248e-01 8.378117e-02
## 67 68 69 70 71 72
## 4.723640e-04 1.615066e-02 1.160016e-02 2.133604e-03 2.606214e-03 6.535624e-04
## 73 74 75 76 77 78
## 2.386204e-02 1.807923e-01 9.227894e-03 7.647569e-04 5.233073e-03 8.953573e-03
## 79 80 81 82 83 84
## 1.145983e-02 2.004250e-04 6.737929e-04 9.896313e-03 1.234693e-02 5.065520e-02
## 85 86 87 88 89 90
## 7.625172e-02 6.398192e-02 5.619436e-02 1.723620e-01 7.788281e-02 1.731500e-02
## 91 92 93 94 95 96
## 8.346450e-02 1.803700e-01 4.061963e-02 3.090759e-02 2.998119e-02 1.724570e-02
## 97 98 99 100 101 102
## 9.337053e-03 4.114982e-03 1.974037e-02 1.132709e-02 1.198585e-02 1.615689e-02
## 103 104 105 106 107 108
## 9.316147e-01 7.509522e-01 4.244291e-01 9.309838e-02 8.291634e-01 3.253605e-02
## 109 110 111 112 113 114
## 9.203724e-01 3.532443e-01 7.409012e-01 2.952220e-01 8.957034e-01 2.996246e-01
## 115 116 117 118
## 3.454790e-02 9.157827e-01 4.681641e-01 5.765988e-01
##################################
# Creating a classification index
# based from the model predictions
##################################
<- predict(LR_US_TOMEK_Model,
(LR_US_TOMEK_Model_Indices type = c("link")))
## 1 2 3 4 5 6 7
## -0.5080921 -5.7302601 -2.5587838 -8.2177196 -7.5994087 -5.3088505 -3.5343414
## 8 9 10 11 12 13 14
## -5.5371279 -2.3999268 -1.2303105 -2.3937341 -0.3696661 -2.3229093 -2.4458174
## 15 16 17 18 19 20 21
## -2.3566509 -3.4564762 -3.3464654 -2.7067694 -3.9329471 -2.2874876 -2.1605009
## 22 23 24 25 26 27 28
## -3.8257592 -2.8063796 -5.4801254 -4.4518710 -7.6886399 -5.2028960 -5.2136964
## 29 30 31 32 33 34 35
## -7.6672756 -5.8281418 -6.9030821 -6.2629263 -7.9529862 -8.6366379 -7.9922495
## 36 37 38 39 40 41 42
## -9.6496846 -2.9586628 -1.9012398 -4.0988753 -4.1321657 -4.2833829 1.5828204
## 43 44 45 46 47 48 49
## -4.4606296 -4.7989347 -3.6216827 -5.0343092 -1.6971944 -1.6058269 -2.7001917
## 50 51 52 53 54 55 56
## -2.1700777 -0.5219828 -2.6261917 -2.4419359 -3.2872798 -3.9060890 -3.2862939
## 57 58 59 60 61 62 63
## -2.9064041 -2.8098824 -1.8287596 -4.5928308 -3.1904248 -1.4783039 0.4388540
## 64 65 66 67 68 69 70
## 0.5343773 -1.6543285 -2.3920470 -7.6572883 -4.1095118 -4.4450680 -6.1478070
## 71 72 73 74 75 76 77
## -5.9472473 -7.3324188 -3.7113150 -1.5109890 -4.6762536 -7.1751876 -5.2475099
## 78 79 80 81 82 83 84
## -4.7067087 -4.4573816 -8.5148698 -7.3019137 -4.6056475 -4.3819239 -2.9307301
## 85 86 87 88 89 90 91
## -2.4943996 -2.6830343 -2.8211039 -1.5689790 -2.4714671 -4.0387156 -2.3961794
## 92 93 94 95 96 97 98
## -1.5138426 -3.1620361 -3.4453583 -3.4767452 -4.0427966 -4.6643837 -5.4889973
## 99 100 101 102 103 104 105
## -3.9051518 -4.4691663 -4.4119703 -4.1091196 2.6117614 1.1036970 -0.3046174
## 106 107 108 109 110 111 112
## -2.2763772 1.5797095 -3.3923293 2.4474182 -0.6048087 1.0506577 -0.8701553
## 113 114 115 116 117 118
## 2.1503710 -0.8490861 -3.3302497 2.3863784 -0.1275159 0.3088265
max(LR_US_TOMEK_Model_Indices)
## [1] 2.611761
min(LR_US_TOMEK_Model_Indices)
## [1] -9.649685
##################################
# Consolidating the model probabilities
# and classification index
# based from the model predictions
##################################
<- as.data.frame(PMA_PreModelling_Train_LR_US_TOMEK)
LR_US_TOMEK_Model_Predictions $LR_US_TOMEK_Prob <- LR_US_TOMEK_Model_Probabilities
LR_US_TOMEK_Model_Predictions$LR_US_TOMEK_LP <- LR_US_TOMEK_Model_Indices
LR_US_TOMEK_Model_Predictions$Class <- as.factor(LR_US_TOMEK_Model_Predictions$Class)
LR_US_TOMEK_Model_Predictions$Label <- rep("LR_US_TOMEK",nrow(LR_US_TOMEK_Model_Predictions))
LR_US_TOMEK_Model_Predictions
##################################
# Formulating the probability curve
# using the consolidated model predictions
##################################
%>%
LR_US_TOMEK_Model_Predictions ggplot(aes(x = LR_US_TOMEK_LP ,
y = LR_US_TOMEK_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
ggtitle("Estimated Rock Detection Probabilities Based on Classification Index : Logistic Regression (US_TOMEK)") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
##################################
# Visualizing the imbalanced data set
##################################
ggplot(PMA_PreModelling_Train, aes(V1, V11, color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
labs(title = "Without Oversampling - Adaptive Synthetic Algorithm") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
##################################
# Implementing OS_ADASYN
# Visualizing the oversampled data using OS_ADASYN
##################################
recipe(Class ~ V1 + V11, data = PMA_PreModelling_Train) %>%
step_adasyn(Class, seed=123456789) %>%
prep() %>%
bake(new_data = NULL) %>%
ggplot(aes(V1, V11, color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
labs(title = "With Oversampling - Adaptive Synthetic Algorithm") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
<- recipe(Class ~ V1 + V11, data = PMA_PreModelling_Train) %>%
OS_ADASYN step_adasyn(Class, seed=123456789) %>%
prep()
<- OS_ADASYN %>%
PMA_PreModelling_Train_LR_OS_ADASYN bake(new_data = NULL)
<- as.data.frame(PMA_PreModelling_Train_LR_OS_ADASYN)) (PMA_PreModelling_Train_LR_OS_ADASYN
## V1 V11 Class
## 1 0.914240973 -1.558615871 M
## 2 2.175018299 0.345206089 M
## 3 -0.230553865 -0.166907171 M
## 4 1.231717734 1.858421213 M
## 5 0.424204681 1.891540835 M
## 6 1.143139503 0.556904232 M
## 7 -0.507041623 0.389850825 M
## 8 0.312328639 0.988089406 M
## 9 -0.935159101 0.036862390 M
## 10 0.408807916 -1.630790485 M
## 11 0.738227771 -1.157702863 M
## 12 1.169078303 -0.792461442 M
## 13 0.118911997 -1.309827049 M
## 14 -0.174409184 -1.318776567 M
## 15 1.012209983 -0.763384712 M
## 16 0.813818599 0.266052702 M
## 17 0.207991857 -0.391049992 M
## 18 -0.957451233 0.025737464 M
## 19 0.695599819 -0.118279862 M
## 20 -1.204708453 -0.186398477 M
## 21 -0.068999246 0.131492907 M
## 22 0.527094931 -0.396504803 M
## 23 0.104263091 0.332865171 M
## 24 0.465616266 -0.564972117 M
## 25 -0.507041623 -0.241278874 M
## 26 0.043933590 0.307319548 M
## 27 0.147716923 -0.201736917 M
## 28 0.612463710 0.844019251 M
## 29 -0.068999246 0.639306087 M
## 30 1.395977603 1.550850706 M
## 31 -0.180527407 1.028124858 M
## 32 0.565359423 0.740125313 M
## 33 2.230423635 1.213291690 M
## 34 0.782380194 0.937156911 M
## 35 2.005116912 0.950721679 M
## 36 1.784287809 0.743374785 M
## 37 1.523834205 1.622070841 M
## 38 1.928176420 1.777321144 M
## 39 -1.204708453 0.652640830 M
## 40 1.934080462 1.478976095 M
## 41 0.744203377 2.707733238 M
## 42 1.424344244 -0.633198890 M
## 43 -0.491314245 -0.582362640 M
## 44 0.689392711 -0.830303223 M
## 45 1.381555338 -0.092589575 M
## 46 1.002499594 0.071585037 M
## 47 0.977896818 0.150716010 M
## 48 0.278484686 -2.269456602 M
## 49 1.406688795 0.063725362 M
## 50 1.852613244 0.043994401 M
## 51 1.281675939 -0.272577947 M
## 52 1.390588252 0.333592312 M
## 53 -0.192852168 -0.577521460 M
## 54 -0.180527407 -0.624335667 M
## 55 -0.052243905 -0.171980296 M
## 56 -0.779287302 -0.129949769 M
## 57 -0.041194136 -1.176969454 M
## 58 -0.924156756 -1.249334984 M
## 59 -0.168320027 -1.801733345 M
## 60 -2.093160440 0.595633277 M
## 61 -0.230553865 -0.220586042 M
## 62 -1.038676203 0.485162490 M
## 63 -0.030238811 0.373354928 M
## 64 0.089444831 0.041618772 M
## 65 0.303951059 -0.217150498 M
## 66 -0.132380328 -0.090114281 M
## 67 -1.546463816 0.014574884 M
## 68 -0.499153812 0.873013033 M
## 69 -0.108968439 0.075507923 M
## 70 0.686277848 -1.023372673 M
## 71 -0.721885235 -1.351014760 M
## 72 -3.557061230 -0.281328435 M
## 73 -0.789109306 -0.363022432 M
## 74 -0.750264961 -0.039378188 M
## 75 -0.379134945 2.233656987 M
## 76 -0.371994173 0.601036378 M
## 77 0.401039618 0.451564218 M
## 78 -0.295964244 1.507548259 M
## 79 0.256869245 1.198277196 M
## 80 1.243886483 1.446942626 M
## 81 -0.230553865 0.362555066 M
## 82 -0.256318859 -0.638134800 M
## 83 0.632133128 0.467002438 M
## 84 0.324793230 1.735703263 M
## 85 0.723165726 0.693677522 M
## 86 -1.191144673 1.197119945 M
## 87 -1.177722925 1.077309728 M
## 88 0.142960831 2.422559073 M
## 89 0.443194144 1.747415902 M
## 90 -0.180527407 0.753754356 M
## 91 0.544629613 0.366158697 M
## 92 -1.274767709 0.414096217 M
## 93 -0.539087424 -0.075302304 M
## 94 -0.555415865 0.017768054 M
## 95 0.345301912 -0.272577947 M
## 96 -1.868589923 0.021755168 M
## 97 -0.217863016 -0.212004228 M
## 98 0.625610136 0.176685353 M
## 99 -0.799007156 -0.018335390 M
## 100 -2.013850697 0.053479912 M
## 101 0.537644499 -0.191503002 M
## 102 -0.013979414 0.155313323 M
## 103 -0.935159101 0.531542880 M
## 104 -0.779287302 0.730360068 M
## 105 0.424204681 0.543217319 M
## 106 0.157176488 1.026917595 M
## 107 -0.323092564 0.487948233 M
## 108 0.377448180 0.471900708 M
## 109 0.992715078 0.203967739 M
## 110 0.295518362 0.338678072 M
## 111 0.099342681 0.206985690 M
## 112 -2.902302684 -1.492828399 R
## 113 -0.588710342 -0.468334419 R
## 114 -0.597170869 -1.705421467 R
## 115 -0.289270938 -1.179388252 R
## 116 0.544629613 -1.617624095 R
## 117 0.064360561 0.156078935 R
## 118 -0.168320027 -1.672266879 R
## 119 -0.316256467 -0.262980859 R
## 120 -1.638716948 -1.515010380 R
## 121 0.274190661 0.017768054 R
## 122 -0.860055136 -2.219461886 R
## 123 -0.789109306 -0.845162176 R
## 124 -1.427539411 -1.354912772 R
## 125 0.779483548 -1.339358851 R
## 126 -0.400799099 -2.263382579 R
## 127 -0.302692686 -0.923992684 R
## 128 0.827857790 0.186562547 R
## 129 -0.829167731 0.422614069 R
## 130 -2.434915803 -1.572864509 R
## 131 -1.138280881 -0.927282397 R
## 132 -0.860055136 -1.284438518 R
## 133 -1.125398710 0.669904780 R
## 134 -0.186674974 -1.399544510 R
## 135 -0.008605128 -1.571435552 R
## 136 -1.348878827 -0.413843793 R
## 137 -0.390322558 -0.784388451 R
## 138 -0.295860799 -1.053993156 R
## 139 -0.017377454 -1.559181951 R
## 140 -0.191911171 -1.315384987 R
## 141 -0.290855568 -1.149235140 R
## 142 -0.035273087 -1.662069728 R
## 143 0.346506687 -1.632808882 R
## 144 0.682389831 -1.454399746 R
## 145 -0.356332622 -0.246153717 R
## 146 0.266972171 0.022526169 R
## 147 0.652376435 0.179556227 R
## 148 -0.342166539 0.277343988 R
## 149 0.104992266 0.129296282 R
## 150 0.128805610 0.158651987 R
## 151 -0.082002824 0.016138154 R
## 152 -0.096321170 0.002448115 R
## 153 -0.178557486 -1.673058340 R
## 154 -0.063390608 -1.606022761 R
## 155 -0.176865876 -1.545290531 R
## 156 -0.180838432 -1.486265301 R
## 157 -0.358706919 -2.156356393 R
## 158 -0.349557028 -0.303980855 R
## 159 -0.582661828 -0.590981902 R
## 160 0.038582729 -0.094260047 R
## 161 0.035702482 0.124526356 R
## 162 -0.574843937 -0.457883052 R
## 163 0.082506030 -0.073375165 R
## 164 -0.529153404 -0.423445246 R
## 165 -0.390201423 -0.354022656 R
## 166 -0.059449742 0.019763646 R
## 167 -0.557747942 -0.560307667 R
## 168 -0.308189318 -0.656122036 R
## 169 0.219359952 0.053910074 R
## 170 0.748334227 0.162318486 R
## 171 0.167225292 0.088274976 R
## 172 0.248268872 0.034854570 R
## 173 0.755598258 0.164533045 R
## 174 0.219609003 -0.008184720 R
## 175 -0.130244888 -0.642472049 R
## 176 -0.784077436 -2.070896097 R
## 177 -0.647882130 -1.804581595 R
## 178 -0.512560514 -2.252694339 R
## 179 -0.807753229 -0.960600033 R
## 180 -0.318364841 -0.921452795 R
## 181 -0.557028431 -1.000347302 R
## 182 -1.342933236 -1.229833763 R
## 183 -1.325016764 -1.203346635 R
## 184 -0.862607677 -0.903846499 R
## 185 0.045146002 -1.229443601 R
## 186 0.437074722 -1.360688788 R
## 187 -0.077403279 -1.640333136 R
## 188 -0.125638032 -1.657275187 R
## 189 -0.517275875 -0.889216525 R
## 190 -0.294167245 -1.086218926 R
## 191 -0.310113401 -0.562354563 R
## 192 -0.297566301 -1.021540043 R
## 193 -0.310870399 -0.525463291 R
## 194 0.703852721 0.148757571 R
## 195 -0.050300971 -0.158482083 R
## 196 0.450144116 0.038151940 R
## 197 0.112591704 -0.144113810 R
## 198 0.727653830 0.156013729 R
## 199 0.205014899 -0.058163811 R
## 200 0.820894004 -0.033103728 R
## 201 -0.725082580 0.036955271 R
## 202 -0.655946484 -0.219209453 R
## 203 -0.723124380 0.280868725 R
## 204 -1.121817738 0.666915420 R
## 205 -1.106401172 -0.023584008 R
## 206 -0.157000177 0.251678836 R
## 207 -1.123828064 0.666573667 R
## 208 -0.938882051 0.514202506 R
## 209 -0.930462819 0.256473662 R
## 210 -0.872827627 0.459060921 R
## 211 -0.171945746 -1.413762652 R
## 212 -0.180283482 -1.405714226 R
## 213 -0.016300987 -1.560685612 R
## 214 -0.083518808 -1.618730111 R
## 215 -0.505285997 -1.684504044 R
## 216 -1.174123534 -0.839897872 R
## 217 -1.279860500 -0.582110741 R
## 218 -1.377082750 -0.751266116 R
## 219 -0.970648251 -0.705281241 R
## 220 -1.190222435 -0.536093069 R
## 221 -1.196659723 -0.784954555 R
## 222 -1.253287340 -0.646896207 R
$Label <- rep("LR_OS_ADASYN",nrow(PMA_PreModelling_Train_LR_OS_ADASYN))
PMA_PreModelling_Train_LR_OS_ADASYN
##################################
# Verifying the class distribution
# for the oversampled data using OS_ADASYN
##################################
table(PMA_PreModelling_Train_LR_OS_ADASYN$Class)
##
## M R
## 111 111
##################################
# Formulating the structure of the
# Logistic Regression model
##################################
<- glm(Class ~ V1 + V11,
LR_OS_ADASYN_Model data = PMA_PreModelling_Train_LR_OS_ADASYN,
family = binomial)
##################################
# Consolidating the model results
##################################
summary(LR_OS_ADASYN_Model)
##
## Call:
## glm(formula = Class ~ V1 + V11, family = binomial, data = PMA_PreModelling_Train_LR_OS_ADASYN)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.4101 0.1704 -2.406 0.0161 *
## V1 -0.5944 0.2092 -2.841 0.0045 **
## V11 -1.2753 0.2148 -5.938 2.89e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 307.76 on 221 degrees of freedom
## Residual deviance: 238.51 on 219 degrees of freedom
## AIC: 244.51
##
## Number of Fisher Scoring iterations: 4
<- (as.data.frame(LR_OS_ADASYN_Model$coefficients))
LR_OS_ADASYN_Model_Coef $Coef <- rownames(LR_OS_ADASYN_Model_Coef)
LR_OS_ADASYN_Model_Coef$Model <- rep("LR_OS_ADASYN",nrow(LR_OS_ADASYN_Model_Coef))
LR_OS_ADASYN_Model_Coefcolnames(LR_OS_ADASYN_Model_Coef) <- c("Estimates","Coefficients","Model")
print(LR_OS_ADASYN_Model_Coef, rownames=FALSE)
## Estimates Coefficients Model
## (Intercept) -0.4101181 (Intercept) LR_OS_ADASYN
## V1 -0.5943822 V1 LR_OS_ADASYN
## V11 -1.2753114 V11 LR_OS_ADASYN
##################################
# Computing the model predictions
##################################
<- predict(LR_OS_ADASYN_Model,
(LR_OS_ADASYN_Model_Probabilities type = c("response")))
## 1 2 3 4 5 6 7
## 0.73772818 0.10497255 0.48494896 0.02896442 0.04416885 0.14187466 0.35298982
## 8 9 10 11 12 13 14
## 0.13518371 0.52465818 0.80638030 0.65192786 0.47642692 0.76667670 0.79825096
## 15 16 17 18 19 20 21
## 0.49045006 0.22563277 0.49124234 0.53149589 0.33788658 0.63266232 0.36893216
## 22 23 24 25 26 27 28
## 0.44577737 0.28975032 0.50841003 0.54957759 0.30403436 0.44012839 0.13580915
## 29 30 31 32 33 34 35
## 0.23425830 0.03850579 0.16603467 0.15576953 0.03615320 0.11201542 0.05655150
## 36 37 38 39 40 41 42
## 0.08175770 0.03278329 0.02139795 0.37136132 0.03089268 0.01331233 0.38955167
## 43 44 45 46 47 48 49
## 0.65126495 0.55947074 0.24727303 0.25025003 0.23441048 0.91040741 0.20957344
## 50 51 52 53 54 55 56
## 0.17258950 0.30485130 0.15948123 0.60850432 0.62090861 0.46015049 0.55448352
## 57 58 59 60 61 62 63
## 0.75312881 0.84972878 0.87949954 0.51859174 0.50205872 0.39855712 0.29561056
## 64 65 66 67 68 69 70
## 0.37370392 0.42217609 0.44608300 0.62022067 0.22674314 0.39134899 0.61942054
## 71 72 73 74 75 76 77
## 0.85093106 0.88724633 0.62758752 0.52149811 0.04594066 0.27777607 0.22716764
## 78 79 80 81 82 83 84
## 0.10369789 0.10997728 0.04766295 0.32399955 0.63553899 0.20078099 0.05642670
## 85 86 87 88 89 90 91
## 0.15127834 0.22638738 0.25274867 0.02699838 0.05205260 0.22027108 0.23133247
## 92 93 94 95 96 97 98
## 0.45499203 0.50158494 0.47436036 0.43346259 0.66212847 0.49743683 0.26750888
## 99 100 101 102 103 104 105
## 0.52203094 0.67231525 0.38096402 0.35437073 0.37001350 0.29351771 0.20504940
## 106 107 108 109 110 111 112
## 0.14025414 0.30145821 0.22508296 0.22092352 0.26547781 0.32450675 0.96153492
## 113 114 115 116 117 118 119
## 0.63113128 0.89281022 0.78004124 0.79069806 0.34357069 0.86087655 0.52828019
## 120 121 122 123 124 125 126
## 0.92386207 0.35531618 0.94938606 0.75708417 0.89718416 0.69734156 0.93788428
## 127 128 129 130 131 132 133
## 0.72074811 0.24230065 0.38787815 0.95448129 0.80984905 0.85057811 0.35536511
## 134 135 136 137 138 139 140
## 0.81543098 0.83188786 0.71492212 0.69470786 0.75211155 0.83042655 0.79922795
## 141 142 143 144 145 146 147
## 0.77353736 0.84947689 0.81248892 0.73867519 0.52886844 0.35490911 0.26369154
## 148 149 150 151 152 153 154
## 0.36344558 0.34583074 0.33425366 0.40565468 0.41193095 0.86172405 0.84233953
## 155 156 157 158 159 160 161
## 0.84100629 0.83101251 0.92778263 0.54619851 0.66594284 0.42242166 0.35660069
## 162 163 164 165 166 167 168
## 0.62609548 0.40960899 0.60931311 0.56790283 0.40131546 0.65384058 0.64790067
## 169 170 171 172 173 174 175
## 0.35222942 0.25694247 0.34930583 0.35385536 0.25558129 0.37046751 0.61931673
## 176 177 178 179 180 181 182
## 0.93684708 0.90690144 0.94088997 0.78500246 0.72196940 0.76794150 0.87615569
## 183 184 185 186 187 188 189
## 0.87125377 0.77821361 0.75601813 0.74372008 0.84913396 0.85546221 0.73718131
## 190 191 192 193 194 195 196
## 0.75951039 0.62043265 0.74450785 0.60939910 0.26538054 0.45559081 0.32600030
## 197 198 199 200 201 202 203
## 0.42720825 0.26084358 0.38750825 0.29821230 0.49343253 0.56447039 0.41617366
## 204 205 206 207 208 209 210
## 0.35575095 0.56895381 0.34575106 0.35612479 0.37570197 0.45409182 0.38301500
## 211 212 213 214 215 216 217
## 0.81683819 0.81604264 0.83060641 0.84604459 0.88477862 0.79557946 0.74894259
## 218 219 220 221 222
## 0.79681306 0.74388715 0.72730948 0.78620781 0.76129777
##################################
# Creating a classification index
# based from the model predictions
##################################
<- predict(LR_OS_ADASYN_Model,
(LR_OS_ADASYN_Model_Indices type = c("link")))
## 1 2 3 4 5 6
## 1.034194007 -2.143155516 -0.060222356 -3.512295020 -3.074561474 -1.799806198
## 7 8 9 10 11 12
## -0.605922800 -1.855882401 0.098712797 1.426659526 0.627524174 -0.094362275
## 13 14 15 16 17 18
## 1.189640156 1.375398467 -0.038204425 -1.233137433 -0.035034220 0.126150588
## 19 20 21 22 23 24
## -0.672726578 0.543655270 -0.536800580 -0.217746823 -0.896596981 0.033643295
## 25 26 27 28 29 30
## 0.198964127 -0.828159573 -0.240641004 -1.850543025 -1.184420539 -3.217679974
## 31 32 33 34 35 36
## -1.613995214 -1.690047948 -3.283166961 -2.070317881 -2.814390118 -2.418701365
## 37 38 39 40 41 42
## -3.384503515 -3.822829815 -0.526381159 -3.445856218 -4.305662512 -0.449197159
## 43 44 45 46 47 48
## 0.624604084 0.239014356 -1.113209442 -1.097279217 -1.183572402 2.318619541
## 49 50 51 52 53 54
## -1.327498549 -1.567384980 -0.824301675 -1.662093081 0.441029528 0.493406603
## 55 56 57 58 59 60
## -0.159736808 0.218802829 1.115369581 1.732475427 1.987699485 0.074401266
## 61 62 63 64 65 66
## 0.008234922 -0.411480728 -0.868288495 -0.516359404 -0.313846676 -0.216509810
## 67 68 69 70 71 72
## 0.490484945 -1.226793467 -0.441645313 0.487089454 1.741922218 2.062917134
## 73 74 75 76 77 78
## 0.521881088 0.086045493 -3.033375348 -0.955519951 -1.224373916 -2.156795762
## 79 80 81 82 83 84
## -2.090973219 -2.994764558 -0.735451606 0.556053884 -1.381420321 -2.816731639
## 85 86 87 88 89 90
## -1.724609805 -1.228823674 -1.084005982 -3.584608772 -2.902044296 -1.264087373
## 91 92 93 94 95 96
## -1.200802614 -0.180520511 0.006339761 -0.102648596 -0.267737627 0.672793868
## 97 98 99 100 101 102
## -0.010252778 -1.007298471 0.088180865 0.718675356 -0.485458441 -0.599881837
## 103 104 105 106 107 108
## -0.532158893 -0.878360151 -1.355029065 -1.813180759 -0.840363591 -1.236286943
## 109 110 111 112 113 114
## -1.260292650 -1.017688967 -0.733136834 3.218780081 0.537073094 2.119773149
## 115 116 117 118 119 120
## 1.265906734 1.329138280 -0.647422117 1.822589419 0.113241617 2.496016154
## 121 122 123 124 125 126
## -0.595751943 2.931588504 1.136759420 2.166321675 0.834670434 2.714627456
## 127 128 129 130 131 132
## 0.948175492 -1.140107373 -0.456240118 3.043044607 1.449029645 1.739142503
## 133 134 135 136 137 138
## -0.595538372 1.485693315 1.599066382 0.919411187 0.822222252 1.109905829
## 139 140 141 142 143 144
## 1.588653339 1.381476015 1.228394001 1.730504144 1.466264352 1.039094181
## 145 146 147 148 149 150
## 0.115602324 -0.597529479 -1.026869141 -0.560440357 -0.637416654 -0.689008549
## 151 152 153 154 155 156
## -0.381958248 -0.355988614 1.829683742 1.675739359 1.665734531 1.592820196
## 157 158 159 160 161 162
## 2.553116895 0.185322642 0.689891703 -0.312840064 -0.590148900 0.515502402
## 163 164 165 166 167 168
## -0.365582021 0.444425835 0.273299828 -0.399987029 0.635964130 0.609824089
## 169 170 171 172 173 174
## -0.609253877 -1.061921255 -0.622091916 -0.602135123 -1.069063118 -0.530211707
## 175 176 177 178 179 180
## 0.486649104 2.696961062 2.276375065 2.767425616 1.295060256 0.954251593
## 181 182 183 184 185 186
## 1.196724048 1.956518579 1.912090011 1.255286332 1.130971418 1.065394456
## 187 188 189 190 191 192
## 1.727824656 1.778100926 1.031369484 1.149997104 0.491384999 1.069531722
## 193 194 195 196 197 198
## 0.444787084 -1.018187850 -0.178106078 -0.726331346 -0.293250606 -1.041588666
## 199 200 201 202 203 204
## -0.457798326 -0.855825310 -0.026271400 0.259325140 -0.338500938 -0.593854473
## 205 206 207 208 209 210
## 0.277584017 -0.637768884 -0.592223728 -0.507831661 -0.184151359 -0.476770541
## 211 212 213 214 215 216
## 1.495071084 1.489762635 1.589931142 1.703909032 2.038482188 1.358891395
## 217 218 219 220 221 222
## 1.092980686 1.366493646 1.066271179 0.981014553 1.302216663 1.159807719
max(LR_OS_ADASYN_Model_Indices)
## [1] 3.21878
min(LR_OS_ADASYN_Model_Indices)
## [1] -4.305663
##################################
# Consolidating the model probabilities
# and classification index
# based from the model predictions
##################################
<- as.data.frame(PMA_PreModelling_Train_LR_OS_ADASYN)
LR_OS_ADASYN_Model_Predictions $LR_OS_ADASYN_Prob <- LR_OS_ADASYN_Model_Probabilities
LR_OS_ADASYN_Model_Predictions$LR_OS_ADASYN_LP <- LR_OS_ADASYN_Model_Indices
LR_OS_ADASYN_Model_Predictions$Class <- as.factor(LR_OS_ADASYN_Model_Predictions$Class)
LR_OS_ADASYN_Model_Predictions$Label <- rep("LR_OS_ADASYN",nrow(LR_OS_ADASYN_Model_Predictions))
LR_OS_ADASYN_Model_Predictions
##################################
# Formulating the probability curve
# using the consolidated model predictions
##################################
%>%
LR_OS_ADASYN_Model_Predictions ggplot(aes(x = LR_OS_ADASYN_LP ,
y = LR_OS_ADASYN_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
ggtitle("Estimated Rock Detection Probabilities Based on Classification Index : Logistic Regression (OS_ADASYN)") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
##################################
# Visualizing the imbalanced data set
##################################
ggplot(PMA_PreModelling_Train, aes(V1, V11, color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
labs(title = "Without Oversampling - Borderline Synthetic Minority Oversampling Technique") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
##################################
# Implementing OS_BSMOTE
# Visualizing the oversampled data using OS_BSMOTE
##################################
recipe(Class ~ V1 + V11, data = PMA_PreModelling_Train) %>%
step_bsmote(Class, seed=123456789) %>%
prep() %>%
bake(new_data = NULL) %>%
ggplot(aes(V1, V11, color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
labs(title = "With Oversampling - Borderline Synthetic Minority Oversampling Technique") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
<- recipe(Class ~ V1 + V11, data = PMA_PreModelling_Train) %>%
OS_BSMOTE step_bsmote(Class, seed=123456789) %>%
prep()
<- OS_BSMOTE %>%
PMA_PreModelling_Train_LR_OS_BSMOTE bake(new_data = NULL)
<- as.data.frame(PMA_PreModelling_Train_LR_OS_BSMOTE)) (PMA_PreModelling_Train_LR_OS_BSMOTE
## V1 V11 Class
## 1 0.914240973 -1.55861587 M
## 2 2.175018299 0.34520609 M
## 3 -0.230553865 -0.16690717 M
## 4 1.231717734 1.85842121 M
## 5 0.424204681 1.89154083 M
## 6 1.143139503 0.55690423 M
## 7 -0.507041623 0.38985083 M
## 8 0.312328639 0.98808941 M
## 9 -0.935159101 0.03686239 M
## 10 0.408807916 -1.63079048 M
## 11 0.738227771 -1.15770286 M
## 12 1.169078303 -0.79246144 M
## 13 0.118911997 -1.30982705 M
## 14 -0.174409184 -1.31877657 M
## 15 1.012209983 -0.76338471 M
## 16 0.813818599 0.26605270 M
## 17 0.207991857 -0.39104999 M
## 18 -0.957451233 0.02573746 M
## 19 0.695599819 -0.11827986 M
## 20 -1.204708453 -0.18639848 M
## 21 -0.068999246 0.13149291 M
## 22 0.527094931 -0.39650480 M
## 23 0.104263091 0.33286517 M
## 24 0.465616266 -0.56497212 M
## 25 -0.507041623 -0.24127887 M
## 26 0.043933590 0.30731955 M
## 27 0.147716923 -0.20173692 M
## 28 0.612463710 0.84401925 M
## 29 -0.068999246 0.63930609 M
## 30 1.395977603 1.55085071 M
## 31 -0.180527407 1.02812486 M
## 32 0.565359423 0.74012531 M
## 33 2.230423635 1.21329169 M
## 34 0.782380194 0.93715691 M
## 35 2.005116912 0.95072168 M
## 36 1.784287809 0.74337479 M
## 37 1.523834205 1.62207084 M
## 38 1.928176420 1.77732114 M
## 39 -1.204708453 0.65264083 M
## 40 1.934080462 1.47897609 M
## 41 0.744203377 2.70773324 M
## 42 1.424344244 -0.63319889 M
## 43 -0.491314245 -0.58236264 M
## 44 0.689392711 -0.83030322 M
## 45 1.381555338 -0.09258957 M
## 46 1.002499594 0.07158504 M
## 47 0.977896818 0.15071601 M
## 48 0.278484686 -2.26945660 M
## 49 1.406688795 0.06372536 M
## 50 1.852613244 0.04399440 M
## 51 1.281675939 -0.27257795 M
## 52 1.390588252 0.33359231 M
## 53 -0.192852168 -0.57752146 M
## 54 -0.180527407 -0.62433567 M
## 55 -0.052243905 -0.17198030 M
## 56 -0.779287302 -0.12994977 M
## 57 -0.041194136 -1.17696945 M
## 58 -0.924156756 -1.24933498 M
## 59 -0.168320027 -1.80173334 M
## 60 -2.093160440 0.59563328 M
## 61 -0.230553865 -0.22058604 M
## 62 -1.038676203 0.48516249 M
## 63 -0.030238811 0.37335493 M
## 64 0.089444831 0.04161877 M
## 65 0.303951059 -0.21715050 M
## 66 -0.132380328 -0.09011428 M
## 67 -1.546463816 0.01457488 M
## 68 -0.499153812 0.87301303 M
## 69 -0.108968439 0.07550792 M
## 70 0.686277848 -1.02337267 M
## 71 -0.721885235 -1.35101476 M
## 72 -3.557061230 -0.28132844 M
## 73 -0.789109306 -0.36302243 M
## 74 -0.750264961 -0.03937819 M
## 75 -0.379134945 2.23365699 M
## 76 -0.371994173 0.60103638 M
## 77 0.401039618 0.45156422 M
## 78 -0.295964244 1.50754826 M
## 79 0.256869245 1.19827720 M
## 80 1.243886483 1.44694263 M
## 81 -0.230553865 0.36255507 M
## 82 -0.256318859 -0.63813480 M
## 83 0.632133128 0.46700244 M
## 84 0.324793230 1.73570326 M
## 85 0.723165726 0.69367752 M
## 86 -1.191144673 1.19711994 M
## 87 -1.177722925 1.07730973 M
## 88 0.142960831 2.42255907 M
## 89 0.443194144 1.74741590 M
## 90 -0.180527407 0.75375436 M
## 91 0.544629613 0.36615870 M
## 92 -1.274767709 0.41409622 M
## 93 -0.539087424 -0.07530230 M
## 94 -0.555415865 0.01776805 M
## 95 0.345301912 -0.27257795 M
## 96 -1.868589923 0.02175517 M
## 97 -0.217863016 -0.21200423 M
## 98 0.625610136 0.17668535 M
## 99 -0.799007156 -0.01833539 M
## 100 -2.013850697 0.05347991 M
## 101 0.537644499 -0.19150300 M
## 102 -0.013979414 0.15531332 M
## 103 -0.935159101 0.53154288 M
## 104 -0.779287302 0.73036007 M
## 105 0.424204681 0.54321732 M
## 106 0.157176488 1.02691760 M
## 107 -0.323092564 0.48794823 M
## 108 0.377448180 0.47190071 M
## 109 0.992715078 0.20396774 M
## 110 0.295518362 0.33867807 M
## 111 0.099342681 0.20698569 M
## 112 -2.902302684 -1.49282840 R
## 113 -0.588710342 -0.46833442 R
## 114 -0.597170869 -1.70542147 R
## 115 -0.289270938 -1.17938825 R
## 116 0.544629613 -1.61762409 R
## 117 0.064360561 0.15607894 R
## 118 -0.168320027 -1.67226688 R
## 119 -0.316256467 -0.26298086 R
## 120 -1.638716948 -1.51501038 R
## 121 0.274190661 0.01776805 R
## 122 -0.860055136 -2.21946189 R
## 123 -0.789109306 -0.84516218 R
## 124 -1.427539411 -1.35491277 R
## 125 0.779483548 -1.33935885 R
## 126 -0.400799099 -2.26338258 R
## 127 -0.302692686 -0.92399268 R
## 128 0.827857790 0.18656255 R
## 129 -0.829167731 0.42261407 R
## 130 -2.434915803 -1.57286451 R
## 131 -1.138280881 -0.92728240 R
## 132 -0.860055136 -1.28443852 R
## 133 -1.125398710 0.66990478 R
## 134 -0.186674974 -1.39954451 R
## 135 -0.008605128 -1.57143555 R
## 136 -1.348878827 -0.41384379 R
## 137 -2.110893443 -0.94312757 R
## 138 -2.045433423 -1.50787056 R
## 139 -2.727906546 -1.49588989 R
## 140 -1.331613148 -0.98926479 R
## 141 -2.884342616 -1.48707040 R
## 142 -1.692543814 -0.65254827 R
## 143 -2.543559671 -1.55426015 R
## 144 -0.200710576 -1.41000300 R
## 145 -0.579994800 -1.75422471 R
## 146 -0.726952717 -1.49758877 R
## 147 -0.427587806 -2.18726646 R
## 148 -0.234409462 -1.43511339 R
## 149 -0.638609937 -1.78645087 R
## 150 -0.424385927 -2.19636412 R
## 151 -0.539891272 -1.86817291 R
## 152 -0.209046173 -1.77581970 R
## 153 -0.198503193 -1.74901247 R
## 154 -0.085576490 -1.62002917 R
## 155 -0.158312844 -1.66594914 R
## 156 -0.255554034 -1.31678566 R
## 157 -0.101553943 -1.63011607 R
## 158 -0.175951690 -1.55887373 R
## 159 -1.536305519 -1.43737038 R
## 160 -1.482954163 -1.39692372 R
## 161 -1.569819776 -1.57734131 R
## 162 -2.179855466 -1.55433108 R
## 163 -1.554224066 -1.59145067 R
## 164 -1.806165345 -1.52717767 R
## 165 -1.620621314 -1.53138141 R
## 166 -0.960871406 -2.12825390 R
## 167 -0.769103007 -2.22816004 R
## 168 -0.703740137 -2.09580937 R
## 169 -1.116503328 -1.82876902 R
## 170 -0.405250123 -1.85968973 R
## 171 -0.708435397 -2.09952354 R
## 172 -0.659278473 -1.82686585 R
## 173 -0.626623939 -2.03480683 R
## 174 -0.336673771 -1.14769148 R
## 175 -0.711430840 -0.69909654 R
## 176 -0.754841345 -0.85071577 R
## 177 -0.842929035 -0.85781982 R
## 178 -0.495358659 -0.89276851 R
## 179 -0.831157170 -1.10551056 R
## 180 -0.652182669 -0.58768701 R
## 181 -1.157096603 -1.32132725 R
## 182 -1.122671198 -1.31705206 R
## 183 -1.243061435 -1.08218655 R
## 184 -0.939653544 -1.29432362 R
## 185 -1.367768189 -1.34748995 R
## 186 -1.542847586 -1.44233004 R
## 187 -1.000539548 -1.01397735 R
## 188 -0.773256346 -2.22776284 R
## 189 -0.226926303 -1.82128283 R
## 190 -0.350132468 -2.05897887 R
## 191 -0.691041322 -2.23562543 R
## 192 -0.250215562 -1.88049954 R
## 193 -0.065672120 -1.67211873 R
## 194 -0.090517759 -1.71595384 R
## 195 -1.736178623 -1.42168866 R
## 196 -2.664395207 -1.53356807 R
## 197 -1.663602473 -1.51681863 R
## 198 -2.158723504 -1.55279557 R
## 199 -2.306693552 -1.43602548 R
## 200 -1.551549377 -0.63013419 R
## 201 -2.133114929 -1.25078205 R
## 202 -1.307953653 -1.17812109 R
## 203 -0.959054286 -1.15735412 R
## 204 -0.801121364 -0.84798724 R
## 205 -1.361056552 -1.25662668 R
## 206 -0.979418863 -0.88992030 R
## 207 -1.031861506 -0.83841126 R
## 208 -1.114796484 -0.95742913 R
## 209 -0.732133486 -1.26089511 R
## 210 -0.999679111 -1.30177802 R
## 211 -0.858361528 -1.27395218 R
## 212 -1.042843755 -1.04979426 R
## 213 -0.827023587 -1.07991659 R
## 214 -0.811668799 -0.98484412 R
## 215 -1.087905903 -0.99194835 R
## 216 -0.185382331 -1.41875093 R
## 217 -0.417948424 -1.57187563 R
## 218 -0.079660886 -1.50284534 R
## 219 -0.022012673 -1.55849323 R
## 220 -0.284049367 -1.19059300 R
## 221 -0.174278835 -1.58372942 R
## 222 -0.277331844 -1.02794573 R
$Label <- rep("LR_OS_BSMOTE",nrow(PMA_PreModelling_Train_LR_OS_BSMOTE))
PMA_PreModelling_Train_LR_OS_BSMOTE
##################################
# Verifying the class distribution
# for the oversampled data using OS_BSMOTE
##################################
table(PMA_PreModelling_Train_LR_OS_BSMOTE$Class)
##
## M R
## 111 111
##################################
# Formulating the structure of the
# Logistic Regression model
##################################
<- glm(Class ~ V1 + V11,
LR_OS_BSMOTE_Model data = PMA_PreModelling_Train_LR_OS_BSMOTE,
family = binomial)
##################################
# Consolidating the model results
##################################
summary(LR_OS_BSMOTE_Model)
##
## Call:
## glm(formula = Class ~ V1 + V11, family = binomial, data = PMA_PreModelling_Train_LR_OS_BSMOTE)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.3748 0.4136 -5.742 9.36e-09 ***
## V1 -1.3384 0.3235 -4.138 3.50e-05 ***
## V11 -2.5707 0.3536 -7.271 3.58e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 307.76 on 221 degrees of freedom
## Residual deviance: 126.59 on 219 degrees of freedom
## AIC: 132.59
##
## Number of Fisher Scoring iterations: 6
<- (as.data.frame(LR_OS_BSMOTE_Model$coefficients))
LR_OS_BSMOTE_Model_Coef $Coef <- rownames(LR_OS_BSMOTE_Model_Coef)
LR_OS_BSMOTE_Model_Coef$Model <- rep("LR_OS_BSMOTE",nrow(LR_OS_BSMOTE_Model_Coef))
LR_OS_BSMOTE_Model_Coefcolnames(LR_OS_BSMOTE_Model_Coef) <- c("Estimates","Coefficients","Model")
print(LR_OS_BSMOTE_Model_Coef, rownames=FALSE)
## Estimates Coefficients Model
## (Intercept) -2.374787 (Intercept) LR_OS_BSMOTE
## V1 -1.338444 V1 LR_OS_BSMOTE
## V11 -2.570724 V11 LR_OS_BSMOTE
##################################
# Computing the model predictions
##################################
<- predict(LR_OS_BSMOTE_Model,
(LR_OS_BSMOTE_Model_Probabilities type = c("response")))
## 1 2 3 4 5 6
## 6.006859e-01 2.079892e-03 1.628557e-01 1.505806e-04 4.074472e-04 4.789811e-03
## 7 8 9 10 11 12
## 6.307140e-02 4.806695e-03 2.283076e-01 7.808042e-01 4.045050e-01 1.298446e-01
## 13 14 15 16 17 18
## 6.970438e-01 7.771026e-01 1.459006e-01 1.555055e-02 1.613942e-01 2.387661e-01
## 19 20 21 22 23 24
## 4.734715e-02 4.296759e-01 6.783271e-02 1.129486e-01 3.324495e-02 1.757225e-01
## 25 26 27 28 29 30
## 2.542867e-01 3.828633e-02 1.136601e-01 4.659037e-03 1.934256e-02 2.664560e-04
## 31 32 33 34 35 36
## 8.357633e-03 6.469618e-03 2.077176e-04 2.926166e-03 5.513619e-04 1.261781e-03
## 37 38 39 40 41 42
## 1.869932e-04 7.303152e-05 8.016464e-02 1.560020e-04 3.258266e-05 6.577898e-02
## 43 44 45 46 47 48
## 4.451968e-01 2.381189e-01 1.823696e-02 1.982859e-02 1.677255e-02 9.563376e-01
## 49 50 51 52 53 54
## 1.187490e-02 6.912386e-03 3.262472e-02 6.098557e-03 3.470533e-01 3.709446e-01
## 55 56 57 58 59 60
## 1.343831e-01 2.693959e-01 6.695195e-01 8.883345e-01 9.228887e-01 2.489130e-01
## 61 62 63 64 65 66
## 1.825541e-01 9.693127e-02 3.577391e-02 6.904229e-02 9.767097e-02 1.228251e-01
## 67 68 69 70 71 72
## 4.152266e-01 1.887292e-02 8.143187e-02 3.401731e-01 8.874048e-01 9.572771e-01
## 73 74 75 76 77 78
## 4.048304e-01 2.193652e-01 4.954973e-04 3.161516e-02 1.675101e-02 2.859779e-03
## 79 80 81 82 83 84
## 3.021328e-03 4.265516e-04 4.750576e-02 4.034164e-01 1.187473e-02 6.945722e-04
## 85 86 87 88 89 90
## 5.905442e-03 2.067431e-02 2.743980e-02 1.516378e-04 5.752660e-04 1.677652e-02
## 91 92 93 94 95 96
## 1.720777e-02 1.501903e-01 1.885182e-01 1.574837e-01 1.056248e-01 5.175650e-01
## 97 98 99 100 101 102
## 1.767992e-01 2.493234e-02 2.212795e-01 5.456617e-01 6.900384e-02 5.978552e-02
## 103 104 105 106 107 108
## 7.659193e-02 3.881689e-02 1.288117e-02 5.351133e-03 3.928910e-02 1.641340e-02
## 109 110 111 112 113 114
## 1.437443e-02 2.555662e-02 4.565748e-02 9.952622e-01 4.054342e-01 9.431421e-01
## 115 116 117 118 119 120
## 7.396726e-01 7.416806e-01 5.405582e-02 8.956161e-01 2.183267e-01 9.761822e-01
## 121 122 123 124 125 126
## 5.800597e-02 9.888154e-01 7.014203e-01 9.534252e-01 5.062597e-01 9.816606e-01
## 127 128 129 130 131 132
## 6.000517e-01 1.866196e-02 8.695320e-02 9.928083e-01 8.223755e-01 8.887747e-01
## 133 134 135 136 137 138
## 6.974294e-02 8.134993e-01 8.424349e-01 6.211635e-01 9.465990e-01 9.857863e-01
## 139 140 141 142 143 144
## 9.940706e-01 8.755153e-01 9.950755e-01 8.275139e-01 9.934726e-01 8.203295e-01
## 145 146 147 148 149 150
## 9.483940e-01 9.204214e-01 9.785506e-01 8.359281e-01 9.557410e-01 9.789479e-01
## 151 152 153 154 155 156
## 9.589239e-01 9.220227e-01 9.158404e-01 8.703888e-01 8.928129e-01 7.945136e-01
## 157 158 159 160 161 162
## 8.756336e-01 8.662439e-01 9.669637e-01 9.608783e-01 9.777136e-01 9.894245e-01
## 163 164 165 166 167 168
## 9.780466e-01 9.814510e-01 9.765941e-01 9.876594e-01 9.876613e-01 9.811992e-01
## 169 170 171 172 173 174
## 9.785606e-01 9.501862e-01 9.814891e-01 9.609841e-01 9.757512e-01 7.361844e-01
## 175 176 177 178 179 180
## 5.925718e-01 6.947628e-01 7.228503e-01 6.418228e-01 8.291525e-01 5.022260e-01
## 181 182 183 184 185 186
## 9.289512e-01 9.250913e-01 8.880508e-01 9.011649e-01 9.488224e-01 9.676439e-01
## 187 188 189 190 191 192
## 8.279311e-01 9.877165e-01 9.315634e-01 9.672930e-01 9.865775e-01 9.423702e-01
## 193 194 195 196 197 198
## 8.820166e-01 8.963778e-01 9.735002e-01 9.941401e-01 9.770490e-01 9.890819e-01
## 199 200 201 202 203 204
## 9.879220e-01 7.894770e-01 9.757711e-01 9.171668e-01 8.680804e-01 7.062853e-01
## 205 206 207 208 209 210
## 9.356769e-01 7.727403e-01 7.616280e-01 8.290050e-01 8.637211e-01 9.096805e-01
## 211 212 213 214 215 216
## 8.858527e-01 8.480944e-01 8.188099e-01 7.761464e-01 8.363531e-01 8.206201e-01
## 217 218 219 220 221 222
## 9.025137e-01 8.313514e-01 8.403899e-01 7.438513e-01 8.732280e-01 6.545208e-01
##################################
# Creating a classification index
# based from the model predictions
##################################
<- predict(LR_OS_BSMOTE_Model,
(LR_OS_BSMOTE_Model_Indices type = c("link")))
## 1 2 3 4 5
## 0.408324003 -6.173357428 -1.637131633 -8.800861401 -7.805191778
## 6 7 8 9 10
## -5.336462967 -2.698339590 -5.332927345 -1.217892324 1.270358909
## 11 12 13 14 15
## -0.386729015 -1.902333371 0.833259849 1.248860586 -1.767122159
## 16 17 18 19 20
## -4.147986246 -1.647891134 -1.159456434 -3.001743931 -0.283173443
## 21 22 23 24 25
## -2.620467808 -2.060969930 -3.370042340 -1.545601145 -1.075879089
## 26 27 28 29 30
## -3.223623920 -2.053888243 -5.364276686 -3.925915527 -8.230035045
## 31 32 33 34 35
## -4.776187215 -5.034147554 -8.479123183 -5.831131646 -7.502567613
## 36 37 38 39 40
## -6.673968501 -8.584251291 -9.524546462 -2.440112242 -8.765485802
## 41 42 43 44 45
## -10.331697835 -2.653412674 -0.220096954 -1.163020255 -3.985899663
## 46 47 48 49 50
## -3.900602432 -4.071096916 3.086623816 -4.421382008 -4.967504071
## 51 52 53 54 55
## -3.389516181 -5.093585877 -0.632017084 -0.528166665 -1.862747933
## 56 57 58 59 60
## -0.997689924 0.706012696 2.073840593 2.482259349 -1.104418259
## 61 62 63 64 65
## -1.499138052 -2.231796468 -3.294107073 -2.601494709 -2.223374824
## 66 67 68 69 70
## -1.965944774 -0.342400130 -3.950973850 -2.423049316 -0.662522844
## 71 72 73 74 75
## 2.064502155 3.109357894 -0.385378145 -1.269369274 -7.609452965
## 76 77 78 79 80
## -3.421992894 -4.072403649 -5.854146889 -5.799032924 -7.759350558
## 81 82 83 84 85
## -2.998233112 -0.391250280 -4.421396791 -7.271519668 -5.125957993
## 86 87 88 89 90
## -3.857972335 -3.567937550 -8.793864147 -7.460102633 -4.070856278
## 91 92 93 94 95
## -4.045036769 -1.733109397 -1.459667592 -1.677071125 -2.136231934
## 96 97 98 99 100
## 0.070289110 -1.538185522 -3.666340925 -1.258225807 0.183156955
## 101 102 103 104 105
## -2.602093063 -2.755344499 -2.489579517 -3.209309392 -4.339023651
## 106 107 108 109 110
## -5.225081444 -3.196726510 -4.093107340 -4.227825845 -3.640970185
## 111 112 113 114 115
## -3.039855195 5.347432700 -0.382872854 2.808660917 1.044267677
## 116 117 118 119 120
## 1.054722007 -2.862166349 2.149436749 -1.275444526 3.713217647
## 121 122 123 124 125
## -2.787453045 4.481973033 0.854070247 3.019001507 0.025039910
## 126 127 128 129 130
## 3.980192518 0.405680323 -3.962429636 -2.351417088 4.927612286
## 131 132 133 134 135
## 1.532525319 2.078285669 -2.590644755 1.472909778 1.676457742
## 136 137 138 139 140
## 0.494489753 2.875046379 4.239230380 5.121883467 1.950629387
## 141 142 143 144 145
## 5.308591914 1.568109532 5.025199351 1.518581542 2.911131389
## 146 147 148 149 150
## 2.448086088 3.820374146 1.628237508 3.072429056 3.839476193
## 151 152 153 154 155
## 3.150384469 2.470152182 2.387126982 1.904400400 2.119801522
## 156 157 158 159 160
## 1.352350344 1.951715959 1.868148771 3.376554627 3.201169629
## 161 162 163 164 165
## 3.781238284 4.538583963 3.796635579 3.968616703 3.731083062
## 166 167 168 169 170
## 4.382439385 4.382599258 3.954877615 3.820850921 2.948366914
## 171 172 173 174 175
## 3.970710070 3.203988514 3.694841183 1.026230052 0.374607475
## 176 177 178 179 180
## 0.822481260 0.958644246 0.583284207 1.579632898 0.008903908
## 181 182 183 184 185
## 2.570689819 2.513622999 2.070984108 2.210235552 2.919919051
## 186 187 188 189 190
## 3.398060745 1.571035089 4.387137173 2.610956908 3.386912469
## 191 192 193 194 195
## 4.297309508 2.794358127 2.011667419 2.157609899 3.603760225
## 196 197 198 199 200
## 5.133737324 3.751174055 4.506352641 4.404218553 1.321775934
## 201 202 203 204 205
## 3.695683464 2.404459967 1.884091524 0.877410183 2.677351471
## 206 207 208 209 210
## 1.223849729 1.161625750 1.578591698 1.846546094 2.309739633
## 211 212 213 214 215
## 2.049061392 1.719732291 1.508305278 1.243348656 1.631339559
## 216 217 218 219 220
## 1.520554047 2.225472173 1.595235372 1.661131855 1.066083204
## 221 222
## 1.929806883 0.638970888
max(LR_OS_BSMOTE_Model_Indices)
## [1] 5.347433
min(LR_OS_BSMOTE_Model_Indices)
## [1] -10.3317
##################################
# Consolidating the model probabilities
# and classification index
# based from the model predictions
##################################
<- as.data.frame(PMA_PreModelling_Train_LR_OS_BSMOTE)
LR_OS_BSMOTE_Model_Predictions $LR_OS_BSMOTE_Prob <- LR_OS_BSMOTE_Model_Probabilities
LR_OS_BSMOTE_Model_Predictions$LR_OS_BSMOTE_LP <- LR_OS_BSMOTE_Model_Indices
LR_OS_BSMOTE_Model_Predictions$Class <- as.factor(LR_OS_BSMOTE_Model_Predictions$Class)
LR_OS_BSMOTE_Model_Predictions$Label <- rep("LR_OS_BSMOTE",nrow(LR_OS_BSMOTE_Model_Predictions))
LR_OS_BSMOTE_Model_Predictions
##################################
# Formulating the probability curve
# using the consolidated model predictions
##################################
%>%
LR_OS_BSMOTE_Model_Predictions ggplot(aes(x = LR_OS_BSMOTE_LP ,
y = LR_OS_BSMOTE_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
ggtitle("Estimated Rock Detection Probabilities Based on Classification Index : Logistic Regression (OS_BSMOTE)") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
##################################
# Visualizing the imbalanced data set
##################################
ggplot(PMA_PreModelling_Train, aes(V1, V11, color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
labs(title = "Without Oversampling - Synthetic Minority Oversampling Technique") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
##################################
# Implementing OS_SMOTE
# Visualizing the oversampled data using OS_SMOTE
##################################
recipe(Class ~ V1 + V11, data = PMA_PreModelling_Train) %>%
step_smote(Class, seed=123456789) %>%
prep() %>%
bake(new_data = NULL) %>%
ggplot(aes(V1, V11, color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
labs(title = "With Oversampling - Synthetic Minority Oversampling Technique") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
<- recipe(Class ~ V1 + V11, data = PMA_PreModelling_Train) %>%
OS_SMOTE step_smote(Class, seed=123456789) %>%
prep()
<- OS_SMOTE %>%
PMA_PreModelling_Train_LR_OS_SMOTE bake(new_data = NULL)
<- as.data.frame(PMA_PreModelling_Train_LR_OS_SMOTE)) (PMA_PreModelling_Train_LR_OS_SMOTE
## V1 V11 Class
## 1 0.914240973 -1.558615871 M
## 2 2.175018299 0.345206089 M
## 3 -0.230553865 -0.166907171 M
## 4 1.231717734 1.858421213 M
## 5 0.424204681 1.891540835 M
## 6 1.143139503 0.556904232 M
## 7 -0.507041623 0.389850825 M
## 8 0.312328639 0.988089406 M
## 9 -0.935159101 0.036862390 M
## 10 0.408807916 -1.630790485 M
## 11 0.738227771 -1.157702863 M
## 12 1.169078303 -0.792461442 M
## 13 0.118911997 -1.309827049 M
## 14 -0.174409184 -1.318776567 M
## 15 1.012209983 -0.763384712 M
## 16 0.813818599 0.266052702 M
## 17 0.207991857 -0.391049992 M
## 18 -0.957451233 0.025737464 M
## 19 0.695599819 -0.118279862 M
## 20 -1.204708453 -0.186398477 M
## 21 -0.068999246 0.131492907 M
## 22 0.527094931 -0.396504803 M
## 23 0.104263091 0.332865171 M
## 24 0.465616266 -0.564972117 M
## 25 -0.507041623 -0.241278874 M
## 26 0.043933590 0.307319548 M
## 27 0.147716923 -0.201736917 M
## 28 0.612463710 0.844019251 M
## 29 -0.068999246 0.639306087 M
## 30 1.395977603 1.550850706 M
## 31 -0.180527407 1.028124858 M
## 32 0.565359423 0.740125313 M
## 33 2.230423635 1.213291690 M
## 34 0.782380194 0.937156911 M
## 35 2.005116912 0.950721679 M
## 36 1.784287809 0.743374785 M
## 37 1.523834205 1.622070841 M
## 38 1.928176420 1.777321144 M
## 39 -1.204708453 0.652640830 M
## 40 1.934080462 1.478976095 M
## 41 0.744203377 2.707733238 M
## 42 1.424344244 -0.633198890 M
## 43 -0.491314245 -0.582362640 M
## 44 0.689392711 -0.830303223 M
## 45 1.381555338 -0.092589575 M
## 46 1.002499594 0.071585037 M
## 47 0.977896818 0.150716010 M
## 48 0.278484686 -2.269456602 M
## 49 1.406688795 0.063725362 M
## 50 1.852613244 0.043994401 M
## 51 1.281675939 -0.272577947 M
## 52 1.390588252 0.333592312 M
## 53 -0.192852168 -0.577521460 M
## 54 -0.180527407 -0.624335667 M
## 55 -0.052243905 -0.171980296 M
## 56 -0.779287302 -0.129949769 M
## 57 -0.041194136 -1.176969454 M
## 58 -0.924156756 -1.249334984 M
## 59 -0.168320027 -1.801733345 M
## 60 -2.093160440 0.595633277 M
## 61 -0.230553865 -0.220586042 M
## 62 -1.038676203 0.485162490 M
## 63 -0.030238811 0.373354928 M
## 64 0.089444831 0.041618772 M
## 65 0.303951059 -0.217150498 M
## 66 -0.132380328 -0.090114281 M
## 67 -1.546463816 0.014574884 M
## 68 -0.499153812 0.873013033 M
## 69 -0.108968439 0.075507923 M
## 70 0.686277848 -1.023372673 M
## 71 -0.721885235 -1.351014760 M
## 72 -3.557061230 -0.281328435 M
## 73 -0.789109306 -0.363022432 M
## 74 -0.750264961 -0.039378188 M
## 75 -0.379134945 2.233656987 M
## 76 -0.371994173 0.601036378 M
## 77 0.401039618 0.451564218 M
## 78 -0.295964244 1.507548259 M
## 79 0.256869245 1.198277196 M
## 80 1.243886483 1.446942626 M
## 81 -0.230553865 0.362555066 M
## 82 -0.256318859 -0.638134800 M
## 83 0.632133128 0.467002438 M
## 84 0.324793230 1.735703263 M
## 85 0.723165726 0.693677522 M
## 86 -1.191144673 1.197119945 M
## 87 -1.177722925 1.077309728 M
## 88 0.142960831 2.422559073 M
## 89 0.443194144 1.747415902 M
## 90 -0.180527407 0.753754356 M
## 91 0.544629613 0.366158697 M
## 92 -1.274767709 0.414096217 M
## 93 -0.539087424 -0.075302304 M
## 94 -0.555415865 0.017768054 M
## 95 0.345301912 -0.272577947 M
## 96 -1.868589923 0.021755168 M
## 97 -0.217863016 -0.212004228 M
## 98 0.625610136 0.176685353 M
## 99 -0.799007156 -0.018335390 M
## 100 -2.013850697 0.053479912 M
## 101 0.537644499 -0.191503002 M
## 102 -0.013979414 0.155313323 M
## 103 -0.935159101 0.531542880 M
## 104 -0.779287302 0.730360068 M
## 105 0.424204681 0.543217319 M
## 106 0.157176488 1.026917595 M
## 107 -0.323092564 0.487948233 M
## 108 0.377448180 0.471900708 M
## 109 0.992715078 0.203967739 M
## 110 0.295518362 0.338678072 M
## 111 0.099342681 0.206985690 M
## 112 -2.902302684 -1.492828399 R
## 113 -0.588710342 -0.468334419 R
## 114 -0.597170869 -1.705421467 R
## 115 -0.289270938 -1.179388252 R
## 116 0.544629613 -1.617624095 R
## 117 0.064360561 0.156078935 R
## 118 -0.168320027 -1.672266879 R
## 119 -0.316256467 -0.262980859 R
## 120 -1.638716948 -1.515010380 R
## 121 0.274190661 0.017768054 R
## 122 -0.860055136 -2.219461886 R
## 123 -0.789109306 -0.845162176 R
## 124 -1.427539411 -1.354912772 R
## 125 0.779483548 -1.339358851 R
## 126 -0.400799099 -2.263382579 R
## 127 -0.302692686 -0.923992684 R
## 128 0.827857790 0.186562547 R
## 129 -0.829167731 0.422614069 R
## 130 -2.434915803 -1.572864509 R
## 131 -1.138280881 -0.927282397 R
## 132 -0.860055136 -1.284438518 R
## 133 -1.125398710 0.669904780 R
## 134 -0.186674974 -1.399544510 R
## 135 -0.008605128 -1.571435552 R
## 136 -1.348878827 -0.413843793 R
## 137 -1.599032158 -1.370950282 R
## 138 -2.657432813 -1.322745508 R
## 139 -1.790490183 -1.512346033 R
## 140 -0.810443562 -0.452440072 R
## 141 -0.538605201 -0.548157539 R
## 142 -0.660062049 -0.527920440 R
## 143 -0.982531013 -0.440104448 R
## 144 -0.584866894 -1.740381378 R
## 145 -0.455540713 -2.107842443 R
## 146 -0.515081007 -1.938667573 R
## 147 -0.418862256 -1.691636374 R
## 148 -0.239516596 -1.286153950 R
## 149 -0.200058557 -1.542931405 R
## 150 -0.280193092 -1.198868010 R
## 151 0.168622255 -1.586231937 R
## 152 0.570113701 -1.587429430 R
## 153 0.594021690 -1.559102196 R
## 154 0.043595498 0.162273052 R
## 155 0.729004935 0.182615719 R
## 156 -0.112595852 0.208864171 R
## 157 -0.265229694 -1.679758994 R
## 158 -0.273378159 -1.939394197 R
## 159 -0.321171380 -2.060916205 R
## 160 -0.133312506 -1.650165905 R
## 161 0.134694842 -0.048560153 R
## 162 -0.475824201 -0.459442307 R
## 163 0.218194825 -0.008857141 R
## 164 -1.947338832 -1.537435745 R
## 165 -1.603461320 -1.473605052 R
## 166 -1.761439442 -1.523927754 R
## 167 -0.074193522 -0.550968323 R
## 168 0.602335677 0.117808437 R
## 169 -0.315403865 -0.314371272 R
## 170 -0.641190054 -2.240392925 R
## 171 -0.860055136 -1.717141529 R
## 172 -0.418893061 -1.870481935 R
## 173 -0.634044436 -1.777523552 R
## 174 -0.796581793 -0.891429685 R
## 175 -0.827847489 -1.085017958 R
## 176 -0.655076888 -0.593129261 R
## 177 -0.846646467 -1.201415865 R
## 178 -1.003113777 -1.302204563 R
## 179 -1.477508813 -1.392795503 R
## 180 -1.024062139 -1.032758823 R
## 181 -1.564325317 -1.458612705 R
## 182 -0.046092087 -1.390787072 R
## 183 -0.066053607 -1.212799303 R
## 184 0.616583983 -1.532369401 R
## 185 -0.286655632 -1.973154324 R
## 186 -0.020863313 -1.593062643 R
## 187 -0.264751485 -2.023354050 R
## 188 -0.360121149 -0.914685611 R
## 189 -0.291775634 -1.131727665 R
## 190 -0.270452304 -1.056144682 R
## 191 -0.234639216 -1.202941057 R
## 192 -0.084662664 -0.235306944 R
## 193 -0.539978141 -0.445804920 R
## 194 -0.263127896 -0.317813628 R
## 195 -0.719767227 0.017260698 R
## 196 -0.729847094 0.289854799 R
## 197 -0.854171908 0.443487311 R
## 198 -2.209147026 -1.524018122 R
## 199 -2.549911790 -1.553172404 R
## 200 -2.408990100 -1.545196549 R
## 201 -0.955492262 -1.161926657 R
## 202 -0.882407262 -0.713601583 R
## 203 -1.281912481 -0.577108003 R
## 204 -0.852329589 -0.860030693 R
## 205 -0.879649125 -1.259285873 R
## 206 -0.820084244 -1.036950160 R
## 207 -0.817419038 -1.020447965 R
## 208 -1.332052173 -0.332244311 R
## 209 -0.357437358 -0.215502108 R
## 210 -0.321886669 0.322888990 R
## 211 -1.300027287 -0.176942239 R
## 212 -0.158828325 -1.426424914 R
## 213 -0.255897661 -1.251002529 R
## 214 -0.177300888 -1.538827013 R
## 215 -0.255677280 -1.116706900 R
## 216 -0.162403256 -1.606447414 R
## 217 -0.161098689 -1.424233330 R
## 218 -0.099380030 -1.483810427 R
## 219 -1.358596322 -0.530100664 R
## 220 -0.896995634 0.313447296 R
## 221 -1.278607918 -0.418880974 R
## 222 -1.404025395 -1.073598925 R
$Label <- rep("LR_OS_SMOTE",nrow(PMA_PreModelling_Train_LR_OS_SMOTE))
PMA_PreModelling_Train_LR_OS_SMOTE
##################################
# Verifying the class distribution
# for the oversampled data using OS_SMOTE
##################################
table(PMA_PreModelling_Train_LR_OS_SMOTE$Class)
##
## M R
## 111 111
##################################
# Formulating the structure of the
# Logistic Regression model
##################################
<- glm(Class ~ V1 + V11,
LR_OS_SMOTE_Model data = PMA_PreModelling_Train_LR_OS_SMOTE,
family = binomial)
##################################
# Consolidating the model results
##################################
summary(LR_OS_SMOTE_Model)
##
## Call:
## glm(formula = Class ~ V1 + V11, family = binomial, data = PMA_PreModelling_Train_LR_OS_SMOTE)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.0415 0.2324 -4.481 7.43e-06 ***
## V1 -0.9329 0.2481 -3.760 0.00017 ***
## V11 -1.7443 0.2506 -6.961 3.38e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 307.76 on 221 degrees of freedom
## Residual deviance: 189.34 on 219 degrees of freedom
## AIC: 195.34
##
## Number of Fisher Scoring iterations: 5
<- (as.data.frame(LR_OS_SMOTE_Model$coefficients))
LR_OS_SMOTE_Model_Coef $Coef <- rownames(LR_OS_SMOTE_Model_Coef)
LR_OS_SMOTE_Model_Coef$Model <- rep("LR_OS_SMOTE",nrow(LR_OS_SMOTE_Model_Coef))
LR_OS_SMOTE_Model_Coefcolnames(LR_OS_SMOTE_Model_Coef) <- c("Estimates","Coefficients","Model")
print(LR_OS_SMOTE_Model_Coef, rownames=FALSE)
## Estimates Coefficients Model
## (Intercept) -1.0415029 (Intercept) LR_OS_SMOTE
## V1 -0.9328753 V1 LR_OS_SMOTE
## V11 -1.7442581 V11 LR_OS_SMOTE
##################################
# Computing the model predictions
##################################
<- predict(LR_OS_SMOTE_Model,
(LR_OS_SMOTE_Model_Probabilities type = c("response")))
## 1 2 3 4 5 6
## 0.695138300 0.024779368 0.369282594 0.004354817 0.008692463 0.043969471
## 7 8 9 10 11 12
## 0.222960465 0.044944135 0.441910102 0.805596919 0.571789299 0.320853998
## 13 14 15 16 17 18
## 0.756252682 0.805571240 0.342037247 0.094083981 0.365066375 0.451846444
## 19 20 21 22 23 24
## 0.184808902 0.600480342 0.230322198 0.301195793 0.151952308 0.379797538
## 25 26 27 28 29 30
## 0.463155988 0.165407411 0.304190208 0.043728465 0.109852360 0.006375734
## 31 32 33 34 35 36
## 0.064984856 0.054172728 0.005280164 0.032108175 0.010248414 0.017939314
## 37 38 39 40 41 42
## 0.005004700 0.002624183 0.258068057 0.004383673 0.001564322 0.219981147
## 43 44 45 46 47 48
## 0.606499788 0.441184242 0.102585458 0.108944043 0.098265568 0.934454823
## 49 50 51 52 53 54
## 0.078354180 0.054862637 0.146577492 0.051143629 0.536373281 0.553768583
## 55 56 57 58 59 60
## 0.333413190 0.478049348 0.740748894 0.880774271 0.905356078 0.468097562
## 61 62 63 64 65 66
## 0.391344086 0.285203257 0.159156007 0.231913539 0.279632581 0.318466956
## 67 68 69 70 71 72
## 0.592843463 0.109231538 0.255107619 0.525804921 0.879578471 0.940894174
## 73 74 75 76 77 78
## 0.581234332 0.432191970 0.010111774 0.148952395 0.099457455 0.032453398
## 79 80 81 82 83 84
## 0.033206650 0.008786310 0.188648708 0.577051822 0.079747910 0.012468425
## 85 86 87 88 89 90
## 0.050879338 0.117284577 0.139201194 0.004494323 0.010955560 0.100848196
## 91 92 93 94 95 96
## 0.100810641 0.360177402 0.399570525 0.364853162 0.291485499 0.660093722
## 97 98 99 100 101 102
## 0.384977539 0.126385323 0.434344509 0.677845021 0.229873159 0.214271334
## 103 104 105 106 107 108
## 0.250440555 0.169600465 0.084343448 0.048368075 0.169214359 0.098258011
## 109 110 111 112 113 114
## 0.089207115 0.129213957 0.183140465 0.986209131 0.580441619 0.923457474
## 115 116 117 118 119 120
## 0.783385955 0.781081414 0.202008970 0.884155454 0.428550245 0.958104757
## 121 122 123 124 125 126
## 0.209443900 0.974222449 0.762931417 0.934228287 0.638191570 0.963747328
## 127 128 129 130 131 132
## 0.701102490 0.105344361 0.267931877 0.981538824 0.837234144 0.880924439
## 133 134 135 136 137 138
## 0.238643107 0.828325850 0.846513532 0.718833604 0.944876905 0.976904705
## 139 140 141 142 143 144
## 0.963269404 0.623331302 0.602783546 0.621302933 0.655370721 0.926883871
## 145 146 147 148 149 150
## 0.955215764 0.943775731 0.908868243 0.806171172 0.862522074 0.787683378
## 151 152 153 154 155 156
## 0.827497084 0.767730195 0.754697624 0.203393521 0.115056286 0.214034101
## 157 158 159 160 161 162
## 0.894339710 0.930624937 0.945467349 0.876660196 0.253041716 0.550766391
## 163 164 165 166 167 168
## 0.226251030 0.969433469 0.953676457 0.963024518 0.497185366 0.140771687
## 169 170 171 172 173 174
## 0.450431899 0.969659778 0.940249029 0.931621958 0.934038188 0.778421640
## 175 176 177 178 179 180
## 0.835253446 0.646610366 0.863408362 0.897122923 0.940823077 0.847511773
## 181 182 183 184 185 186
## 0.950828270 0.806493833 0.756862021 0.741953895 0.935087595 0.852792408
## 187 188 189 190 191 192
## 0.939045080 0.708868092 0.769362054 0.741347551 0.781704288 0.365380305
## 193 194 195 196 197 198
## 0.559671314 0.439870312 0.401274633 0.296039750 0.265373614 0.975340798
## 199 200 201 202 203 204
## 0.982814940 0.980179251 0.867214606 0.736213169 0.761511998 0.777944845
## 205 206 207 208 209 210
## 0.878213438 0.822334132 0.817719367 0.685821378 0.417713421 0.213421318
## 211 212 213 214 215 216
## 0.617722703 0.831278762 0.798876785 0.859120974 0.758566641 0.871250176
## 217 218 219 220 221 222
## 0.831039532 0.837447101 0.759607794 0.320505259 0.707218982 0.894816419
##################################
# Creating a classification index
# based from the model predictions
##################################
<- predict(LR_OS_SMOTE_Model,
(LR_OS_SMOTE_Model_Indices type = c("link")))
## 1 2 3 4 5 6
## 0.82425259 -3.67265237 -0.53529572 -5.43210828 -4.73656844 -3.07929429
## 7 8 9 10 11 12
## -1.24849676 -3.05634956 -0.23341359 1.42164977 0.28915520 -0.74984995
## 13 14 15 16 17 18
## 1.13224345 1.42148582 -0.65422868 -2.26475881 -0.55344128 -0.19321306
## 19 20 21 22 23 24
## -1.48410023 0.40746693 -1.20649280 -0.84161007 -1.71937016 -0.49040766
## 25 26 27 28 29 30
## -0.14764367 -1.61853210 -0.82742315 -3.08504263 -2.09225005 -5.04885991
## 31 32 33 34 35 36
## -2.66640847 -2.85988236 -5.23850398 -3.40600965 -4.57033103 -4.00265851
## 37 38 39 40 41 42
## -5.29236048 -5.94035796 -1.05603418 -5.42547492 -6.45873755 -1.26577624
## 43 44 45 46 47 48
## 0.43262277 -0.23635725 -2.16882171 -2.10157285 -2.21664637 2.65722365
## 49 50 51 52 53 54
## -2.46492169 -2.84649772 -1.76170050 -2.92061950 0.14575059 0.21590919
## 55 56 57 58 59 60
## -0.69278785 -0.08785908 1.04986458 1.99978279 2.25820667 -0.12778335
## 61 62 63 64 65 66
## -0.44166592 -0.91879612 -1.66452124 -1.19753768 -0.94628485 -0.76082602
## 67 68 69 70 71 72
## 0.37573267 -2.09861470 -1.07155426 0.10331148 1.98844445 2.76750117
## 73 74 75 76 77 78
## 0.32784250 -0.27291352 -4.58389159 -1.74284131 -2.20326744 -3.39495845
## 79 80 81 82 83 84
## -3.37123462 -4.72573535 -1.45881452 0.31068241 -2.44577712 -4.37200900
## 85 86 87 88 89 90
## -2.92607903 -2.01839960 -1.82194048 -5.40043585 -4.50289216 -2.18783550
## 91 92 93 94 95 96
## -2.18824973 -0.57459425 -0.40725491 -0.55436124 -0.88818027 0.66371190
## 97 98 99 100 101 102
## -0.46847380 -1.93330405 -0.26414720 0.74388595 -1.20902755 -1.29936840
## 103 104 105 106 107 108
## -1.09626404 -1.58846149 -2.38474422 -2.97933833 -1.59120550 -2.21673166
## 109 110 111 112 113 114
## -2.32335472 -1.90792669 -1.49521373 4.26986170 0.32458654 2.49027827
## 115 116 117 118 119 120
## 1.28550832 1.27197948 -1.37378525 2.03238373 -0.28776857 3.12978483
## 121 122 123 124 125 126
## -1.32828070 3.63213568 1.16881866 2.65353097 0.56752363 3.28031608
## 127 128 129 130 131 132
## 0.85255334 -2.13920427 -1.00514081 3.97345164 1.63779107 2.00121359
## 133 134 135 136 137 138
## -1.16013307 1.57380841 1.70751378 0.93868325 2.84148588 3.74476018
## 139 140 141 142 143 144
## 3.26672303 0.50371215 0.41707681 0.49508219 0.64273177 2.53977930
## 145 146 147 148 149 150
## 3.06008104 2.82054007 2.29989380 1.42532065 1.83639738 1.31101754
## 151 152 153 154 155 156
## 1.56799145 1.19553871 1.12382553 -1.36521818 -2.04010260 -1.30077805
## 157 158 159 160 161 162
## 2.13585655 2.59632886 2.85287973 1.96117628 -1.08245498 0.20376771
## 163 164 165 166 167 168
## -1.22960236 3.45680620 3.02467415 3.25982382 -0.01125866 -1.80889534
## 169 170 171 172 173 174
## -0.19892580 3.46447098 2.75595933 2.61187535 2.65044135 1.25649204
## 175 176 177 178 179 180
## 1.62332695 0.60417267 1.84389204 2.16565804 2.76622345 1.71521733
## 181 182 183 184 185 186
## 2.96201461 1.42738687 1.13555187 1.05614883 2.66760147 1.75667237
## 187 188 189 190 191 192
## 2.73472891 0.88989300 1.20471252 1.05298428 1.27562590 -0.55208717
## 193 194 195 196 197 198
## 0.23982821 -0.24168840 -0.40015694 -0.86622815 -1.01822335 3.67763681
## 199 200 201 202 203 204
## 4.04638045 3.90100613 1.87655223 1.02637839 1.16098692 1.25372983
## 205 206 207 208 209 210
## 1.97561964 1.53224216 1.50097177 0.78065553 -0.33216713 -1.30442453
## 211 212 213 214 215 216
## 0.47989340 1.59471732 1.37928899 1.80800819 1.14483716 1.91205799
## 217 218 219 220 221 222
## 1.59301260 1.63935462 1.15053046 -0.75145081 0.88191540 2.14091136
max(LR_OS_SMOTE_Model_Indices)
## [1] 4.269862
min(LR_OS_SMOTE_Model_Indices)
## [1] -6.458738
##################################
# Consolidating the model probabilities
# and classification index
# based from the model predictions
##################################
<- as.data.frame(PMA_PreModelling_Train_LR_OS_SMOTE)
LR_OS_SMOTE_Model_Predictions $LR_OS_SMOTE_Prob <- LR_OS_SMOTE_Model_Probabilities
LR_OS_SMOTE_Model_Predictions$LR_OS_SMOTE_LP <- LR_OS_SMOTE_Model_Indices
LR_OS_SMOTE_Model_Predictions$Class <- as.factor(LR_OS_SMOTE_Model_Predictions$Class)
LR_OS_SMOTE_Model_Predictions$Label <- rep("LR_OS_SMOTE",nrow(LR_OS_SMOTE_Model_Predictions))
LR_OS_SMOTE_Model_Predictions
##################################
# Formulating the probability curve
# using the consolidated model predictions
##################################
%>%
LR_OS_SMOTE_Model_Predictions ggplot(aes(x = LR_OS_SMOTE_LP ,
y = LR_OS_SMOTE_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
ggtitle("Estimated Rock Detection Probabilities Based on Classification Index : Logistic Regression (OS_SMOTE)") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
##################################
# Visualizing the imbalanced data set
##################################
ggplot(PMA_PreModelling_Train, aes(V1, V11, color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
labs(title = "Without Oversampling - Random Oversampling Examples") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
##################################
# Implementing OS_ROSE
# Visualizing the oversampled data using OS_ROSE
##################################
recipe(Class ~ V1 + V11, data = PMA_PreModelling_Train) %>%
step_rose(Class, seed=123456789) %>%
prep() %>%
bake(new_data = NULL) %>%
ggplot(aes(V1, V11, color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
labs(title = "With Oversampling - Random Oversampling Examples") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
<- recipe(Class ~ V1 + V11, data = PMA_PreModelling_Train) %>%
OS_ROSE step_rose(Class, seed=123456789) %>%
prep()
<- OS_ROSE %>%
PMA_PreModelling_Train_LR_OS_ROSE bake(new_data = NULL)
<- as.data.frame(PMA_PreModelling_Train_LR_OS_ROSE)) (PMA_PreModelling_Train_LR_OS_ROSE
## V1 V11 Class
## 1 0.453829274 -0.420221366 M
## 2 0.127920224 0.686476420 M
## 3 1.075595629 -0.235554656 M
## 4 2.137901379 -1.178271820 M
## 5 1.092110905 0.456926231 M
## 6 0.857402441 1.398298653 M
## 7 -1.907459325 -0.021176979 M
## 8 0.309616027 -0.332458449 M
## 9 0.693229730 -2.341819879 M
## 10 1.716214396 -0.555582217 M
## 11 -0.770935156 -0.187998880 M
## 12 -1.496837087 1.378598435 M
## 13 -0.106000306 1.534889807 M
## 14 -0.916802330 -1.457363172 M
## 15 0.157063822 0.775286340 M
## 16 -0.064112224 -1.717694803 M
## 17 -0.112461451 -0.079932050 M
## 18 0.125906565 -1.196069844 M
## 19 -0.946156465 0.057997744 M
## 20 -1.748418397 -0.678614715 M
## 21 0.690260404 0.140155995 M
## 22 -0.560768697 0.384716767 M
## 23 0.250861459 -0.498669324 M
## 24 -2.438990535 0.731196382 M
## 25 1.529501791 -1.522171442 M
## 26 2.558447121 1.985131848 M
## 27 -0.387603764 0.364725766 M
## 28 -0.961430471 -0.770858863 M
## 29 0.863038442 2.196168690 M
## 30 1.039157671 0.133492560 M
## 31 -1.697870849 -1.143428379 M
## 32 -0.610125646 -0.127814520 M
## 33 -0.749263527 -0.150861865 M
## 34 -0.279227167 0.078775816 M
## 35 0.194985084 0.683020200 M
## 36 0.210531378 1.454231491 M
## 37 -0.147592914 0.362929787 M
## 38 -0.146821909 -1.873438319 M
## 39 -0.268162038 0.392433523 M
## 40 1.087154437 -1.567477864 M
## 41 -3.258143427 -0.310093040 M
## 42 0.221639048 2.228731592 M
## 43 -1.339735538 1.934676727 M
## 44 1.124251223 2.209600237 M
## 45 -1.920302036 0.658912503 M
## 46 0.320103472 0.002252489 M
## 47 -0.373684831 0.377203482 M
## 48 0.416501911 0.873626212 M
## 49 -0.337904242 0.283054835 M
## 50 -0.488958504 0.548829476 M
## 51 0.837780447 0.174524037 M
## 52 -0.818636727 1.462313516 M
## 53 0.522647205 0.301663855 M
## 54 0.077677433 -0.395192354 M
## 55 0.808685352 0.089522951 M
## 56 0.121157987 -1.487662966 M
## 57 0.930609853 -0.001536725 M
## 58 1.606666273 -0.460572860 M
## 59 1.879030954 1.415353475 M
## 60 -0.259700391 1.886523777 M
## 61 -1.163566178 -0.350578696 M
## 62 -0.473028462 -0.152114025 M
## 63 -0.270091603 0.103561905 M
## 64 -0.381874467 1.008824831 M
## 65 0.108606139 2.879619917 M
## 66 1.200948103 -0.411763531 M
## 67 -0.143328184 -1.493160633 M
## 68 1.381475831 0.613253958 M
## 69 1.396345117 0.826444551 M
## 70 0.096154862 -1.106932315 M
## 71 0.697201801 0.765645056 M
## 72 0.280450179 -0.779498730 M
## 73 -0.819993172 0.582228816 M
## 74 0.113508607 1.089077908 M
## 75 0.324148150 1.032218252 M
## 76 1.331741998 -0.388969403 M
## 77 -0.504012381 -0.624175476 M
## 78 -0.278104210 0.190564663 M
## 79 1.514692613 0.102141162 M
## 80 -1.332434679 0.332392981 M
## 81 0.748018715 2.546676685 M
## 82 -1.401265796 0.653245938 M
## 83 -3.555046462 -0.437014632 M
## 84 -0.797376264 0.590342435 M
## 85 -1.153805197 0.874627800 M
## 86 1.207193590 -0.900335700 M
## 87 -0.892535628 -0.053046654 M
## 88 0.025673058 -0.103210450 M
## 89 0.708863775 0.305835422 M
## 90 -0.479304016 -0.828521934 M
## 91 0.531855505 -1.020076195 M
## 92 0.843587712 -0.127072250 M
## 93 0.321447635 -0.242383581 M
## 94 -0.104688089 -0.955018789 M
## 95 -1.223682265 -0.993723911 M
## 96 -0.060035584 0.193030555 M
## 97 -0.184412189 -0.355696298 M
## 98 0.739698901 -0.008225890 M
## 99 1.751934661 0.060003736 M
## 100 0.012753639 -0.683233510 M
## 101 0.704272106 1.556907557 M
## 102 0.349253294 -1.364315460 M
## 103 -0.054249920 -0.279637524 M
## 104 0.422806210 1.856196052 M
## 105 -0.378916586 -1.774888727 M
## 106 0.896085827 -0.207757879 M
## 107 0.724946782 -1.974278177 M
## 108 1.177536942 1.427597354 M
## 109 0.417306877 0.674306556 M
## 110 0.119668249 0.770502124 M
## 111 -0.295729614 -1.403169748 R
## 112 -0.003104477 -1.303129281 R
## 113 -1.119225438 0.383400942 R
## 114 -1.205336093 -0.269591889 R
## 115 -0.031385116 -0.727154048 R
## 116 -0.262140197 -2.421432946 R
## 117 -0.004372316 -0.720426193 R
## 118 -0.652857082 -0.170061116 R
## 119 -0.243474578 0.765583966 R
## 120 -0.862558574 -2.394683708 R
## 121 -0.084987329 -0.315983611 R
## 122 0.335663272 -1.450942265 R
## 123 -0.547564677 -1.300644608 R
## 124 -0.085770445 -0.386014589 R
## 125 1.158708949 0.194640216 R
## 126 0.838758747 0.838624142 R
## 127 0.129363261 -1.283480849 R
## 128 1.185903335 0.067746005 R
## 129 -0.411610808 -1.398984345 R
## 130 -1.584645597 -3.066092331 R
## 131 -1.195552409 -0.055875719 R
## 132 -2.077490954 -1.016881169 R
## 133 1.123569804 -1.294911355 R
## 134 -1.696277599 1.577388230 R
## 135 -2.606714342 -2.430876734 R
## 136 -0.936576706 -0.967689409 R
## 137 -0.653422361 -0.737952271 R
## 138 1.399478608 -2.336802156 R
## 139 -0.439107621 -0.751356400 R
## 140 0.682169711 -0.645106739 R
## 141 -0.391226812 -1.228763737 R
## 142 -0.175404459 -1.171822064 R
## 143 0.149961923 -0.632053488 R
## 144 -1.306586950 0.989132759 R
## 145 0.192756837 -1.588485717 R
## 146 -1.637705898 -1.115797066 R
## 147 -1.035313367 -0.044642241 R
## 148 -0.454662737 0.327593121 R
## 149 -0.398721880 -2.531360086 R
## 150 -0.521649915 -1.203285944 R
## 151 -1.036925438 -1.419544783 R
## 152 -0.766892871 -1.525224785 R
## 153 -2.249201887 -1.166084343 R
## 154 -0.868992543 0.374276284 R
## 155 -3.122277505 -1.914384957 R
## 156 -1.711225949 0.095212802 R
## 157 0.846502796 -0.576583436 R
## 158 1.436542798 -0.686131512 R
## 159 -0.842933030 -0.712232667 R
## 160 -3.122732053 -1.680571529 R
## 161 0.635603354 -1.077316478 R
## 162 -0.654996459 -2.908798364 R
## 163 0.418427304 -0.144379560 R
## 164 -1.041169997 -1.922001515 R
## 165 -0.441540025 -0.259070590 R
## 166 -1.655681734 0.576523808 R
## 167 0.953177477 -1.711512314 R
## 168 -3.210760176 -1.284814307 R
## 169 -1.217750244 -1.591163354 R
## 170 0.259282153 -0.574268729 R
## 171 -0.921207702 -0.875473417 R
## 172 -0.865572661 -2.663360159 R
## 173 -0.947803456 -0.948766125 R
## 174 -0.460461844 0.286061177 R
## 175 0.419542632 -1.381617010 R
## 176 -0.621694247 -1.065107860 R
## 177 0.222233446 -0.308422959 R
## 178 -0.643581074 0.498325789 R
## 179 -0.260344829 0.290918714 R
## 180 -1.089617028 1.086009599 R
## 181 -0.474076557 -1.474554870 R
## 182 -1.113328689 -0.745280710 R
## 183 -1.367306542 -1.446652698 R
## 184 -0.227162341 -1.792986420 R
## 185 0.869250328 0.385747269 R
## 186 -0.828656357 1.245036664 R
## 187 -0.712628500 -2.041821701 R
## 188 -0.314242395 -0.861387324 R
## 189 -1.931922510 -1.283036428 R
## 190 -1.152889515 0.238829964 R
## 191 -1.757284170 -0.475428583 R
## 192 0.091169879 -0.475918031 R
## 193 0.853052045 -0.195251645 R
## 194 -1.030858895 -1.444796537 R
## 195 -0.458061630 -2.392889498 R
## 196 -0.757335077 -0.976940053 R
## 197 -1.825122741 -1.449896879 R
## 198 0.548521112 -1.625042438 R
## 199 0.815956664 0.127893714 R
## 200 -0.802403884 -1.389200876 R
## 201 -0.504679908 0.215394678 R
## 202 -0.828050004 0.276042407 R
## 203 -0.015700504 -1.362342936 R
## 204 -1.790699584 1.276762042 R
## 205 -0.319616516 -0.271949278 R
## 206 -0.497131472 -0.653931589 R
## 207 -0.434923007 -1.431687637 R
## 208 -2.126603097 -1.447703954 R
## 209 -1.508349941 -1.887116236 R
## 210 -2.644025006 -1.683878336 R
## 211 -1.292563792 0.557925461 R
## 212 0.956404116 0.448577768 R
## 213 0.851149954 -0.762481606 R
## 214 -0.031079759 -1.607449284 R
## 215 -0.131473949 -1.989738433 R
## 216 -0.987534049 -1.880077457 R
## 217 1.350301010 -0.132513674 R
## 218 -1.488226564 -1.509045712 R
## 219 1.732090761 -1.215001202 R
## 220 -1.010449867 -0.143822342 R
## 221 -0.443012681 -2.279874936 R
## 222 -0.918668643 -1.722236931 R
$Label <- rep("LR_OS_ROSE",nrow(PMA_PreModelling_Train_LR_OS_ROSE))
PMA_PreModelling_Train_LR_OS_ROSE
##################################
# Verifying the class distribution
# for the oversampled data using OS_ROSE
##################################
table(PMA_PreModelling_Train_LR_OS_ROSE$Class)
##
## M R
## 110 112
##################################
# Formulating the structure of the
# Logistic Regression model
##################################
<- glm(Class ~ V1 + V11,
LR_OS_ROSE_Model data = PMA_PreModelling_Train_LR_OS_ROSE,
family = binomial)
##################################
# Consolidating the model results
##################################
summary(LR_OS_ROSE_Model)
##
## Call:
## glm(formula = Class ~ V1 + V11, family = binomial, data = PMA_PreModelling_Train_LR_OS_ROSE)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.4555 0.1701 -2.678 0.007398 **
## V1 -0.5284 0.1593 -3.317 0.000908 ***
## V11 -0.9132 0.1650 -5.535 3.11e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 307.74 on 221 degrees of freedom
## Residual deviance: 249.98 on 219 degrees of freedom
## AIC: 255.98
##
## Number of Fisher Scoring iterations: 4
<- (as.data.frame(LR_OS_ROSE_Model$coefficients))
LR_OS_ROSE_Model_Coef $Coef <- rownames(LR_OS_ROSE_Model_Coef)
LR_OS_ROSE_Model_Coef$Model <- rep("LR_OS_ROSE",nrow(LR_OS_ROSE_Model_Coef))
LR_OS_ROSE_Model_Coefcolnames(LR_OS_ROSE_Model_Coef) <- c("Estimates","Coefficients","Model")
print(LR_OS_ROSE_Model_Coef, rownames=FALSE)
## Estimates Coefficients Model
## (Intercept) -0.4555392 (Intercept) LR_OS_ROSE
## V1 -0.5283578 V1 LR_OS_ROSE
## V11 -0.9131847 V11 LR_OS_ROSE
##################################
# Computing the model predictions
##################################
<- predict(LR_OS_ROSE_Model,
(LR_OS_ROSE_Model_Probabilities type = c("response")))
## 1 2 3 4 5 6 7
## 0.42272822 0.24048787 0.30816041 0.37539602 0.19002887 0.10106385 0.63913697
## 8 9 10 11 12 13 14
## 0.42176519 0.78863241 0.29839231 0.53082787 0.28422724 0.14170857 0.79571130
## 15 16 17 18 19 20 21
## 0.22330577 0.75894563 0.41991396 0.63880235 0.49785184 0.74799586 0.27924052
## 22 23 24 25 26 27 28
## 0.37505987 0.46687195 0.54125677 0.53154810 0.02608136 0.35805690 0.68056655
## 29 30 31 32 33 34 35
## 0.05131761 0.24480950 0.81543281 0.49588602 0.51951627 0.40614035 0.23464164
## 36 37 38 39 40 41 42
## 0.13070302 0.32982442 0.79130846 0.33800037 0.59903625 0.82478350 0.06863251
## 43 44 45 46 47 48 49
## 0.18028811 0.04447561 0.48934125 0.34825343 0.35375905 0.18642890 0.36923822
## 50 51 52 53 54 55 56
## 0.33217220 0.25777606 0.20450560 0.26753625 0.46612769 0.27596505 0.69824553
## 57 58 59 60 61 62 63
## 0.27972403 0.29238147 0.06060725 0.11495790 0.61760221 0.48333050 0.39953443
## 64 65 66 67 68 69 70
## 0.23594946 0.04138595 0.32870381 0.72784589 0.14861986 0.12477099 0.62351399
## 71 72 73 74 75 76 77
## 0.17900745 0.52700098 0.36494339 0.18093261 0.17230032 0.30917596 0.59405330
## 78 79 80 81 82 83 84
## 0.38163331 0.20601548 0.48623520 0.04006559 0.42269894 0.86079066 0.36046859
## 85 86 87 88 89 90 91
## 0.34420429 0.43261336 0.51611451 0.40736875 0.24799075 0.63513227 0.54858837
## 92 93 94 95 96 97 98
## 0.31319703 0.40034265 0.61582913 0.74997077 0.35432276 0.49167898 0.30177638
## 99 100 101 102 103 104 105
## 0.19216831 0.54032242 0.09540200 0.64698261 0.45722602 0.08517833 0.79664657
## 106 107 108 109 110 111 112
## 0.32316339 0.72398394 0.08460516 0.21554948 0.22751830 0.72751777 0.67614219
## 113 114 115 116 117 118 119
## 0.44662821 0.60527575 0.55603100 0.86923142 0.55098545 0.51117317 0.26385700
## 120 121 122 123 124 125 126
## 0.89907501 0.46951683 0.66643143 0.73526461 0.48557423 0.22348101 0.15915581
## 127 128 129 130 131 132 133
## 0.65659701 0.24158944 0.73874767 0.96013708 0.55654831 0.82788619 0.53327752
## 134 135 136 137 138 139 140
## 0.26899165 0.95857559 0.71565046 0.63728241 0.71889102 0.61362934 0.44352519
## 141 142 143 144 145 146 147
## 0.70542292 0.66978816 0.51060062 0.33884294 0.70954950 0.80669624 0.53301273
## 148 149 150 151 152 153 154
## 0.37414679 0.88762701 0.71481811 0.80037425 0.79289528 0.85786807 0.41625077
## 155 156 157 158 159 160 161
## 0.94989962 0.58944065 0.40702723 0.35709778 0.65480568 0.93871938 0.54795805
## 162 163 164 165 166 167 168
## 0.92735848 0.36707788 0.86409346 0.50358274 0.47322076 0.64651810 0.91790101
## 169 170 171 172 173 174 175
## 0.83765665 0.48297670 0.69649384 0.91937851 0.71333541 0.38379354 0.64209386
## 176 177 178 179 180 181 182
## 0.69963871 0.42768298 0.36110724 0.35809396 0.29493003 0.75795197 0.69280709
## 183 184 185 186 187 188 189
## 0.83032735 0.78613971 0.21975873 0.23963883 0.85638009 0.62177733 0.85028733
## 190 191 192 193 194 195 196
## 0.48388136 0.71240426 0.48272976 0.32564362 0.80352755 0.87778623 0.69777637
## 197 198 199 200 201 202 203
## 0.86209523 0.67669167 0.26826426 0.77504386 0.40478111 0.43288003 0.68929534
## 204 205 206 207 208 209 210
## 0.33730469 0.49041932 0.59971660 0.74680630 0.87975259 0.88742407 0.92266675
## 211 212 213 214 215 216 217
## 0.42994096 0.20253953 0.44794817 0.73667934 0.80704537 0.85607802 0.25961475
## 218 219 220 221 222
## 0.84668245 0.43507308 0.55222785 0.86535253 0.83237685
##################################
# Creating a classification index
# based from the model predictions
##################################
<- predict(LR_OS_ROSE_Model,
(LR_OS_ROSE_Model_Indices type = c("link")))
## 1 2 3 4 5 6
## -0.311583732 -1.150006656 -0.808733670 -0.509136303 -1.449822617 -2.185459486
## 7 8 9 10 11 12
## 0.571620329 -0.315531291 1.316701585 -0.854965324 0.123468107 -0.923588677
## 13 14 15 16 17 18
## -1.801171064 1.359702258 -1.246504767 1.146907641 -0.323126605 0.570169780
## 19 20 21 22 23 24
## -0.008592705 1.087951913 -0.948232019 -0.510570169 -0.132706621 0.165403137
## 25 26 27 28 29 30
## 0.126360262 -3.620106864 -0.583807739 0.756376634 -2.917040046 -1.126489672
## 31 32 33 34 35 36
## 1.485705461 -0.016456294 0.078104777 -0.379944235 -1.182284732 -1.894757113
## 37 38 39 40 41 42
## -0.708979289 1.332830551 -0.672218011 0.401451079 1.549098576 -2.607887604
## 43 44 45 46 47 48
## -1.514396714 -3.067319346 -0.042641454 -0.626725333 -0.602556377 -1.473383381
## 49 50 51 52 53 54
## -0.535486224 -0.698376867 -1.057559759 -1.358368477 -1.007158785 -0.135697076
## 55 56 57 58 59 60
## -0.964565444 0.838957115 -0.945830902 -0.883845811 -2.740819106 -2.041069195
## 61 62 63 64 65 66
## 0.479383185 -0.066702727 -0.407405360 -1.175016290 -3.142547062 -0.714053376
## 67 68 69 70 71 72
## 0.983720838 -1.745466933 -1.948005631 0.504490295 -1.523086620 0.108109072
## 73 74 75 76 77 78
## -0.553971878 -1.510041697 -1.569411378 -0.803974603 0.380747178 -0.482621426
## 79 80 81 82 83 84
## -1.349112661 -0.055073129 -3.176347023 -0.311703691 1.821872477 -0.573330931
## 85 86 87 88 89 90
## -0.644613971 -0.271196583 0.064480356 -0.374853574 -1.109357179 0.554298385
## 91 92 93 94 95 96
## 0.194968769 -0.785214949 -0.404037608 0.471882126 1.098456379 -0.600091505
## 97 98 99 100 101 102
## -0.033287169 -0.838853164 -1.435982099 0.161640704 -2.249391106 0.605802116
## 103 104 105 106 107 108
## -0.171515133 -2.373982082 1.365465607 -0.739271854 0.964310162 -2.381360181
## 109 110 111 112 113 114
## -1.291793024 -1.222377651 0.982065023 0.736098817 -0.214303591 0.427496730
## 115 116 117 118 119 120
## 0.225069325 1.894180197 0.204653127 0.044700141 -1.026017110 2.186988947
## 121 122 123 124 125 126
## -0.122084091 0.692088786 1.021499657 -0.057719106 -1.245494630 -1.664522726
## 127 128 129 130 131 132
## 0.648165803 -1.143985141 1.039469713 3.181629377 0.227165101 1.570719734
## 133 134 135 136 137 138
## 0.133307162 -0.999744528 3.141578211 0.922987602 0.563588342 0.938967358
## 139 140 141 142 143 144
## 0.462593918 -0.226867299 0.873256809 0.707227114 0.042408819 -0.668454716
## 145 146 147 148 149 150
## 0.893197099 1.428684346 0.132243309 -0.514467642 2.066727983 0.918900944
## 151 152 153 154 155 156
## 1.388635067 1.342466612 1.797694609 -0.338183600 2.942327636 0.361653519
## 157 158 159 160 161 162
## -0.376268405 -0.587983024 0.640231037 2.729052948 0.192423733 2.546803534
## 163 164 165 166 167 168
## -0.544773350 1.849713529 0.014331213 -0.107219562 0.603768915 2.414163843
## 169 170 171 172 173 174
## 1.640894726 -0.068119539 0.830657032 2.433932696 0.911638889 -0.473477302
## 175 176 177 178 179 180
## 0.584463706 0.845578033 -0.291310863 -0.570561627 -0.583646521 -0.871558923
## 181 182 183 184 185 186
## 1.141483829 0.813275667 1.587949042 1.301811601 -1.267072947 -1.154660623
## 187 188 189 190 191 192
## 1.785544023 0.497098959 1.736856426 -0.064496902 0.907089741 -0.069108460
## 193 194 195 196 197 198
## -0.727955121 1.408489278 1.971631378 0.836731432 1.832802349 0.738609301
## 199 200 201 202 203 204
## -1.003446893 1.237014177 -0.385582774 -0.270110234 0.796827032 -0.675328688
## 205 206 207 208 209 210
## -0.038327405 0.404284424 1.081651042 1.990089307 2.064695003 2.479144060
## 211 212 213 214 215 216
## -0.282092041 -1.370497185 -0.208964397 1.028780156 1.430924825 1.783090147
## 217 218 219 220 221 222
## -1.047971859 1.708814429 -0.261182377 0.209676238 1.860476975 1.602567012
max(LR_OS_ROSE_Model_Indices)
## [1] 3.181629
min(LR_OS_ROSE_Model_Indices)
## [1] -3.620107
##################################
# Consolidating the model probabilities
# and classification index
# based from the model predictions
##################################
<- as.data.frame(PMA_PreModelling_Train_LR_OS_ROSE)
LR_OS_ROSE_Model_Predictions $LR_OS_ROSE_Prob <- LR_OS_ROSE_Model_Probabilities
LR_OS_ROSE_Model_Predictions$LR_OS_ROSE_LP <- LR_OS_ROSE_Model_Indices
LR_OS_ROSE_Model_Predictions$Class <- as.factor(LR_OS_ROSE_Model_Predictions$Class)
LR_OS_ROSE_Model_Predictions$Label <- rep("LR_OS_ROSE",nrow(LR_OS_ROSE_Model_Predictions))
LR_OS_ROSE_Model_Predictions
##################################
# Formulating the probability curve
# using the consolidated model predictions
##################################
%>%
LR_OS_ROSE_Model_Predictions ggplot(aes(x = LR_OS_ROSE_LP ,
y = LR_OS_ROSE_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
ggtitle("Estimated Rock Detection Probabilities Based on Classification Index : Logistic Regression (OS_ROSE)") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
##################################
# Visualizing the imbalanced data set
# Visualizing the undersampled and oversampled data
##################################
<- PMA_PreModelling_Train_LR %>%
LR_ClassDistribution ggplot(aes(x = V1 ,
y = V11,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
theme_bw() +
facet_grid(. ~ Label) +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=10, face="bold"),
axis.title.y = element_text(color="black", size=10, face="bold"),
legend.position="top")
<- PMA_PreModelling_Train_LR_US_DOWNSAMPLE %>%
LR_US_DOWNSAMPLE_ClassDistribution ggplot(aes(x = V1,
y = V11,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
theme_bw() +
facet_grid(. ~ Label) +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
<- PMA_PreModelling_Train_LR_OS_UPSAMPLE %>%
LR_OS_UPSAMPLE_ClassDistribution ggplot(aes(x = V1,
y = V11,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
theme_bw() +
facet_grid(. ~ Label) +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
<- PMA_PreModelling_Train_LR_US_NEARMISS %>%
LR_US_NEARMISS_ClassDistribution ggplot(aes(x = V1,
y = V11,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
theme_bw() +
facet_grid(. ~ Label) +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
<- PMA_PreModelling_Train_LR_US_TOMEK %>%
LR_US_TOMEK_ClassDistribution ggplot(aes(x = V1,
y = V11,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
theme_bw() +
facet_grid(. ~ Label) +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
<- PMA_PreModelling_Train_LR_OS_ADASYN %>%
LR_OS_ADASYN_ClassDistribution ggplot(aes(x = V1,
y = V11,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
theme_bw() +
facet_grid(. ~ Label) +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
<- PMA_PreModelling_Train_LR_OS_BSMOTE %>%
LR_OS_BSMOTE_ClassDistribution ggplot(aes(x = V1,
y = V11,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
theme_bw() +
facet_grid(. ~ Label) +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
<- PMA_PreModelling_Train_LR_OS_SMOTE %>%
LR_OS_SMOTE_ClassDistribution ggplot(aes(x = V1,
y = V11,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
theme_bw() +
facet_grid(. ~ Label) +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
<- PMA_PreModelling_Train_LR_OS_ROSE %>%
LR_OS_ROSE_ClassDistribution ggplot(aes(x = V1,
y = V11,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
scale_x_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
scale_y_continuous( limits=c(-4,4), breaks=seq(-4,4,by=1)) +
theme_bw() +
facet_grid(. ~ Label) +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
<- ggarrange(LR_ClassDistribution,
RDD_ClassDistribution
LR_US_DOWNSAMPLE_ClassDistribution,
LR_OS_UPSAMPLE_ClassDistribution,
LR_US_NEARMISS_ClassDistribution,
LR_US_TOMEK_ClassDistribution,
LR_OS_ADASYN_ClassDistribution,
LR_OS_BSMOTE_ClassDistribution,
LR_OS_SMOTE_ClassDistribution,
LR_OS_ROSE_ClassDistribution,ncol=3, nrow=3)
annotate_figure(RDD_ClassDistribution,
top = text_grob("Class Distribution",
color = "black",
face = "bold",
size = 14))
##################################
# Replotting the logistic curves
##################################
<- LR_Model_Predictions %>%
LR_LogisticCurvePlot ggplot(aes(x = LR_LP ,
y = LR_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
facet_grid(. ~ Label) +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=10, face="bold"),
axis.title.y = element_text(color="black", size=10, face="bold"),
legend.position="top")
<- LR_US_DOWNSAMPLE_Model_Predictions %>%
LR_US_DOWNSAMPLE_LogisticCurvePlot ggplot(aes(x = LR_US_DOWNSAMPLE_LP ,
y = LR_US_DOWNSAMPLE_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
facet_grid(. ~ Label) +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=10, face="bold"),
axis.title.y = element_text(color="black", size=10, face="bold"),
legend.position="top")
<- LR_OS_UPSAMPLE_Model_Predictions %>%
LR_OS_UPSAMPLE_LogisticCurvePlot ggplot(aes(x = LR_OS_UPSAMPLE_LP ,
y = LR_OS_UPSAMPLE_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
facet_grid(. ~ Label) +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=10, face="bold"),
axis.title.y = element_text(color="black", size=10, face="bold"),
legend.position="top")
<- LR_US_NEARMISS_Model_Predictions %>%
LR_US_NEARMISS_LogisticCurvePlot ggplot(aes(x = LR_US_NEARMISS_LP ,
y = LR_US_NEARMISS_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
facet_grid(. ~ Label) +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=10, face="bold"),
axis.title.y = element_text(color="black", size=10, face="bold"),
legend.position="top")
<- LR_US_TOMEK_Model_Predictions %>%
LR_US_TOMEK_LogisticCurvePlot ggplot(aes(x = LR_US_TOMEK_LP ,
y = LR_US_TOMEK_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
facet_grid(. ~ Label) +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=10, face="bold"),
axis.title.y = element_text(color="black", size=10, face="bold"),
legend.position="top")
<- LR_OS_ADASYN_Model_Predictions %>%
LR_OS_ADASYN_LogisticCurvePlot ggplot(aes(x = LR_OS_ADASYN_LP ,
y = LR_OS_ADASYN_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
facet_grid(. ~ Label) +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=10, face="bold"),
axis.title.y = element_text(color="black", size=10, face="bold"),
legend.position="top")
<- LR_OS_BSMOTE_Model_Predictions %>%
LR_OS_BSMOTE_LogisticCurvePlot ggplot(aes(x = LR_OS_BSMOTE_LP ,
y = LR_OS_BSMOTE_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
facet_grid(. ~ Label) +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=10, face="bold"),
axis.title.y = element_text(color="black", size=10, face="bold"),
legend.position="top")
<- LR_OS_SMOTE_Model_Predictions %>%
LR_OS_SMOTE_LogisticCurvePlot ggplot(aes(x = LR_OS_SMOTE_LP ,
y = LR_OS_SMOTE_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
facet_grid(. ~ Label) +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=10, face="bold"),
axis.title.y = element_text(color="black", size=10, face="bold"),
legend.position="top")
<- LR_OS_ROSE_Model_Predictions %>%
LR_OS_ROSE_LogisticCurvePlot ggplot(aes(x = LR_OS_ROSE_LP ,
y = LR_OS_ROSE_Prob,
color = Class)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("Sonar Object Classification Index (Logit Values)") +
ylab("Estimated Rock Detection Probability") +
labs(color = "Class") +
scale_x_continuous( limits=c(-10,5), breaks=seq(-10,5,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
facet_grid(. ~ Label) +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=10, face="bold"),
axis.title.y = element_text(color="black", size=10, face="bold"),
legend.position="top")
<- ggarrange(LR_LogisticCurvePlot,
RLR_LogisticCurvePlot
LR_US_DOWNSAMPLE_LogisticCurvePlot,
LR_OS_UPSAMPLE_LogisticCurvePlot,
LR_US_NEARMISS_LogisticCurvePlot,
LR_US_TOMEK_LogisticCurvePlot,
LR_OS_ADASYN_LogisticCurvePlot,
LR_OS_BSMOTE_LogisticCurvePlot,
LR_OS_SMOTE_LogisticCurvePlot,
LR_OS_ROSE_LogisticCurvePlot,ncol=3, nrow=3)
annotate_figure(RLR_LogisticCurvePlot,
top = text_grob("Estimated Rock Detection Probabilities Based on Classification Index",
color = "black",
face = "bold",
size = 14))