##################################
# Loading R libraries
##################################
library(AppliedPredictiveModeling)
library(caret)
library(rpart)
library(lattice)
library(dplyr)
library(tidyr)
library(moments)
library(skimr)
library(RANN)
library(pls)
library(corrplot)
library(tidyverse)
library(lares)
library(DMwR2)
library(gridExtra)
library(rattle)
library(rpart.plot)
library(RColorBrewer)
library(stats)
library(nnet)
library(elasticnet)
library(earth)
library(party)
library(kernlab)
library(randomForest)
library(Cubist)
library(pROC)
library(mda)
library(klaR)
library(pamr)
library(OptimalCutpoints)
library(broom)
library(PRROC)
library(ggpubr)
library(Hmisc)
library(logistf)
library(arm)
library(glmnet)
##################################
# Loading source and
# formulating the train set
##################################
data(sex2)
$dia <- sex2$case
sex2$dia[1] <- 0
sex2<- as.data.frame(sex2)
UTI_Train
##################################
# Applying verbose descriptions
# for the response and predictor variables
##################################
colnames(UTI_Train) <- c("UTI",
"Age_24",
"Contraceptive",
"Condom",
"LubricatedCondom",
"Spermicide",
"Diaphragm")
<- UTI_Train
UTI
<- lapply(UTI_Train, function(x) ifelse(x==1,"Yes","No"))
UTI_Train <- lapply(UTI_Train, function(x) as.factor(as.character(x)))
UTI_Train <- as.data.frame(UTI_Train)
UTI_Train
##################################
# Performing a general exploration of the train set
##################################
dim(UTI_Train)
## [1] 239 7
str(UTI_Train)
## 'data.frame': 239 obs. of 7 variables:
## $ UTI : Factor w/ 2 levels "No","Yes": 2 2 2 2 2 2 2 2 2 2 ...
## $ Age_24 : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ Contraceptive : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ Condom : Factor w/ 2 levels "No","Yes": 1 2 2 2 2 2 2 2 2 2 ...
## $ LubricatedCondom: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ Spermicide : Factor w/ 2 levels "No","Yes": 2 1 1 1 1 1 1 1 1 1 ...
## $ Diaphragm : Factor w/ 2 levels "No","Yes": 1 2 2 2 2 2 2 2 2 2 ...
summary(UTI_Train)
## UTI Age_24 Contraceptive Condom LubricatedCondom Spermicide
## No :109 No :201 No : 83 No :112 No :159 No :180
## Yes:130 Yes: 38 Yes:156 Yes:127 Yes: 80 Yes: 59
## Diaphragm
## No :110
## Yes:129
describe(UTI_Train)
## UTI_Train
##
## 7 Variables 239 Observations
## --------------------------------------------------------------------------------
## UTI
## n missing distinct
## 239 0 2
##
## Value No Yes
## Frequency 109 130
## Proportion 0.456 0.544
## --------------------------------------------------------------------------------
## Age_24
## n missing distinct
## 239 0 2
##
## Value No Yes
## Frequency 201 38
## Proportion 0.841 0.159
## --------------------------------------------------------------------------------
## Contraceptive
## n missing distinct
## 239 0 2
##
## Value No Yes
## Frequency 83 156
## Proportion 0.347 0.653
## --------------------------------------------------------------------------------
## Condom
## n missing distinct
## 239 0 2
##
## Value No Yes
## Frequency 112 127
## Proportion 0.469 0.531
## --------------------------------------------------------------------------------
## LubricatedCondom
## n missing distinct
## 239 0 2
##
## Value No Yes
## Frequency 159 80
## Proportion 0.665 0.335
## --------------------------------------------------------------------------------
## Spermicide
## n missing distinct
## 239 0 2
##
## Value No Yes
## Frequency 180 59
## Proportion 0.753 0.247
## --------------------------------------------------------------------------------
## Diaphragm
## n missing distinct
## 239 0 2
##
## Value No Yes
## Frequency 110 129
## Proportion 0.46 0.54
## --------------------------------------------------------------------------------
##################################
# Formulating a data type assessment summary
##################################
<- UTI_Train
PDA <- data.frame(
(PDA.Summary Column.Index=c(1:length(names(PDA))),
Column.Name= names(PDA),
Column.Type=sapply(PDA, function(x) class(x)),
row.names=NULL)
)
## Column.Index Column.Name Column.Type
## 1 1 UTI factor
## 2 2 Age_24 factor
## 3 3 Contraceptive factor
## 4 4 Condom factor
## 5 5 LubricatedCondom factor
## 6 6 Spermicide factor
## 7 7 Diaphragm factor
##################################
# Loading dataset
##################################
<- UTI_Train
DQA
##################################
# Formulating an overall data quality assessment summary
##################################
<- data.frame(
(DQA.Summary Column.Index=c(1:length(names(DQA))),
Column.Name= names(DQA),
Column.Type=sapply(DQA, function(x) class(x)),
Row.Count=sapply(DQA, function(x) nrow(DQA)),
NA.Count=sapply(DQA,function(x)sum(is.na(x))),
Fill.Rate=sapply(DQA,function(x)format(round((sum(!is.na(x))/nrow(DQA)),3),nsmall=3)),
row.names=NULL)
)
## Column.Index Column.Name Column.Type Row.Count NA.Count Fill.Rate
## 1 1 UTI factor 239 0 1.000
## 2 2 Age_24 factor 239 0 1.000
## 3 3 Contraceptive factor 239 0 1.000
## 4 4 Condom factor 239 0 1.000
## 5 5 LubricatedCondom factor 239 0 1.000
## 6 6 Spermicide factor 239 0 1.000
## 7 7 Diaphragm factor 239 0 1.000
##################################
# Listing all predictors
##################################
<- DQA[,!names(DQA) %in% c("UTI")]
DQA.Predictors
##################################
# Listing all numeric predictors
##################################
<- DQA.Predictors[,sapply(DQA.Predictors, is.numeric)]
DQA.Predictors.Numeric
if (length(names(DQA.Predictors.Numeric))>0) {
print(paste0("There are ",
length(names(DQA.Predictors.Numeric))),
(" numeric predictor variable(s)."))
else {
} print("There are no numeric predictor variables.")
}
## [1] "There are no numeric predictor variables."
##################################
# Listing all factor predictors
##################################
<- DQA.Predictors[,sapply(DQA.Predictors, is.factor)]
DQA.Predictors.Factor
if (length(names(DQA.Predictors.Factor))>0) {
print(paste0("There are ",
length(names(DQA.Predictors.Factor))),
(" factor predictor variable(s)."))
else {
} print("There are no factor predictor variables.")
}
## [1] "There are 6 factor predictor variable(s)."
##################################
# Formulating a data quality assessment summary for factor predictors
##################################
if (length(names(DQA.Predictors.Factor))>0) {
##################################
# Formulating a function to determine the first mode
##################################
<- function(x) {
FirstModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab == max(tab)]
ux[tab
}
##################################
# Formulating a function to determine the second mode
##################################
<- function(x) {
SecondModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab = ux[tab == max(tab)]
fm = x[!(x %in% fm)]
sm <- unique(sm)
usm <- tabulate(match(sm, usm))
tabsm ifelse(is.na(usm[tabsm == max(tabsm)])==TRUE,
return("x"),
return(usm[tabsm == max(tabsm)]))
}
<- data.frame(
(DQA.Predictors.Factor.Summary Column.Name= names(DQA.Predictors.Factor),
Column.Type=sapply(DQA.Predictors.Factor, function(x) class(x)),
Unique.Count=sapply(DQA.Predictors.Factor, function(x) length(unique(x))),
First.Mode.Value=sapply(DQA.Predictors.Factor, function(x) as.character(FirstModes(x)[1])),
Second.Mode.Value=sapply(DQA.Predictors.Factor, function(x) as.character(SecondModes(x)[1])),
First.Mode.Count=sapply(DQA.Predictors.Factor, function(x) sum(na.omit(x) == FirstModes(x)[1])),
Second.Mode.Count=sapply(DQA.Predictors.Factor, function(x) sum(na.omit(x) == SecondModes(x)[1])),
Unique.Count.Ratio=sapply(DQA.Predictors.Factor, function(x) format(round((length(unique(x))/nrow(DQA.Predictors.Factor)),3), nsmall=3)),
First.Second.Mode.Ratio=sapply(DQA.Predictors.Factor, function(x) format(round((sum(na.omit(x) == FirstModes(x)[1])/sum(na.omit(x) == SecondModes(x)[1])),3), nsmall=3)),
row.names=NULL)
)
}
## Column.Name Column.Type Unique.Count First.Mode.Value Second.Mode.Value
## 1 Age_24 factor 2 No Yes
## 2 Contraceptive factor 2 Yes No
## 3 Condom factor 2 Yes No
## 4 LubricatedCondom factor 2 No Yes
## 5 Spermicide factor 2 No Yes
## 6 Diaphragm factor 2 Yes No
## First.Mode.Count Second.Mode.Count Unique.Count.Ratio First.Second.Mode.Ratio
## 1 201 38 0.008 5.289
## 2 156 83 0.008 1.880
## 3 127 112 0.008 1.134
## 4 159 80 0.008 1.988
## 5 180 59 0.008 3.051
## 6 129 110 0.008 1.173
##################################
# Formulating a data quality assessment summary for numeric predictors
##################################
if (length(names(DQA.Predictors.Numeric))>0) {
##################################
# Formulating a function to determine the first mode
##################################
<- function(x) {
FirstModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab == max(tab)]
ux[tab
}
##################################
# Formulating a function to determine the second mode
##################################
<- function(x) {
SecondModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab = ux[tab == max(tab)]
fm = na.omit(x)[!(na.omit(x) %in% fm)]
sm <- unique(sm)
usm <- tabulate(match(sm, usm))
tabsm ifelse(is.na(usm[tabsm == max(tabsm)])==TRUE,
return(0.00001),
return(usm[tabsm == max(tabsm)]))
}
<- data.frame(
(DQA.Predictors.Numeric.Summary Column.Name= names(DQA.Predictors.Numeric),
Column.Type=sapply(DQA.Predictors.Numeric, function(x) class(x)),
Unique.Count=sapply(DQA.Predictors.Numeric, function(x) length(unique(x))),
Unique.Count.Ratio=sapply(DQA.Predictors.Numeric, function(x) format(round((length(unique(x))/nrow(DQA.Predictors.Numeric)),3), nsmall=3)),
First.Mode.Value=sapply(DQA.Predictors.Numeric, function(x) format(round((FirstModes(x)[1]),3),nsmall=3)),
Second.Mode.Value=sapply(DQA.Predictors.Numeric, function(x) format(round((SecondModes(x)[1]),3),nsmall=3)),
First.Mode.Count=sapply(DQA.Predictors.Numeric, function(x) sum(na.omit(x) == FirstModes(x)[1])),
Second.Mode.Count=sapply(DQA.Predictors.Numeric, function(x) sum(na.omit(x) == SecondModes(x)[1])),
First.Second.Mode.Ratio=sapply(DQA.Predictors.Numeric, function(x) format(round((sum(na.omit(x) == FirstModes(x)[1])/sum(na.omit(x) == SecondModes(x)[1])),3), nsmall=3)),
Minimum=sapply(DQA.Predictors.Numeric, function(x) format(round(min(x,na.rm = TRUE),3), nsmall=3)),
Mean=sapply(DQA.Predictors.Numeric, function(x) format(round(mean(x,na.rm = TRUE),3), nsmall=3)),
Median=sapply(DQA.Predictors.Numeric, function(x) format(round(median(x,na.rm = TRUE),3), nsmall=3)),
Maximum=sapply(DQA.Predictors.Numeric, function(x) format(round(max(x,na.rm = TRUE),3), nsmall=3)),
Skewness=sapply(DQA.Predictors.Numeric, function(x) format(round(skewness(x,na.rm = TRUE),3), nsmall=3)),
Kurtosis=sapply(DQA.Predictors.Numeric, function(x) format(round(kurtosis(x,na.rm = TRUE),3), nsmall=3)),
Percentile25th=sapply(DQA.Predictors.Numeric, function(x) format(round(quantile(x,probs=0.25,na.rm = TRUE),3), nsmall=3)),
Percentile75th=sapply(DQA.Predictors.Numeric, function(x) format(round(quantile(x,probs=0.75,na.rm = TRUE),3), nsmall=3)),
row.names=NULL)
)
}
##################################
# Identifying potential data quality issues
##################################
##################################
# Checking for missing observations
##################################
if ((nrow(DQA.Summary[DQA.Summary$NA.Count>0,]))>0){
print(paste0("Missing observations noted for ",
nrow(DQA.Summary[DQA.Summary$NA.Count>0,])),
(" variable(s) with NA.Count>0 and Fill.Rate<1.0."))
$NA.Count>0,]
DQA.Summary[DQA.Summaryelse {
} print("No missing observations noted.")
}
## [1] "No missing observations noted."
##################################
# Checking for zero or near-zero variance predictors
##################################
if (length(names(DQA.Predictors.Factor))==0) {
print("No factor predictors noted.")
else if (nrow(DQA.Predictors.Factor.Summary[as.numeric(as.character(DQA.Predictors.Factor.Summary$First.Second.Mode.Ratio))>5,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Predictors.Factor.Summary[as.numeric(as.character(DQA.Predictors.Factor.Summary$First.Second.Mode.Ratio))>5,])),
(" factor variable(s) with First.Second.Mode.Ratio>5."))
as.numeric(as.character(DQA.Predictors.Factor.Summary$First.Second.Mode.Ratio))>5,]
DQA.Predictors.Factor.Summary[else {
} print("No low variance factor predictors due to high first-second mode ratio noted.")
}
## [1] "Low variance observed for 1 factor variable(s) with First.Second.Mode.Ratio>5."
## Column.Name Column.Type Unique.Count First.Mode.Value Second.Mode.Value
## 1 Age_24 factor 2 No Yes
## First.Mode.Count Second.Mode.Count Unique.Count.Ratio First.Second.Mode.Ratio
## 1 201 38 0.008 5.289
if (length(names(DQA.Predictors.Numeric))==0) {
print("No numeric predictors noted.")
else if (nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$First.Second.Mode.Ratio))>5,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$First.Second.Mode.Ratio))>5,])),
(" numeric variable(s) with First.Second.Mode.Ratio>5."))
as.numeric(as.character(DQA.Predictors.Numeric.Summary$First.Second.Mode.Ratio))>5,]
DQA.Predictors.Numeric.Summary[else {
} print("No low variance numeric predictors due to high first-second mode ratio noted.")
}
## [1] "No numeric predictors noted."
if (length(names(DQA.Predictors.Numeric))==0) {
print("No numeric predictors noted.")
else if (nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Unique.Count.Ratio))<0.01,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Unique.Count.Ratio))<0.01,])),
(" numeric variable(s) with Unique.Count.Ratio<0.01."))
as.numeric(as.character(DQA.Predictors.Numeric.Summary$Unique.Count.Ratio))<0.01,]
DQA.Predictors.Numeric.Summary[else {
} print("No low variance numeric predictors due to low unique count ratio noted.")
}
## [1] "No numeric predictors noted."
##################################
# Checking for skewed predictors
##################################
if (length(names(DQA.Predictors.Numeric))==0) {
print("No numeric predictors noted.")
else if (nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))>3 |
} as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))<(-3),])>0){
print(paste0("High skewness observed for ",
nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))>3 |
(as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))<(-3),])),
" numeric variable(s) with Skewness>3 or Skewness<(-3)."))
as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))>3 |
DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))<(-3),]
else {
} print("No skewed numeric predictors noted.")
}
## [1] "No numeric predictors noted."
##################################
# Creating the pre-modelling
# train set
##################################
<- UTI_Train
PMA_PreModelling_Train
##################################
# Gathering descriptive statistics
##################################
<- skim(PMA_PreModelling_Train)) (PMA_PreModelling_Train_Skimmed
Name | PMA_PreModelling_Train |
Number of rows | 239 |
Number of columns | 7 |
_______________________ | |
Column type frequency: | |
factor | 7 |
________________________ | |
Group variables | None |
Variable type: factor
skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
---|---|---|---|---|---|
UTI | 0 | 1 | FALSE | 2 | Yes: 130, No: 109 |
Age_24 | 0 | 1 | FALSE | 2 | No: 201, Yes: 38 |
Contraceptive | 0 | 1 | FALSE | 2 | Yes: 156, No: 83 |
Condom | 0 | 1 | FALSE | 2 | Yes: 127, No: 112 |
LubricatedCondom | 0 | 1 | FALSE | 2 | No: 159, Yes: 80 |
Spermicide | 0 | 1 | FALSE | 2 | No: 180, Yes: 59 |
Diaphragm | 0 | 1 | FALSE | 2 | Yes: 129, No: 110 |
###################################
# Verifying the data dimensions
# for the train set
###################################
dim(PMA_PreModelling_Train)
## [1] 239 7
##################################
# Restructuring the dataset for
# for barchart analysis
##################################
<- PMA_PreModelling_Train
EDA.Bar.Source
##################################
# Creating a function to formulate
# the proportions table
##################################
<- function(FactorVar) {
EDA.PropTable.Function <- EDA.Bar.Source[,c("UTI",
EDA.Bar.Source.FactorVar
FactorVar)]<- as.data.frame(prop.table(table(EDA.Bar.Source.FactorVar), 2))
EDA.Bar.Source.FactorVar.Prop names(EDA.Bar.Source.FactorVar.Prop)[2] <- "UTI"
$Variable <- rep(FactorVar,nrow(EDA.Bar.Source.FactorVar.Prop))
EDA.Bar.Source.FactorVar.Prop
return(EDA.Bar.Source.FactorVar.Prop)
}
<- rbind(EDA.PropTable.Function("Age_24"),
EDA.Bar.Source.FactorVar.Prop EDA.PropTable.Function("Contraceptive"),
EDA.PropTable.Function("Condom"),
EDA.PropTable.Function("LubricatedCondom"),
EDA.PropTable.Function("Spermicide"),
EDA.PropTable.Function("Diaphragm"))
<- barchart(EDA.Bar.Source.FactorVar.Prop[,3] ~
(EDA.Barchart.FactorVar 2] | EDA.Bar.Source.FactorVar.Prop[,4],
EDA.Bar.Source.FactorVar.Prop[,data=EDA.Bar.Source.FactorVar.Prop,
groups = EDA.Bar.Source.FactorVar.Prop[,1],
stack=TRUE,
ylab = "Proportion",
xlab = "UTI",
auto.key = list(adj=1, space="top", columns=2),
layout=(c(3,2))))
##################################
# Creating a local object
# for the train set
##################################
<- UTI
PMA_PreModelling_Train_LR
##################################
# Formulating the structure of the
# Logistic Regression model
##################################
<- glm(UTI ~ Age_24 + Contraceptive + Condom + LubricatedCondom + Spermicide + Diaphragm,
LR_Model data = PMA_PreModelling_Train_LR,
family = binomial)
##################################
# Consolidating the model results
##################################
summary(LR_Model)
##
## Call:
## glm(formula = UTI ~ Age_24 + Contraceptive + Condom + LubricatedCondom +
## Spermicide + Diaphragm, family = binomial, data = PMA_PreModelling_Train_LR)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.868 32506.120 0.000 1.000
## Age_24 -1.052 30909.202 0.000 1.000
## Contraceptive -21.885 28411.225 -0.001 0.999
## Condom -22.120 26985.686 -0.001 0.999
## LubricatedCondom -1.687 25786.443 0.000 1.000
## Spermicide 2.868 32506.120 0.000 1.000
## Diaphragm 71.283 30516.786 0.002 0.998
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 329.4768 on 238 degrees of freedom
## Residual deviance: 2.7726 on 232 degrees of freedom
## AIC: 16.773
##
## Number of Fisher Scoring iterations: 24
<- (as.data.frame(LR_Model$coefficients))
LR_Model_Coef $Coef <- rownames(LR_Model_Coef)
LR_Model_Coef$Model <- rep("LR",nrow(LR_Model_Coef))
LR_Model_Coefcolnames(LR_Model_Coef) <- c("Estimates","Coefficients","Model")
print(LR_Model_Coef, rownames=FALSE)
## Estimates Coefficients Model
## (Intercept) -2.868285 (Intercept) LR
## Age_24 -1.051834 Age_24 LR
## Contraceptive -21.884704 Contraceptive LR
## Condom -22.119783 Condom LR
## LubricatedCondom -1.686565 LubricatedCondom LR
## Spermicide 2.868285 Spermicide LR
## Diaphragm 71.283177 Diaphragm LR
##################################
# Computing the model predictions
##################################
<- predict(LR_Model,
(LR_Model_Probabilities type = c("response")))
## 1 2 3 4 5 6
## 5.000000e-01 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
## 7 8 9 10 11 12
## 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
## 13 14 15 16 17 18
## 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
## 19 20 21 22 23 24
## 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
## 25 26 27 28 29 30
## 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
## 31 32 33 34 35 36
## 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
## 37 38 39 40 41 42
## 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
## 43 44 45 46 47 48
## 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
## 49 50 51 52 53 54
## 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
## 55 56 57 58 59 60
## 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
## 61 62 63 64 65 66
## 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
## 67 68 69 70 71 72
## 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
## 73 74 75 76 77 78
## 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
## 79 80 81 82 83 84
## 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
## 85 86 87 88 89 90
## 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
## 91 92 93 94 95 96
## 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
## 97 98 99 100 101 102
## 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
## 103 104 105 106 107 108
## 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
## 109 110 111 112 113 114
## 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
## 115 116 117 118 119 120
## 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
## 121 122 123 124 125 126
## 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00
## 127 128 129 130 131 132
## 1.000000e+00 1.000000e+00 1.000000e+00 1.000000e+00 5.000000e-01 1.405464e-11
## 133 134 135 136 137 138
## 1.405464e-11 2.474572e-10 2.602280e-12 2.602280e-12 2.602280e-12 2.602280e-12
## 139 140 141 142 143 144
## 2.602280e-12 2.602280e-12 2.602280e-12 2.602280e-12 4.581779e-11 4.581779e-11
## 145 146 147 148 149 150
## 4.581779e-11 4.581779e-11 4.581779e-11 4.581779e-11 4.581779e-11 4.581779e-11
## 151 152 153 154 155 156
## 4.581779e-11 4.581779e-11 4.581779e-11 4.581779e-11 4.581779e-11 4.581779e-11
## 157 158 159 160 161 162
## 4.581779e-11 4.581779e-11 4.581779e-11 4.581779e-11 1.777926e-11 1.777926e-11
## 163 164 165 166 167 168
## 1.777926e-11 1.777926e-11 1.777926e-11 1.777926e-11 1.777926e-11 1.777926e-11
## 169 170 171 172 173 174
## 1.777926e-11 1.777926e-11 1.777926e-11 1.777926e-11 1.777926e-11 1.777926e-11
## 175 176 177 178 179 180
## 1.777926e-11 1.777926e-11 1.777926e-11 1.777926e-11 1.777926e-11 1.777926e-11
## 181 182 183 184 185 186
## 1.777926e-11 1.777926e-11 1.777926e-11 1.777926e-11 1.777926e-11 1.777926e-11
## 187 188 189 190 191 192
## 1.777926e-11 1.777926e-11 1.777926e-11 1.777926e-11 1.777926e-11 1.777926e-11
## 193 194 195 196 197 198
## 1.777926e-11 1.777926e-11 1.777926e-11 1.777926e-11 1.777926e-11 1.777926e-11
## 199 200 201 202 203 204
## 1.777926e-11 1.777926e-11 1.777926e-11 1.777926e-11 3.130357e-10 2.220446e-16
## 205 206 207 208 209 210
## 2.220446e-16 2.220446e-16 2.220446e-16 2.220446e-16 2.220446e-16 2.220446e-16
## 211 212 213 214 215 216
## 2.220446e-16 2.220446e-16 2.220446e-16 2.220446e-16 2.220446e-16 9.089676e-13
## 217 218 219 220 221 222
## 9.089676e-13 1.600400e-11 1.600400e-11 1.600400e-11 1.600400e-11 6.210236e-12
## 223 224 225 226 227 228
## 6.210236e-12 6.210236e-12 6.210236e-12 6.210236e-12 6.210236e-12 6.210236e-12
## 229 230 231 232 233 234
## 6.210236e-12 6.210236e-12 6.210236e-12 6.210236e-12 6.210236e-12 6.210236e-12
## 235 236 237 238 239
## 6.210236e-12 1.093423e-10 2.220446e-16 2.220446e-16 2.220446e-16
##################################
# Creating a classification index
# based from the model predictions
##################################
<- predict(LR_Model,
(LR_Model_Indices type = c("link")))
## 1 2 3 4 5
## -4.085621e-14 4.629511e+01 4.629511e+01 4.629511e+01 4.629511e+01
## 6 7 8 9 10
## 4.629511e+01 4.629511e+01 4.629511e+01 4.629511e+01 4.629511e+01
## 11 12 13 14 15
## 4.629511e+01 4.629511e+01 4.629511e+01 4.629511e+01 4.629511e+01
## 16 17 18 19 20
## 4.916339e+01 4.916339e+01 4.916339e+01 4.460854e+01 4.460854e+01
## 21 22 23 24 25
## 4.460854e+01 4.460854e+01 4.460854e+01 4.460854e+01 4.460854e+01
## 26 27 28 29 30
## 4.460854e+01 4.460854e+01 4.460854e+01 4.747683e+01 4.747683e+01
## 31 32 33 34 35
## 4.747683e+01 4.747683e+01 4.747683e+01 4.747683e+01 4.747683e+01
## 36 37 38 39 40
## 4.747683e+01 4.747683e+01 4.747683e+01 4.747683e+01 4.747683e+01
## 41 42 43 44 45
## 4.747683e+01 4.653019e+01 4.653019e+01 4.653019e+01 4.653019e+01
## 46 47 48 49 50
## 4.653019e+01 4.653019e+01 4.653019e+01 4.653019e+01 4.653019e+01
## 51 52 53 54 55
## 4.653019e+01 4.653019e+01 4.653019e+01 4.653019e+01 4.653019e+01
## 56 57 58 59 60
## 4.653019e+01 4.653019e+01 4.653019e+01 4.653019e+01 4.653019e+01
## 61 62 63 64 65
## 4.653019e+01 4.653019e+01 4.653019e+01 4.653019e+01 4.653019e+01
## 66 67 68 69 70
## 4.653019e+01 4.653019e+01 4.653019e+01 4.653019e+01 4.653019e+01
## 71 72 73 74 75
## 4.653019e+01 4.653019e+01 4.653019e+01 4.653019e+01 4.653019e+01
## 76 77 78 79 80
## 4.653019e+01 4.653019e+01 4.653019e+01 4.653019e+01 4.653019e+01
## 81 82 83 84 85
## 4.653019e+01 4.653019e+01 4.653019e+01 4.653019e+01 4.653019e+01
## 86 87 88 89 90
## 4.653019e+01 4.653019e+01 4.653019e+01 2.441040e+01 2.441040e+01
## 91 92 93 94 95
## 2.441040e+01 2.441040e+01 2.441040e+01 2.441040e+01 2.441040e+01
## 96 97 98 99 100
## 2.441040e+01 2.441040e+01 2.441040e+01 2.441040e+01 2.441040e+01
## 101 102 103 104 105
## 2.441040e+01 2.441040e+01 2.441040e+01 2.441040e+01 2.727869e+01
## 106 107 108 109 110
## 2.727869e+01 2.272384e+01 2.272384e+01 2.272384e+01 2.272384e+01
## 111 112 113 114 115
## 2.272384e+01 2.272384e+01 2.272384e+01 2.559212e+01 2.559212e+01
## 116 117 118 119 120
## 2.559212e+01 4.524327e+01 4.524327e+01 4.811156e+01 4.811156e+01
## 121 122 123 124 125
## 4.355671e+01 4.642499e+01 4.547835e+01 4.547835e+01 4.547835e+01
## 126 127 128 129 130
## 4.547835e+01 4.547835e+01 2.335857e+01 2.335857e+01 2.335857e+01
## 131 132 133 134 135
## -4.085621e-14 -2.498807e+01 -2.498807e+01 -2.211978e+01 -2.667463e+01
## 136 137 138 139 140
## -2.667463e+01 -2.667463e+01 -2.667463e+01 -2.667463e+01 -2.667463e+01
## 141 142 143 144 145
## -2.667463e+01 -2.667463e+01 -2.380635e+01 -2.380635e+01 -2.380635e+01
## 146 147 148 149 150
## -2.380635e+01 -2.380635e+01 -2.380635e+01 -2.380635e+01 -2.380635e+01
## 151 152 153 154 155
## -2.380635e+01 -2.380635e+01 -2.380635e+01 -2.380635e+01 -2.380635e+01
## 156 157 158 159 160
## -2.380635e+01 -2.380635e+01 -2.380635e+01 -2.380635e+01 -2.380635e+01
## 161 162 163 164 165
## -2.475299e+01 -2.475299e+01 -2.475299e+01 -2.475299e+01 -2.475299e+01
## 166 167 168 169 170
## -2.475299e+01 -2.475299e+01 -2.475299e+01 -2.475299e+01 -2.475299e+01
## 171 172 173 174 175
## -2.475299e+01 -2.475299e+01 -2.475299e+01 -2.475299e+01 -2.475299e+01
## 176 177 178 179 180
## -2.475299e+01 -2.475299e+01 -2.475299e+01 -2.475299e+01 -2.475299e+01
## 181 182 183 184 185
## -2.475299e+01 -2.475299e+01 -2.475299e+01 -2.475299e+01 -2.475299e+01
## 186 187 188 189 190
## -2.475299e+01 -2.475299e+01 -2.475299e+01 -2.475299e+01 -2.475299e+01
## 191 192 193 194 195
## -2.475299e+01 -2.475299e+01 -2.475299e+01 -2.475299e+01 -2.475299e+01
## 196 197 198 199 200
## -2.475299e+01 -2.475299e+01 -2.475299e+01 -2.475299e+01 -2.475299e+01
## 201 202 203 204 205
## -2.475299e+01 -2.475299e+01 -2.188470e+01 -4.687277e+01 -4.855934e+01
## 206 207 208 209 210
## -4.855934e+01 -4.855934e+01 -4.855934e+01 -4.855934e+01 -4.569105e+01
## 211 212 213 214 215
## -4.569105e+01 -4.569105e+01 -4.569105e+01 -4.569105e+01 -4.569105e+01
## 216 217 218 219 220
## -2.772647e+01 -2.772647e+01 -2.485818e+01 -2.485818e+01 -2.485818e+01
## 221 222 223 224 225
## -2.485818e+01 -2.580482e+01 -2.580482e+01 -2.580482e+01 -2.580482e+01
## 226 227 228 229 230
## -2.580482e+01 -2.580482e+01 -2.580482e+01 -2.580482e+01 -2.580482e+01
## 231 232 233 234 235
## -2.580482e+01 -2.580482e+01 -2.580482e+01 -2.580482e+01 -2.580482e+01
## 236 237 238 239
## -2.293654e+01 -4.792461e+01 -4.674289e+01 -4.674289e+01
max(LR_Model_Indices)
## [1] 49.16339
min(LR_Model_Indices)
## [1] -48.55934
##################################
# Consolidating the model probabilities
# and classification index
# based from the model predictions
##################################
<- as.data.frame(PMA_PreModelling_Train_LR)
LR_Model_Predictions $LR_Prob <- LR_Model_Probabilities
LR_Model_Predictions$LR_LP <- LR_Model_Indices
LR_Model_Predictions$UTI <- as.factor(LR_Model_Predictions$UTI)
LR_Model_Predictions$Label <- rep("LR",nrow(LR_Model_Predictions))
LR_Model_Predictions
##################################
# Formulating the probability curve
# using the consolidated model predictions
##################################
%>%
LR_Model_Predictions ggplot(aes(x = LR_LP ,
y = LR_Prob,
color = UTI)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("UTI Classification Index (Logit Values)") +
ylab("Estimated UTI Probability") +
labs(color = "UTI") +
scale_x_continuous( limits=c(-50,50), breaks=seq(-50,50,by=5)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
ggtitle("Estimated UTI Probabilities Based on Classification Index : Logistic Regression") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
##################################
# Formulating the corresponding
# receiver operating characteristic (ROC) curve
# using the model predictions
##################################
<- LR_Model_Predictions[LR_Model_Predictions$UTI==0,
LR_Prob_Low c("LR_Prob")]
<- LR_Model_Predictions[LR_Model_Predictions$UTI==1,
LR_Prob_High c("LR_Prob")]
<- roc.curve(scores.class1 = LR_Prob_Low,
LR_Model_Prob_ROC scores.class0 = LR_Prob_High,
curve = TRUE)
plot(LR_Model_Prob_ROC,
xlab="1-Specificity",
ylab="Sensitivity",
main="ROC Curve of the UTI Probabilities : Logistic Regression",
color=TRUE,
lwd=8,
legend=3)
##################################
# Creating a local object
# for the train set
##################################
<- UTI
PMA_PreModelling_Train_FBRLR
##################################
# Formulating the structure of the
# Firth's Bias-Reduced Logistic Regression model
##################################
<- logistf(UTI ~ Age_24 + Contraceptive + Condom + LubricatedCondom + Spermicide + Diaphragm,
FBRLR_Model data = PMA_PreModelling_Train_FBRLR,
control=logistf.control(maxit = 10000))
##################################
# Consolidating the model results
##################################
summary(FBRLR_Model)
## logistf(formula = UTI ~ Age_24 + Contraceptive + Condom + LubricatedCondom +
## Spermicide + Diaphragm, data = PMA_PreModelling_Train_FBRLR,
## control = logistf.control(maxit = 10000))
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95 Chisq p
## (Intercept) -3.345993 1.773653 -8.977236 5.673493 0.0000000 1.0000000
## Age_24 -1.539877 1.501847 -10.674608 6.075439 0.0322714 0.8574333
## Contraceptive -1.014937 1.295321 -10.851515 2.273439 0.3216179 0.5706369
## Condom -2.048751 1.745776 -8.554568 2.953225 0.5499878 0.4583227
## LubricatedCondom -1.214703 1.504503 -9.510967 4.335792 0.2212636 0.6380788
## Spermicide 3.019690 1.735450 -5.362560 8.573758 0.0000000 1.0000000
## Diaphragm 10.409803 1.879626 7.071215 17.006177 Inf 0.0000000
## method
## (Intercept) 2
## Age_24 2
## Contraceptive 2
## Condom 2
## LubricatedCondom 2
## Spermicide 2
## Diaphragm 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=300.1278 on 6 df, p=0, n=239
## Wald test = 45.31742 on 6 df, p = 4.04738e-08
<- (as.data.frame(FBRLR_Model$coefficients))
FBRLR_Model_Coef $Coef <- rownames(FBRLR_Model_Coef)
FBRLR_Model_Coef$Model <- rep("FBRLR",nrow(FBRLR_Model_Coef))
FBRLR_Model_Coefcolnames(FBRLR_Model_Coef) <- c("Estimates","Coefficients","Model")
print(FBRLR_Model_Coef, rownames=FALSE)
## Estimates Coefficients Model
## (Intercept) -3.345993 (Intercept) FBRLR
## Age_24 -1.539877 Age_24 FBRLR
## Contraceptive -1.014937 Contraceptive FBRLR
## Condom -2.048751 Condom FBRLR
## LubricatedCondom -1.214703 LubricatedCondom FBRLR
## Spermicide 3.019690 Spermicide FBRLR
## Diaphragm 10.409803 Diaphragm FBRLR
##################################
# Computing the model predictions
##################################
<- predict(FBRLR_Model,
(FBRLR_Model_Probabilities type = c("response")))
## [1] 0.4191404846 0.9934065218 0.9934065218 0.9934065218 0.9934065218
## [6] 0.9934065218 0.9934065218 0.9934065218 0.9934065218 0.9934065218
## [11] 0.9934065218 0.9934065218 0.9934065218 0.9934065218 0.9934065218
## [16] 0.9996760990 0.9996760990 0.9996760990 0.9781263548 0.9781263548
## [21] 0.9781263548 0.9781263548 0.9781263548 0.9781263548 0.9781263548
## [26] 0.9781263548 0.9781263548 0.9781263548 0.9989095199 0.9989095199
## [31] 0.9989095199 0.9989095199 0.9989095199 0.9989095199 0.9989095199
## [36] 0.9989095199 0.9989095199 0.9989095199 0.9989095199 0.9989095199
## [41] 0.9989095199 0.9976450392 0.9976450392 0.9976450392 0.9976450392
## [46] 0.9976450392 0.9976450392 0.9976450392 0.9976450392 0.9976450392
## [51] 0.9976450392 0.9976450392 0.9976450392 0.9976450392 0.9976450392
## [56] 0.9976450392 0.9976450392 0.9976450392 0.9976450392 0.9976450392
## [61] 0.9976450392 0.9976450392 0.9976450392 0.9976450392 0.9976450392
## [66] 0.9976450392 0.9976450392 0.9976450392 0.9976450392 0.9976450392
## [71] 0.9976450392 0.9976450392 0.9976450392 0.9976450392 0.9976450392
## [76] 0.9976450392 0.9976450392 0.9976450392 0.9976450392 0.9976450392
## [81] 0.9976450392 0.9976450392 0.9976450392 0.9976450392 0.9976450392
## [86] 0.9976450392 0.9976450392 0.9976450392 0.9820159482 0.9820159482
## [91] 0.9820159482 0.9820159482 0.9820159482 0.9820159482 0.9820159482
## [96] 0.9820159482 0.9820159482 0.9820159482 0.9820159482 0.9820159482
## [101] 0.9820159482 0.9820159482 0.9820159482 0.9820159482 0.9991068050
## [106] 0.9991068050 0.9418828221 0.9418828221 0.9418828221 0.9418828221
## [111] 0.9418828221 0.9418828221 0.9418828221 0.9969969205 0.9969969205
## [116] 0.9969969205 0.9699733189 0.9699733189 0.9984911116 0.9984911116
## [121] 0.9055506591 0.9949342246 0.9891103889 0.9891103889 0.9891103889
## [126] 0.9891103889 0.9891103889 0.9213074590 0.9213074590 0.9213074590
## [131] 0.4191404846 0.0045198613 0.0045198613 0.0850948364 0.0013457643
## [136] 0.0013457643 0.0013457643 0.0013457643 0.0013457643 0.0013457643
## [141] 0.0013457643 0.0013457643 0.0268634795 0.0268634795 0.0268634795
## [146] 0.0268634795 0.0268634795 0.0268634795 0.0268634795 0.0268634795
## [151] 0.0268634795 0.0268634795 0.0268634795 0.0268634795 0.0268634795
## [156] 0.0268634795 0.0268634795 0.0268634795 0.0268634795 0.0268634795
## [161] 0.0126055879 0.0126055879 0.0126055879 0.0126055879 0.0126055879
## [166] 0.0126055879 0.0126055879 0.0126055879 0.0126055879 0.0126055879
## [171] 0.0126055879 0.0126055879 0.0126055879 0.0126055879 0.0126055879
## [176] 0.0126055879 0.0126055879 0.0126055879 0.0126055879 0.0126055879
## [181] 0.0126055879 0.0126055879 0.0126055879 0.0126055879 0.0126055879
## [186] 0.0126055879 0.0126055879 0.0126055879 0.0126055879 0.0126055879
## [191] 0.0126055879 0.0126055879 0.0126055879 0.0126055879 0.0126055879
## [196] 0.0126055879 0.0126055879 0.0126055879 0.0126055879 0.0126055879
## [201] 0.0126055879 0.0126055879 0.2073062892 0.0016428466 0.0004881580
## [206] 0.0004881580 0.0004881580 0.0004881580 0.0004881580 0.0099056649
## [211] 0.0099056649 0.0099056649 0.0099056649 0.0099056649 0.0099056649
## [216] 0.0002888474 0.0002888474 0.0058839046 0.0058839046 0.0058839046
## [221] 0.0058839046 0.0027297654 0.0027297654 0.0027297654 0.0027297654
## [226] 0.0027297654 0.0027297654 0.0027297654 0.0027297654 0.0027297654
## [231] 0.0027297654 0.0027297654 0.0027297654 0.0027297654 0.0027297654
## [236] 0.0530949852 0.0003526939 0.0021405061 0.0021405061
##################################
# Creating a classification index
# based from the model predictions
##################################
<- predict(FBRLR_Model,
(FBRLR_Model_Indices type = c("link")))
## [1] -0.3263028 5.0150590 5.0150590 5.0150590 5.0150590 5.0150590
## [7] 5.0150590 5.0150590 5.0150590 5.0150590 5.0150590 5.0150590
## [13] 5.0150590 5.0150590 5.0150590 8.0347488 8.0347488 8.0347488
## [19] 3.8003564 3.8003564 3.8003564 3.8003564 3.8003564 3.8003564
## [25] 3.8003564 3.8003564 3.8003564 3.8003564 6.8200462 6.8200462
## [31] 6.8200462 6.8200462 6.8200462 6.8200462 6.8200462 6.8200462
## [37] 6.8200462 6.8200462 6.8200462 6.8200462 6.8200462 6.0488735
## [43] 6.0488735 6.0488735 6.0488735 6.0488735 6.0488735 6.0488735
## [49] 6.0488735 6.0488735 6.0488735 6.0488735 6.0488735 6.0488735
## [55] 6.0488735 6.0488735 6.0488735 6.0488735 6.0488735 6.0488735
## [61] 6.0488735 6.0488735 6.0488735 6.0488735 6.0488735 6.0488735
## [67] 6.0488735 6.0488735 6.0488735 6.0488735 6.0488735 6.0488735
## [73] 6.0488735 6.0488735 6.0488735 6.0488735 6.0488735 6.0488735
## [79] 6.0488735 6.0488735 6.0488735 6.0488735 6.0488735 6.0488735
## [85] 6.0488735 6.0488735 6.0488735 6.0488735 4.0001222 4.0001222
## [91] 4.0001222 4.0001222 4.0001222 4.0001222 4.0001222 4.0001222
## [97] 4.0001222 4.0001222 4.0001222 4.0001222 4.0001222 4.0001222
## [103] 4.0001222 4.0001222 7.0198120 7.0198120 2.7854196 2.7854196
## [109] 2.7854196 2.7854196 2.7854196 2.7854196 2.7854196 5.8051094
## [115] 5.8051094 5.8051094 3.4751822 3.4751822 6.4948720 6.4948720
## [121] 2.2604796 5.2801694 4.5089967 4.5089967 4.5089967 4.5089967
## [127] 4.5089967 2.4602454 2.4602454 2.4602454 -0.3263028 -5.3947439
## [133] -5.3947439 -2.3750541 -6.6094465 -6.6094465 -6.6094465 -6.6094465
## [139] -6.6094465 -6.6094465 -6.6094465 -6.6094465 -3.5897567 -3.5897567
## [145] -3.5897567 -3.5897567 -3.5897567 -3.5897567 -3.5897567 -3.5897567
## [151] -3.5897567 -3.5897567 -3.5897567 -3.5897567 -3.5897567 -3.5897567
## [157] -3.5897567 -3.5897567 -3.5897567 -3.5897567 -4.3609294 -4.3609294
## [163] -4.3609294 -4.3609294 -4.3609294 -4.3609294 -4.3609294 -4.3609294
## [169] -4.3609294 -4.3609294 -4.3609294 -4.3609294 -4.3609294 -4.3609294
## [175] -4.3609294 -4.3609294 -4.3609294 -4.3609294 -4.3609294 -4.3609294
## [181] -4.3609294 -4.3609294 -4.3609294 -4.3609294 -4.3609294 -4.3609294
## [187] -4.3609294 -4.3609294 -4.3609294 -4.3609294 -4.3609294 -4.3609294
## [193] -4.3609294 -4.3609294 -4.3609294 -4.3609294 -4.3609294 -4.3609294
## [199] -4.3609294 -4.3609294 -4.3609294 -4.3609294 -1.3412395 -6.4096806
## [205] -7.6243832 -7.6243832 -7.6243832 -7.6243832 -7.6243832 -4.6046934
## [211] -4.6046934 -4.6046934 -4.6046934 -4.6046934 -4.6046934 -8.1493232
## [217] -8.1493232 -5.1296334 -5.1296334 -5.1296334 -5.1296334 -5.9008061
## [223] -5.9008061 -5.9008061 -5.9008061 -5.9008061 -5.9008061 -5.9008061
## [229] -5.9008061 -5.9008061 -5.9008061 -5.9008061 -5.9008061 -5.9008061
## [235] -5.9008061 -2.8811163 -7.9495574 -6.1445702 -6.1445702
max(FBRLR_Model_Indices)
## [1] 8.034749
min(FBRLR_Model_Indices)
## [1] -8.149323
##################################
# Consolidating the model probabilities
# and classification index
# based from the model predictions
##################################
<- as.data.frame(PMA_PreModelling_Train_FBRLR)
FBRLR_Model_Predictions $FBRLR_Prob <- FBRLR_Model_Probabilities
FBRLR_Model_Predictions$FBRLR_LP <- FBRLR_Model_Indices
FBRLR_Model_Predictions$UTI <- as.factor(FBRLR_Model_Predictions$UTI)
FBRLR_Model_Predictions$Label <- rep("FBRLR",nrow(FBRLR_Model_Predictions))
FBRLR_Model_Predictions
##################################
# Formulating the probability curve
# using the consolidated model predictions
##################################
%>%
FBRLR_Model_Predictions ggplot(aes(x = FBRLR_LP ,
y = FBRLR_Prob,
color = UTI)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("UTI Classification Index (Logit Values)") +
ylab("Estimated UTI Probability") +
labs(color = "UTI") +
scale_x_continuous( limits=c(-10,10), breaks=seq(-10,10,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
ggtitle("Estimated UTI Probabilities Based on Classification Index : Firth's Bias-Reduced Logistic Regression") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
##################################
# Formulating the corresponding
# receiver operating characteristic (ROC) curve
# using the model predictions
##################################
<- FBRLR_Model_Predictions[FBRLR_Model_Predictions$UTI==0,
FBRLR_Prob_Low c("FBRLR_Prob")]
<- FBRLR_Model_Predictions[FBRLR_Model_Predictions$UTI==1,
FBRLR_Prob_High c("FBRLR_Prob")]
<- roc.curve(scores.class1 = FBRLR_Prob_Low,
FBRLR_Model_Prob_ROC scores.class0 = FBRLR_Prob_High,
curve = TRUE)
plot(FBRLR_Model_Prob_ROC,
xlab="1-Specificity",
ylab="Sensitivity",
main="ROC Curve of the UTI Probabilities : Firth's Bias-Reduced Logistic Regression",
color=TRUE,
lwd=8,
legend=3)
##################################
# Creating a local object
# for the train set
##################################
<- UTI
PMA_PreModelling_Train_FLAC
##################################
# Formulating the structure of the
# Firth's Logistic Regression With Added Covariate model
##################################
<- flac(UTI ~ Age_24 + Contraceptive + Condom + LubricatedCondom + Spermicide + Diaphragm,
FLAC_Model data = PMA_PreModelling_Train_FLAC,
control=logistf.control(maxit = 10000))
##################################
# Consolidating the model results
##################################
summary(FLAC_Model)
## Firth's logistic regression with added covariate
##
## flac.default(formula = UTI ~ Age_24 + Contraceptive + Condom +
## LubricatedCondom + Spermicide + Diaphragm, data = PMA_PreModelling_Train_FLAC,
## control = logistf.control(maxit = 10000))
##
## Model fitted by Standard ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95 Chisq p
## (Intercept) -3.302290 1.782586 -7.760055 0.09953485 3.6234507 0.05697059
## Age_24 -1.487618 1.497439 -4.646277 1.36417805 1.0134856 0.31406924
## Contraceptive -1.042335 1.302286 -4.111464 1.31085064 0.7064407 0.40062796
## Condom -2.115792 1.778198 -6.237843 1.15049726 1.5751351 0.20946284
## LubricatedCondom -1.231671 1.508294 -4.369073 2.00803070 0.6514910 0.41958010
## Spermicide 3.115554 1.757872 -0.114788 7.55438640 3.5591064 0.05921961
## Diaphragm 10.507036 1.920601 7.523219 15.41416178 Inf 0.00000000
## method
## (Intercept) 2
## Age_24 2
## Contraceptive 2
## Condom 2
## LubricatedCondom 2
## Spermicide 2
## Diaphragm 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=305.0325 on 6 df, p=0, n=239
## Wald test = 44.74216 on 6 df, p = 5.266057e-08
<- (as.data.frame(FLAC_Model$coefficients))
FLAC_Model_Coef $Coef <- rownames(FLAC_Model_Coef)
FLAC_Model_Coef$Model <- rep("FLAC",nrow(FLAC_Model_Coef))
FLAC_Model_Coefcolnames(FLAC_Model_Coef) <- c("Estimates","Coefficients","Model")
print(FLAC_Model_Coef, rownames=FALSE)
## Estimates Coefficients Model
## (Intercept) -3.302290 (Intercept) FLAC
## Age_24 -1.487618 Age_24 FLAC
## Contraceptive -1.042335 Contraceptive FLAC
## Condom -2.115792 Condom FLAC
## LubricatedCondom -1.231671 LubricatedCondom FLAC
## Spermicide 3.115554 Spermicide FLAC
## Diaphragm 10.507036 Diaphragm FLAC
##################################
# Computing the model predictions
##################################
<- predict(FLAC_Model,
(FLAC_Model_Probabilities type = c("response")))
## [1] 0.4534510426 0.9938733034 0.9938733034 0.9938733034 0.9938733034
## [6] 0.9938733034 0.9938733034 0.9938733034 0.9938733034 0.9938733034
## [11] 0.9938733034 0.9938733034 0.9938733034 0.9938733034 0.9938733034
## [16] 0.9997266564 0.9997266564 0.9997266564 0.9793117349 0.9793117349
## [21] 0.9793117349 0.9793117349 0.9793117349 0.9793117349 0.9793117349
## [26] 0.9793117349 0.9793117349 0.9793117349 0.9990638862 0.9990638862
## [31] 0.9990638862 0.9990638862 0.9990638862 0.9990638862 0.9990638862
## [36] 0.9990638862 0.9990638862 0.9990638862 0.9990638862 0.9990638862
## [41] 0.9990638862 0.9978972638 0.9978972638 0.9978972638 0.9978972638
## [46] 0.9978972638 0.9978972638 0.9978972638 0.9978972638 0.9978972638
## [51] 0.9978972638 0.9978972638 0.9978972638 0.9978972638 0.9978972638
## [56] 0.9978972638 0.9978972638 0.9978972638 0.9978972638 0.9978972638
## [61] 0.9978972638 0.9978972638 0.9978972638 0.9978972638 0.9978972638
## [66] 0.9978972638 0.9978972638 0.9978972638 0.9978972638 0.9978972638
## [71] 0.9978972638 0.9978972638 0.9978972638 0.9978972638 0.9978972638
## [76] 0.9978972638 0.9978972638 0.9978972638 0.9978972638 0.9978972638
## [81] 0.9978972638 0.9978972638 0.9978972638 0.9978972638 0.9978972638
## [86] 0.9978972638 0.9978972638 0.9978972638 0.9828189634 0.9828189634
## [91] 0.9828189634 0.9828189634 0.9828189634 0.9828189634 0.9828189634
## [96] 0.9828189634 0.9828189634 0.9828189634 0.9828189634 0.9828189634
## [101] 0.9828189634 0.9828189634 0.9828189634 0.9828189634 0.9992252323
## [106] 0.9992252323 0.9434782637 0.9434782637 0.9434782637 0.9434782637
## [111] 0.9434782637 0.9434782637 0.9434782637 0.9973498929 0.9973498929
## [116] 0.9973498929 0.9734375625 0.9734375625 0.9987911656 0.9987911656
## [121] 0.9144846628 0.9958695031 0.9907587369 0.9907587369 0.9907587369
## [126] 0.9907587369 0.9907587369 0.9281758381 0.9281758381 0.9281758381
## [131] 0.4534510426 0.0044160565 0.0044160565 0.0909137438 0.0012926686
## [136] 0.0012926686 0.0012926686 0.0012926686 0.0012926686 0.0012926686
## [141] 0.0012926686 0.0012926686 0.0283546611 0.0283546611 0.0283546611
## [146] 0.0283546611 0.0283546611 0.0283546611 0.0283546611 0.0283546611
## [151] 0.0283546611 0.0283546611 0.0283546611 0.0283546611 0.0283546611
## [156] 0.0283546611 0.0283546611 0.0283546611 0.0283546611 0.0283546611
## [161] 0.0128101384 0.0128101384 0.0128101384 0.0128101384 0.0128101384
## [166] 0.0128101384 0.0128101384 0.0128101384 0.0128101384 0.0128101384
## [171] 0.0128101384 0.0128101384 0.0128101384 0.0128101384 0.0128101384
## [176] 0.0128101384 0.0128101384 0.0128101384 0.0128101384 0.0128101384
## [181] 0.0128101384 0.0128101384 0.0128101384 0.0128101384 0.0128101384
## [186] 0.0128101384 0.0128101384 0.0128101384 0.0128101384 0.0128101384
## [191] 0.0128101384 0.0128101384 0.0128101384 0.0128101384 0.0128101384
## [196] 0.0128101384 0.0128101384 0.0128101384 0.0128101384 0.0128101384
## [201] 0.0128101384 0.0128101384 0.2263439047 0.0015616995 0.0004562158
## [206] 0.0004562158 0.0004562158 0.0004562158 0.0004562158 0.0101856780
## [211] 0.0101856780 0.0101856780 0.0101856780 0.0101856780 0.0101856780
## [216] 0.0002923193 0.0002923193 0.0065493547 0.0065493547 0.0065493547
## [221] 0.0065493547 0.0029229231 0.0029229231 0.0029229231 0.0029229231
## [226] 0.0029229231 0.0029229231 0.0029229231 0.0029229231 0.0029229231
## [231] 0.0029229231 0.0029229231 0.0029229231 0.0029229231 0.0029229231
## [236] 0.0619956518 0.0003532306 0.0023193340 0.0023193340
##################################
# Creating a classification index
# based from the model predictions
##################################
<- predict(FLAC_Model,
(FLAC_Model_Indices type = c("link")))
## [1] -0.1867366 5.0889540 5.0889540 5.0889540 5.0889540 5.0889540
## [7] 5.0889540 5.0889540 5.0889540 5.0889540 5.0889540 5.0889540
## [13] 5.0889540 5.0889540 5.0889540 8.2045076 8.2045076 8.2045076
## [19] 3.8572834 3.8572834 3.8572834 3.8572834 3.8572834 3.8572834
## [25] 3.8572834 3.8572834 3.8572834 3.8572834 6.9728369 6.9728369
## [31] 6.9728369 6.9728369 6.9728369 6.9728369 6.9728369 6.9728369
## [37] 6.9728369 6.9728369 6.9728369 6.9728369 6.9728369 6.1624109
## [43] 6.1624109 6.1624109 6.1624109 6.1624109 6.1624109 6.1624109
## [49] 6.1624109 6.1624109 6.1624109 6.1624109 6.1624109 6.1624109
## [55] 6.1624109 6.1624109 6.1624109 6.1624109 6.1624109 6.1624109
## [61] 6.1624109 6.1624109 6.1624109 6.1624109 6.1624109 6.1624109
## [67] 6.1624109 6.1624109 6.1624109 6.1624109 6.1624109 6.1624109
## [73] 6.1624109 6.1624109 6.1624109 6.1624109 6.1624109 6.1624109
## [79] 6.1624109 6.1624109 6.1624109 6.1624109 6.1624109 6.1624109
## [85] 6.1624109 6.1624109 6.1624109 6.1624109 4.0466187 4.0466187
## [91] 4.0466187 4.0466187 4.0466187 4.0466187 4.0466187 4.0466187
## [97] 4.0466187 4.0466187 4.0466187 4.0466187 4.0466187 4.0466187
## [103] 4.0466187 4.0466187 7.1621722 7.1621722 2.8149480 2.8149480
## [109] 2.8149480 2.8149480 2.8149480 2.8149480 2.8149480 5.9305016
## [115] 5.9305016 5.9305016 3.6013356 3.6013356 6.7168891 6.7168891
## [121] 2.3696650 5.4852185 4.6747925 4.6747925 4.6747925 4.6747925
## [127] 4.6747925 2.5590003 2.5590003 2.5590003 -0.1867366 -5.4180823
## [133] -5.4180823 -2.3025288 -6.6497530 -6.6497530 -6.6497530 -6.6497530
## [139] -6.6497530 -6.6497530 -6.6497530 -6.6497530 -3.5341994 -3.5341994
## [145] -3.5341994 -3.5341994 -3.5341994 -3.5341994 -3.5341994 -3.5341994
## [151] -3.5341994 -3.5341994 -3.5341994 -3.5341994 -3.5341994 -3.5341994
## [157] -3.5341994 -3.5341994 -3.5341994 -3.5341994 -4.3446255 -4.3446255
## [163] -4.3446255 -4.3446255 -4.3446255 -4.3446255 -4.3446255 -4.3446255
## [169] -4.3446255 -4.3446255 -4.3446255 -4.3446255 -4.3446255 -4.3446255
## [175] -4.3446255 -4.3446255 -4.3446255 -4.3446255 -4.3446255 -4.3446255
## [181] -4.3446255 -4.3446255 -4.3446255 -4.3446255 -4.3446255 -4.3446255
## [187] -4.3446255 -4.3446255 -4.3446255 -4.3446255 -4.3446255 -4.3446255
## [193] -4.3446255 -4.3446255 -4.3446255 -4.3446255 -4.3446255 -4.3446255
## [199] -4.3446255 -4.3446255 -4.3446255 -4.3446255 -1.2290719 -6.4604177
## [205] -7.6920883 -7.6920883 -7.6920883 -7.6920883 -7.6920883 -4.5765348
## [211] -4.5765348 -4.5765348 -4.5765348 -4.5765348 -4.5765348 -8.1373714
## [217] -8.1373714 -5.0218179 -5.0218179 -5.0218179 -5.0218179 -5.8322439
## [223] -5.8322439 -5.8322439 -5.8322439 -5.8322439 -5.8322439 -5.8322439
## [229] -5.8322439 -5.8322439 -5.8322439 -5.8322439 -5.8322439 -5.8322439
## [235] -5.8322439 -2.7166903 -7.9480361 -6.0641532 -6.0641532
max(FLAC_Model_Indices)
## [1] 8.204508
min(FLAC_Model_Indices)
## [1] -8.137371
##################################
# Consolidating the model probabilities
# and classification index
# based from the model predictions
##################################
<- as.data.frame(PMA_PreModelling_Train_FLAC)
FLAC_Model_Predictions $FLAC_Prob <- FLAC_Model_Probabilities
FLAC_Model_Predictions$FLAC_LP <- FLAC_Model_Indices
FLAC_Model_Predictions$UTI <- as.factor(FLAC_Model_Predictions$UTI)
FLAC_Model_Predictions$Label <- rep("FLAC",nrow(FLAC_Model_Predictions))
FLAC_Model_Predictions
##################################
# Formulating the probability curve
# using the consolidated model predictions
##################################
%>%
FLAC_Model_Predictions ggplot(aes(x = FLAC_LP ,
y = FLAC_Prob,
color = UTI)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("UTI Classification Index (Logit Values)") +
ylab("Estimated UTI Probability") +
labs(color = "UTI") +
scale_x_continuous( limits=c(-10,10), breaks=seq(-10,10,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
ggtitle("Estimated UTI Probabilities Based on Classification Index : Firth's Logistic Regression With Added Covariate") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
##################################
# Formulating the corresponding
# receiver operating characteristic (ROC) curve
# using the model predictions
##################################
<- FLAC_Model_Predictions[FLAC_Model_Predictions$UTI==0,
FLAC_Prob_Low c("FLAC_Prob")]
<- FLAC_Model_Predictions[FLAC_Model_Predictions$UTI==1,
FLAC_Prob_High c("FLAC_Prob")]
<- roc.curve(scores.class1 = FLAC_Prob_Low,
FLAC_Model_Prob_ROC scores.class0 = FLAC_Prob_High,
curve = TRUE)
plot(FLAC_Model_Prob_ROC,
xlab="1-Specificity",
ylab="Sensitivity",
main="ROC Curve of the UTI Probabilities : Firth's Logistic Regression With Added Covariate",
color=TRUE,
lwd=8,
legend=3)
##################################
# Creating a local object
# for the train set
##################################
<- UTI
PMA_PreModelling_Train_FLIC
##################################
# Formulating the structure of the
# Firth's Logistic Regression With Intercept Correction model
##################################
<- flic(UTI ~ Age_24 + Contraceptive + Condom + LubricatedCondom + Spermicide + Diaphragm,
FLIC_Model data = PMA_PreModelling_Train_FLIC,
control=logistf.control(maxit = 10000))
##################################
# Consolidating the model results
##################################
summary(FLIC_Model)
## Firth's logistic regression with intercept correction
##
## flic.default(formula = UTI ~ Age_24 + Contraceptive + Condom +
## LubricatedCondom + Spermicide + Diaphragm, data = PMA_PreModelling_Train_FLIC,
## control = logistf.control(maxit = 10000))
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95 Chisq
## (Intercept) -3.274102 0.6801791 -6.859687 0.3114838 37.0547096
## Age_24 -1.539877 0.2894472 -10.674608 6.0754390 0.0322714
## Contraceptive -1.014937 0.2432554 -10.851515 2.2734388 0.3216179
## Condom -2.048751 0.2402000 -8.554568 2.9532245 0.5499878
## LubricatedCondom -1.214703 0.2497985 -9.510967 4.3357916 0.2212636
## Spermicide 3.019690 0.2665983 -5.362560 8.5737577 0.0000000
## Diaphragm 10.409803 0.6658350 7.071215 17.0061765 Inf
## p method
## (Intercept) 1.148608e-09 1
## Age_24 8.574333e-01 2
## Contraceptive 5.706369e-01 2
## Condom 4.583227e-01 2
## LubricatedCondom 6.380788e-01 2
## Spermicide 1.000000e+00 2
## Diaphragm 0.000000e+00 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=300.1144 on 6 df, p=0, n=239
## Wald test = 3885.076 on 6 df, p = 0
<- (as.data.frame(FLIC_Model$coefficients))
FLIC_Model_Coef $Coef <- rownames(FLIC_Model_Coef)
FLIC_Model_Coef$Model <- rep("FLIC",nrow(FLIC_Model_Coef))
FLIC_Model_Coefcolnames(FLIC_Model_Coef) <- c("Estimates","Coefficients","Model")
print(FLIC_Model_Coef, rownames=FALSE)
## Estimates Coefficients Model
## (Intercept) -3.274102 (Intercept) FLIC
## Age_24 -1.539877 Age_24 FLIC
## Contraceptive -1.014937 Contraceptive FLIC
## Condom -2.048751 Condom FLIC
## LubricatedCondom -1.214703 LubricatedCondom FLIC
## Spermicide 3.019690 Spermicide FLIC
## Diaphragm 10.409803 Diaphragm FLIC
##################################
# Computing the model predictions
##################################
<- predict(FLIC_Model,
(FLIC_Model_Probabilities type = c("response")))
## [1] 0.4367378818 0.9938610876 0.9938610876 0.9938610876 0.9938610876
## [6] 0.9938610876 0.9938610876 0.9938610876 0.9938610876 0.9938610876
## [11] 0.9938610876 0.9938610876 0.9938610876 0.9938610876 0.9938610876
## [16] 0.9996985605 0.9996985605 0.9996985605 0.9796127423 0.9796127423
## [21] 0.9796127423 0.9796127423 0.9796127423 0.9796127423 0.9796127423
## [26] 0.9796127423 0.9796127423 0.9796127423 0.9989850871 0.9989850871
## [31] 0.9989850871 0.9989850871 0.9989850871 0.9989850871 0.9989850871
## [36] 0.9989850871 0.9989850871 0.9989850871 0.9989850871 0.9989850871
## [41] 0.9989850871 0.9978080391 0.9978080391 0.9978080391 0.9978080391
## [46] 0.9978080391 0.9978080391 0.9978080391 0.9978080391 0.9978080391
## [51] 0.9978080391 0.9978080391 0.9978080391 0.9978080391 0.9978080391
## [56] 0.9978080391 0.9978080391 0.9978080391 0.9978080391 0.9978080391
## [61] 0.9978080391 0.9978080391 0.9978080391 0.9978080391 0.9978080391
## [66] 0.9978080391 0.9978080391 0.9978080391 0.9978080391 0.9978080391
## [71] 0.9978080391 0.9978080391 0.9978080391 0.9978080391 0.9978080391
## [76] 0.9978080391 0.9978080391 0.9978080391 0.9978080391 0.9978080391
## [81] 0.9978080391 0.9978080391 0.9978080391 0.9978080391 0.9978080391
## [86] 0.9978080391 0.9978080391 0.9978080391 0.9832425530 0.9832425530
## [91] 0.9832425530 0.9832425530 0.9832425530 0.9832425530 0.9832425530
## [96] 0.9832425530 0.9832425530 0.9832425530 0.9832425530 0.9832425530
## [101] 0.9832425530 0.9832425530 0.9832425530 0.9832425530 0.9991687122
## [106] 0.9991687122 0.9456953430 0.9456953430 0.9456953430 0.9456953430
## [111] 0.9456953430 0.9456953430 0.9456953430 0.9972046545 0.9972046545
## [116] 0.9972046545 0.9719978717 0.9719978717 0.9985956325 0.9985956325
## [121] 0.9115227016 0.9952839679 0.9898581137 0.9898581137 0.9898581137
## [126] 0.9898581137 0.9898581137 0.9263642127 0.9263642127 0.9263642127
## [131] 0.4367378818 0.0048551274 0.0048551274 0.0908613275 0.0014459300
## [136] 0.0014459300 0.0014459300 0.0014459300 0.0014459300 0.0014459300
## [141] 0.0014459300 0.0014459300 0.0288081480 0.0288081480 0.0288081480
## [146] 0.0288081480 0.0288081480 0.0288081480 0.0288081480 0.0288081480
## [151] 0.0288081480 0.0288081480 0.0288081480 0.0288081480 0.0288081480
## [156] 0.0288081480 0.0288081480 0.0288081480 0.0288081480 0.0288081480
## [161] 0.0135324694 0.0135324694 0.0135324694 0.0135324694 0.0135324694
## [166] 0.0135324694 0.0135324694 0.0135324694 0.0135324694 0.0135324694
## [171] 0.0135324694 0.0135324694 0.0135324694 0.0135324694 0.0135324694
## [176] 0.0135324694 0.0135324694 0.0135324694 0.0135324694 0.0135324694
## [181] 0.0135324694 0.0135324694 0.0135324694 0.0135324694 0.0135324694
## [186] 0.0135324694 0.0135324694 0.0135324694 0.0135324694 0.0135324694
## [191] 0.0135324694 0.0135324694 0.0135324694 0.0135324694 0.0135324694
## [196] 0.0135324694 0.0135324694 0.0135324694 0.0135324694 0.0135324694
## [201] 0.0135324694 0.0135324694 0.2193687712 0.0017650851 0.0005245252
## [206] 0.0005245252 0.0005245252 0.0005245252 0.0005245252 0.0106361610
## [211] 0.0106361610 0.0106361610 0.0106361610 0.0106361610 0.0106361610
## [216] 0.0003103708 0.0003103708 0.0063197079 0.0063197079 0.0063197079
## [221] 0.0063197079 0.0029326402 0.0029326402 0.0029326402 0.0029326402
## [226] 0.0029326402 0.0029326402 0.0029326402 0.0029326402 0.0029326402
## [231] 0.0029326402 0.0029326402 0.0029326402 0.0029326402 0.0029326402
## [236] 0.0568276830 0.0003789730 0.0022996884 0.0022996884
##################################
# Creating a classification index
# based from the model predictions
##################################
<- predict(FLIC_Model,
(FLIC_Model_Indices type = c("link")))
## 1 2 3 4 5 6 7
## -0.2544119 5.0869499 5.0869499 5.0869499 5.0869499 5.0869499 5.0869499
## 8 9 10 11 12 13 14
## 5.0869499 5.0869499 5.0869499 5.0869499 5.0869499 5.0869499 5.0869499
## 15 16 17 18 19 20 21
## 5.0869499 8.1066397 8.1066397 8.1066397 3.8722473 3.8722473 3.8722473
## 22 23 24 25 26 27 28
## 3.8722473 3.8722473 3.8722473 3.8722473 3.8722473 3.8722473 3.8722473
## 29 30 31 32 33 34 35
## 6.8919371 6.8919371 6.8919371 6.8919371 6.8919371 6.8919371 6.8919371
## 36 37 38 39 40 41 42
## 6.8919371 6.8919371 6.8919371 6.8919371 6.8919371 6.8919371 6.1207644
## 43 44 45 46 47 48 49
## 6.1207644 6.1207644 6.1207644 6.1207644 6.1207644 6.1207644 6.1207644
## 50 51 52 53 54 55 56
## 6.1207644 6.1207644 6.1207644 6.1207644 6.1207644 6.1207644 6.1207644
## 57 58 59 60 61 62 63
## 6.1207644 6.1207644 6.1207644 6.1207644 6.1207644 6.1207644 6.1207644
## 64 65 66 67 68 69 70
## 6.1207644 6.1207644 6.1207644 6.1207644 6.1207644 6.1207644 6.1207644
## 71 72 73 74 75 76 77
## 6.1207644 6.1207644 6.1207644 6.1207644 6.1207644 6.1207644 6.1207644
## 78 79 80 81 82 83 84
## 6.1207644 6.1207644 6.1207644 6.1207644 6.1207644 6.1207644 6.1207644
## 85 86 87 88 89 90 91
## 6.1207644 6.1207644 6.1207644 6.1207644 4.0720131 4.0720131 4.0720131
## 92 93 94 95 96 97 98
## 4.0720131 4.0720131 4.0720131 4.0720131 4.0720131 4.0720131 4.0720131
## 99 100 101 102 103 104 105
## 4.0720131 4.0720131 4.0720131 4.0720131 4.0720131 4.0720131 7.0917029
## 106 107 108 109 110 111 112
## 7.0917029 2.8573105 2.8573105 2.8573105 2.8573105 2.8573105 2.8573105
## 113 114 115 116 117 118 119
## 2.8573105 5.8770003 5.8770003 5.8770003 3.5470731 3.5470731 6.5667629
## 120 121 122 123 124 125 126
## 6.5667629 2.3323705 5.3520603 4.5808876 4.5808876 4.5808876 4.5808876
## 127 128 129 130 131 132 133
## 4.5808876 2.5321363 2.5321363 2.5321363 -0.2544119 -5.3228530 -5.3228530
## 134 135 136 137 138 139 140
## -2.3031632 -6.5375556 -6.5375556 -6.5375556 -6.5375556 -6.5375556 -6.5375556
## 141 142 143 144 145 146 147
## -6.5375556 -6.5375556 -3.5178658 -3.5178658 -3.5178658 -3.5178658 -3.5178658
## 148 149 150 151 152 153 154
## -3.5178658 -3.5178658 -3.5178658 -3.5178658 -3.5178658 -3.5178658 -3.5178658
## 155 156 157 158 159 160 161
## -3.5178658 -3.5178658 -3.5178658 -3.5178658 -3.5178658 -3.5178658 -4.2890385
## 162 163 164 165 166 167 168
## -4.2890385 -4.2890385 -4.2890385 -4.2890385 -4.2890385 -4.2890385 -4.2890385
## 169 170 171 172 173 174 175
## -4.2890385 -4.2890385 -4.2890385 -4.2890385 -4.2890385 -4.2890385 -4.2890385
## 176 177 178 179 180 181 182
## -4.2890385 -4.2890385 -4.2890385 -4.2890385 -4.2890385 -4.2890385 -4.2890385
## 183 184 185 186 187 188 189
## -4.2890385 -4.2890385 -4.2890385 -4.2890385 -4.2890385 -4.2890385 -4.2890385
## 190 191 192 193 194 195 196
## -4.2890385 -4.2890385 -4.2890385 -4.2890385 -4.2890385 -4.2890385 -4.2890385
## 197 198 199 200 201 202 203
## -4.2890385 -4.2890385 -4.2890385 -4.2890385 -4.2890385 -4.2890385 -1.2693487
## 204 205 206 207 208 209 210
## -6.3377898 -7.5524924 -7.5524924 -7.5524924 -7.5524924 -7.5524924 -4.5328025
## 211 212 213 214 215 216 217
## -4.5328025 -4.5328025 -4.5328025 -4.5328025 -4.5328025 -8.0774323 -8.0774323
## 218 219 220 221 222 223 224
## -5.0577425 -5.0577425 -5.0577425 -5.0577425 -5.8289152 -5.8289152 -5.8289152
## 225 226 227 228 229 230 231
## -5.8289152 -5.8289152 -5.8289152 -5.8289152 -5.8289152 -5.8289152 -5.8289152
## 232 233 234 235 236 237 238
## -5.8289152 -5.8289152 -5.8289152 -5.8289152 -2.8092254 -7.8776665 -6.0726793
## 239
## -6.0726793
max(FLIC_Model_Indices)
## [1] 8.10664
min(FLIC_Model_Indices)
## [1] -8.077432
##################################
# Consolidating the model probabilities
# and classification index
# based from the model predictions
##################################
<- as.data.frame(PMA_PreModelling_Train_FLIC)
FLIC_Model_Predictions $FLIC_Prob <- FLIC_Model_Probabilities
FLIC_Model_Predictions$FLIC_LP <- FLIC_Model_Indices
FLIC_Model_Predictions$UTI <- as.factor(FLIC_Model_Predictions$UTI)
FLIC_Model_Predictions$Label <- rep("FLIC",nrow(FLIC_Model_Predictions))
FLIC_Model_Predictions
##################################
# Formulating the probability curve
# using the consolidated model predictions
##################################
%>%
FLIC_Model_Predictions ggplot(aes(x = FLIC_LP ,
y = FLIC_Prob,
color = UTI)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("UTI Classification Index (Logit Values)") +
ylab("Estimated UTI Probability") +
labs(color = "UTI") +
scale_x_continuous( limits=c(-10,10), breaks=seq(-10,10,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
ggtitle("Estimated UTI Probabilities Based on Classification Index : Firth's Logistic Regression With Intercept Correction") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
##################################
# Formulating the corresponding
# receiver operating characteristic (ROC) curve
# using the model predictions
##################################
<- FLIC_Model_Predictions[FLIC_Model_Predictions$UTI==0,
FLIC_Prob_Low c("FLIC_Prob")]
<- FLIC_Model_Predictions[FLIC_Model_Predictions$UTI==1,
FLIC_Prob_High c("FLIC_Prob")]
<- roc.curve(scores.class1 = FLIC_Prob_Low,
FLIC_Model_Prob_ROC scores.class0 = FLIC_Prob_High,
curve = TRUE)
plot(FLIC_Model_Prob_ROC,
xlab="1-Specificity",
ylab="Sensitivity",
main="ROC Curve of the UTI Probabilities : Firth's Logistic Regression With Intercept Correction",
color=TRUE,
lwd=8,
legend=3)
##################################
# Creating a local object
# for the train set
##################################
<- UTI
PMA_PreModelling_Train_BGLM_CP
##################################
# Formulating the structure of the
# Bayesian Generalized Linear Model With Cauchy Priors model
##################################
<- bayesglm(UTI ~ Age_24 + Contraceptive + Condom + LubricatedCondom + Spermicide + Diaphragm,
BGLM_CP_Model data = PMA_PreModelling_Train_BGLM_CP,
family=binomial(link="logit"))
##################################
# Consolidating the model results
##################################
summary(BGLM_CP_Model)
##
## Call:
## bayesglm(formula = UTI ~ Age_24 + Contraceptive + Condom + LubricatedCondom +
## Spermicide + Diaphragm, family = binomial(link = "logit"),
## data = PMA_PreModelling_Train_BGLM_CP)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.051 1.924 -1.586 0.113
## Age_24 -0.489 1.753 -0.279 0.780
## Contraceptive -1.935 1.797 -1.077 0.281
## Condom -1.510 1.766 -0.855 0.392
## LubricatedCondom -1.306 1.743 -0.750 0.453
## Spermicide 1.540 1.730 0.891 0.373
## Diaphragm 12.604 2.860 4.407 1.05e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 329.4768 on 238 degrees of freedom
## Residual deviance: 5.6929 on 232 degrees of freedom
## AIC: 19.693
##
## Number of Fisher Scoring iterations: 28
<- (as.data.frame(BGLM_CP_Model$coefficients))
BGLM_CP_Model_Coef $Coef <- rownames(BGLM_CP_Model_Coef)
BGLM_CP_Model_Coef$Model <- rep("BGLM_CP",nrow(BGLM_CP_Model_Coef))
BGLM_CP_Model_Coefcolnames(BGLM_CP_Model_Coef) <- c("Estimates","Coefficients","Model")
print(BGLM_CP_Model_Coef, rownames=FALSE)
## Estimates Coefficients Model
## (Intercept) -3.0513140 (Intercept) BGLM_CP
## Age_24 -0.4890448 Age_24 BGLM_CP
## Contraceptive -1.9351325 Contraceptive BGLM_CP
## Condom -1.5103889 Condom BGLM_CP
## LubricatedCondom -1.3064324 LubricatedCondom BGLM_CP
## Spermicide 1.5402574 Spermicide BGLM_CP
## Diaphragm 12.6043657 Diaphragm BGLM_CP
##################################
# Computing the model predictions
##################################
<- predict(BGLM_CP_Model,
(BGLM_CP_Model_Probabilities type = c("response")))
## 1 2 3 4 5 6
## 0.1807822594 0.9996786514 0.9996786514 0.9996786514 0.9996786514 0.9996786514
## 7 8 9 10 11 12
## 0.9996786514 0.9996786514 0.9996786514 0.9996786514 0.9996786514 0.9996786514
## 13 14 15 16 17 18
## 0.9996786514 0.9996786514 0.9996786514 0.9999311093 0.9999311093 0.9999311093
## 19 20 21 22 23 24
## 0.9988142938 0.9988142938 0.9988142938 0.9988142938 0.9988142938 0.9988142938
## 25 26 27 28 29 30
## 0.9988142938 0.9988142938 0.9988142938 0.9988142938 0.9997456355 0.9997456355
## 31 32 33 34 35 36
## 0.9997456355 0.9997456355 0.9997456355 0.9997456355 0.9997456355 0.9997456355
## 37 38 39 40 41 42
## 0.9997456355 0.9997456355 0.9997456355 0.9997456355 0.9997456355 0.9995086779
## 43 44 45 46 47 48
## 0.9995086779 0.9995086779 0.9995086779 0.9995086779 0.9995086779 0.9995086779
## 49 50 51 52 53 54
## 0.9995086779 0.9995086779 0.9995086779 0.9995086779 0.9995086779 0.9995086779
## 55 56 57 58 59 60
## 0.9995086779 0.9995086779 0.9995086779 0.9995086779 0.9995086779 0.9995086779
## 61 62 63 64 65 66
## 0.9995086779 0.9995086779 0.9995086779 0.9995086779 0.9995086779 0.9995086779
## 67 68 69 70 71 72
## 0.9995086779 0.9995086779 0.9995086779 0.9995086779 0.9995086779 0.9995086779
## 73 74 75 76 77 78
## 0.9995086779 0.9995086779 0.9995086779 0.9995086779 0.9995086779 0.9995086779
## 79 80 81 82 83 84
## 0.9995086779 0.9995086779 0.9995086779 0.9995086779 0.9995086779 0.9995086779
## 85 86 87 88 89 90
## 0.9995086779 0.9995086779 0.9995086779 0.9995086779 0.9977789026 0.9977789026
## 91 92 93 94 95 96
## 0.9977789026 0.9977789026 0.9977789026 0.9977789026 0.9977789026 0.9977789026
## 97 98 99 100 101 102
## 0.9977789026 0.9977789026 0.9977789026 0.9977789026 0.9977789026 0.9977789026
## 103 104 105 106 107 108
## 0.9977789026 0.9977789026 0.9995231291 0.9995231291 0.9918463122 0.9918463122
## 109 110 111 112 113 114
## 0.9918463122 0.9918463122 0.9918463122 0.9918463122 0.9918463122 0.9982411861
## 115 116 117 118 119 120
## 0.9982411861 0.9982411861 0.9994760645 0.9994760645 0.9998876608 0.9998876608
## 121 122 123 124 125 126
## 0.9980678455 0.9995852596 0.9991990209 0.9991990209 0.9991990209 0.9991990209
## 127 128 129 130 131 132
## 0.9991990209 0.9963829954 0.9963829954 0.9963829954 0.1807822594 0.0103363031
## 133 134 135 136 137 138
## 0.0103363031 0.0464663861 0.0028201661 0.0028201661 0.0028201661 0.0028201661
## 139 140 141 142 143 144
## 0.0028201661 0.0028201661 0.0028201661 0.0028201661 0.0130236654 0.0130236654
## 145 146 147 148 149 150
## 0.0130236654 0.0130236654 0.0130236654 0.0130236654 0.0130236654 0.0130236654
## 151 152 153 154 155 156
## 0.0130236654 0.0130236654 0.0130236654 0.0130236654 0.0130236654 0.0130236654
## 157 158 159 160 161 162
## 0.0130236654 0.0130236654 0.0130236654 0.0130236654 0.0067835607 0.0067835607
## 163 164 165 166 167 168
## 0.0067835607 0.0067835607 0.0067835607 0.0067835607 0.0067835607 0.0067835607
## 169 170 171 172 173 174
## 0.0067835607 0.0067835607 0.0067835607 0.0067835607 0.0067835607 0.0067835607
## 175 176 177 178 179 180
## 0.0067835607 0.0067835607 0.0067835607 0.0067835607 0.0067835607 0.0067835607
## 181 182 183 184 185 186
## 0.0067835607 0.0067835607 0.0067835607 0.0067835607 0.0067835607 0.0067835607
## 187 188 189 190 191 192
## 0.0067835607 0.0067835607 0.0067835607 0.0067835607 0.0067835607 0.0067835607
## 193 194 195 196 197 198
## 0.0067835607 0.0067835607 0.0067835607 0.0067835607 0.0067835607 0.0067835607
## 199 200 201 202 203 204
## 0.0067835607 0.0067835607 0.0067835607 0.0067835607 0.0308827134 0.0015059333
## 205 206 207 208 209 210
## 0.0004082315 0.0004082315 0.0004082315 0.0004082315 0.0004082315 0.0019018768
## 211 212 213 214 215 216
## 0.0019018768 0.0019018768 0.0019018768 0.0019018768 0.0019018768 0.0017312477
## 217 218 219 220 221 222
## 0.0017312477 0.0080266998 0.0080266998 0.0080266998 0.0080266998 0.0041707030
## 223 224 225 226 227 228
## 0.0041707030 0.0041707030 0.0041707030 0.0041707030 0.0041707030 0.0041707030
## 229 230 231 232 233 234
## 0.0041707030 0.0041707030 0.0041707030 0.0041707030 0.0041707030 0.0041707030
## 235 236 237 238 239
## 0.0041707030 0.0191665924 0.0009239944 0.0011671120 0.0011671120
##################################
# Creating a classification index
# based from the model predictions
##################################
<- predict(BGLM_CP_Model,
(BGLM_CP_Model_Indices type = c("link")))
## 1 2 3 4 5 6 7 8
## -1.511057 8.042663 8.042663 8.042663 8.042663 8.042663 8.042663 8.042663
## 9 10 11 12 13 14 15 16
## 8.042663 8.042663 8.042663 8.042663 8.042663 8.042663 8.042663 9.582920
## 17 18 19 20 21 22 23 24
## 9.582920 9.582920 6.736230 6.736230 6.736230 6.736230 6.736230 6.736230
## 25 26 27 28 29 30 31 32
## 6.736230 6.736230 6.736230 6.736230 8.276488 8.276488 8.276488 8.276488
## 33 34 35 36 37 38 39 40
## 8.276488 8.276488 8.276488 8.276488 8.276488 8.276488 8.276488 8.276488
## 41 42 43 44 45 46 47 48
## 8.276488 7.617919 7.617919 7.617919 7.617919 7.617919 7.617919 7.617919
## 49 50 51 52 53 54 55 56
## 7.617919 7.617919 7.617919 7.617919 7.617919 7.617919 7.617919 7.617919
## 57 58 59 60 61 62 63 64
## 7.617919 7.617919 7.617919 7.617919 7.617919 7.617919 7.617919 7.617919
## 65 66 67 68 69 70 71 72
## 7.617919 7.617919 7.617919 7.617919 7.617919 7.617919 7.617919 7.617919
## 73 74 75 76 77 78 79 80
## 7.617919 7.617919 7.617919 7.617919 7.617919 7.617919 7.617919 7.617919
## 81 82 83 84 85 86 87 88
## 7.617919 7.617919 7.617919 7.617919 7.617919 7.617919 7.617919 7.617919
## 89 90 91 92 93 94 95 96
## 6.107530 6.107530 6.107530 6.107530 6.107530 6.107530 6.107530 6.107530
## 97 98 99 100 101 102 103 104
## 6.107530 6.107530 6.107530 6.107530 6.107530 6.107530 6.107530 6.107530
## 105 106 107 108 109 110 111 112
## 7.647788 7.647788 4.801098 4.801098 4.801098 4.801098 4.801098 4.801098
## 113 114 115 116 117 118 119 120
## 4.801098 6.341355 6.341355 6.341355 7.553618 7.553618 9.093875 9.093875
## 121 122 123 124 125 126 127 128
## 6.247186 7.787443 7.128874 7.128874 7.128874 7.128874 7.128874 5.618486
## 129 130 131 132 133 134 135 136
## 5.618486 5.618486 -1.511057 -4.561703 -4.561703 -3.021446 -5.868135 -5.868135
## 137 138 139 140 141 142 143 144
## -5.868135 -5.868135 -5.868135 -5.868135 -5.868135 -5.868135 -4.327878 -4.327878
## 145 146 147 148 149 150 151 152
## -4.327878 -4.327878 -4.327878 -4.327878 -4.327878 -4.327878 -4.327878 -4.327878
## 153 154 155 156 157 158 159 160
## -4.327878 -4.327878 -4.327878 -4.327878 -4.327878 -4.327878 -4.327878 -4.327878
## 161 162 163 164 165 166 167 168
## -4.986446 -4.986446 -4.986446 -4.986446 -4.986446 -4.986446 -4.986446 -4.986446
## 169 170 171 172 173 174 175 176
## -4.986446 -4.986446 -4.986446 -4.986446 -4.986446 -4.986446 -4.986446 -4.986446
## 177 178 179 180 181 182 183 184
## -4.986446 -4.986446 -4.986446 -4.986446 -4.986446 -4.986446 -4.986446 -4.986446
## 185 186 187 188 189 190 191 192
## -4.986446 -4.986446 -4.986446 -4.986446 -4.986446 -4.986446 -4.986446 -4.986446
## 193 194 195 196 197 198 199 200
## -4.986446 -4.986446 -4.986446 -4.986446 -4.986446 -4.986446 -4.986446 -4.986446
## 201 202 203 204 205 206 207 208
## -4.986446 -4.986446 -3.446189 -6.496835 -7.803268 -7.803268 -7.803268 -7.803268
## 209 210 211 212 213 214 215 216
## -7.803268 -6.263010 -6.263010 -6.263010 -6.263010 -6.263010 -6.263010 -6.357180
## 217 218 219 220 221 222 223 224
## -6.357180 -4.816923 -4.816923 -4.816923 -4.816923 -5.475491 -5.475491 -5.475491
## 225 226 227 228 229 230 231 232
## -5.475491 -5.475491 -5.475491 -5.475491 -5.475491 -5.475491 -5.475491 -5.475491
## 233 234 235 236 237 238 239
## -5.475491 -5.475491 -5.475491 -3.935234 -6.985880 -6.752055 -6.752055
max(BGLM_CP_Model_Indices)
## [1] 9.58292
min(BGLM_CP_Model_Indices)
## [1] -7.803268
##################################
# Consolidating the model probabilities
# and classification index
# based from the model predictions
##################################
<- as.data.frame(PMA_PreModelling_Train_BGLM_CP)
BGLM_CP_Model_Predictions $BGLM_CP_Prob <- BGLM_CP_Model_Probabilities
BGLM_CP_Model_Predictions$BGLM_CP_LP <- BGLM_CP_Model_Indices
BGLM_CP_Model_Predictions$UTI <- as.factor(BGLM_CP_Model_Predictions$UTI)
BGLM_CP_Model_Predictions$Label <- rep("BGLM_CP",nrow(BGLM_CP_Model_Predictions))
BGLM_CP_Model_Predictions
##################################
# Formulating the probability curve
# using the consolidated model predictions
##################################
%>%
BGLM_CP_Model_Predictions ggplot(aes(x = BGLM_CP_LP ,
y = BGLM_CP_Prob,
color = UTI)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("UTI Classification Index (Logit Values)") +
ylab("Estimated UTI Probability") +
labs(color = "UTI") +
scale_x_continuous( limits=c(-10,10), breaks=seq(-10,10,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
ggtitle("Estimated UTI Probabilities Based on Classification Index : Bayesian Generalized Linear Model With Cauchy Priors") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
##################################
# Formulating the corresponding
# receiver operating characteristic (ROC) curve
# using the model predictions
##################################
<- BGLM_CP_Model_Predictions[BGLM_CP_Model_Predictions$UTI==0,
BGLM_CP_Prob_Low c("BGLM_CP_Prob")]
<- BGLM_CP_Model_Predictions[BGLM_CP_Model_Predictions$UTI==1,
BGLM_CP_Prob_High c("BGLM_CP_Prob")]
<- roc.curve(scores.class1 = BGLM_CP_Prob_Low,
BGLM_CP_Model_Prob_ROC scores.class0 = BGLM_CP_Prob_High,
curve = TRUE)
plot(BGLM_CP_Model_Prob_ROC,
xlab="1-Specificity",
ylab="Sensitivity",
main="ROC Curve of the UTI Probabilities : Bayesian Generalized Linear Model With Cauchy Priors",
color=TRUE,
lwd=8,
legend=3)
##################################
# Creating a local object
# for the train set
##################################
<- UTI
PMA_PreModelling_Train_PLR_R <- PMA_PreModelling_Train_PLR_R[,c("UTI")]
PLR_R_Response <- model.matrix(UTI~.,
PLR_R_Predictors 2:ncol(PMA_PreModelling_Train_PLR_R)]
PMA_PreModelling_Train_PLR_R)[,
##################################
# Conducting hyperparameter tuning
# of the lambda parameter
# using cross-validation
##################################
<- cv.glmnet(PLR_R_Predictors,
(PLR_R_LambdaCV
PLR_R_Response,family = "binomial",
alpha = 0))
##
## Call: cv.glmnet(x = PLR_R_Predictors, y = PLR_R_Response, family = "binomial", alpha = 0)
##
## Measure: Binomial Deviance
##
## Lambda Index Measure SE Nonzero
## min 0.04939 100 0.2442 0.01824 6
## 1se 0.05420 99 0.2580 0.01770 6
$lambda PLR_R_LambdaCV
## [1] 493.88644807 450.01093723 410.03320585 373.60698594 340.41677102
## [6] 310.17508331 282.61998378 257.51280336 234.63607564 213.79165337
## [11] 194.79899212 177.49358655 161.72554551 147.35829378 134.26738910
## [16] 122.33944431 111.47114526 101.56835594 92.54530313 84.32383347
## [21] 76.83273651 70.00712796 63.78788766 58.12114752 52.95782497
## [26] 48.25319775 43.96651666 40.06065249 36.50177454 33.25905749
## [31] 30.30441449 27.61225383 25.15925730 22.92417822 20.88765741
## [36] 19.03205550 17.34130014 15.80074683 14.39705203 13.11805761
## [41] 11.95268553 10.89084189 9.92332951 9.04176827 8.23852249
## [46] 7.50663486 6.83976611 6.23214014 5.67849399 5.17403223
## [51] 4.71438547 4.29557246 3.91396564 3.56625972 3.24944303
## [56] 2.96077147 2.69774469 2.45808449 2.23971504 2.04074493
## [61] 1.85945077 1.69426229 1.54374870 1.40660632 1.28164729
## [66] 1.16778928 1.06404610 0.96951917 0.88338976 0.80491185
## [71] 0.73340570 0.66825196 0.60888630 0.55479452 0.50550810
## [76] 0.46060015 0.41968171 0.38239834 0.34842713 0.31747382
## [81] 0.28927032 0.26357234 0.24015730 0.21882239 0.19938281
## [86] 0.18167019 0.16553111 0.15082579 0.13742684 0.12521822
## [91] 0.11409418 0.10395836 0.09472299 0.08630806 0.07864069
## [96] 0.07165447 0.06528888 0.05948880 0.05420398 0.04938864
plot(PLR_R_LambdaCV)
<- PLR_R_LambdaCV$lambda.min) (PLR_R_MinLambda
## [1] 0.04938864
##################################
# Formulating the structure of the
# Penalized Logistic Regression - Ridge model
##################################
<- glmnet(PLR_R_Predictors,
PLR_R_Model
PLR_R_Response,alpha = 0,
family = "binomial",
lambda = PLR_R_MinLambda)
##################################
# Consolidating the model results
##################################
summary(PLR_R_Model)
## Length Class Mode
## a0 1 -none- numeric
## beta 6 dgCMatrix S4
## df 1 -none- numeric
## dim 2 -none- numeric
## lambda 1 -none- numeric
## dev.ratio 1 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## classnames 2 -none- character
## call 6 -none- call
## nobs 1 -none- numeric
coef(PLR_R_Model)
## 7 x 1 sparse Matrix of class "dgCMatrix"
## s0
## (Intercept) -1.7196340
## Age_24 -0.3633108
## Contraceptive -0.1858195
## Condom 0.3240962
## LubricatedCondom -0.4720128
## Spermicide -0.1491506
## Diaphragm 4.1293922
<- (as.data.frame(coef(PLR_R_Model)[1:7]))
PLR_R_Model_Coef $Coef <- rownames(coef(PLR_R_Model))
PLR_R_Model_Coef$Model <- rep("PLR_R",nrow(PLR_R_Model_Coef))
PLR_R_Model_Coefcolnames(PLR_R_Model_Coef) <- c("Estimates","Coefficients","Model")
print(PLR_R_Model_Coef, rownames=FALSE)
## Estimates Coefficients Model
## 1 -1.7196340 (Intercept) PLR_R
## 2 -0.3633108 Age_24 PLR_R
## 3 -0.1858195 Contraceptive PLR_R
## 4 0.3240962 Condom PLR_R
## 5 -0.4720128 LubricatedCondom PLR_R
## 6 -0.1491506 Spermicide PLR_R
## 7 4.1293922 Diaphragm PLR_R
##################################
# Computing the model predictions
##################################
<- PLR_R_Model %>%
(PLR_R_Model_Probabilities predict(PLR_R_Predictors, type = c("response")) %>%
as.vector())
## [1] 0.13368241 0.93899500 0.93899500 0.93899500 0.93899500 0.93899500
## [7] 0.93899500 0.93899500 0.93899500 0.93899500 0.93899500 0.93899500
## [13] 0.93899500 0.93899500 0.93899500 0.92987063 0.92987063 0.92987063
## [19] 0.90566709 0.90566709 0.90566709 0.90566709 0.90566709 0.90566709
## [25] 0.90566709 0.90566709 0.90566709 0.90566709 0.89213057 0.89213057
## [31] 0.89213057 0.89213057 0.89213057 0.89213057 0.89213057 0.89213057
## [37] 0.89213057 0.89213057 0.89213057 0.89213057 0.89213057 0.90237871
## [43] 0.90237871 0.90237871 0.90237871 0.90237871 0.90237871 0.90237871
## [49] 0.90237871 0.90237871 0.90237871 0.90237871 0.90237871 0.90237871
## [55] 0.90237871 0.90237871 0.90237871 0.90237871 0.90237871 0.90237871
## [61] 0.90237871 0.90237871 0.90237871 0.90237871 0.90237871 0.90237871
## [67] 0.90237871 0.90237871 0.90237871 0.90237871 0.90237871 0.90237871
## [73] 0.90237871 0.90237871 0.90237871 0.90237871 0.90237871 0.90237871
## [79] 0.90237871 0.90237871 0.90237871 0.90237871 0.90237871 0.90237871
## [85] 0.90237871 0.90237871 0.90237871 0.90237871 0.92744138 0.92744138
## [91] 0.92744138 0.92744138 0.92744138 0.92744138 0.92744138 0.92744138
## [97] 0.92744138 0.92744138 0.92744138 0.92744138 0.92744138 0.92744138
## [103] 0.92744138 0.92744138 0.91674218 0.91674218 0.88855071 0.88855071
## [109] 0.88855071 0.88855071 0.88855071 0.88855071 0.88855071 0.87290273
## [115] 0.87290273 0.87290273 0.91455335 0.91455335 0.90215422 0.90215422
## [121] 0.86972515 0.85187460 0.86537012 0.86537012 0.86537012 0.86537012
## [127] 0.86537012 0.89886931 0.89886931 0.89886931 0.13368241 0.19852515
## [133] 0.19852515 0.17585474 0.13382539 0.13382539 0.13382539 0.13382539
## [139] 0.13382539 0.13382539 0.13382539 0.13382539 0.11746052 0.11746052
## [145] 0.11746052 0.11746052 0.11746052 0.11746052 0.11746052 0.11746052
## [151] 0.11746052 0.11746052 0.11746052 0.11746052 0.11746052 0.11746052
## [157] 0.11746052 0.11746052 0.11746052 0.11746052 0.12949249 0.12949249
## [163] 0.12949249 0.12949249 0.12949249 0.12949249 0.12949249 0.12949249
## [169] 0.12949249 0.12949249 0.12949249 0.12949249 0.12949249 0.12949249
## [175] 0.12949249 0.12949249 0.12949249 0.12949249 0.12949249 0.12949249
## [181] 0.12949249 0.12949249 0.12949249 0.12949249 0.12949249 0.12949249
## [187] 0.12949249 0.12949249 0.12949249 0.12949249 0.12949249 0.12949249
## [193] 0.12949249 0.12949249 0.12949249 0.12949249 0.12949249 0.12949249
## [199] 0.12949249 0.12949249 0.12949249 0.12949249 0.11358798 0.17060333
## [205] 0.11371229 0.11371229 0.11371229 0.11371229 0.11371229 0.09952435
## [211] 0.09952435 0.09952435 0.09952435 0.09952435 0.09952435 0.09701315
## [217] 0.09701315 0.08470963 0.08470963 0.08470963 0.08470963 0.09374314
## [223] 0.09374314 0.09374314 0.09374314 0.09374314 0.09374314 0.09374314
## [229] 0.09374314 0.09374314 0.09374314 0.09374314 0.09374314 0.09374314
## [235] 0.09374314 0.08181675 0.12513591 0.07137008 0.07137008
##################################
# Creating a classification index
# based from the model predictions
##################################
<- PLR_R_Model %>%
(PLR_R_Model_Indices predict(PLR_R_Predictors, type = c("link")) %>%
as.vector())
## [1] -1.868785 2.733854 2.733854 2.733854 2.733854 2.733854 2.733854
## [8] 2.733854 2.733854 2.733854 2.733854 2.733854 2.733854 2.733854
## [15] 2.733854 2.584704 2.584704 2.584704 2.261842 2.261842 2.261842
## [22] 2.261842 2.261842 2.261842 2.261842 2.261842 2.261842 2.261842
## [29] 2.112691 2.112691 2.112691 2.112691 2.112691 2.112691 2.112691
## [36] 2.112691 2.112691 2.112691 2.112691 2.112691 2.112691 2.223939
## [43] 2.223939 2.223939 2.223939 2.223939 2.223939 2.223939 2.223939
## [50] 2.223939 2.223939 2.223939 2.223939 2.223939 2.223939 2.223939
## [57] 2.223939 2.223939 2.223939 2.223939 2.223939 2.223939 2.223939
## [64] 2.223939 2.223939 2.223939 2.223939 2.223939 2.223939 2.223939
## [71] 2.223939 2.223939 2.223939 2.223939 2.223939 2.223939 2.223939
## [78] 2.223939 2.223939 2.223939 2.223939 2.223939 2.223939 2.223939
## [85] 2.223939 2.223939 2.223939 2.223939 2.548035 2.548035 2.548035
## [92] 2.548035 2.548035 2.548035 2.548035 2.548035 2.548035 2.548035
## [99] 2.548035 2.548035 2.548035 2.548035 2.548035 2.548035 2.398884
## [106] 2.398884 2.076022 2.076022 2.076022 2.076022 2.076022 2.076022
## [113] 2.076022 1.926871 1.926871 1.926871 2.370544 2.370544 2.221393
## [120] 2.221393 1.898531 1.749380 1.860628 1.860628 1.860628 1.860628
## [127] 1.860628 2.184724 2.184724 2.184724 -1.868785 -1.395538 -1.395538
## [134] -1.544688 -1.867551 -1.867551 -1.867551 -1.867551 -1.867551 -1.867551
## [141] -1.867551 -1.867551 -2.016701 -2.016701 -2.016701 -2.016701 -2.016701
## [148] -2.016701 -2.016701 -2.016701 -2.016701 -2.016701 -2.016701 -2.016701
## [155] -2.016701 -2.016701 -2.016701 -2.016701 -2.016701 -2.016701 -1.905454
## [162] -1.905454 -1.905454 -1.905454 -1.905454 -1.905454 -1.905454 -1.905454
## [169] -1.905454 -1.905454 -1.905454 -1.905454 -1.905454 -1.905454 -1.905454
## [176] -1.905454 -1.905454 -1.905454 -1.905454 -1.905454 -1.905454 -1.905454
## [183] -1.905454 -1.905454 -1.905454 -1.905454 -1.905454 -1.905454 -1.905454
## [190] -1.905454 -1.905454 -1.905454 -1.905454 -1.905454 -1.905454 -1.905454
## [197] -1.905454 -1.905454 -1.905454 -1.905454 -1.905454 -1.905454 -2.054604
## [204] -1.581357 -2.053370 -2.053370 -2.053370 -2.053370 -2.053370 -2.202521
## [211] -2.202521 -2.202521 -2.202521 -2.202521 -2.202521 -2.230861 -2.230861
## [218] -2.380012 -2.380012 -2.380012 -2.380012 -2.268764 -2.268764 -2.268764
## [225] -2.268764 -2.268764 -2.268764 -2.268764 -2.268764 -2.268764 -2.268764
## [232] -2.268764 -2.268764 -2.268764 -2.268764 -2.417915 -1.944668 -2.565832
## [239] -2.565832
max(PLR_R_Model_Indices)
## [1] 2.733854
min(PLR_R_Model_Indices)
## [1] -2.565832
##################################
# Consolidating the model probabilities
# and classification index
# based from the model predictions
##################################
<- as.data.frame(PMA_PreModelling_Train_PLR_R)
PLR_R_Model_Predictions $PLR_R_Prob <- PLR_R_Model_Probabilities
PLR_R_Model_Predictions$PLR_R_LP <- PLR_R_Model_Indices
PLR_R_Model_Predictions$UTI <- as.factor(PLR_R_Model_Predictions$UTI)
PLR_R_Model_Predictions$Label <- rep("PLR_R",nrow(PLR_R_Model_Predictions))
PLR_R_Model_Predictions
##################################
# Formulating the probability curve
# using the consolidated model predictions
##################################
%>%
PLR_R_Model_Predictions ggplot(aes(x = PLR_R_LP ,
y = PLR_R_Prob,
color = UTI)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("UTI Classification Index (Logit Values)") +
ylab("Estimated UTI Probability") +
labs(color = "UTI") +
scale_x_continuous( limits=c(-3,3), breaks=seq(-3,3,by=1)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
ggtitle("Estimated UTI Probabilities Based on Classification Index : Penalized Logistic Regression - Ridge") +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="top")
##################################
# Formulating the corresponding
# receiver operating characteristic (ROC) curve
# using the model predictions
##################################
<- PLR_R_Model_Predictions[PLR_R_Model_Predictions$UTI==0,
PLR_R_Prob_Low c("PLR_R_Prob")]
<- PLR_R_Model_Predictions[PLR_R_Model_Predictions$UTI==1,
PLR_R_Prob_High c("PLR_R_Prob")]
<- roc.curve(scores.class1 = PLR_R_Prob_Low,
PLR_R_Model_Prob_ROC scores.class0 = PLR_R_Prob_High,
curve = TRUE)
plot(PLR_R_Model_Prob_ROC,
xlab="1-Specificity",
ylab="Sensitivity",
main="ROC Curve of the UTI Probabilities : Penalized Logistic Regression - Ridge",
color=TRUE,
lwd=8,
legend=3)
##################################
# Replotting the logistic curves
##################################
<- LR_Model_Predictions %>%
LR_LogisticCurvePlot ggplot(aes(x = LR_LP ,
y = LR_Prob,
color = UTI)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("UTI Classification Index (Logit Values)") +
ylab("Estimated UTI Probability") +
labs(color = "UTI") +
scale_x_continuous( limits=c(-50,50), breaks=seq(-50,50,by=5)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
facet_grid(. ~ Label) +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="none")
<- FBRLR_Model_Predictions %>%
FBRLR_LogisticCurvePlot ggplot(aes(x = FBRLR_LP ,
y = FBRLR_Prob,
color = UTI)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("UTI Classification Index (Logit Values)") +
ylab("Estimated UTI Probability") +
labs(color = "UTI") +
scale_x_continuous( limits=c(-50,50), breaks=seq(-50,50,by=5)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
facet_grid(. ~ Label) +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="none")
<- FLAC_Model_Predictions %>%
FLAC_LogisticCurvePlot ggplot(aes(x = FLAC_LP ,
y = FLAC_Prob,
color = UTI)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("UTI Classification Index (Logit Values)") +
ylab("Estimated UTI Probability") +
labs(color = "UTI") +
scale_x_continuous( limits=c(-50,50), breaks=seq(-50,50,by=5)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
facet_grid(. ~ Label) +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="none")
<- FLIC_Model_Predictions %>%
FLIC_LogisticCurvePlot ggplot(aes(x = FLIC_LP ,
y = FLIC_Prob,
color = UTI)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("UTI Classification Index (Logit Values)") +
ylab("Estimated UTI Probability") +
labs(color = "UTI") +
scale_x_continuous( limits=c(-50,50), breaks=seq(-50,50,by=5)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
facet_grid(. ~ Label) +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="none")
<- BGLM_CP_Model_Predictions %>%
BGLM_CP_LogisticCurvePlot ggplot(aes(x = BGLM_CP_LP ,
y = BGLM_CP_Prob,
color = UTI)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("UTI Classification Index (Logit Values)") +
ylab("Estimated UTI Probability") +
labs(color = "UTI") +
scale_x_continuous( limits=c(-50,50), breaks=seq(-50,50,by=5)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
facet_grid(. ~ Label) +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="none")
<- PLR_R_Model_Predictions %>%
PLR_R_LogisticCurvePlot ggplot(aes(x = PLR_R_LP ,
y = PLR_R_Prob,
color = UTI)) +
scale_colour_manual(values=c("#1846BA55","#B8000055")) +
geom_point(size=5) +
geom_line(color="black") +
xlab("UTI Classification Index (Logit Values)") +
ylab("Estimated UTI Probability") +
labs(color = "UTI") +
scale_x_continuous( limits=c(-50,50), breaks=seq(-50,50,by=5)) +
scale_y_continuous( limits=c(0,1), breaks=seq(0,1,by=0.1),labels = scales::percent) +
facet_grid(. ~ Label) +
theme_bw() +
theme(plot.title = element_text(color="black", size=14, face="bold", hjust=0.50),
axis.title.x = element_text(color="black", size=12, face="bold"),
axis.title.y = element_text(color="black", size=12, face="bold"),
legend.position="none")
<- ggarrange(LR_LogisticCurvePlot,
RLR_LogisticCurvePlot
FBRLR_LogisticCurvePlot,
FLAC_LogisticCurvePlot,
FLIC_LogisticCurvePlot,
BGLM_CP_LogisticCurvePlot,
PLR_R_LogisticCurvePlot,ncol=2, nrow=3)
annotate_figure(RLR_LogisticCurvePlot,
top = text_grob("Estimated UTI Probabilities Based on Classification Index",
color = "black",
face = "bold",
size = 14))
##################################
# Formulating the logistic regression model
# coefficient comparison plot
##################################
<- rbind(LR_Model_Coef,
RLRCoefficient_Summary
FBRLR_Model_Coef,
FLAC_Model_Coef,
FLIC_Model_Coef,
BGLM_CP_Model_Coef,
PLR_R_Model_Coef)
<- as.data.frame(RLRCoefficient_Summary)
RLRCoefficient_Summary
$Estimates <- as.numeric(as.character(RLRCoefficient_Summary$Estimates))
RLRCoefficient_Summary$Coefficients <- factor(RLRCoefficient_Summary$Coefficients,
RLRCoefficient_Summarylevels = c("Diaphragm",
"Spermicide",
"LubricatedCondom",
"Condom",
"Contraceptive",
"Age_24",
"(Intercept)"))
$Model <- factor(RLRCoefficient_Summary$Model,
RLRCoefficient_Summarylevels = c("PLR_R",
"BGLM_CP",
"FLIC",
"FLAC",
"FBRLR",
"LR"))
print(RLRCoefficient_Summary, row.names=FALSE)
## Estimates Coefficients Model
## -2.8682847 (Intercept) LR
## -1.0518337 Age_24 LR
## -21.8847038 Contraceptive LR
## -22.1197835 Condom LR
## -1.6865651 LubricatedCondom LR
## 2.8682847 Spermicide LR
## 71.2831767 Diaphragm LR
## -3.3459926 (Intercept) FBRLR
## -1.5398768 Age_24 FBRLR
## -1.0149368 Contraceptive FBRLR
## -2.0487513 Condom FBRLR
## -1.2147026 LubricatedCondom FBRLR
## 3.0196898 Spermicide FBRLR
## 10.4098028 Diaphragm FBRLR
## -3.3022901 (Intercept) FLAC
## -1.4876184 Age_24 FLAC
## -1.0423353 Contraceptive FLAC
## -2.1157922 Condom FLAC
## -1.2316706 LubricatedCondom FLAC
## 3.1155536 Spermicide FLAC
## 10.5070364 Diaphragm FLAC
## -3.2741017 (Intercept) FLIC
## -1.5398768 Age_24 FLIC
## -1.0149368 Contraceptive FLIC
## -2.0487513 Condom FLIC
## -1.2147026 LubricatedCondom FLIC
## 3.0196898 Spermicide FLIC
## 10.4098028 Diaphragm FLIC
## -3.0513140 (Intercept) BGLM_CP
## -0.4890448 Age_24 BGLM_CP
## -1.9351325 Contraceptive BGLM_CP
## -1.5103889 Condom BGLM_CP
## -1.3064324 LubricatedCondom BGLM_CP
## 1.5402574 Spermicide BGLM_CP
## 12.6043657 Diaphragm BGLM_CP
## -1.7196340 (Intercept) PLR_R
## -0.3633108 Age_24 PLR_R
## -0.1858195 Contraceptive PLR_R
## 0.3240962 Condom PLR_R
## -0.4720128 LubricatedCondom PLR_R
## -0.1491506 Spermicide PLR_R
## 4.1293922 Diaphragm PLR_R
<- dotplot(Model ~ Estimates | Coefficients,
(ROCCurveAUC_Plot data = RLRCoefficient_Summary,
main = "Model Coefficient Estimation Comparison",
ylab = "Model",
xlab = "Coefficient Estimates",
auto.key = list(adj=1),
type=c("p", "h"),
origin = 0,
alpha = 0.45,
pch = 16,
cex = 2,
layout = c(3,3)))