##################################
# Loading R libraries
##################################
library(AppliedPredictiveModeling)
library(performance)
library(parameters)
library(HH)
library(tidyr)
library(caret)
library(psych)
library(lattice)
library(dplyr)
library(moments)
library(skimr)
library(RANN)
library(pls)
library(corrplot)
library(lares)
library(DMwR2)
library(gridExtra)
library(rattle)
library(RColorBrewer)
library(stats)
library(factoextra)
library(FactoMineR)
library(gplots)
library(qgraph)
library(ggplot2)
library(GGally)
library(psych)
library(nFactors)
library(MBESS)
library(mice)
library(DandEFA)
library(EFAtools)
library(tidyverse)
library(lavaan)
library(semPlot)
library(semoutput)
library(sjPlot)
library(GPArotation)
library(semTools)
##################################
# Defining file paths
##################################
<- file.path("datasets","original")
DATASETS_ORIGINAL_PATH
##################################
# Loading source and
# formulating the analysis set
##################################
<- read.csv(file.path("..", DATASETS_ORIGINAL_PATH, "ChronicDiseaseIndicators.csv"),
CDI na.strings=c("NA","NaN"," ",""),
stringsAsFactors = FALSE)
<- as.data.frame(CDI)
CDI
##################################
# Performing a general exploration of the data set
##################################
dim(CDI)
## [1] 50 16
str(CDI)
## 'data.frame': 50 obs. of 16 variables:
## $ STATE : chr "Alabama" "Alaska" "Arizona" "Arkansas" ...
## $ LDMORT: num 11.8 10.7 14.3 10.4 12 12.7 8.5 9.3 11.8 8.6 ...
## $ ARTINC: num 31.7 22.5 23.2 28.1 19.9 22.1 21.4 23.8 23.1 24.6 ...
## $ ASMORT: num 10.7 NA 11.7 11 10 7.8 8.7 NA 8.6 9.9 ...
## $ PSMUSE: num 83 78.5 79.8 77.8 83.7 84.8 87.6 86.8 79.9 84.5 ...
## $ RDMORT: num 60.1 49.2 37.3 65.8 51 44.2 47.7 58.7 39.1 55.1 ...
## $ PDMORT: num 148.3 105.7 116.5 163 87.8 ...
## $ CDMORT: num 292 193 183 279 194 ...
## $ HDMORT: num 224 147 136 218 142 ...
## $ STMORT: num 48.3 32.3 28.3 45.4 33.9 33.4 26.3 38.8 33 42.6 ...
## $ DBMORT: num 59.5 59.3 55.1 71.6 69.9 52 43.8 57.2 49.3 55.2 ...
## $ INFVAC: num 37.7 35 32.6 39.6 35.7 44.2 41.3 39.4 29 34.1 ...
## $ MEUNDA: num 4.7 3 3.7 4.4 3.6 3.2 3.7 3.6 3.8 4 ...
## $ OVWINC: num 66.9 65.2 64 70.7 59.8 57.2 59.6 67.3 61.5 65.7 ...
## $ HLTINS: num 17.9 17.1 18.1 20.3 17.8 15.5 11 11.2 23.1 25.2 ...
## $ SMPREV: num 21.7 19.5 16.9 25.4 12.8 15.8 16.1 20.3 18.6 17.4 ...
summary(CDI)
## STATE LDMORT ARTINC ASMORT
## Length:50 Min. : 6.80 Min. :19.00 Min. : 7.50
## Class :character 1st Qu.: 8.70 1st Qu.:23.12 1st Qu.: 9.45
## Mode :character Median :10.20 Median :24.15 Median :11.00
## Mean :10.54 Mean :24.78 Mean :11.32
## 3rd Qu.:11.95 3rd Qu.:25.98 3rd Qu.:13.30
## Max. :22.50 Max. :35.90 Max. :16.70
## NA's :11
## PSMUSE RDMORT PDMORT CDMORT
## Min. :76.20 Min. :37.30 Min. : 43.9 Min. :164.7
## 1st Qu.:80.53 1st Qu.:49.30 1st Qu.:102.3 1st Qu.:195.2
## Median :82.70 Median :54.90 Median :119.8 Median :208.8
## Mean :82.59 Mean :55.54 Mean :120.0 Mean :220.0
## 3rd Qu.:85.33 3rd Qu.:59.98 3rd Qu.:139.6 3rd Qu.:242.1
## Max. :87.90 Max. :78.50 Max. :177.7 Max. :300.5
##
## HDMORT STMORT DBMORT INFVAC
## Min. :116.5 Min. :25.60 Min. : 42.30 Min. :29.00
## 1st Qu.:147.8 1st Qu.:33.50 1st Qu.: 57.05 1st Qu.:35.88
## Median :158.1 Median :36.75 Median : 64.85 Median :39.20
## Mean :167.0 Mean :36.86 Mean : 67.37 Mean :39.01
## 3rd Qu.:182.3 3rd Qu.:41.45 3rd Qu.: 75.55 3rd Qu.:42.02
## Max. :229.9 Max. :48.80 Max. :117.20 Max. :49.00
##
## MEUNDA OVWINC HLTINS SMPREV
## Min. :2.800 Min. :57.20 Min. : 5.40 Min. : 9.50
## 1st Qu.:3.325 1st Qu.:62.60 1st Qu.:12.55 1st Qu.:16.62
## Median :3.700 Median :65.10 Median :15.70 Median :18.90
## Mean :3.686 Mean :64.54 Mean :15.84 Mean :19.02
## 3rd Qu.:4.000 3rd Qu.:66.75 3rd Qu.:18.32 3rd Qu.:21.25
## Max. :4.900 Max. :70.80 Max. :29.10 Max. :28.20
##
##################################
# Formulating a data type assessment summary
##################################
<- CDI
PDA <- data.frame(
(PDA.Summary Column.Index=c(1:length(names(PDA))),
Column.Name= names(PDA),
Column.Type=sapply(PDA, function(x) class(x)),
row.names=NULL)
)
## Column.Index Column.Name Column.Type
## 1 1 STATE character
## 2 2 LDMORT numeric
## 3 3 ARTINC numeric
## 4 4 ASMORT numeric
## 5 5 PSMUSE numeric
## 6 6 RDMORT numeric
## 7 7 PDMORT numeric
## 8 8 CDMORT numeric
## 9 9 HDMORT numeric
## 10 10 STMORT numeric
## 11 11 DBMORT numeric
## 12 12 INFVAC numeric
## 13 13 MEUNDA numeric
## 14 14 OVWINC numeric
## 15 15 HLTINS numeric
## 16 16 SMPREV numeric
##################################
# Loading dataset
##################################
<- CDI
DQA
##################################
# Formulating an overall data quality assessment summary
##################################
<- data.frame(
(DQA.Summary Column.Name= names(DQA),
Column.Type=sapply(DQA, function(x) class(x)),
Row.Count=sapply(DQA, function(x) nrow(DQA)),
NA.Count=sapply(DQA,function(x)sum(is.na(x))),
Fill.Rate=sapply(DQA,function(x)format(round((sum(!is.na(x))/nrow(DQA)),3),nsmall=3)),
row.names=NULL)
)
## Column.Name Column.Type Row.Count NA.Count Fill.Rate
## 1 STATE character 50 0 1.000
## 2 LDMORT numeric 50 0 1.000
## 3 ARTINC numeric 50 0 1.000
## 4 ASMORT numeric 50 11 0.780
## 5 PSMUSE numeric 50 0 1.000
## 6 RDMORT numeric 50 0 1.000
## 7 PDMORT numeric 50 0 1.000
## 8 CDMORT numeric 50 0 1.000
## 9 HDMORT numeric 50 0 1.000
## 10 STMORT numeric 50 0 1.000
## 11 DBMORT numeric 50 0 1.000
## 12 INFVAC numeric 50 0 1.000
## 13 MEUNDA numeric 50 0 1.000
## 14 OVWINC numeric 50 0 1.000
## 15 HLTINS numeric 50 0 1.000
## 16 SMPREV numeric 50 0 1.000
##################################
# Listing all descriptors
##################################
<- DQA[,colnames(DQA)!="State"]
DQA.Descriptors
##################################
# Listing all numeric Descriptors
##################################
<- DQA.Descriptors[,sapply(DQA.Descriptors, is.numeric)]
DQA.Descriptors.Numeric
if (length(names(DQA.Descriptors.Numeric))>0) {
print(paste0("There are ",
length(names(DQA.Descriptors.Numeric))),
(" numeric descriptor variable(s)."))
else {
} print("There are no numeric descriptor variables.")
}
## [1] "There are 15 numeric descriptor variable(s)."
##################################
# Listing all factor Descriptors
##################################
<- DQA.Descriptors[,sapply(DQA.Descriptors, is.factor)]
DQA.Descriptors.Factor
if (length(names(DQA.Descriptors.Factor))>0) {
print(paste0("There are ",
length(names(DQA.Descriptors.Factor))),
(" factor descriptor variable(s)."))
else {
} print("There are no factor descriptor variables.")
}
## [1] "There are no factor descriptor variables."
##################################
# Formulating a data quality assessment summary for factor Descriptors
##################################
if (length(names(DQA.Descriptors.Factor))>0) {
##################################
# Formulating a function to determine the first mode
##################################
<- function(x) {
FirstModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab == max(tab)]
ux[tab
}
##################################
# Formulating a function to determine the second mode
##################################
<- function(x) {
SecondModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab = ux[tab == max(tab)]
fm = x[!(x %in% fm)]
sm <- unique(sm)
usm <- tabulate(match(sm, usm))
tabsm ifelse(is.na(usm[tabsm == max(tabsm)])==TRUE,
return("x"),
return(usm[tabsm == max(tabsm)]))
}
<- data.frame(
(DQA.Descriptors.Factor.Summary Column.Name= names(DQA.Descriptors.Factor),
Column.Type=sapply(DQA.Descriptors.Factor, function(x) class(x)),
Unique.Count=sapply(DQA.Descriptors.Factor, function(x) length(unique(x))),
First.Mode.Value=sapply(DQA.Descriptors.Factor, function(x) as.character(FirstModes(x)[1])),
Second.Mode.Value=sapply(DQA.Descriptors.Factor, function(x) as.character(SecondModes(x)[1])),
First.Mode.Count=sapply(DQA.Descriptors.Factor, function(x) sum(na.omit(x) == FirstModes(x)[1])),
Second.Mode.Count=sapply(DQA.Descriptors.Factor, function(x) sum(na.omit(x) == SecondModes(x)[1])),
Unique.Count.Ratio=sapply(DQA.Descriptors.Factor, function(x) format(round((length(unique(x))/nrow(DQA.Descriptors.Factor)),3), nsmall=3)),
First.Second.Mode.Ratio=sapply(DQA.Descriptors.Factor, function(x) format(round((sum(na.omit(x) == FirstModes(x)[1])/sum(na.omit(x) == SecondModes(x)[1])),3), nsmall=3)),
row.names=NULL)
)
}
##################################
# Formulating a data quality assessment summary for numeric Descriptors
##################################
if (length(names(DQA.Descriptors.Numeric))>0) {
##################################
# Formulating a function to determine the first mode
##################################
<- function(x) {
FirstModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab == max(tab)]
ux[tab
}
##################################
# Formulating a function to determine the second mode
##################################
<- function(x) {
SecondModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab = ux[tab == max(tab)]
fm = na.omit(x)[!(na.omit(x) %in% fm)]
sm <- unique(sm)
usm <- tabulate(match(sm, usm))
tabsm ifelse(is.na(usm[tabsm == max(tabsm)])==TRUE,
return(0.00001),
return(usm[tabsm == max(tabsm)]))
}
<- data.frame(
(DQA.Descriptors.Numeric.Summary Column.Name= names(DQA.Descriptors.Numeric),
Column.Type=sapply(DQA.Descriptors.Numeric, function(x) class(x)),
Unique.Count=sapply(DQA.Descriptors.Numeric, function(x) length(unique(x))),
Unique.Count.Ratio=sapply(DQA.Descriptors.Numeric, function(x) format(round((length(unique(x))/nrow(DQA.Descriptors.Numeric)),3), nsmall=3)),
First.Mode.Value=sapply(DQA.Descriptors.Numeric, function(x) format(round((FirstModes(x)[1]),3),nsmall=3)),
Second.Mode.Value=sapply(DQA.Descriptors.Numeric, function(x) format(round((SecondModes(x)[1]),3),nsmall=3)),
First.Mode.Count=sapply(DQA.Descriptors.Numeric, function(x) sum(na.omit(x) == FirstModes(x)[1])),
Second.Mode.Count=sapply(DQA.Descriptors.Numeric, function(x) sum(na.omit(x) == SecondModes(x)[1])),
First.Second.Mode.Ratio=sapply(DQA.Descriptors.Numeric, function(x) format(round((sum(na.omit(x) == FirstModes(x)[1])/sum(na.omit(x) == SecondModes(x)[1])),3), nsmall=3)),
Minimum=sapply(DQA.Descriptors.Numeric, function(x) format(round(min(x,na.rm = TRUE),3), nsmall=3)),
Mean=sapply(DQA.Descriptors.Numeric, function(x) format(round(mean(x,na.rm = TRUE),3), nsmall=3)),
Median=sapply(DQA.Descriptors.Numeric, function(x) format(round(median(x,na.rm = TRUE),3), nsmall=3)),
Maximum=sapply(DQA.Descriptors.Numeric, function(x) format(round(max(x,na.rm = TRUE),3), nsmall=3)),
Skewness=sapply(DQA.Descriptors.Numeric, function(x) format(round(skewness(x,na.rm = TRUE),3), nsmall=3)),
Kurtosis=sapply(DQA.Descriptors.Numeric, function(x) format(round(moments::kurtosis(x,na.rm = TRUE),3), nsmall=3)),
Percentile25th=sapply(DQA.Descriptors.Numeric, function(x) format(round(quantile(x,probs=0.25,na.rm = TRUE),3), nsmall=3)),
Percentile75th=sapply(DQA.Descriptors.Numeric, function(x) format(round(quantile(x,probs=0.75,na.rm = TRUE),3), nsmall=3)),
row.names=NULL)
)
}
## Column.Name Column.Type Unique.Count Unique.Count.Ratio First.Mode.Value
## 1 LDMORT numeric 37 0.740 11.800
## 2 ARTINC numeric 40 0.800 23.900
## 3 ASMORT numeric 33 0.660 10.000
## 4 PSMUSE numeric 42 0.840 83.000
## 5 RDMORT numeric 48 0.960 54.400
## 6 PDMORT numeric 49 0.980 108.100
## 7 CDMORT numeric 49 0.980 242.100
## 8 HDMORT numeric 49 0.980 147.900
## 9 STMORT numeric 44 0.880 38.800
## 10 DBMORT numeric 46 0.920 69.900
## 11 INFVAC numeric 45 0.900 37.700
## 12 MEUNDA numeric 19 0.380 3.700
## 13 OVWINC numeric 39 0.780 66.900
## 14 HLTINS numeric 47 0.940 18.000
## 15 SMPREV numeric 43 0.860 21.700
## Second.Mode.Value First.Mode.Count Second.Mode.Count First.Second.Mode.Ratio
## 1 14.300 3 2 1.500
## 2 22.500 3 2 1.500
## 3 10.700 2 1 2.000
## 4 78.500 2 1 2.000
## 5 60.100 2 1 2.000
## 6 148.300 2 1 2.000
## 7 292.100 2 1 2.000
## 8 224.000 2 1 2.000
## 9 48.300 2 1 2.000
## 10 59.500 2 1 2.000
## 11 44.200 3 2 1.500
## 12 3.600 8 5 1.600
## 13 64.000 2 1 2.000
## 14 17.900 2 1 2.000
## 15 25.400 2 1 2.000
## Minimum Mean Median Maximum Skewness Kurtosis Percentile25th
## 1 6.800 10.540 10.200 22.500 1.690 7.736 8.700
## 2 19.000 24.784 24.150 35.900 0.992 4.597 23.125
## 3 7.500 11.323 11.000 16.700 0.195 2.112 9.450
## 4 76.200 82.594 82.700 87.900 -0.196 2.035 80.525
## 5 37.300 55.544 54.900 78.500 0.400 3.148 49.300
## 6 43.900 119.990 119.850 177.700 -0.059 3.045 102.325
## 7 164.700 219.974 208.750 300.500 0.752 2.704 195.150
## 8 116.500 166.954 158.050 229.900 0.677 2.653 147.825
## 9 25.600 36.858 36.750 48.800 0.085 2.430 33.500
## 10 42.300 67.372 64.850 117.200 0.903 4.207 57.050
## 11 29.000 39.010 39.200 49.000 -0.060 2.833 35.875
## 12 2.800 3.686 3.700 4.900 0.209 2.442 3.325
## 13 57.200 64.540 65.100 70.800 -0.260 2.664 62.600
## 14 5.400 15.844 15.700 29.100 0.363 3.068 12.550
## 15 9.500 19.020 18.900 28.200 0.194 3.201 16.625
## Percentile75th
## 1 11.950
## 2 25.975
## 3 13.300
## 4 85.325
## 5 59.975
## 6 139.600
## 7 242.100
## 8 182.300
## 9 41.450
## 10 75.550
## 11 42.025
## 12 4.000
## 13 66.750
## 14 18.325
## 15 21.250
##################################
# Identifying potential data quality issues
##################################
##################################
# Checking for missing observations
##################################
if ((nrow(DQA.Summary[DQA.Summary$NA.Count>0,]))>0){
print(paste0("Missing observations noted for ",
nrow(DQA.Summary[DQA.Summary$NA.Count>0,])),
(" variable(s) with NA.Count>0 and Fill.Rate<1.0."))
$NA.Count>0,]
DQA.Summary[DQA.Summaryelse {
} print("No missing observations noted.")
}
## [1] "Missing observations noted for 1 variable(s) with NA.Count>0 and Fill.Rate<1.0."
## Column.Name Column.Type Row.Count NA.Count Fill.Rate
## 4 ASMORT numeric 50 11 0.780
##################################
# Checking for zero or near-zero variance Descriptors
##################################
if (length(names(DQA.Descriptors.Factor))==0) {
print("No factor descriptors noted.")
else if (nrow(DQA.Descriptors.Factor.Summary[as.numeric(as.character(DQA.Descriptors.Factor.Summary$First.Second.Mode.Ratio))>5,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Descriptors.Factor.Summary[as.numeric(as.character(DQA.Descriptors.Factor.Summary$First.Second.Mode.Ratio))>5,])),
(" factor variable(s) with First.Second.Mode.Ratio>5."))
as.numeric(as.character(DQA.Descriptors.Factor.Summary$First.Second.Mode.Ratio))>5,]
DQA.Descriptors.Factor.Summary[else {
} print("No low variance factor descriptors due to high first-second mode ratio noted.")
}
## [1] "No factor descriptors noted."
if (length(names(DQA.Descriptors.Numeric))==0) {
print("No numeric descriptors noted.")
else if (nrow(DQA.Descriptors.Numeric.Summary[as.numeric(as.character(DQA.Descriptors.Numeric.Summary$First.Second.Mode.Ratio))>5,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Descriptors.Numeric.Summary[as.numeric(as.character(DQA.Descriptors.Numeric.Summary$First.Second.Mode.Ratio))>5,])),
(" numeric variable(s) with First.Second.Mode.Ratio>5."))
as.numeric(as.character(DQA.Descriptors.Numeric.Summary$First.Second.Mode.Ratio))>5,]
DQA.Descriptors.Numeric.Summary[else {
} print("No low variance numeric descriptors due to high first-second mode ratio noted.")
}
## [1] "No low variance numeric descriptors due to high first-second mode ratio noted."
if (length(names(DQA.Descriptors.Numeric))==0) {
print("No numeric descriptors noted.")
else if (nrow(DQA.Descriptors.Numeric.Summary[as.numeric(as.character(DQA.Descriptors.Numeric.Summary$Unique.Count.Ratio))<0.01,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Descriptors.Numeric.Summary[as.numeric(as.character(DQA.Descriptors.Numeric.Summary$Unique.Count.Ratio))<0.01,])),
(" numeric variable(s) with Unique.Count.Ratio<0.01."))
as.numeric(as.character(DQA.Descriptors.Numeric.Summary$Unique.Count.Ratio))<0.01,]
DQA.Descriptors.Numeric.Summary[else {
} print("No low variance numeric descriptors due to low unique count ratio noted.")
}
## [1] "No low variance numeric descriptors due to low unique count ratio noted."
##################################
# Checking for skewed Descriptors
##################################
if (length(names(DQA.Descriptors.Numeric))==0) {
print("No numeric descriptors noted.")
else if (nrow(DQA.Descriptors.Numeric.Summary[as.numeric(as.character(DQA.Descriptors.Numeric.Summary$Skewness))>3 |
} as.numeric(as.character(DQA.Descriptors.Numeric.Summary$Skewness))<(-3),])>0){
print(paste0("High skewness observed for ",
nrow(DQA.Descriptors.Numeric.Summary[as.numeric(as.character(DQA.Descriptors.Numeric.Summary$Skewness))>3 |
(as.numeric(as.character(DQA.Descriptors.Numeric.Summary$Skewness))<(-3),])),
" numeric variable(s) with Skewness>3 or Skewness<(-3)."))
as.numeric(as.character(DQA.Descriptors.Numeric.Summary$Skewness))>3 |
DQA.Descriptors.Numeric.Summary[as.numeric(as.character(DQA.Descriptors.Numeric.Summary$Skewness))<(-3),]
else {
} print("No skewed numeric descriptors noted.")
}
## [1] "No skewed numeric descriptors noted."
##################################
# Visualizing the missing data patterns
# prior to imputation
##################################
summary(DQA.Descriptors.Numeric)
## LDMORT ARTINC ASMORT PSMUSE
## Min. : 6.80 Min. :19.00 Min. : 7.50 Min. :76.20
## 1st Qu.: 8.70 1st Qu.:23.12 1st Qu.: 9.45 1st Qu.:80.53
## Median :10.20 Median :24.15 Median :11.00 Median :82.70
## Mean :10.54 Mean :24.78 Mean :11.32 Mean :82.59
## 3rd Qu.:11.95 3rd Qu.:25.98 3rd Qu.:13.30 3rd Qu.:85.33
## Max. :22.50 Max. :35.90 Max. :16.70 Max. :87.90
## NA's :11
## RDMORT PDMORT CDMORT HDMORT
## Min. :37.30 Min. : 43.9 Min. :164.7 Min. :116.5
## 1st Qu.:49.30 1st Qu.:102.3 1st Qu.:195.2 1st Qu.:147.8
## Median :54.90 Median :119.8 Median :208.8 Median :158.1
## Mean :55.54 Mean :120.0 Mean :220.0 Mean :167.0
## 3rd Qu.:59.98 3rd Qu.:139.6 3rd Qu.:242.1 3rd Qu.:182.3
## Max. :78.50 Max. :177.7 Max. :300.5 Max. :229.9
##
## STMORT DBMORT INFVAC MEUNDA
## Min. :25.60 Min. : 42.30 Min. :29.00 Min. :2.800
## 1st Qu.:33.50 1st Qu.: 57.05 1st Qu.:35.88 1st Qu.:3.325
## Median :36.75 Median : 64.85 Median :39.20 Median :3.700
## Mean :36.86 Mean : 67.37 Mean :39.01 Mean :3.686
## 3rd Qu.:41.45 3rd Qu.: 75.55 3rd Qu.:42.02 3rd Qu.:4.000
## Max. :48.80 Max. :117.20 Max. :49.00 Max. :4.900
##
## OVWINC HLTINS SMPREV
## Min. :57.20 Min. : 5.40 Min. : 9.50
## 1st Qu.:62.60 1st Qu.:12.55 1st Qu.:16.62
## Median :65.10 Median :15.70 Median :18.90
## Mean :64.54 Mean :15.84 Mean :19.02
## 3rd Qu.:66.75 3rd Qu.:18.32 3rd Qu.:21.25
## Max. :70.80 Max. :29.10 Max. :28.20
##
::vis_miss(DQA.Descriptors.Numeric, sort_miss = FALSE) visdat
##################################
# Conducting missing data imputation
# using Multivariate Imputation by Chained Equations
##################################
<- mice(DQA.Descriptors.Numeric, method='pmm', seed = 123) MICE_Model
##
## iter imp variable
## 1 1 ASMORT
## 1 2 ASMORT
## 1 3 ASMORT
## 1 4 ASMORT
## 1 5 ASMORT
## 2 1 ASMORT
## 2 2 ASMORT
## 2 3 ASMORT
## 2 4 ASMORT
## 2 5 ASMORT
## 3 1 ASMORT
## 3 2 ASMORT
## 3 3 ASMORT
## 3 4 ASMORT
## 3 5 ASMORT
## 4 1 ASMORT
## 4 2 ASMORT
## 4 3 ASMORT
## 4 4 ASMORT
## 4 5 ASMORT
## 5 1 ASMORT
## 5 2 ASMORT
## 5 3 ASMORT
## 5 4 ASMORT
## 5 5 ASMORT
<- complete(MICE_Model)
DQA.Descriptors.Numeric
##################################
# Visualizing the missing data patterns
# after imputation
##################################
::vis_miss(DQA.Descriptors.Numeric, sort_miss = FALSE) visdat
##################################
# Loading dataset
##################################
<- DQA.Descriptors.Numeric
DPA
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA)) (DPA_Skimmed
Name | DPA |
Number of rows | 50 |
Number of columns | 15 |
_______________________ | |
Column type frequency: | |
numeric | 15 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
LDMORT | 0 | 1 | 10.54 | 2.82 | 6.8 | 8.70 | 10.20 | 11.95 | 22.5 | ▇▇▂▁▁ |
ARTINC | 0 | 1 | 24.78 | 3.29 | 19.0 | 23.13 | 24.15 | 25.98 | 35.9 | ▃▇▃▁▁ |
ASMORT | 0 | 1 | 11.05 | 2.48 | 7.5 | 8.70 | 10.90 | 12.90 | 16.7 | ▇▆▆▅▁ |
PSMUSE | 0 | 1 | 82.59 | 3.09 | 76.2 | 80.53 | 82.70 | 85.33 | 87.9 | ▃▃▇▆▅ |
RDMORT | 0 | 1 | 55.54 | 9.27 | 37.3 | 49.30 | 54.90 | 59.98 | 78.5 | ▂▇▇▃▁ |
PDMORT | 0 | 1 | 119.99 | 27.81 | 43.9 | 102.33 | 119.85 | 139.60 | 177.7 | ▁▃▇▇▂ |
CDMORT | 0 | 1 | 219.97 | 33.08 | 164.7 | 195.15 | 208.75 | 242.10 | 300.5 | ▃▇▅▂▂ |
HDMORT | 0 | 1 | 166.95 | 27.69 | 116.5 | 147.83 | 158.05 | 182.30 | 229.9 | ▃▇▅▃▂ |
STMORT | 0 | 1 | 36.86 | 5.77 | 25.6 | 33.50 | 36.75 | 41.45 | 48.8 | ▃▇▇▅▃ |
DBMORT | 0 | 1 | 67.37 | 14.99 | 42.3 | 57.05 | 64.85 | 75.55 | 117.2 | ▆▇▅▂▁ |
INFVAC | 0 | 1 | 39.01 | 3.94 | 29.0 | 35.88 | 39.20 | 42.03 | 49.0 | ▂▆▇▇▁ |
MEUNDA | 0 | 1 | 3.69 | 0.53 | 2.8 | 3.32 | 3.70 | 4.00 | 4.9 | ▆▆▇▅▂ |
OVWINC | 0 | 1 | 64.54 | 3.20 | 57.2 | 62.60 | 65.10 | 66.75 | 70.8 | ▃▃▇▇▃ |
HLTINS | 0 | 1 | 15.84 | 4.73 | 5.4 | 12.55 | 15.70 | 18.32 | 29.1 | ▂▇▇▃▁ |
SMPREV | 0 | 1 | 19.02 | 3.68 | 9.5 | 16.62 | 18.90 | 21.25 | 28.2 | ▁▆▇▅▂ |
##################################
# Outlier Detection
##################################
##################################
# Listing all Descriptors
##################################
<- DPA
DPA.Descriptors
##################################
# Listing all numeric Descriptors
##################################
<- DPA.Descriptors[,sapply(DPA.Descriptors, is.numeric)]
DPA.Descriptors.Numeric
##################################
# Identifying outliers for the numeric Descriptors
##################################
<- c()
OutlierCountList
for (i in 1:ncol(DPA.Descriptors.Numeric)) {
<- boxplot.stats(DPA.Descriptors.Numeric[,i])$out
Outliers <- length(Outliers)
OutlierCount <- append(OutlierCountList,OutlierCount)
OutlierCountList <- which(DPA.Descriptors.Numeric[,i] %in% c(Outliers))
OutlierIndices print(
ggplot(DPA.Descriptors.Numeric, aes(x=DPA.Descriptors.Numeric[,i])) +
geom_boxplot() +
theme_bw() +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank()) +
xlab(names(DPA.Descriptors.Numeric)[i]) +
labs(title=names(DPA.Descriptors.Numeric)[i],
subtitle=paste0(OutlierCount, " Outlier(s) Detected")))
}
##################################
# Zero and Near-Zero Variance
##################################
##################################
# Identifying columns with low variance
###################################
<- nearZeroVar(DPA,
DPA_LowVariance freqCut = 80/20,
uniqueCut = 10,
saveMetrics= TRUE)
$nzv,]) (DPA_LowVariance[DPA_LowVariance
## [1] freqRatio percentUnique zeroVar nzv
## <0 rows> (or 0-length row.names)
if ((nrow(DPA_LowVariance[DPA_LowVariance$nzv,]))==0){
print("No low variance descriptors noted.")
else {
}
print(paste0("Low variance observed for ",
nrow(DPA_LowVariance[DPA_LowVariance$nzv,])),
(" numeric variable(s) with First.Second.Mode.Ratio>4 and Unique.Count.Ratio<0.10."))
<- (nrow(DPA_LowVariance[DPA_LowVariance$nzv,]))
DPA_LowVarianceForRemoval
print(paste0("Low variance can be resolved by removing ",
nrow(DPA_LowVariance[DPA_LowVariance$nzv,])),
(" numeric variable(s)."))
for (j in 1:DPA_LowVarianceForRemoval) {
<- rownames(DPA_LowVariance[DPA_LowVariance$nzv,])[j]
DPA_LowVarianceRemovedVariable print(paste0("Variable ",
j," for removal: ",
DPA_LowVarianceRemovedVariable))
}
%>%
DPA skim() %>%
::filter(skim_variable %in% rownames(DPA_LowVariance[DPA_LowVariance$nzv,]))
dplyr
}
## [1] "No low variance descriptors noted."
##################################
# Measuring pairwise correlation between descriptors
##################################
<- cor(DPA.Descriptors.Numeric,
(DPA_Correlation method = "pearson",
use="pairwise.complete.obs"))
## LDMORT ARTINC ASMORT PSMUSE RDMORT
## LDMORT 1.00000000 0.173093335 -0.232025288 -0.345100550 0.02923731
## ARTINC 0.17309333 1.000000000 -0.042746459 0.024546154 0.60290597
## ASMORT -0.23202529 -0.042746459 1.000000000 -0.087053707 0.02466696
## PSMUSE -0.34510055 0.024546154 -0.087053707 1.000000000 -0.11152216
## RDMORT 0.02923731 0.602905967 0.024666956 -0.111522163 1.00000000
## PDMORT 0.45267604 0.662922162 -0.210484947 -0.286784690 0.50529506
## CDMORT 0.07404928 0.667228833 0.023618891 -0.214153256 0.58858032
## HDMORT 0.04934776 0.636548347 -0.001775156 -0.206862030 0.52444923
## STMORT 0.17126776 0.582853439 0.066770232 -0.219881708 0.70019055
## DBMORT 0.17609150 0.481600436 0.191885492 -0.124643846 0.61452185
## INFVAC -0.10505851 0.103088137 0.100706735 0.399251809 0.24772090
## MEUNDA 0.22133327 0.803258627 -0.017665903 0.009246146 0.41867094
## OVWINC 0.16735471 0.498314042 0.149667042 -0.217877006 0.63784116
## HLTINS 0.37829219 0.002972526 -0.108605067 -0.488950184 0.12993910
## SMPREV 0.23811659 0.823678167 -0.099197602 -0.055348594 0.59858835
## PDMORT CDMORT HDMORT STMORT DBMORT INFVAC
## LDMORT 0.45267604 0.07404928 0.049347761 0.17126776 0.17609150 -0.10505851
## ARTINC 0.66292216 0.66722883 0.636548347 0.58285344 0.48160044 0.10308814
## ASMORT -0.21048495 0.02361889 -0.001775156 0.06677023 0.19188549 0.10070674
## PSMUSE -0.28678469 -0.21415326 -0.206862030 -0.21988171 -0.12464385 0.39925181
## RDMORT 0.50529506 0.58858032 0.524449231 0.70019055 0.61452185 0.24772090
## PDMORT 1.00000000 0.61855185 0.581638625 0.56453130 0.51296109 -0.05135669
## CDMORT 0.61855185 1.00000000 0.986330154 0.79569366 0.46503091 -0.09852403
## HDMORT 0.58163863 0.98633015 1.000000000 0.69775225 0.41225978 -0.11351086
## STMORT 0.56453130 0.79569366 0.697752254 1.00000000 0.50413319 0.02353408
## DBMORT 0.51296109 0.46503091 0.412259779 0.50413319 1.00000000 0.24952376
## INFVAC -0.05135669 -0.09852403 -0.113510863 0.02353408 0.24952376 1.00000000
## MEUNDA 0.48590617 0.64341929 0.640170352 0.49206776 0.35231401 -0.17366823
## OVWINC 0.60733512 0.67870794 0.621196381 0.68784352 0.49793894 -0.06691508
## HLTINS 0.36864746 0.41605763 0.378207992 0.45854866 0.01034802 -0.51094161
## SMPREV 0.73851432 0.66017408 0.630688330 0.59808334 0.49613487 0.06481756
## MEUNDA OVWINC HLTINS SMPREV
## LDMORT 0.221333267 0.16735471 0.378292185 0.23811659
## ARTINC 0.803258627 0.49831404 0.002972526 0.82367817
## ASMORT -0.017665903 0.14966704 -0.108605067 -0.09919760
## PSMUSE 0.009246146 -0.21787701 -0.488950184 -0.05534859
## RDMORT 0.418670943 0.63784116 0.129939097 0.59858835
## PDMORT 0.485906174 0.60733512 0.368647458 0.73851432
## CDMORT 0.643419286 0.67870794 0.416057632 0.66017408
## HDMORT 0.640170352 0.62119638 0.378207992 0.63068833
## STMORT 0.492067763 0.68784352 0.458548660 0.59808334
## DBMORT 0.352314013 0.49793894 0.010348016 0.49613487
## INFVAC -0.173668230 -0.06691508 -0.510941607 0.06481756
## MEUNDA 1.000000000 0.27967009 0.152816174 0.60964853
## OVWINC 0.279670088 1.00000000 0.378877840 0.67261164
## HLTINS 0.152816174 0.37887784 1.000000000 0.13686509
## SMPREV 0.609648533 0.67261164 0.136865092 1.00000000
##################################
# Testing pairwise correlation between descriptors
##################################
<- cor.mtest(DPA.Descriptors.Numeric,
DPA_CorrelationTest method = "pearson",
conf.level = 0.95)
##################################
# Visualizing pairwise correlation between descriptors
##################################
corrplot(cor(DPA.Descriptors.Numeric,
method = "pearson",
use="pairwise.complete.obs"),
method = "circle",
type = "upper",
order = "original",
tl.col = "black",
tl.cex = 0.75,
tl.srt = 90,
sig.level = 0.05,
p.mat = DPA_CorrelationTest$p,
insig = "blank")
corrplot(cor(DPA.Descriptors.Numeric,
method = "pearson",
use="pairwise.complete.obs"),
method = "number",
type = "upper",
order = "original",
tl.col = "black",
tl.cex = 0.75,
tl.srt = 90,
sig.level = 0.05,
p.mat = DPA_CorrelationTest$p,
insig = "blank")
##################################
# Identifying the highly correlated variables
##################################
<- sum(abs(DPA_Correlation[upper.tri(DPA_Correlation)])>0.90)) (DPA_HighlyCorrelatedCount
## [1] 1
if (DPA_HighlyCorrelatedCount > 0) {
<- findCorrelation(DPA_Correlation, cutoff = 0.90)
DPA_HighlyCorrelated
<- length(DPA_HighlyCorrelated))
(DPA_HighlyCorrelatedForRemoval
print(paste0("High correlation can be resolved by removing ",
(DPA_HighlyCorrelatedForRemoval)," numeric variable(s)."))
for (j in 1:DPA_HighlyCorrelatedForRemoval) {
<- colnames(DPA.Descriptors.Numeric)[DPA_HighlyCorrelated[j]]
DPA_HighlyCorrelatedRemovedVariable print(paste0("Variable ",
j," for removal: ",
DPA_HighlyCorrelatedRemovedVariable))
}
}
## [1] "High correlation can be resolved by removing 1 numeric variable(s)."
## [1] "Variable 1 for removal: CDMORT"
if (DPA_HighlyCorrelatedCount == 0) {
print("No highly correlated predictors noted.")
else {
} print(paste0("High correlation observed for ",
(DPA_HighlyCorrelatedCount)," pairs of numeric variable(s) with Correlation.Coefficient>0.90."))
<- corr_cross(DPA.Descriptors.Numeric,
(DPA_HighlyCorrelatedPairs max_pvalue = 0.05,
top = DPA_HighlyCorrelatedCount,
rm.na = TRUE,
grid = FALSE
))
}
## [1] "High correlation observed for 1 pairs of numeric variable(s) with Correlation.Coefficient>0.90."
##################################
# Removing individual descriptors
# among the highly correlated pair
##################################
<- DPA.Descriptors.Numeric[,!colnames(DPA.Descriptors.Numeric) %in% c("CDMORT")] DPA.Descriptors.Numeric
##################################
# Linear Dependencies
##################################
##################################
# Finding linear dependencies
##################################
<- findLinearCombos(DPA.Descriptors.Numeric)
DPA_LinearlyDependent
##################################
# Identifying the linearly dependent variables
##################################
<- findLinearCombos(DPA.Descriptors.Numeric)
DPA_LinearlyDependent
<- length(DPA_LinearlyDependent$linearCombos)) (DPA_LinearlyDependentCount
## [1] 0
if (DPA_LinearlyDependentCount == 0) {
print("No linearly dependent predictors noted.")
else {
} print(paste0("Linear dependency observed for ",
(DPA_LinearlyDependentCount)," subset(s) of numeric variable(s)."))
for (i in 1:DPA_LinearlyDependentCount) {
<- colnames(DPA.Descriptors.Numeric)[DPA_LinearlyDependent$linearCombos[[i]]]
DPA_LinearlyDependentSubset print(paste0("Linear dependent variable(s) for subset ",
i," include: ",
DPA_LinearlyDependentSubset))
}
}
## [1] "No linearly dependent predictors noted."
##################################
# Identifying the linearly dependent variables for removal
##################################
if (DPA_LinearlyDependentCount > 0) {
<- findLinearCombos(DPA.Descriptors.Numeric)
DPA_LinearlyDependent
<- length(DPA_LinearlyDependent$remove)
DPA_LinearlyDependentForRemoval
print(paste0("Linear dependency can be resolved by removing ",
(DPA_LinearlyDependentForRemoval)," numeric variable(s)."))
for (j in 1:DPA_LinearlyDependentForRemoval) {
<- colnames(DPA.Descriptors.Numeric)[DPA_LinearlyDependent$remove[j]]
DPA_LinearlyDependentRemovedVariable print(paste0("Variable ",
j," for removal: ",
DPA_LinearlyDependentRemovedVariable))
}
}
##################################
# Distributional Shape
##################################
##################################
# Formulating the histogram
# for the numeric descriptors
##################################
for (i in 1:ncol(DPA.Descriptors.Numeric)) {
<- format(round(median(DPA.Descriptors.Numeric[,i],na.rm = TRUE),2), nsmall=2)
Median <- format(round(mean(DPA.Descriptors.Numeric[,i],na.rm = TRUE),2), nsmall=2)
Mean <- format(round(skewness(DPA.Descriptors.Numeric[,i],na.rm = TRUE),2), nsmall=2)
Skewness print(
ggplot(DPA.Descriptors.Numeric, aes(x=DPA.Descriptors.Numeric[,i])) +
geom_histogram(binwidth=1,color="black", fill="white") +
geom_vline(aes(xintercept=mean(DPA.Descriptors.Numeric[,i])),
color="blue", size=1) +
geom_vline(aes(xintercept=median(DPA.Descriptors.Numeric[,i])),
color="red", size=1) +
theme_bw() +
ylab("Count") +
xlab(names(DPA.Descriptors.Numeric)[i]) +
labs(title=names(DPA.Descriptors.Numeric)[i],
subtitle=paste0("Median = ", Median,
", Mean = ", Mean,
", Skewness = ", Skewness)))
}
##################################
# Data Normalization
##################################
<- preProcess(DPA.Descriptors.Numeric, method = c("center","scale"))
DPA.Descriptors.Numeric.CenteredScaled <- predict(DPA.Descriptors.Numeric.CenteredScaled, DPA.Descriptors.Numeric) DPA.Descriptors.Numeric
##################################
# Calculating the Kaiser-Meyer-Olkin Factor Adequacy
##################################
<- KMO(DPA.Descriptors.Numeric)) (DPA_KMOFactorAdequacy
##
## ── Kaiser-Meyer-Olkin criterion (KMO) ──────────────────────────────────────────
##
## ! The overall KMO value for your data is mediocre.
## These data are probably suitable for factor analysis.
##
## Overall: 0.679
##
## For each variable:
## LDMORT ARTINC ASMORT PSMUSE RDMORT PDMORT HDMORT STMORT DBMORT INFVAC MEUNDA
## 0.359 0.800 0.152 0.391 0.716 0.873 0.721 0.891 0.849 0.284 0.547
## OVWINC HLTINS SMPREV
## 0.645 0.670 0.900
##################################
# Removing individual descriptors
# with low KMO values
##################################
<- DPA.Descriptors.Numeric[,!colnames(DPA.Descriptors.Numeric) %in% c("ASMORT")]
DPA.Descriptors.Numeric
##################################
# Calculating the Kaiser-Meyer-Olkin Factor Adequacy
##################################
<- KMO(DPA.Descriptors.Numeric)) (DPA_KMOFactorAdequacy
##
## ── Kaiser-Meyer-Olkin criterion (KMO) ──────────────────────────────────────────
##
## ✔ The overall KMO value for your data is middling.
## These data are probably suitable for factor analysis.
##
## Overall: 0.752
##
## For each variable:
## LDMORT ARTINC PSMUSE RDMORT PDMORT HDMORT STMORT DBMORT INFVAC MEUNDA OVWINC
## 0.434 0.790 0.546 0.832 0.855 0.785 0.882 0.851 0.350 0.603 0.763
## HLTINS SMPREV
## 0.655 0.906
##################################
# Removing individual descriptors
# with low KMO values
##################################
<- DPA.Descriptors.Numeric[,!colnames(DPA.Descriptors.Numeric) %in% c("INFVAC")]
DPA.Descriptors.Numeric
##################################
# Calculating the Kaiser-Meyer-Olkin Factor Adequacy
##################################
<- KMO(DPA.Descriptors.Numeric)) (DPA_KMOFactorAdequacy
##
## ── Kaiser-Meyer-Olkin criterion (KMO) ──────────────────────────────────────────
##
## ✔ The overall KMO value for your data is meritorious.
## These data are probably suitable for factor analysis.
##
## Overall: 0.81
##
## For each variable:
## LDMORT ARTINC PSMUSE RDMORT PDMORT HDMORT STMORT DBMORT MEUNDA OVWINC HLTINS
## 0.587 0.780 0.738 0.880 0.851 0.841 0.875 0.840 0.703 0.854 0.562
## SMPREV
## 0.900
##################################
# Removing individual descriptors
# with low KMO values
##################################
<- DPA.Descriptors.Numeric[,!colnames(DPA.Descriptors.Numeric) %in% c("HLTINS")]
DPA.Descriptors.Numeric
##################################
# Calculating the Kaiser-Meyer-Olkin Factor Adequacy
##################################
<- KMO(DPA.Descriptors.Numeric)) (DPA_KMOFactorAdequacy
##
## ── Kaiser-Meyer-Olkin criterion (KMO) ──────────────────────────────────────────
##
## ✔ The overall KMO value for your data is meritorious.
## These data are probably suitable for factor analysis.
##
## Overall: 0.832
##
## For each variable:
## LDMORT ARTINC PSMUSE RDMORT PDMORT HDMORT STMORT DBMORT MEUNDA OVWINC SMPREV
## 0.469 0.822 0.645 0.865 0.878 0.805 0.902 0.927 0.734 0.857 0.885
##################################
# Removing individual descriptors
# with low KMO values
##################################
<- DPA.Descriptors.Numeric[,!colnames(DPA.Descriptors.Numeric) %in% c("LDMORT")]
DPA.Descriptors.Numeric
##################################
# Calculating the Kaiser-Meyer-Olkin Factor Adequacy
##################################
<- KMO(DPA.Descriptors.Numeric)) (DPA_KMOFactorAdequacy
##
## ── Kaiser-Meyer-Olkin criterion (KMO) ──────────────────────────────────────────
##
## ✔ The overall KMO value for your data is meritorious.
## These data are probably suitable for factor analysis.
##
## Overall: 0.866
##
## For each variable:
## ARTINC PSMUSE RDMORT PDMORT HDMORT STMORT DBMORT MEUNDA OVWINC SMPREV
## 0.828 0.589 0.880 0.901 0.892 0.908 0.921 0.785 0.857 0.878
##################################
# Removing individual descriptors
# with low KMO values
##################################
<- DPA.Descriptors.Numeric[,!colnames(DPA.Descriptors.Numeric) %in% c("PSMUSE")]
DPA.Descriptors.Numeric
##################################
# Calculating the Kaiser-Meyer-Olkin Factor Adequacy
##################################
<- KMO(DPA.Descriptors.Numeric)) (DPA_KMOFactorAdequacy
##
## ── Kaiser-Meyer-Olkin criterion (KMO) ──────────────────────────────────────────
##
## ✔ The overall KMO value for your data is meritorious.
## These data are probably suitable for factor analysis.
##
## Overall: 0.871
##
## For each variable:
## ARTINC RDMORT PDMORT HDMORT STMORT DBMORT MEUNDA OVWINC SMPREV
## 0.828 0.879 0.935 0.888 0.904 0.919 0.782 0.853 0.876
##################################
# Calculating the Bartlett's Test of Sphericity
##################################
<- cortest.bartlett(DPA.Descriptors.Numeric,
(DPA_BartlettTest n=nrow(DPA.Descriptors.Numeric)))
## $chisq
## [1] 321.9186
##
## $p.value
## [1] 1.28086e-47
##
## $df
## [1] 36
##################################
# Measuring pairwise correlation between descriptors
##################################
<- cor(DPA.Descriptors.Numeric,
(DPA_Correlation method = "pearson",
use="pairwise.complete.obs"))
## ARTINC RDMORT PDMORT HDMORT STMORT DBMORT MEUNDA
## ARTINC 1.0000000 0.6029060 0.6629222 0.6365483 0.5828534 0.4816004 0.8032586
## RDMORT 0.6029060 1.0000000 0.5052951 0.5244492 0.7001906 0.6145218 0.4186709
## PDMORT 0.6629222 0.5052951 1.0000000 0.5816386 0.5645313 0.5129611 0.4859062
## HDMORT 0.6365483 0.5244492 0.5816386 1.0000000 0.6977523 0.4122598 0.6401704
## STMORT 0.5828534 0.7001906 0.5645313 0.6977523 1.0000000 0.5041332 0.4920678
## DBMORT 0.4816004 0.6145218 0.5129611 0.4122598 0.5041332 1.0000000 0.3523140
## MEUNDA 0.8032586 0.4186709 0.4859062 0.6401704 0.4920678 0.3523140 1.0000000
## OVWINC 0.4983140 0.6378412 0.6073351 0.6211964 0.6878435 0.4979389 0.2796701
## SMPREV 0.8236782 0.5985883 0.7385143 0.6306883 0.5980833 0.4961349 0.6096485
## OVWINC SMPREV
## ARTINC 0.4983140 0.8236782
## RDMORT 0.6378412 0.5985883
## PDMORT 0.6073351 0.7385143
## HDMORT 0.6211964 0.6306883
## STMORT 0.6878435 0.5980833
## DBMORT 0.4979389 0.4961349
## MEUNDA 0.2796701 0.6096485
## OVWINC 1.0000000 0.6726116
## SMPREV 0.6726116 1.0000000
##################################
# Identifying the minimally correlated variables
##################################
<- sum(abs(DPA_Correlation[upper.tri(DPA_Correlation)])>0.30)) (DPA_MinimallyCorrelatedCount
## [1] 35
<- length(DPA.Descriptors.Numeric)*(length(DPA.Descriptors.Numeric)-1)/2) (DPA_AllPairs
## [1] 36
<- DPA_MinimallyCorrelatedCount/DPA_AllPairs) (DPA_MinimallyCorrelatedCountPercentage
## [1] 0.9722222
qgraph(cor(DPA.Descriptors.Numeric),
cut=0.30,
details=FALSE,
posCol="#2F75B5",
negCol="#FF5050",
labels=names(DPA.Descriptors.Numeric))
##################################
# Computing the determinant of the correlation matrix
##################################
<- det(cor(DPA.Descriptors.Numeric))) (DPA_CorrelationMatrixDeterminant
## [1] 0.0008028455
##################################
# Gathering descriptive statistics
##################################
<- skim(DPA.Descriptors.Numeric)) (DPA_Skimmed
Name | DPA.Descriptors.Numeric |
Number of rows | 50 |
Number of columns | 9 |
_______________________ | |
Column type frequency: | |
numeric | 9 |
________________________ | |
Group variables | None |
Variable type: numeric
skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
---|---|---|---|---|---|---|---|---|---|---|
ARTINC | 0 | 1 | 0 | 1 | -1.76 | -0.50 | -0.19 | 0.36 | 3.38 | ▃▇▃▁▁ |
RDMORT | 0 | 1 | 0 | 1 | -1.97 | -0.67 | -0.07 | 0.48 | 2.48 | ▂▇▇▃▁ |
PDMORT | 0 | 1 | 0 | 1 | -2.74 | -0.64 | -0.01 | 0.71 | 2.08 | ▁▃▇▇▂ |
HDMORT | 0 | 1 | 0 | 1 | -1.82 | -0.69 | -0.32 | 0.55 | 2.27 | ▃▇▅▃▂ |
STMORT | 0 | 1 | 0 | 1 | -1.95 | -0.58 | -0.02 | 0.80 | 2.07 | ▃▇▇▅▃ |
DBMORT | 0 | 1 | 0 | 1 | -1.67 | -0.69 | -0.17 | 0.55 | 3.32 | ▆▇▅▂▁ |
MEUNDA | 0 | 1 | 0 | 1 | -1.66 | -0.68 | 0.03 | 0.59 | 2.28 | ▆▆▇▅▂ |
OVWINC | 0 | 1 | 0 | 1 | -2.29 | -0.61 | 0.18 | 0.69 | 1.96 | ▃▃▇▇▃ |
SMPREV | 0 | 1 | 0 | 1 | -2.59 | -0.65 | -0.03 | 0.61 | 2.50 | ▁▆▇▅▂ |
##################################
# Formulating the histogram
# for the numeric descriptors
##################################
for (i in 1:ncol(DPA.Descriptors.Numeric)) {
<- format(round(median(DPA.Descriptors.Numeric[,i],na.rm = TRUE),2), nsmall=2)
Median <- format(round(mean(DPA.Descriptors.Numeric[,i],na.rm = TRUE),2), nsmall=2)
Mean <- format(round(skewness(DPA.Descriptors.Numeric[,i],na.rm = TRUE),2), nsmall=2)
Skewness print(
ggplot(DPA.Descriptors.Numeric, aes(x=DPA.Descriptors.Numeric[,i])) +
geom_histogram(binwidth=0.25, color="black", fill="white", breaks = seq(-4, 4, by = 1)) +
scale_x_continuous(breaks = seq(-4, 4, 1)) +
geom_vline(aes(xintercept=mean(DPA.Descriptors.Numeric[,i])),
color="blue", size=1) +
geom_vline(aes(xintercept=median(DPA.Descriptors.Numeric[,i])),
color="red", size=1) +
theme_bw() +
ylab("Count") +
xlab(names(DPA.Descriptors.Numeric)[i]) +
labs(title=names(DPA.Descriptors.Numeric)[i],
subtitle=paste0("Median = ", Median,
", Mean = ", Mean,
", Skewness = ", Skewness)))
}
##################################
# Plotting the scatterplot matrix
# between descriptors
##################################
<- ggpairs(DPA.Descriptors.Numeric,
DPA_ScatterplotMatrix upper=list(continuous="points"),
lower=list(continuous="smooth_loess"),
diag=list(continuous="barDiag"))
##################################
# Measuring pairwise correlation
# between the updated descriptors
##################################
<- cor(DPA.Descriptors.Numeric,
(DPA_Correlation method = "pearson",
use="pairwise.complete.obs"))
## ARTINC RDMORT PDMORT HDMORT STMORT DBMORT MEUNDA
## ARTINC 1.0000000 0.6029060 0.6629222 0.6365483 0.5828534 0.4816004 0.8032586
## RDMORT 0.6029060 1.0000000 0.5052951 0.5244492 0.7001906 0.6145218 0.4186709
## PDMORT 0.6629222 0.5052951 1.0000000 0.5816386 0.5645313 0.5129611 0.4859062
## HDMORT 0.6365483 0.5244492 0.5816386 1.0000000 0.6977523 0.4122598 0.6401704
## STMORT 0.5828534 0.7001906 0.5645313 0.6977523 1.0000000 0.5041332 0.4920678
## DBMORT 0.4816004 0.6145218 0.5129611 0.4122598 0.5041332 1.0000000 0.3523140
## MEUNDA 0.8032586 0.4186709 0.4859062 0.6401704 0.4920678 0.3523140 1.0000000
## OVWINC 0.4983140 0.6378412 0.6073351 0.6211964 0.6878435 0.4979389 0.2796701
## SMPREV 0.8236782 0.5985883 0.7385143 0.6306883 0.5980833 0.4961349 0.6096485
## OVWINC SMPREV
## ARTINC 0.4983140 0.8236782
## RDMORT 0.6378412 0.5985883
## PDMORT 0.6073351 0.7385143
## HDMORT 0.6211964 0.6306883
## STMORT 0.6878435 0.5980833
## DBMORT 0.4979389 0.4961349
## MEUNDA 0.2796701 0.6096485
## OVWINC 1.0000000 0.6726116
## SMPREV 0.6726116 1.0000000
##################################
# Setting the correlation breaks
##################################
= seq(0.25, 1.00, 0.01)
DPA_Breaks
##################################
# Setting the correlation binnings
##################################
<- .bincode(DPA_Correlation,
DPA_CorrelationBins
DPA_Breaks, include.lowest = T)
##################################
# Transforming the correlation values
# into color ranges
##################################
<- matrix(factor(DPA_CorrelationBins,
DPA_ScatterplotMatrixColorRange levels = 1:length(DPA_Breaks),
labels = rev(colorpanel(length(DPA_Breaks),"#DA2A2A","#F2B2B2","#FBE5E5"))),
$nrow)
DPA_ScatterplotMatrix
##################################
# Updating the scatterplot matrix
# through color-coding depending
# on the correlation values
##################################
for(i in 1:DPA_ScatterplotMatrix$nrow) {
for(j in 1:DPA_ScatterplotMatrix$ncol){
<- DPA_ScatterplotMatrix[i,j] +
DPA_ScatterplotMatrix[i,j] theme(panel.background= element_rect(fill=DPA_ScatterplotMatrixColorRange[i,j]))
if(i == j){
<- DPA_ScatterplotMatrix[i,j] +
DPA_ScatterplotMatrix[i,j] theme(panel.background= element_rect(fill="#EBECF0"))
}
}}
##################################
# Plotting the scatterplot matrix
# color-coded by correlation values
##################################
DPA_ScatterplotMatrix
##################################
# Testing pairwise correlation between descriptors
##################################
<- cor.mtest(DPA.Descriptors.Numeric,
DPA_CorrelationTest method = "pearson",
conf.level = 0.95)
##################################
# Plotting the pairwise association
# between descriptors
##################################
corrplot(cor(DPA.Descriptors.Numeric,
method = "pearson",
use="pairwise.complete.obs"),
method = "number",
type = "upper",
order = "original",
tl.col = "black",
tl.cex = 0.75,
tl.srt = 90,
sig.level = 0.05,
p.mat = DPA_CorrelationTest$p,
insig = "blank")
##################################
# Implementing various procedures for determining
# factor retention based on
# the maximum consensus between methods
##################################
<- parameters::n_factors(DPA.Descriptors.Numeric,
(FA_PA_V_MethodAgreementProcedure algorithm = "pa",
rotation = "varimax"))
## # Method Agreement Procedure:
##
## The choice of 1 dimensions is supported by 6 (42.86%) methods out of 14 (Optimal coordinates, Acceleration factor, Parallel analysis, Kaiser criterion, Scree (SE), Scree (R2)).
as.data.frame(FA_PA_V_MethodAgreementProcedure)
## n_Factors Method Family
## 1 1 Optimal coordinates Scree
## 2 1 Acceleration factor Scree
## 3 1 Parallel analysis Scree
## 4 1 Kaiser criterion Scree
## 5 1 Scree (SE) Scree_SE
## 6 1 Scree (R2) Scree_SE
## 7 3 Bentler Bentler
## 8 3 CNG CNG
## 9 3 t Multiple_regression
## 10 3 p Multiple_regression
## 11 4 Bartlett Barlett
## 12 4 beta Multiple_regression
## 13 5 Anderson Barlett
## 14 5 Lawley Barlett
##################################
# Conducting exploratory factor analysis
# using Principal Axes extraction
# and Varimax rotation
# with 3 factors
##################################
<- fa(DPA.Descriptors.Numeric,
(FA_PA_V_3F nfactors = 3,
fm="pa",
rotate = "varimax",
residuals=TRUE,
SMC=TRUE,
n.obs=nrow(DPA.Descriptors.Numeric)))
## Factor Analysis using method = pa
## Call: fa(r = DPA.Descriptors.Numeric, nfactors = 3, n.obs = nrow(DPA.Descriptors.Numeric),
## rotate = "varimax", residuals = TRUE, SMC = TRUE, fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PA1 PA2 PA3 h2 u2 com
## ARTINC 0.35 0.62 0.58 0.84 0.16 2.6
## RDMORT 0.70 0.22 0.31 0.63 0.37 1.6
## PDMORT 0.45 0.27 0.60 0.63 0.37 2.3
## HDMORT 0.57 0.46 0.30 0.62 0.38 2.5
## STMORT 0.82 0.31 0.18 0.80 0.20 1.4
## DBMORT 0.52 0.17 0.33 0.41 0.59 1.9
## MEUNDA 0.19 0.99 0.23 1.06 -0.06 1.2
## OVWINC 0.73 0.05 0.44 0.72 0.28 1.6
## SMPREV 0.41 0.36 0.77 0.90 0.10 2.0
##
## PA1 PA2 PA3
## SS loadings 2.81 1.94 1.86
## Proportion Var 0.31 0.22 0.21
## Cumulative Var 0.31 0.53 0.73
## Proportion Explained 0.43 0.29 0.28
## Cumulative Proportion 0.43 0.72 1.00
##
## Mean item complexity = 1.9
## Test of the hypothesis that 3 factors are sufficient.
##
## df null model = 36 with the objective function = 7.13 with Chi Square = 321.92
## df of the model are 12 and the objective function was 0.31
##
## The root mean square of the residuals (RMSR) is 0.03
## The df corrected root mean square of the residuals is 0.06
##
## The harmonic n.obs is 50 with the empirical chi square 3.79 with prob < 0.99
## The total n.obs was 50 with Likelihood Chi Square = 13.46 with prob < 0.34
##
## Tucker Lewis Index of factoring reliability = 0.984
## RMSEA index = 0.045 and the 90 % confidence intervals are 0 0.158
## BIC = -33.48
## Fit based upon off diagonal values = 1
<- FA_PA_V_3F %>%
(FA_PA_V_3F_Summary model_parameters(sort = TRUE, threshold = "max"))
## # Rotated loadings from Factor Analysis (varimax-rotation)
##
## Variable | PA1 | PA2 | PA3 | Complexity | Uniqueness
## -------------------------------------------------------
## STMORT | 0.82 | | | 1.39 | 0.20
## OVWINC | 0.73 | | | 1.65 | 0.28
## RDMORT | 0.70 | | | 1.60 | 0.37
## HDMORT | 0.57 | | | 2.47 | 0.38
## DBMORT | 0.52 | | | 1.93 | 0.59
## MEUNDA | | 0.99 | | 1.19 | -0.06
## ARTINC | | 0.62 | | 2.58 | 0.16
## SMPREV | | | 0.77 | 1.99 | 0.10
## PDMORT | | | 0.60 | 2.30 | 0.37
##
## The 3 latent factors (varimax rotation) accounted for 73.47% of the total variance of the original data (PA1 = 31.23%, PA2 = 21.53%, PA3 = 20.71%).
summary(FA_PA_V_3F_Summary)
## # (Explained) Variance of Components
##
## Parameter | PA1 | PA2 | PA3
## -------------------------------------------------------
## Eigenvalues | 5.397 | 0.854 | 0.361
## Variance Explained | 0.312 | 0.215 | 0.207
## Variance Explained (Cumulative) | 0.312 | 0.528 | 0.735
## Variance Explained (Proportion) | 0.425 | 0.293 | 0.282
##################################
# Extracting the residuals
# from the Exploratory Factor Analysis
##################################
<- residuals(FA_PA_V_3F,
(FA_PA_V_3F_Residual diag=FALSE,
na.rm=TRUE))
## ARTIN RDMOR PDMOR HDMOR STMOR DBMOR MEUND OVWIN SMPRE
## ARTINC NA
## RDMORT 0.05 NA
## PDMORT 0.00 -0.05 NA
## HDMORT -0.02 -0.07 0.02 NA
## STMORT 0.00 0.00 0.00 0.03 NA
## DBMORT 0.01 0.11 0.04 -0.06 -0.04 NA
## MEUNDA 0.00 0.00 0.00 0.01 -0.01 0.01 NA
## OVWINC -0.04 -0.02 0.01 0.06 0.00 -0.03 0.00 NA
## SMPREV 0.01 -0.01 0.00 0.00 0.01 -0.03 0.00 0.02 NA
##################################
# Obtaining Fit Indices
##################################
<- FA_PA_V_3F$rms) (FA_PA_V_3F_RMS
## [1] 0.03244678
<- FA_PA_V_3F$TLI) (FA_PA_V_3F_TLI
## [1] 0.9838317
<- FA_PA_V_3F$BIC) (FA_PA_V_3F_BIC
## [1] -33.48016
<- max(abs(FA_PA_V_3F_Residual),na.rm=TRUE)) (FA_PA_V_3F_MaxResidual
## [1] 0.1120448
<- sum(FA_PA_V_3F_Residual>abs(0.05),na.rm=TRUE)) (FA_PA_V_3F_HighResidual
## [1] 4
<- length(DPA.Descriptors.Numeric)*(length(DPA.Descriptors.Numeric)-1)/2) (FA_PA_V_3F_TotalResidual
## [1] 36
<- FA_PA_V_3F_HighResidual/FA_PA_V_3F_TotalResidual) (FA_PA_V_3F_HighResidualRate
## [1] 0.1111111
##################################
# Graph the factor loading matrices
##################################
fa.diagram(FA_PA_V_3F,
sort=TRUE,
cut=0,
digits=3,
main="Principal Axes Factor Extraction + Varimax Rotation : 3 Factors",
cex=0.75)
##################################
# computing the internal consistency
# measure of reliability using the
# Cronbach's alpha coefficient
# for each factor
##################################
alpha(DPA.Descriptors.Numeric[,c("STMORT","OVWINC","RDMORT","HDMORT","DBMORT")],
check.keys=FALSE)
##
## Reliability analysis
## Call: alpha(x = DPA.Descriptors.Numeric[, c("STMORT", "OVWINC", "RDMORT",
## "HDMORT", "DBMORT")], check.keys = FALSE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.88 0.88 0.87 0.59 7.2 0.028 -2.6e-16 0.82 0.62
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.81 0.88 0.92
## Duhachek 0.82 0.88 0.93
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## STMORT 0.83 0.83 0.80 0.55 4.9 0.039 0.0079 0.57
## OVWINC 0.84 0.84 0.83 0.58 5.4 0.036 0.0133 0.57
## RDMORT 0.84 0.84 0.81 0.57 5.3 0.037 0.0134 0.56
## HDMORT 0.86 0.86 0.83 0.61 6.2 0.032 0.0077 0.63
## DBMORT 0.88 0.88 0.85 0.64 7.3 0.028 0.0046 0.66
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## STMORT 50 0.88 0.88 0.85 0.79 4.6e-16 1
## OVWINC 50 0.84 0.84 0.79 0.74 -1.8e-15 1
## RDMORT 50 0.85 0.85 0.81 0.75 3.3e-16 1
## HDMORT 50 0.79 0.79 0.73 0.67 -2.9e-16 1
## DBMORT 50 0.74 0.74 0.64 0.59 -1.6e-17 1
##################################
# computing the internal consistency
# measure of reliability using the
# Cronbach's alpha coefficient
# for each factor
##################################
alpha(DPA.Descriptors.Numeric[,c("MEUNDA","ARTINC")],
check.keys=FALSE)
##
## Reliability analysis
## Call: alpha(x = DPA.Descriptors.Numeric[, c("MEUNDA", "ARTINC")], check.keys = FALSE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.89 0.89 0.8 0.8 8.2 0.031 2.3e-16 0.95 0.8
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.81 0.89 0.94
## Duhachek 0.83 0.89 0.95
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## MEUNDA 0.8 0.8 0.65 0.8 4.1 NA 0 0.8
## ARTINC 0.8 0.8 0.65 0.8 4.1 NA 0 0.8
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## MEUNDA 50 0.95 0.95 0.85 0.8 1.5e-16 1
## ARTINC 50 0.95 0.95 0.85 0.8 3.0e-16 1
##################################
# computing the internal consistency
# measure of reliability using the
# Cronbach's alpha coefficient
# for each factor
##################################
alpha(DPA.Descriptors.Numeric[,c("SMPREV","PDMORT")],
check.keys=FALSE)
##
## Reliability analysis
## Call: alpha(x = DPA.Descriptors.Numeric[, c("SMPREV", "PDMORT")], check.keys = FALSE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.85 0.85 0.74 0.74 5.6 0.043 1.5e-16 0.93 0.74
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.73 0.85 0.91
## Duhachek 0.77 0.85 0.93
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## SMPREV 0.74 0.74 0.55 0.74 2.8 NA 0 0.74
## PDMORT 0.74 0.74 0.55 0.74 2.8 NA 0 0.74
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## SMPREV 50 0.93 0.93 0.8 0.74 1.2e-16 1
## PDMORT 50 0.93 0.93 0.8 0.74 1.6e-16 1
##################################
# Formulating the dandelion plot to
# visualize both factor variances and loadings
# from the factor loading matrices
##################################
<- factload(DPA.Descriptors.Numeric,
FA_PA_V_3F_FactorLoading cormeth = "pearson",
method = "prax",
nfac = 3,
rotation = "varimax")
<- rev(rainbow(100, start = 0, end = 0.2))
DandelionPlotPalette
dandelion(FA_PA_V_3F_FactorLoading,
bound=0,
mcex=c(1,1.2),
palet=DandelionPlotPalette)
##################################
# Conducting exploratory factor analysis
# using Principal Axes extraction
# and Varimax rotation
# with 4 factors
##################################
<- fa(DPA.Descriptors.Numeric,
(FA_PA_V_4F nfactors = 4,
fm="pa",
rotate = "varimax",
residuals=TRUE,
SMC=TRUE,
n.obs=nrow(DPA.Descriptors.Numeric)))
## Factor Analysis using method = pa
## Call: fa(r = DPA.Descriptors.Numeric, nfactors = 4, n.obs = nrow(DPA.Descriptors.Numeric),
## rotate = "varimax", residuals = TRUE, SMC = TRUE, fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PA2 PA3 PA1 PA4 h2 u2 com
## ARTINC 0.70 0.33 0.52 0.16 0.89 0.107 2.4
## RDMORT 0.22 0.89 0.22 0.26 0.95 0.054 1.4
## PDMORT 0.30 0.27 0.61 0.32 0.65 0.354 2.5
## HDMORT 0.46 0.21 0.26 0.68 0.79 0.214 2.4
## STMORT 0.27 0.49 0.25 0.60 0.73 0.267 2.8
## DBMORT 0.18 0.50 0.33 0.21 0.44 0.561 2.4
## MEUNDA 0.90 0.15 0.18 0.21 0.90 0.098 1.3
## OVWINC 0.00 0.42 0.52 0.59 0.79 0.215 2.8
## SMPREV 0.42 0.30 0.73 0.27 0.87 0.128 2.3
##
## PA2 PA3 PA1 PA4
## SS loadings 1.93 1.80 1.76 1.51
## Proportion Var 0.21 0.20 0.20 0.17
## Cumulative Var 0.21 0.41 0.61 0.78
## Proportion Explained 0.28 0.26 0.25 0.22
## Cumulative Proportion 0.28 0.53 0.78 1.00
##
## Mean item complexity = 2.3
## Test of the hypothesis that 4 factors are sufficient.
##
## df null model = 36 with the objective function = 7.13 with Chi Square = 321.92
## df of the model are 6 and the objective function was 0.05
##
## The root mean square of the residuals (RMSR) is 0.01
## The df corrected root mean square of the residuals is 0.03
##
## The harmonic n.obs is 50 with the empirical chi square 0.5 with prob < 1
## The total n.obs was 50 with Likelihood Chi Square = 2.29 with prob < 0.89
##
## Tucker Lewis Index of factoring reliability = 1.083
## RMSEA index = 0 and the 90 % confidence intervals are 0 0.085
## BIC = -21.18
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## PA2 PA3 PA1 PA4
## Correlation of (regression) scores with factors 0.94 0.95 0.88 0.85
## Multiple R square of scores with factors 0.89 0.90 0.78 0.72
## Minimum correlation of possible factor scores 0.78 0.81 0.56 0.45
<- FA_PA_V_4F %>%
(FA_PA_V_4F_Summary model_parameters(sort = TRUE, threshold = "max"))
## # Rotated loadings from Factor Analysis (varimax-rotation)
##
## Variable | PA2 | PA3 | PA1 | PA4 | Complexity | Uniqueness
## --------------------------------------------------------------
## MEUNDA | 0.90 | | | | 1.26 | 0.10
## ARTINC | 0.70 | | | | 2.44 | 0.11
## RDMORT | | 0.89 | | | 1.43 | 0.05
## DBMORT | | 0.50 | | | 2.44 | 0.56
## SMPREV | | | 0.73 | | 2.32 | 0.13
## PDMORT | | | 0.61 | | 2.50 | 0.35
## HDMORT | | | | 0.68 | 2.36 | 0.21
## STMORT | | | | 0.60 | 2.78 | 0.27
## OVWINC | | | | 0.59 | 2.81 | 0.21
##
## The 4 latent factors (varimax rotation) accounted for 77.81% of the total variance of the original data (PA2 = 21.45%, PA3 = 19.97%, PA1 = 19.58%, PA4 = 16.81%).
summary(FA_PA_V_4F_Summary)
## # (Explained) Variance of Components
##
## Parameter | PA2 | PA3 | PA1 | PA4
## ---------------------------------------------------------------
## Eigenvalues | 5.441 | 0.825 | 0.391 | 0.347
## Variance Explained | 0.215 | 0.200 | 0.196 | 0.168
## Variance Explained (Cumulative) | 0.215 | 0.414 | 0.610 | 0.778
## Variance Explained (Proportion) | 0.276 | 0.257 | 0.252 | 0.216
##################################
# Extracting the residuals
# from the Exploratory Factor Analysis
##################################
<- residuals(FA_PA_V_4F,
(FA_PA_V_4F_Residual diag=FALSE,
na.rm=TRUE))
## ARTIN RDMOR PDMOR HDMOR STMOR DBMOR MEUND OVWIN SMPRE
## ARTINC NA
## RDMORT 0.00 NA
## PDMORT -0.01 -0.02 NA
## HDMORT 0.00 0.00 0.01 NA
## STMORT 0.00 0.00 0.00 0.00 NA
## DBMORT -0.01 0.00 0.05 -0.01 0.00 NA
## MEUNDA 0.00 0.00 0.00 0.00 0.00 0.01 NA
## OVWINC 0.00 0.00 -0.01 0.00 0.00 -0.01 0.00 NA
## SMPREV 0.01 0.01 0.00 0.00 -0.01 -0.03 0.00 0.01 NA
##################################
# Obtaining Fit Indices
##################################
<- FA_PA_V_4F$rms) (FA_PA_V_4F_RMS
## [1] 0.01182674
<- FA_PA_V_4F$TLI) (FA_PA_V_4F_TLI
## [1] 1.083371
<- FA_PA_V_4F$BIC) (FA_PA_V_4F_BIC
## [1] -21.18092
<- max(abs(FA_PA_V_4F_Residual),na.rm=TRUE)) (FA_PA_V_4F_MaxResidual
## [1] 0.05035344
<- sum(FA_PA_V_4F_Residual>abs(0.05),na.rm=TRUE)) (FA_PA_V_4F_HighResidual
## [1] 2
<- length(DPA.Descriptors.Numeric)*(length(DPA.Descriptors.Numeric)-1)/2) (FA_PA_V_4F_TotalResidual
## [1] 36
<- FA_PA_V_4F_HighResidual/FA_PA_V_4F_TotalResidual) (FA_PA_V_4F_HighResidualRate
## [1] 0.05555556
##################################
# Graphing the factor loading matrices
##################################
fa.diagram(FA_PA_V_4F,
sort=TRUE,
cut=0,
digits=3,
main="Principal Axes Factor Extraction + Varimax Rotation : 4 Factors",
cex=0.75)
##################################
# Computing the internal consistency
# measure of reliability using the
# Cronbach's alpha coefficient
# for each factor
##################################
alpha(DPA.Descriptors.Numeric[,c("MEUNDA","ARTINC")],
check.keys=FALSE)
##
## Reliability analysis
## Call: alpha(x = DPA.Descriptors.Numeric[, c("MEUNDA", "ARTINC")], check.keys = FALSE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.89 0.89 0.8 0.8 8.2 0.031 2.3e-16 0.95 0.8
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.81 0.89 0.94
## Duhachek 0.83 0.89 0.95
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## MEUNDA 0.8 0.8 0.65 0.8 4.1 NA 0 0.8
## ARTINC 0.8 0.8 0.65 0.8 4.1 NA 0 0.8
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## MEUNDA 50 0.95 0.95 0.85 0.8 1.5e-16 1
## ARTINC 50 0.95 0.95 0.85 0.8 3.0e-16 1
##################################
# Computing the internal consistency
# measure of reliability using the
# Cronbach's alpha coefficient
# for each factor
##################################
alpha(DPA.Descriptors.Numeric[,c("RDMORT","DBMORT")],
check.keys=FALSE)
##
## Reliability analysis
## Call: alpha(x = DPA.Descriptors.Numeric[, c("RDMORT", "DBMORT")], check.keys = FALSE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.76 0.76 0.61 0.61 3.2 0.068 1.5e-16 0.9 0.61
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.58 0.76 0.86
## Duhachek 0.63 0.76 0.89
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## RDMORT 0.61 0.61 0.38 0.61 1.6 NA 0 0.61
## DBMORT 0.61 0.61 0.38 0.61 1.6 NA 0 0.61
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## RDMORT 50 0.9 0.9 0.7 0.61 3.3e-16 1
## DBMORT 50 0.9 0.9 0.7 0.61 -1.6e-17 1
##################################
# Computing the internal consistency
# measure of reliability using the
# Cronbach's alpha coefficient
# for each factor
##################################
alpha(DPA.Descriptors.Numeric[,c("SMPREV","PDMORT")],
check.keys=FALSE)
##
## Reliability analysis
## Call: alpha(x = DPA.Descriptors.Numeric[, c("SMPREV", "PDMORT")], check.keys = FALSE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.85 0.85 0.74 0.74 5.6 0.043 1.5e-16 0.93 0.74
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.73 0.85 0.91
## Duhachek 0.77 0.85 0.93
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## SMPREV 0.74 0.74 0.55 0.74 2.8 NA 0 0.74
## PDMORT 0.74 0.74 0.55 0.74 2.8 NA 0 0.74
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## SMPREV 50 0.93 0.93 0.8 0.74 1.2e-16 1
## PDMORT 50 0.93 0.93 0.8 0.74 1.6e-16 1
##################################
# Computing the internal consistency
# measure of reliability using the
# Cronbach's alpha coefficient
# for each factor
##################################
alpha(DPA.Descriptors.Numeric[,c("STMORT","OVWINC","HDMORT")],
check.keys=FALSE)
##
## Reliability analysis
## Call: alpha(x = DPA.Descriptors.Numeric[, c("STMORT", "OVWINC", "HDMORT")],
## check.keys = FALSE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.86 0.86 0.8 0.67 6.1 0.035 -5.4e-16 0.88 0.69
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.77 0.86 0.91
## Duhachek 0.79 0.86 0.93
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## STMORT 0.77 0.77 0.62 0.62 3.3 0.066 NA 0.62
## OVWINC 0.82 0.82 0.70 0.70 4.6 0.050 NA 0.70
## HDMORT 0.82 0.82 0.69 0.69 4.4 0.052 NA 0.69
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## STMORT 50 0.90 0.90 0.83 0.77 4.6e-16 1
## OVWINC 50 0.87 0.87 0.77 0.71 -1.8e-15 1
## HDMORT 50 0.88 0.88 0.78 0.72 -2.9e-16 1
##################################
# Formulating the dandelion plot to
# visualize both factor variances and loadings
# from the factor loading matrices
##################################
<- factload(DPA.Descriptors.Numeric,
FA_PA_V_4F_FactorLoading cormeth = "pearson",
method = "prax",
nfac = 4,
rotation = "varimax")
<- rev(rainbow(100, start = 0, end = 0.2))
DandelionPlotPalette
dandelion(FA_PA_V_4F_FactorLoading,
bound=0,
mcex=c(1,1.2),
palet=DandelionPlotPalette)
##################################
# Implementing various procedures for determining
# factor retention based on
# the maximum consensus between methods
##################################
<- parameters::n_factors(DPA.Descriptors.Numeric,
(FA_PA_P_MethodAgreementProcedure algorithm = "pa",
rotation = "promax"))
## # Method Agreement Procedure:
##
## The choice of 1 dimensions is supported by 6 (42.86%) methods out of 14 (Optimal coordinates, Acceleration factor, Parallel analysis, Kaiser criterion, Scree (SE), Scree (R2)).
as.data.frame(FA_PA_P_MethodAgreementProcedure)
## n_Factors Method Family
## 1 1 Optimal coordinates Scree
## 2 1 Acceleration factor Scree
## 3 1 Parallel analysis Scree
## 4 1 Kaiser criterion Scree
## 5 1 Scree (SE) Scree_SE
## 6 1 Scree (R2) Scree_SE
## 7 3 Bentler Bentler
## 8 3 CNG CNG
## 9 3 t Multiple_regression
## 10 3 p Multiple_regression
## 11 4 Bartlett Barlett
## 12 4 beta Multiple_regression
## 13 5 Anderson Barlett
## 14 5 Lawley Barlett
##################################
# Conducting exploratory factor analysis
# using Principal Axes extraction
# and Promax rotation
# with 3 factors
##################################
<- fa(DPA.Descriptors.Numeric,
(FA_PA_P_3F nfactors = 3,
fm="pa",
rotate = "promax",
residuals=TRUE,
SMC=TRUE,
n.obs=nrow(DPA.Descriptors.Numeric)))
## Factor Analysis using method = pa
## Call: fa(r = DPA.Descriptors.Numeric, nfactors = 3, n.obs = nrow(DPA.Descriptors.Numeric),
## rotate = "promax", residuals = TRUE, SMC = TRUE, fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PA1 PA3 PA2 h2 u2 com
## ARTINC 0.00 0.57 0.45 0.84 0.16 1.9
## RDMORT 0.74 0.07 0.00 0.63 0.37 1.0
## PDMORT 0.19 0.63 0.01 0.63 0.37 1.2
## HDMORT 0.52 0.06 0.32 0.62 0.38 1.7
## STMORT 0.98 -0.21 0.13 0.80 0.20 1.1
## DBMORT 0.48 0.21 -0.03 0.41 0.59 1.4
## MEUNDA -0.06 0.01 1.05 1.06 -0.06 1.0
## OVWINC 0.73 0.30 -0.26 0.72 0.28 1.6
## SMPREV 0.00 0.91 0.06 0.90 0.10 1.0
##
## PA1 PA3 PA2
## SS loadings 2.83 2.11 1.67
## Proportion Var 0.31 0.23 0.19
## Cumulative Var 0.31 0.55 0.73
## Proportion Explained 0.43 0.32 0.25
## Cumulative Proportion 0.43 0.75 1.00
##
## With factor correlations of
## PA1 PA3 PA2
## PA1 1.00 0.76 0.53
## PA3 0.76 1.00 0.60
## PA2 0.53 0.60 1.00
##
## Mean item complexity = 1.3
## Test of the hypothesis that 3 factors are sufficient.
##
## df null model = 36 with the objective function = 7.13 with Chi Square = 321.92
## df of the model are 12 and the objective function was 0.31
##
## The root mean square of the residuals (RMSR) is 0.03
## The df corrected root mean square of the residuals is 0.06
##
## The harmonic n.obs is 50 with the empirical chi square 3.79 with prob < 0.99
## The total n.obs was 50 with Likelihood Chi Square = 13.46 with prob < 0.34
##
## Tucker Lewis Index of factoring reliability = 0.984
## RMSEA index = 0.045 and the 90 % confidence intervals are 0 0.158
## BIC = -33.48
## Fit based upon off diagonal values = 1
<- FA_PA_P_3F %>%
(FA_PA_P_3F_Summary model_parameters(sort = TRUE, threshold = "max"))
## # Rotated loadings from Factor Analysis (promax-rotation)
##
## Variable | PA1 | PA3 | PA2 | Complexity | Uniqueness
## -------------------------------------------------------
## STMORT | 0.98 | | | 1.13 | 0.20
## RDMORT | 0.74 | | | 1.02 | 0.37
## OVWINC | 0.73 | | | 1.59 | 0.28
## HDMORT | 0.52 | | | 1.68 | 0.38
## DBMORT | 0.48 | | | 1.36 | 0.59
## SMPREV | | 0.91 | | 1.01 | 0.10
## PDMORT | | 0.63 | | 1.18 | 0.37
## ARTINC | | 0.57 | | 1.91 | 0.16
## MEUNDA | | | 1.05 | 1.01 | -0.06
##
## The 3 latent factors (promax rotation) accounted for 73.47% of the total variance of the original data (PA1 = 31.43%, PA3 = 23.45%, PA2 = 18.59%).
summary(FA_PA_P_3F_Summary)
## # (Explained) Variance of Components
##
## Parameter | PA1 | PA3 | PA2
## -------------------------------------------------------
## Eigenvalues | 5.397 | 0.854 | 0.361
## Variance Explained | 0.314 | 0.235 | 0.186
## Variance Explained (Cumulative) | 0.314 | 0.549 | 0.735
## Variance Explained (Proportion) | 0.428 | 0.319 | 0.253
##################################
# Extracting the residuals
# from the Exploratory Factor Analysis
##################################
<- residuals(FA_PA_P_3F,
(FA_PA_P_3F_Residual diag=FALSE,
na.rm=TRUE))
## ARTIN RDMOR PDMOR HDMOR STMOR DBMOR MEUND OVWIN SMPRE
## ARTINC NA
## RDMORT 0.05 NA
## PDMORT 0.00 -0.05 NA
## HDMORT -0.02 -0.07 0.02 NA
## STMORT 0.00 0.00 0.00 0.03 NA
## DBMORT 0.01 0.11 0.04 -0.06 -0.04 NA
## MEUNDA 0.00 0.00 0.00 0.01 -0.01 0.01 NA
## OVWINC -0.04 -0.02 0.01 0.06 0.00 -0.03 0.00 NA
## SMPREV 0.01 -0.01 0.00 0.00 0.01 -0.03 0.00 0.02 NA
##################################
# Obtaining Fit Indices
##################################
<- FA_PA_P_3F$rms) (FA_PA_P_3F_RMS
## [1] 0.03244678
<- FA_PA_P_3F$TLI) (FA_PA_P_3F_TLI
## [1] 0.9838317
<- FA_PA_P_3F$BIC) (FA_PA_P_3F_BIC
## [1] -33.48016
<- max(abs(FA_PA_P_3F_Residual),na.rm=TRUE)) (FA_PA_P_3F_MaxResidual
## [1] 0.1120448
<- sum(FA_PA_P_3F_Residual>abs(0.05),na.rm=TRUE)) (FA_PA_P_3F_HighResidual
## [1] 4
<- length(DPA.Descriptors.Numeric)*(length(DPA.Descriptors.Numeric)-1)/2) (FA_PA_P_3F_TotalResidual
## [1] 36
<- FA_PA_P_3F_HighResidual/FA_PA_P_3F_TotalResidual) (FA_PA_P_3F_HighResidualRate
## [1] 0.1111111
##################################
# Graph the factor loading matrices
##################################
fa.diagram(FA_PA_P_3F,
sort=TRUE,
cut=0,
digits=3,
main="Principal Axes Factor Extraction + Promax Rotation : 3 Factors",
cex=0.75)
##################################
# computing the internal consistency
# measure of reliability using the
# Cronbach's alpha coefficient
# for each factor
##################################
alpha(DPA.Descriptors.Numeric[,c("STMORT","OVWINC","RDMORT","HDMORT","DBMORT")],
check.keys=FALSE)
##
## Reliability analysis
## Call: alpha(x = DPA.Descriptors.Numeric[, c("STMORT", "OVWINC", "RDMORT",
## "HDMORT", "DBMORT")], check.keys = FALSE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.88 0.88 0.87 0.59 7.2 0.028 -2.6e-16 0.82 0.62
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.81 0.88 0.92
## Duhachek 0.82 0.88 0.93
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## STMORT 0.83 0.83 0.80 0.55 4.9 0.039 0.0079 0.57
## OVWINC 0.84 0.84 0.83 0.58 5.4 0.036 0.0133 0.57
## RDMORT 0.84 0.84 0.81 0.57 5.3 0.037 0.0134 0.56
## HDMORT 0.86 0.86 0.83 0.61 6.2 0.032 0.0077 0.63
## DBMORT 0.88 0.88 0.85 0.64 7.3 0.028 0.0046 0.66
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## STMORT 50 0.88 0.88 0.85 0.79 4.6e-16 1
## OVWINC 50 0.84 0.84 0.79 0.74 -1.8e-15 1
## RDMORT 50 0.85 0.85 0.81 0.75 3.3e-16 1
## HDMORT 50 0.79 0.79 0.73 0.67 -2.9e-16 1
## DBMORT 50 0.74 0.74 0.64 0.59 -1.6e-17 1
##################################
# computing the internal consistency
# measure of reliability using the
# Cronbach's alpha coefficient
# for each factor
##################################
alpha(DPA.Descriptors.Numeric[,c("MEUNDA","ARTINC")],
check.keys=FALSE)
##
## Reliability analysis
## Call: alpha(x = DPA.Descriptors.Numeric[, c("MEUNDA", "ARTINC")], check.keys = FALSE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.89 0.89 0.8 0.8 8.2 0.031 2.3e-16 0.95 0.8
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.81 0.89 0.94
## Duhachek 0.83 0.89 0.95
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## MEUNDA 0.8 0.8 0.65 0.8 4.1 NA 0 0.8
## ARTINC 0.8 0.8 0.65 0.8 4.1 NA 0 0.8
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## MEUNDA 50 0.95 0.95 0.85 0.8 1.5e-16 1
## ARTINC 50 0.95 0.95 0.85 0.8 3.0e-16 1
##################################
# computing the internal consistency
# measure of reliability using the
# Cronbach's alpha coefficient
# for each factor
##################################
alpha(DPA.Descriptors.Numeric[,c("SMPREV","PDMORT")],
check.keys=FALSE)
##
## Reliability analysis
## Call: alpha(x = DPA.Descriptors.Numeric[, c("SMPREV", "PDMORT")], check.keys = FALSE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.85 0.85 0.74 0.74 5.6 0.043 1.5e-16 0.93 0.74
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.73 0.85 0.91
## Duhachek 0.77 0.85 0.93
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## SMPREV 0.74 0.74 0.55 0.74 2.8 NA 0 0.74
## PDMORT 0.74 0.74 0.55 0.74 2.8 NA 0 0.74
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## SMPREV 50 0.93 0.93 0.8 0.74 1.2e-16 1
## PDMORT 50 0.93 0.93 0.8 0.74 1.6e-16 1
##################################
# Formulating the dandelion plot to
# visualize both factor variances and loadings
# from the factor loading matrices
##################################
<- factload(DPA.Descriptors.Numeric,
FA_PA_P_3F_FactorLoading cormeth = "pearson",
method = "prax",
nfac = 3,
rotation = "promax")
<- rev(rainbow(100, start = 0, end = 0.2))
DandelionPlotPalette
dandelion(FA_PA_P_3F_FactorLoading,
bound=0,
mcex=c(1,1.2),
palet=DandelionPlotPalette)
##################################
# Conducting exploratory factor analysis
# using Principal Axes extraction
# and Promax rotation
# with 4 factors
##################################
<- fa(DPA.Descriptors.Numeric,
(FA_PA_P_4F nfactors = 4,
fm="pa",
rotate = "promax",
residuals=TRUE,
SMC=TRUE,
n.obs=nrow(DPA.Descriptors.Numeric)))
## Factor Analysis using method = pa
## Call: fa(r = DPA.Descriptors.Numeric, nfactors = 4, n.obs = nrow(DPA.Descriptors.Numeric),
## rotate = "promax", residuals = TRUE, SMC = TRUE, fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
## PA1 PA4 PA2 PA3 h2 u2 com
## ARTINC 0.50 -0.11 0.56 0.13 0.89 0.107 2.2
## RDMORT -0.13 0.01 0.04 1.04 0.95 0.054 1.0
## PDMORT 0.71 0.13 0.05 -0.04 0.65 0.354 1.1
## HDMORT 0.02 0.78 0.31 -0.12 0.79 0.214 1.4
## STMORT -0.05 0.60 0.08 0.32 0.73 0.267 1.6
## DBMORT 0.22 0.02 0.01 0.47 0.44 0.561 1.4
## MEUNDA -0.03 0.13 0.91 0.00 0.90 0.098 1.0
## OVWINC 0.47 0.53 -0.31 0.12 0.79 0.215 2.7
## SMPREV 0.87 -0.01 0.16 -0.04 0.87 0.128 1.1
##
## PA1 PA4 PA2 PA3
## SS loadings 2.14 1.65 1.62 1.59
## Proportion Var 0.24 0.18 0.18 0.18
## Cumulative Var 0.24 0.42 0.60 0.78
## Proportion Explained 0.31 0.24 0.23 0.23
## Cumulative Proportion 0.31 0.54 0.77 1.00
##
## With factor correlations of
## PA1 PA4 PA2 PA3
## PA1 1.00 0.70 0.55 0.72
## PA4 0.70 1.00 0.44 0.69
## PA2 0.55 0.44 1.00 0.41
## PA3 0.72 0.69 0.41 1.00
##
## Mean item complexity = 1.5
## Test of the hypothesis that 4 factors are sufficient.
##
## df null model = 36 with the objective function = 7.13 with Chi Square = 321.92
## df of the model are 6 and the objective function was 0.05
##
## The root mean square of the residuals (RMSR) is 0.01
## The df corrected root mean square of the residuals is 0.03
##
## The harmonic n.obs is 50 with the empirical chi square 0.5 with prob < 1
## The total n.obs was 50 with Likelihood Chi Square = 2.29 with prob < 0.89
##
## Tucker Lewis Index of factoring reliability = 1.083
## RMSEA index = 0 and the 90 % confidence intervals are 0 0.085
## BIC = -21.18
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## PA1 PA4 PA2 PA3
## Correlation of (regression) scores with factors 0.96 0.94 0.96 0.98
## Multiple R square of scores with factors 0.92 0.87 0.92 0.96
## Minimum correlation of possible factor scores 0.85 0.75 0.85 0.91
<- FA_PA_P_4F %>%
(FA_PA_P_4F_Summary model_parameters(sort = TRUE, threshold = "max"))
## # Rotated loadings from Factor Analysis (promax-rotation)
##
## Variable | PA1 | PA4 | PA2 | PA3 | Complexity | Uniqueness
## --------------------------------------------------------------
## SMPREV | 0.87 | | | | 1.07 | 0.13
## PDMORT | 0.71 | | | | 1.08 | 0.35
## HDMORT | | 0.78 | | | 1.37 | 0.21
## STMORT | | 0.60 | | | 1.59 | 0.27
## OVWINC | | 0.53 | | | 2.73 | 0.21
## MEUNDA | | | 0.91 | | 1.04 | 0.10
## ARTINC | | | 0.56 | | 2.18 | 0.11
## RDMORT | | | | 1.04 | 1.03 | 0.05
## DBMORT | | | | 0.47 | 1.42 | 0.56
##
## The 4 latent factors (promax rotation) accounted for 77.81% of the total variance of the original data (PA1 = 23.77%, PA4 = 18.37%, PA2 = 17.98%, PA3 = 17.70%).
summary(FA_PA_P_4F_Summary)
## # (Explained) Variance of Components
##
## Parameter | PA1 | PA4 | PA2 | PA3
## ---------------------------------------------------------------
## Eigenvalues | 5.441 | 0.825 | 0.391 | 0.347
## Variance Explained | 0.238 | 0.184 | 0.180 | 0.177
## Variance Explained (Cumulative) | 0.238 | 0.421 | 0.601 | 0.778
## Variance Explained (Proportion) | 0.305 | 0.236 | 0.231 | 0.227
##################################
# Extracting the residuals
# from the Exploratory Factor Analysis
##################################
<- residuals(FA_PA_P_4F,
(FA_PA_P_4F_Residual diag=FALSE,
na.rm=TRUE))
## ARTIN RDMOR PDMOR HDMOR STMOR DBMOR MEUND OVWIN SMPRE
## ARTINC NA
## RDMORT 0.00 NA
## PDMORT -0.01 -0.02 NA
## HDMORT 0.00 0.00 0.01 NA
## STMORT 0.00 0.00 0.00 0.00 NA
## DBMORT -0.01 0.00 0.05 -0.01 0.00 NA
## MEUNDA 0.00 0.00 0.00 0.00 0.00 0.01 NA
## OVWINC 0.00 0.00 -0.01 0.00 0.00 -0.01 0.00 NA
## SMPREV 0.01 0.01 0.00 0.00 -0.01 -0.03 0.00 0.01 NA
##################################
# Obtaining Fit Indices
##################################
<- FA_PA_P_4F$rms) (FA_PA_P_4F_RMS
## [1] 0.01182674
<- FA_PA_P_4F$TLI) (FA_PA_P_4F_TLI
## [1] 1.083371
<- FA_PA_P_4F$BIC) (FA_PA_P_4F_BIC
## [1] -21.18092
<- max(abs(FA_PA_P_4F_Residual),na.rm=TRUE)) (FA_PA_P_4F_MaxResidual
## [1] 0.05035344
<- sum(FA_PA_P_4F_Residual>abs(0.05),na.rm=TRUE)) (FA_PA_P_4F_HighResidual
## [1] 2
<- length(DPA.Descriptors.Numeric)*(length(DPA.Descriptors.Numeric)-1)/2) (FA_PA_P_4F_TotalResidual
## [1] 36
<- FA_PA_P_4F_HighResidual/FA_PA_P_4F_TotalResidual) (FA_PA_P_4F_HighResidualRate
## [1] 0.05555556
##################################
# Graph the factor loading matrices
##################################
fa.diagram(FA_PA_P_4F,
sort=TRUE,
cut=0,
digits=3,
main="Principal Axes Factor Extraction + Promax Rotation : 4 Factors",
cex=0.75)
##################################
# computing the internal consistency
# measure of reliability using the
# Cronbach's alpha coefficient
# for each factor
##################################
alpha(DPA.Descriptors.Numeric[,c("MEUNDA","ARTINC")],
check.keys=FALSE)
##
## Reliability analysis
## Call: alpha(x = DPA.Descriptors.Numeric[, c("MEUNDA", "ARTINC")], check.keys = FALSE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.89 0.89 0.8 0.8 8.2 0.031 2.3e-16 0.95 0.8
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.81 0.89 0.94
## Duhachek 0.83 0.89 0.95
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## MEUNDA 0.8 0.8 0.65 0.8 4.1 NA 0 0.8
## ARTINC 0.8 0.8 0.65 0.8 4.1 NA 0 0.8
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## MEUNDA 50 0.95 0.95 0.85 0.8 1.5e-16 1
## ARTINC 50 0.95 0.95 0.85 0.8 3.0e-16 1
##################################
# computing the internal consistency
# measure of reliability using the
# Cronbach's alpha coefficient
# for each factor
##################################
alpha(DPA.Descriptors.Numeric[,c("RDMORT","DBMORT")],
check.keys=FALSE)
##
## Reliability analysis
## Call: alpha(x = DPA.Descriptors.Numeric[, c("RDMORT", "DBMORT")], check.keys = FALSE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.76 0.76 0.61 0.61 3.2 0.068 1.5e-16 0.9 0.61
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.58 0.76 0.86
## Duhachek 0.63 0.76 0.89
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## RDMORT 0.61 0.61 0.38 0.61 1.6 NA 0 0.61
## DBMORT 0.61 0.61 0.38 0.61 1.6 NA 0 0.61
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## RDMORT 50 0.9 0.9 0.7 0.61 3.3e-16 1
## DBMORT 50 0.9 0.9 0.7 0.61 -1.6e-17 1
##################################
# computing the internal consistency
# measure of reliability using the
# Cronbach's alpha coefficient
# for each factor
##################################
alpha(DPA.Descriptors.Numeric[,c("SMPREV","PDMORT")],
check.keys=FALSE)
##
## Reliability analysis
## Call: alpha(x = DPA.Descriptors.Numeric[, c("SMPREV", "PDMORT")], check.keys = FALSE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.85 0.85 0.74 0.74 5.6 0.043 1.5e-16 0.93 0.74
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.73 0.85 0.91
## Duhachek 0.77 0.85 0.93
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## SMPREV 0.74 0.74 0.55 0.74 2.8 NA 0 0.74
## PDMORT 0.74 0.74 0.55 0.74 2.8 NA 0 0.74
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## SMPREV 50 0.93 0.93 0.8 0.74 1.2e-16 1
## PDMORT 50 0.93 0.93 0.8 0.74 1.6e-16 1
##################################
# computing the internal consistency
# measure of reliability using the
# Cronbach's alpha coefficient
# for each factor
##################################
alpha(DPA.Descriptors.Numeric[,c("STMORT","OVWINC","HDMORT")],
check.keys=FALSE)
##
## Reliability analysis
## Call: alpha(x = DPA.Descriptors.Numeric[, c("STMORT", "OVWINC", "HDMORT")],
## check.keys = FALSE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.86 0.86 0.8 0.67 6.1 0.035 -5.4e-16 0.88 0.69
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.77 0.86 0.91
## Duhachek 0.79 0.86 0.93
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## STMORT 0.77 0.77 0.62 0.62 3.3 0.066 NA 0.62
## OVWINC 0.82 0.82 0.70 0.70 4.6 0.050 NA 0.70
## HDMORT 0.82 0.82 0.69 0.69 4.4 0.052 NA 0.69
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## STMORT 50 0.90 0.90 0.83 0.77 4.6e-16 1
## OVWINC 50 0.87 0.87 0.77 0.71 -1.8e-15 1
## HDMORT 50 0.88 0.88 0.78 0.72 -2.9e-16 1
##################################
# Formulating the dandelion plot to
# visualize both factor variances and loadings
# from the factor loading matrices
##################################
<- factload(DPA.Descriptors.Numeric,
FA_PA_P_4F_FactorLoading cormeth = "pearson",
method = "prax",
nfac = 4,
rotation = "promax")
<- rev(rainbow(100, start = 0, end = 0.2))
DandelionPlotPalette
dandelion(FA_PA_P_4F_FactorLoading,
bound=0,
mcex=c(1,1.2),
palet=DandelionPlotPalette)
##################################
# Implementing various procedures for determining
# factor retention based on
# the maximum consensus between methods
##################################
<- parameters::n_factors(DPA.Descriptors.Numeric,
(FA_ML_V_MethodAgreementProcedure algorithm = "mle",
rotation = "varimax"))
## # Method Agreement Procedure:
##
## The choice of 1 dimensions is supported by 8 (42.11%) methods out of 19 (Optimal coordinates, Acceleration factor, Parallel analysis, Kaiser criterion, Scree (SE), Scree (R2), VSS complexity 1, Velicer's MAP).
as.data.frame(FA_ML_V_MethodAgreementProcedure)
## n_Factors Method Family
## 1 1 Optimal coordinates Scree
## 2 1 Acceleration factor Scree
## 3 1 Parallel analysis Scree
## 4 1 Kaiser criterion Scree
## 5 1 Scree (SE) Scree_SE
## 6 1 Scree (R2) Scree_SE
## 7 1 VSS complexity 1 VSS
## 8 1 Velicer's MAP Velicers_MAP
## 9 2 VSS complexity 2 VSS
## 10 2 BIC BIC
## 11 3 Bentler Bentler
## 12 3 CNG CNG
## 13 3 t Multiple_regression
## 14 3 p Multiple_regression
## 15 4 Bartlett Barlett
## 16 4 beta Multiple_regression
## 17 4 BIC (adjusted) BIC
## 18 5 Anderson Barlett
## 19 5 Lawley Barlett
##################################
# Conducting exploratory factor analysis
# using Maximum Likelihood extraction
# and Varimax rotation
# with 3 factors
##################################
<- fa(DPA.Descriptors.Numeric,
(FA_ML_V_3F nfactors = 3,
fm="ml",
rotate = "varimax",
residuals=TRUE,
SMC=TRUE,
n.obs=nrow(DPA.Descriptors.Numeric)))
## Factor Analysis using method = ml
## Call: fa(r = DPA.Descriptors.Numeric, nfactors = 3, n.obs = nrow(DPA.Descriptors.Numeric),
## rotate = "varimax", residuals = TRUE, SMC = TRUE, fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML3 ML1 ML2 h2 u2 com
## ARTINC 0.34 0.64 0.56 0.84 0.155 2.5
## RDMORT 0.68 0.23 0.30 0.61 0.389 1.6
## PDMORT 0.46 0.28 0.56 0.61 0.388 2.4
## HDMORT 0.60 0.49 0.25 0.67 0.335 2.3
## STMORT 0.81 0.31 0.19 0.79 0.210 1.4
## DBMORT 0.51 0.20 0.29 0.38 0.617 1.9
## MEUNDA 0.20 0.95 0.23 1.00 0.005 1.2
## OVWINC 0.74 0.04 0.44 0.75 0.255 1.6
## SMPREV 0.41 0.37 0.79 0.93 0.069 2.0
##
## ML3 ML1 ML2
## SS loadings 2.85 1.95 1.77
## Proportion Var 0.32 0.22 0.20
## Cumulative Var 0.32 0.53 0.73
## Proportion Explained 0.43 0.30 0.27
## Cumulative Proportion 0.43 0.73 1.00
##
## Mean item complexity = 1.9
## Test of the hypothesis that 3 factors are sufficient.
##
## df null model = 36 with the objective function = 7.13 with Chi Square = 321.92
## df of the model are 12 and the objective function was 0.29
##
## The root mean square of the residuals (RMSR) is 0.03
## The df corrected root mean square of the residuals is 0.06
##
## The harmonic n.obs is 50 with the empirical chi square 4.2 with prob < 0.98
## The total n.obs was 50 with Likelihood Chi Square = 12.57 with prob < 0.4
##
## Tucker Lewis Index of factoring reliability = 0.994
## RMSEA index = 0.023 and the 90 % confidence intervals are 0 0.151
## BIC = -34.38
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## ML3 ML1 ML2
## Correlation of (regression) scores with factors 0.91 0.99 0.92
## Multiple R square of scores with factors 0.83 0.99 0.84
## Minimum correlation of possible factor scores 0.66 0.97 0.68
<- FA_ML_V_3F %>%
(FA_ML_V_3F_Summary model_parameters(sort = TRUE, threshold = "max"))
## # Rotated loadings from Factor Analysis (varimax-rotation)
##
## Variable | ML3 | ML1 | ML2 | Complexity | Uniqueness
## -------------------------------------------------------
## STMORT | 0.81 | | | 1.39 | 0.21
## OVWINC | 0.74 | | | 1.62 | 0.25
## RDMORT | 0.68 | | | 1.63 | 0.39
## HDMORT | 0.60 | | | 2.29 | 0.33
## DBMORT | 0.51 | | | 1.90 | 0.62
## MEUNDA | | 0.95 | | 1.20 | 5.00e-03
## ARTINC | | 0.64 | | 2.53 | 0.16
## SMPREV | | | 0.79 | 1.98 | 0.07
## PDMORT | | | 0.56 | 2.44 | 0.39
##
## The 3 latent factors (varimax rotation) accounted for 73.08% of the total variance of the original data (ML3 = 31.68%, ML1 = 21.70%, ML2 = 19.70%).
summary(FA_ML_V_3F_Summary)
## # (Explained) Variance of Components
##
## Parameter | ML3 | ML1 | ML2
## -------------------------------------------------------
## Eigenvalues | 5.396 | 0.822 | 0.364
## Variance Explained | 0.317 | 0.217 | 0.197
## Variance Explained (Cumulative) | 0.317 | 0.534 | 0.731
## Variance Explained (Proportion) | 0.434 | 0.297 | 0.270
##################################
# Extracting the residuals
# from the Exploratory Factor Analysis
##################################
<- residuals(FA_ML_V_3F,
(FA_ML_V_3F_Residual diag=FALSE,
na.rm=TRUE))
## ARTIN RDMOR PDMOR HDMOR STMOR DBMOR MEUND OVWIN SMPRE
## ARTINC NA
## RDMORT 0.05 NA
## PDMORT 0.01 -0.05 NA
## HDMORT -0.02 -0.08 0.02 NA
## STMORT 0.00 0.02 0.00 0.01 NA
## DBMORT 0.02 0.13 0.06 -0.06 -0.03 NA
## MEUNDA 0.00 0.00 0.00 0.00 0.00 0.00 NA
## OVWINC -0.03 -0.01 0.01 0.04 -0.01 -0.02 0.00 NA
## SMPREV 0.00 -0.01 0.00 0.00 0.00 -0.01 0.00 0.01 NA
##################################
# Obtaining Fit Indices
##################################
<- FA_ML_V_3F$rms) (FA_ML_V_3F_RMS
## [1] 0.03416164
<- FA_ML_V_3F$TLI) (FA_ML_V_3F_TLI
## [1] 0.9937238
<- FA_ML_V_3F$BIC) (FA_ML_V_3F_BIC
## [1] -34.37594
<- max(abs(FA_ML_V_3F_Residual),na.rm=TRUE)) (FA_ML_V_3F_MaxResidual
## [1] 0.132959
<- sum(FA_ML_V_3F_Residual>abs(0.05),na.rm=TRUE)) (FA_ML_V_3F_HighResidual
## [1] 6
<- length(DPA.Descriptors.Numeric)*(length(DPA.Descriptors.Numeric)-1)/2) (FA_ML_V_3F_TotalResidual
## [1] 36
<- FA_ML_V_3F_HighResidual/FA_ML_V_3F_TotalResidual) (FA_ML_V_3F_HighResidualRate
## [1] 0.1666667
##################################
# Graph the factor loading matrices
##################################
fa.diagram(FA_ML_V_3F,
sort=TRUE,
cut=0,
digits=3,
main="Maximum Likelihood Factor Extraction + Varimax Rotation : 3 Factors",
cex=0.75)
##################################
# computing the internal consistency
# measure of reliability using the
# Cronbach's alpha coefficient
# for each factor
##################################
alpha(DPA.Descriptors.Numeric[,c("STMORT","OVWINC","RDMORT","HDMORT","DBMORT")],
check.keys=FALSE)
##
## Reliability analysis
## Call: alpha(x = DPA.Descriptors.Numeric[, c("STMORT", "OVWINC", "RDMORT",
## "HDMORT", "DBMORT")], check.keys = FALSE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.88 0.88 0.87 0.59 7.2 0.028 -2.6e-16 0.82 0.62
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.81 0.88 0.92
## Duhachek 0.82 0.88 0.93
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## STMORT 0.83 0.83 0.80 0.55 4.9 0.039 0.0079 0.57
## OVWINC 0.84 0.84 0.83 0.58 5.4 0.036 0.0133 0.57
## RDMORT 0.84 0.84 0.81 0.57 5.3 0.037 0.0134 0.56
## HDMORT 0.86 0.86 0.83 0.61 6.2 0.032 0.0077 0.63
## DBMORT 0.88 0.88 0.85 0.64 7.3 0.028 0.0046 0.66
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## STMORT 50 0.88 0.88 0.85 0.79 4.6e-16 1
## OVWINC 50 0.84 0.84 0.79 0.74 -1.8e-15 1
## RDMORT 50 0.85 0.85 0.81 0.75 3.3e-16 1
## HDMORT 50 0.79 0.79 0.73 0.67 -2.9e-16 1
## DBMORT 50 0.74 0.74 0.64 0.59 -1.6e-17 1
##################################
# computing the internal consistency
# measure of reliability using the
# Cronbach's alpha coefficient
# for each factor
##################################
alpha(DPA.Descriptors.Numeric[,c("MEUNDA","ARTINC")],
check.keys=FALSE)
##
## Reliability analysis
## Call: alpha(x = DPA.Descriptors.Numeric[, c("MEUNDA", "ARTINC")], check.keys = FALSE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.89 0.89 0.8 0.8 8.2 0.031 2.3e-16 0.95 0.8
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.81 0.89 0.94
## Duhachek 0.83 0.89 0.95
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## MEUNDA 0.8 0.8 0.65 0.8 4.1 NA 0 0.8
## ARTINC 0.8 0.8 0.65 0.8 4.1 NA 0 0.8
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## MEUNDA 50 0.95 0.95 0.85 0.8 1.5e-16 1
## ARTINC 50 0.95 0.95 0.85 0.8 3.0e-16 1
##################################
# computing the internal consistency
# measure of reliability using the
# Cronbach's alpha coefficient
# for each factor
##################################
alpha(DPA.Descriptors.Numeric[,c("SMPREV","PDMORT")],
check.keys=FALSE)
##
## Reliability analysis
## Call: alpha(x = DPA.Descriptors.Numeric[, c("SMPREV", "PDMORT")], check.keys = FALSE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.85 0.85 0.74 0.74 5.6 0.043 1.5e-16 0.93 0.74
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.73 0.85 0.91
## Duhachek 0.77 0.85 0.93
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## SMPREV 0.74 0.74 0.55 0.74 2.8 NA 0 0.74
## PDMORT 0.74 0.74 0.55 0.74 2.8 NA 0 0.74
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## SMPREV 50 0.93 0.93 0.8 0.74 1.2e-16 1
## PDMORT 50 0.93 0.93 0.8 0.74 1.6e-16 1
##################################
# Formulating the dandelion plot to
# visualize both factor variances and loadings
# from the factor loading matrices
##################################
<- factload(DPA.Descriptors.Numeric,
FA_ML_V_3F_FactorLoading cormeth = "pearson",
method = "mle",
nfac = 3,
rotation = "varimax")
<- rev(rainbow(100, start = 0, end = 0.2))
DandelionPlotPalette
dandelion(FA_ML_V_3F_FactorLoading,
bound=0,
mcex=c(1,1.2),
palet=DandelionPlotPalette)
##################################
# Conducting exploratory factor analysis
# using Maximum Likelihood extraction
# and Varimax rotation
# with 4 factors
##################################
<- fa(DPA.Descriptors.Numeric,
(FA_ML_V_4F nfactors = 4,
fm="ml",
rotate = "varimax",
residuals=TRUE,
SMC=TRUE,
n.obs=nrow(DPA.Descriptors.Numeric)))
## Factor Analysis using method = ml
## Call: fa(r = DPA.Descriptors.Numeric, nfactors = 4, n.obs = nrow(DPA.Descriptors.Numeric),
## rotate = "varimax", residuals = TRUE, SMC = TRUE, fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML2 ML1 ML3 ML4 h2 u2 com
## ARTINC 0.69 0.34 0.52 0.15 0.89 0.106 2.5
## RDMORT 0.21 0.90 0.22 0.25 0.96 0.040 1.4
## PDMORT 0.30 0.25 0.60 0.34 0.63 0.370 2.6
## HDMORT 0.47 0.23 0.26 0.66 0.77 0.226 2.4
## STMORT 0.28 0.49 0.24 0.60 0.73 0.267 2.8
## DBMORT 0.19 0.51 0.29 0.23 0.43 0.574 2.4
## MEUNDA 0.90 0.16 0.18 0.21 0.90 0.096 1.3
## OVWINC -0.01 0.42 0.52 0.60 0.80 0.201 2.8
## SMPREV 0.41 0.31 0.75 0.26 0.90 0.105 2.2
##
## ML2 ML1 ML3 ML4
## SS loadings 1.92 1.82 1.75 1.52
## Proportion Var 0.21 0.20 0.19 0.17
## Cumulative Var 0.21 0.42 0.61 0.78
## Proportion Explained 0.27 0.26 0.25 0.22
## Cumulative Proportion 0.27 0.53 0.78 1.00
##
## Mean item complexity = 2.3
## Test of the hypothesis that 4 factors are sufficient.
##
## df null model = 36 with the objective function = 7.13 with Chi Square = 321.92
## df of the model are 6 and the objective function was 0.04
##
## The root mean square of the residuals (RMSR) is 0.01
## The df corrected root mean square of the residuals is 0.03
##
## The harmonic n.obs is 50 with the empirical chi square 0.69 with prob < 0.99
## The total n.obs was 50 with Likelihood Chi Square = 1.83 with prob < 0.93
##
## Tucker Lewis Index of factoring reliability = 1.094
## RMSEA index = 0 and the 90 % confidence intervals are 0 0.049
## BIC = -21.64
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## ML2 ML1 ML3 ML4
## Correlation of (regression) scores with factors 0.95 0.96 0.90 0.85
## Multiple R square of scores with factors 0.89 0.92 0.80 0.73
## Minimum correlation of possible factor scores 0.79 0.84 0.61 0.46
<- FA_ML_V_4F %>%
(FA_ML_V_4F_Summary model_parameters(sort = TRUE, threshold = "max"))
## # Rotated loadings from Factor Analysis (varimax-rotation)
##
## Variable | ML2 | ML1 | ML3 | ML4 | Complexity | Uniqueness
## --------------------------------------------------------------
## MEUNDA | 0.90 | | | | 1.26 | 0.10
## ARTINC | 0.69 | | | | 2.49 | 0.11
## RDMORT | | 0.90 | | | 1.41 | 0.04
## DBMORT | | 0.51 | | | 2.38 | 0.57
## SMPREV | | | 0.75 | | 2.24 | 0.10
## PDMORT | | | 0.60 | | 2.56 | 0.37
## HDMORT | | | | 0.66 | 2.44 | 0.23
## OVWINC | | | | 0.60 | 2.77 | 0.20
## STMORT | | | | 0.60 | 2.76 | 0.27
##
## The 4 latent factors (varimax rotation) accounted for 77.96% of the total variance of the original data (ML2 = 21.30%, ML1 = 20.28%, ML3 = 19.47%, ML4 = 16.91%).
summary(FA_ML_V_4F_Summary)
## # (Explained) Variance of Components
##
## Parameter | ML2 | ML1 | ML3 | ML4
## ---------------------------------------------------------------
## Eigenvalues | 5.443 | 0.831 | 0.401 | 0.345
## Variance Explained | 0.213 | 0.203 | 0.195 | 0.169
## Variance Explained (Cumulative) | 0.213 | 0.416 | 0.610 | 0.780
## Variance Explained (Proportion) | 0.273 | 0.260 | 0.250 | 0.217
##################################
# Extracting the residuals
# from the Exploratory Factor Analysis
##################################
<- residuals(FA_ML_V_4F,
(FA_ML_V_4F_Residual diag=FALSE,
na.rm=TRUE))
## ARTIN RDMOR PDMOR HDMOR STMOR DBMOR MEUND OVWIN SMPRE
## ARTINC NA
## RDMORT 0.00 NA
## PDMORT 0.00 0.00 NA
## HDMORT 0.00 0.00 0.00 NA
## STMORT 0.01 0.00 0.01 0.00 NA
## DBMORT 0.00 0.00 0.08 -0.02 0.00 NA
## MEUNDA 0.00 0.00 0.00 0.00 0.00 0.01 NA
## OVWINC 0.00 0.00 -0.01 0.00 0.00 0.00 0.00 NA
## SMPREV 0.00 0.00 0.00 0.00 -0.01 -0.01 0.00 0.00 NA
##################################
# Obtaining Fit Indices
##################################
<- FA_ML_V_4F$rms) (FA_ML_V_4F_RMS
## [1] 0.01385925
<- FA_ML_V_4F$TLI) (FA_ML_V_4F_TLI
## [1] 1.093667
<- FA_ML_V_4F$BIC) (FA_ML_V_4F_BIC
## [1] -21.63894
<- max(abs(FA_ML_V_4F_Residual),na.rm=TRUE)) (FA_ML_V_4F_MaxResidual
## [1] 0.07801115
<- sum(FA_ML_V_4F_Residual>abs(0.05),na.rm=TRUE)) (FA_ML_V_4F_HighResidual
## [1] 2
<- length(DPA.Descriptors.Numeric)*(length(DPA.Descriptors.Numeric)-1)/2) (FA_ML_V_4F_TotalResidual
## [1] 36
<- FA_ML_V_4F_HighResidual/FA_ML_V_4F_TotalResidual) (FA_ML_V_4F_HighResidualRate
## [1] 0.05555556
##################################
# Graph the factor loading matrices
##################################
fa.diagram(FA_ML_V_4F,
sort=TRUE,
cut=0,
digits=3,
main="Maximum Likelihood Factor Extraction + Varimax Rotation : 4 Factors",
cex=0.75)
##################################
# computing the internal consistency
# measure of reliability using the
# Cronbach's alpha coefficient
# for each factor
##################################
alpha(DPA.Descriptors.Numeric[,c("MEUNDA","ARTINC")],
check.keys=FALSE)
##
## Reliability analysis
## Call: alpha(x = DPA.Descriptors.Numeric[, c("MEUNDA", "ARTINC")], check.keys = FALSE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.89 0.89 0.8 0.8 8.2 0.031 2.3e-16 0.95 0.8
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.81 0.89 0.94
## Duhachek 0.83 0.89 0.95
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## MEUNDA 0.8 0.8 0.65 0.8 4.1 NA 0 0.8
## ARTINC 0.8 0.8 0.65 0.8 4.1 NA 0 0.8
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## MEUNDA 50 0.95 0.95 0.85 0.8 1.5e-16 1
## ARTINC 50 0.95 0.95 0.85 0.8 3.0e-16 1
##################################
# computing the internal consistency
# measure of reliability using the
# Cronbach's alpha coefficient
# for each factor
##################################
alpha(DPA.Descriptors.Numeric[,c("RDMORT","DBMORT")],
check.keys=FALSE)
##
## Reliability analysis
## Call: alpha(x = DPA.Descriptors.Numeric[, c("RDMORT", "DBMORT")], check.keys = FALSE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.76 0.76 0.61 0.61 3.2 0.068 1.5e-16 0.9 0.61
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.58 0.76 0.86
## Duhachek 0.63 0.76 0.89
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## RDMORT 0.61 0.61 0.38 0.61 1.6 NA 0 0.61
## DBMORT 0.61 0.61 0.38 0.61 1.6 NA 0 0.61
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## RDMORT 50 0.9 0.9 0.7 0.61 3.3e-16 1
## DBMORT 50 0.9 0.9 0.7 0.61 -1.6e-17 1
##################################
# computing the internal consistency
# measure of reliability using the
# Cronbach's alpha coefficient
# for each factor
##################################
alpha(DPA.Descriptors.Numeric[,c("SMPREV","PDMORT")],
check.keys=FALSE)
##
## Reliability analysis
## Call: alpha(x = DPA.Descriptors.Numeric[, c("SMPREV", "PDMORT")], check.keys = FALSE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.85 0.85 0.74 0.74 5.6 0.043 1.5e-16 0.93 0.74
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.73 0.85 0.91
## Duhachek 0.77 0.85 0.93
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## SMPREV 0.74 0.74 0.55 0.74 2.8 NA 0 0.74
## PDMORT 0.74 0.74 0.55 0.74 2.8 NA 0 0.74
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## SMPREV 50 0.93 0.93 0.8 0.74 1.2e-16 1
## PDMORT 50 0.93 0.93 0.8 0.74 1.6e-16 1
##################################
# computing the internal consistency
# measure of reliability using the
# Cronbach's alpha coefficient
# for each factor
##################################
alpha(DPA.Descriptors.Numeric[,c("STMORT","OVWINC","HDMORT")],
check.keys=FALSE)
##
## Reliability analysis
## Call: alpha(x = DPA.Descriptors.Numeric[, c("STMORT", "OVWINC", "HDMORT")],
## check.keys = FALSE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.86 0.86 0.8 0.67 6.1 0.035 -5.4e-16 0.88 0.69
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.77 0.86 0.91
## Duhachek 0.79 0.86 0.93
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## STMORT 0.77 0.77 0.62 0.62 3.3 0.066 NA 0.62
## OVWINC 0.82 0.82 0.70 0.70 4.6 0.050 NA 0.70
## HDMORT 0.82 0.82 0.69 0.69 4.4 0.052 NA 0.69
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## STMORT 50 0.90 0.90 0.83 0.77 4.6e-16 1
## OVWINC 50 0.87 0.87 0.77 0.71 -1.8e-15 1
## HDMORT 50 0.88 0.88 0.78 0.72 -2.9e-16 1
##################################
# Formulating the dandelion plot to
# visualize both factor variances and loadings
# from the factor loading matrices
##################################
<- factload(DPA.Descriptors.Numeric,
FA_ML_V_4F_FactorLoading cormeth = "pearson",
method = "mle",
nfac = 4,
rotation = "varimax")
<- rev(rainbow(100, start = 0, end = 0.2))
DandelionPlotPalette
dandelion(FA_ML_V_4F_FactorLoading,
bound=0,
mcex=c(1,1.2),
palet=DandelionPlotPalette)
##################################
# Implementing various procedures for determining
# factor retention based on
# the maximum consensus between methods
##################################
<- parameters::n_factors(DPA.Descriptors.Numeric,
(FA_ML_P_MethodAgreementProcedure algorithm = "mle",
rotation = "promax"))
## # Method Agreement Procedure:
##
## The choice of 1 dimensions is supported by 8 (42.11%) methods out of 19 (Optimal coordinates, Acceleration factor, Parallel analysis, Kaiser criterion, Scree (SE), Scree (R2), VSS complexity 1, Velicer's MAP).
as.data.frame(FA_ML_P_MethodAgreementProcedure)
## n_Factors Method Family
## 1 1 Optimal coordinates Scree
## 2 1 Acceleration factor Scree
## 3 1 Parallel analysis Scree
## 4 1 Kaiser criterion Scree
## 5 1 Scree (SE) Scree_SE
## 6 1 Scree (R2) Scree_SE
## 7 1 VSS complexity 1 VSS
## 8 1 Velicer's MAP Velicers_MAP
## 9 2 VSS complexity 2 VSS
## 10 2 BIC BIC
## 11 3 Bentler Bentler
## 12 3 CNG CNG
## 13 3 t Multiple_regression
## 14 3 p Multiple_regression
## 15 4 Bartlett Barlett
## 16 4 beta Multiple_regression
## 17 4 BIC (adjusted) BIC
## 18 5 Anderson Barlett
## 19 5 Lawley Barlett
##################################
# Conducting exploratory factor analysis
# using Maximum Likelihood extraction
# and Promax rotation
# with 3 factors
##################################
<- fa(DPA.Descriptors.Numeric,
(FA_ML_P_3F nfactors = 3,
fm="ml",
rotate = "promax",
residuals=TRUE,
SMC=TRUE,
n.obs=nrow(DPA.Descriptors.Numeric)))
## Factor Analysis using method = ml
## Call: fa(r = DPA.Descriptors.Numeric, nfactors = 3, n.obs = nrow(DPA.Descriptors.Numeric),
## rotate = "promax", residuals = TRUE, SMC = TRUE, fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML3 ML2 ML1 h2 u2 com
## ARTINC -0.01 0.54 0.50 0.84 0.155 2.0
## RDMORT 0.72 0.08 0.00 0.61 0.389 1.0
## PDMORT 0.24 0.56 0.03 0.61 0.388 1.4
## HDMORT 0.58 -0.03 0.36 0.67 0.335 1.7
## STMORT 0.96 -0.18 0.11 0.79 0.210 1.1
## DBMORT 0.50 0.14 0.01 0.38 0.617 1.2
## MEUNDA -0.07 0.02 1.02 1.00 0.005 1.0
## OVWINC 0.76 0.30 -0.29 0.75 0.255 1.6
## SMPREV 0.01 0.91 0.07 0.93 0.069 1.0
##
## ML3 ML2 ML1
## SS loadings 2.91 1.97 1.70
## Proportion Var 0.32 0.22 0.19
## Cumulative Var 0.32 0.54 0.73
## Proportion Explained 0.44 0.30 0.26
## Cumulative Proportion 0.44 0.74 1.00
##
## With factor correlations of
## ML3 ML2 ML1
## ML3 1.00 0.75 0.56
## ML2 0.75 1.00 0.60
## ML1 0.56 0.60 1.00
##
## Mean item complexity = 1.3
## Test of the hypothesis that 3 factors are sufficient.
##
## df null model = 36 with the objective function = 7.13 with Chi Square = 321.92
## df of the model are 12 and the objective function was 0.29
##
## The root mean square of the residuals (RMSR) is 0.03
## The df corrected root mean square of the residuals is 0.06
##
## The harmonic n.obs is 50 with the empirical chi square 4.2 with prob < 0.98
## The total n.obs was 50 with Likelihood Chi Square = 12.57 with prob < 0.4
##
## Tucker Lewis Index of factoring reliability = 0.994
## RMSEA index = 0.023 and the 90 % confidence intervals are 0 0.151
## BIC = -34.38
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## ML3 ML2 ML1
## Correlation of (regression) scores with factors 0.96 0.97 1.00
## Multiple R square of scores with factors 0.92 0.94 0.99
## Minimum correlation of possible factor scores 0.83 0.89 0.99
<- FA_ML_P_3F %>%
(FA_ML_P_3F_Summary model_parameters(sort = TRUE, threshold = "max"))
## # Rotated loadings from Factor Analysis (promax-rotation)
##
## Variable | ML3 | ML2 | ML1 | Complexity | Uniqueness
## -------------------------------------------------------
## STMORT | 0.96 | | | 1.10 | 0.21
## OVWINC | 0.76 | | | 1.60 | 0.25
## RDMORT | 0.72 | | | 1.02 | 0.39
## HDMORT | 0.58 | | | 1.66 | 0.33
## DBMORT | 0.50 | | | 1.16 | 0.62
## SMPREV | | 0.91 | | 1.01 | 0.07
## PDMORT | | 0.56 | | 1.36 | 0.39
## ARTINC | | 0.54 | | 1.99 | 0.16
## MEUNDA | | | 1.02 | 1.01 | 5.00e-03
##
## The 3 latent factors (promax rotation) accounted for 73.08% of the total variance of the original data (ML3 = 32.37%, ML2 = 21.87%, ML1 = 18.85%).
summary(FA_ML_P_3F_Summary)
## # (Explained) Variance of Components
##
## Parameter | ML3 | ML2 | ML1
## -------------------------------------------------------
## Eigenvalues | 5.396 | 0.822 | 0.364
## Variance Explained | 0.324 | 0.219 | 0.188
## Variance Explained (Cumulative) | 0.324 | 0.542 | 0.731
## Variance Explained (Proportion) | 0.443 | 0.299 | 0.258
##################################
# Extracting the residuals
# from the Exploratory Factor Analysis
##################################
<- residuals(FA_ML_P_3F,
(FA_ML_P_3F_Residual diag=FALSE,
na.rm=TRUE))
## ARTIN RDMOR PDMOR HDMOR STMOR DBMOR MEUND OVWIN SMPRE
## ARTINC NA
## RDMORT 0.05 NA
## PDMORT 0.01 -0.05 NA
## HDMORT -0.02 -0.08 0.02 NA
## STMORT 0.00 0.02 0.00 0.01 NA
## DBMORT 0.02 0.13 0.06 -0.06 -0.03 NA
## MEUNDA 0.00 0.00 0.00 0.00 0.00 0.00 NA
## OVWINC -0.03 -0.01 0.01 0.04 -0.01 -0.02 0.00 NA
## SMPREV 0.00 -0.01 0.00 0.00 0.00 -0.01 0.00 0.01 NA
##################################
# Obtaining Fit Indices
##################################
<- FA_ML_P_3F$rms) (FA_ML_P_3F_RMS
## [1] 0.03416164
<- FA_ML_P_3F$TLI) (FA_ML_P_3F_TLI
## [1] 0.9937238
<- FA_ML_P_3F$BIC) (FA_ML_P_3F_BIC
## [1] -34.37594
<- max(abs(FA_ML_P_3F_Residual),na.rm=TRUE)) (FA_ML_P_3F_MaxResidual
## [1] 0.132959
<- sum(FA_ML_P_3F_Residual>abs(0.05),na.rm=TRUE)) (FA_ML_P_3F_HighResidual
## [1] 6
<- length(DPA.Descriptors.Numeric)*(length(DPA.Descriptors.Numeric)-1)/2) (FA_ML_P_3F_TotalResidual
## [1] 36
<- FA_ML_P_3F_HighResidual/FA_ML_P_3F_TotalResidual) (FA_ML_P_3F_HighResidualRate
## [1] 0.1666667
##################################
# Graph the factor loading matrices
##################################
fa.diagram(FA_ML_P_3F,
sort=TRUE,
cut=0,
digits=3,
main="Maximum Likelihood Factor Extraction + Promax Rotation : 3 Factors",
cex=0.75)
##################################
# computing the internal consistency
# measure of reliability using the
# Cronbach's alpha coefficient
# for each factor
##################################
alpha(DPA.Descriptors.Numeric[,c("STMORT","OVWINC","RDMORT","HDMORT","DBMORT")],
check.keys=FALSE)
##
## Reliability analysis
## Call: alpha(x = DPA.Descriptors.Numeric[, c("STMORT", "OVWINC", "RDMORT",
## "HDMORT", "DBMORT")], check.keys = FALSE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.88 0.88 0.87 0.59 7.2 0.028 -2.6e-16 0.82 0.62
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.81 0.88 0.92
## Duhachek 0.82 0.88 0.93
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## STMORT 0.83 0.83 0.80 0.55 4.9 0.039 0.0079 0.57
## OVWINC 0.84 0.84 0.83 0.58 5.4 0.036 0.0133 0.57
## RDMORT 0.84 0.84 0.81 0.57 5.3 0.037 0.0134 0.56
## HDMORT 0.86 0.86 0.83 0.61 6.2 0.032 0.0077 0.63
## DBMORT 0.88 0.88 0.85 0.64 7.3 0.028 0.0046 0.66
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## STMORT 50 0.88 0.88 0.85 0.79 4.6e-16 1
## OVWINC 50 0.84 0.84 0.79 0.74 -1.8e-15 1
## RDMORT 50 0.85 0.85 0.81 0.75 3.3e-16 1
## HDMORT 50 0.79 0.79 0.73 0.67 -2.9e-16 1
## DBMORT 50 0.74 0.74 0.64 0.59 -1.6e-17 1
##################################
# computing the internal consistency
# measure of reliability using the
# Cronbach's alpha coefficient
# for each factor
##################################
alpha(DPA.Descriptors.Numeric[,c("MEUNDA","ARTINC")],
check.keys=FALSE)
##
## Reliability analysis
## Call: alpha(x = DPA.Descriptors.Numeric[, c("MEUNDA", "ARTINC")], check.keys = FALSE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.89 0.89 0.8 0.8 8.2 0.031 2.3e-16 0.95 0.8
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.81 0.89 0.94
## Duhachek 0.83 0.89 0.95
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## MEUNDA 0.8 0.8 0.65 0.8 4.1 NA 0 0.8
## ARTINC 0.8 0.8 0.65 0.8 4.1 NA 0 0.8
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## MEUNDA 50 0.95 0.95 0.85 0.8 1.5e-16 1
## ARTINC 50 0.95 0.95 0.85 0.8 3.0e-16 1
##################################
# computing the internal consistency
# measure of reliability using the
# Cronbach's alpha coefficient
# for each factor
##################################
alpha(DPA.Descriptors.Numeric[,c("SMPREV","PDMORT")],
check.keys=FALSE)
##
## Reliability analysis
## Call: alpha(x = DPA.Descriptors.Numeric[, c("SMPREV", "PDMORT")], check.keys = FALSE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.85 0.85 0.74 0.74 5.6 0.043 1.5e-16 0.93 0.74
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.73 0.85 0.91
## Duhachek 0.77 0.85 0.93
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## SMPREV 0.74 0.74 0.55 0.74 2.8 NA 0 0.74
## PDMORT 0.74 0.74 0.55 0.74 2.8 NA 0 0.74
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## SMPREV 50 0.93 0.93 0.8 0.74 1.2e-16 1
## PDMORT 50 0.93 0.93 0.8 0.74 1.6e-16 1
##################################
# Formulating the dandelion plot to
# visualize both factor variances and loadings
# from the factor loading matrices
##################################
<- factload(DPA.Descriptors.Numeric,
FA_ML_P_3F_FactorLoading cormeth = "pearson",
method = "mle",
nfac = 3,
rotation = "promax")
<- rev(rainbow(100, start = 0, end = 0.2))
DandelionPlotPalette
dandelion(FA_ML_P_3F_FactorLoading,
bound=0,
mcex=c(1,1.2),
palet=DandelionPlotPalette)
##################################
# Conducting exploratory factor analysis
# using Maximum Likelihood extraction
# and Promax rotation
# with 4 factors
##################################
<- fa(DPA.Descriptors.Numeric,
(FA_ML_P_4F nfactors = 4,
fm="ml",
rotate = "promax",
residuals=TRUE,
SMC=TRUE,
n.obs=nrow(DPA.Descriptors.Numeric)))
## Factor Analysis using method = ml
## Call: fa(r = DPA.Descriptors.Numeric, nfactors = 4, n.obs = nrow(DPA.Descriptors.Numeric),
## rotate = "promax", residuals = TRUE, SMC = TRUE, fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML3 ML4 ML2 ML1 h2 u2 com
## ARTINC 0.50 -0.13 0.55 0.14 0.89 0.106 2.2
## RDMORT -0.11 0.00 0.02 1.05 0.96 0.040 1.0
## PDMORT 0.67 0.17 0.06 -0.06 0.63 0.370 1.2
## HDMORT 0.01 0.76 0.32 -0.11 0.77 0.226 1.4
## STMORT -0.06 0.61 0.09 0.31 0.73 0.267 1.5
## DBMORT 0.15 0.06 0.02 0.48 0.43 0.574 1.2
## MEUNDA -0.03 0.12 0.91 -0.01 0.90 0.096 1.0
## OVWINC 0.46 0.56 -0.32 0.11 0.80 0.201 2.7
## SMPREV 0.89 -0.03 0.14 -0.01 0.90 0.105 1.1
##
## ML3 ML4 ML2 ML1
## SS loadings 2.08 1.70 1.62 1.62
## Proportion Var 0.23 0.19 0.18 0.18
## Cumulative Var 0.23 0.42 0.60 0.78
## Proportion Explained 0.30 0.24 0.23 0.23
## Cumulative Proportion 0.30 0.54 0.77 1.00
##
## With factor correlations of
## ML3 ML4 ML2 ML1
## ML3 1.00 0.71 0.56 0.71
## ML4 0.71 1.00 0.44 0.69
## ML2 0.56 0.44 1.00 0.43
## ML1 0.71 0.69 0.43 1.00
##
## Mean item complexity = 1.5
## Test of the hypothesis that 4 factors are sufficient.
##
## df null model = 36 with the objective function = 7.13 with Chi Square = 321.92
## df of the model are 6 and the objective function was 0.04
##
## The root mean square of the residuals (RMSR) is 0.01
## The df corrected root mean square of the residuals is 0.03
##
## The harmonic n.obs is 50 with the empirical chi square 0.69 with prob < 0.99
## The total n.obs was 50 with Likelihood Chi Square = 1.83 with prob < 0.93
##
## Tucker Lewis Index of factoring reliability = 1.094
## RMSEA index = 0 and the 90 % confidence intervals are 0 0.049
## BIC = -21.64
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## ML3 ML4 ML2 ML1
## Correlation of (regression) scores with factors 0.97 0.94 0.96 0.98
## Multiple R square of scores with factors 0.93 0.88 0.93 0.97
## Minimum correlation of possible factor scores 0.86 0.76 0.86 0.93
<- FA_ML_P_4F %>%
(FA_ML_P_4F_Summary model_parameters(sort = TRUE, threshold = "max"))
## # Rotated loadings from Factor Analysis (promax-rotation)
##
## Variable | ML3 | ML4 | ML2 | ML1 | Complexity | Uniqueness
## --------------------------------------------------------------
## SMPREV | 0.89 | | | | 1.05 | 0.10
## PDMORT | 0.67 | | | | 1.17 | 0.37
## HDMORT | | 0.76 | | | 1.40 | 0.23
## STMORT | | 0.61 | | | 1.55 | 0.27
## OVWINC | | 0.56 | | | 2.68 | 0.20
## MEUNDA | | | 0.91 | | 1.04 | 0.10
## ARTINC | | | 0.55 | | 2.24 | 0.11
## RDMORT | | | | 1.05 | 1.02 | 0.04
## DBMORT | | | | 0.48 | 1.23 | 0.57
##
## The 4 latent factors (promax rotation) accounted for 77.96% of the total variance of the original data (ML3 = 23.13%, ML4 = 18.86%, ML2 = 18.00%, ML1 = 17.97%).
summary(FA_ML_P_4F_Summary)
## # (Explained) Variance of Components
##
## Parameter | ML3 | ML4 | ML2 | ML1
## ---------------------------------------------------------------
## Eigenvalues | 5.443 | 0.831 | 0.401 | 0.345
## Variance Explained | 0.231 | 0.189 | 0.180 | 0.180
## Variance Explained (Cumulative) | 0.231 | 0.420 | 0.600 | 0.780
## Variance Explained (Proportion) | 0.297 | 0.242 | 0.231 | 0.231
##################################
# Extracting the residuals
# from the Exploratory Factor Analysis
##################################
<- residuals(FA_ML_P_4F,
(FA_ML_P_4F_Residual diag=FALSE,
na.rm=TRUE))
## ARTIN RDMOR PDMOR HDMOR STMOR DBMOR MEUND OVWIN SMPRE
## ARTINC NA
## RDMORT 0.00 NA
## PDMORT 0.00 0.00 NA
## HDMORT 0.00 0.00 0.00 NA
## STMORT 0.01 0.00 0.01 0.00 NA
## DBMORT 0.00 0.00 0.08 -0.02 0.00 NA
## MEUNDA 0.00 0.00 0.00 0.00 0.00 0.01 NA
## OVWINC 0.00 0.00 -0.01 0.00 0.00 0.00 0.00 NA
## SMPREV 0.00 0.00 0.00 0.00 -0.01 -0.01 0.00 0.00 NA
##################################
# Obtaining Fit Indices
##################################
<- FA_ML_P_4F$rms) (FA_ML_P_4F_RMS
## [1] 0.01385925
<- FA_ML_P_4F$TLI) (FA_ML_P_4F_TLI
## [1] 1.093667
<- FA_ML_P_4F$BIC) (FA_ML_P_4F_BIC
## [1] -21.63894
<- max(abs(FA_ML_P_4F_Residual),na.rm=TRUE)) (FA_ML_P_4F_MaxResidual
## [1] 0.07801115
<- sum(FA_ML_P_4F_Residual>abs(0.05),na.rm=TRUE)) (FA_ML_P_4F_HighResidual
## [1] 2
<- length(DPA.Descriptors.Numeric)*(length(DPA.Descriptors.Numeric)-1)/2) (FA_ML_P_4F_TotalResidual
## [1] 36
<- FA_ML_P_4F_HighResidual/FA_ML_P_4F_TotalResidual) (FA_ML_P_4F_HighResidualRate
## [1] 0.05555556
##################################
# Graph the factor loading matrices
##################################
fa.diagram(FA_ML_P_4F,
sort=TRUE,
cut=0,
digits=3,
main="Maximum Likelihood Factor Extraction + Promax Rotation : 4 Factors",
cex=0.75)
##################################
# computing the internal consistency
# measure of reliability using the
# Cronbach's alpha coefficient
# for each factor
##################################
alpha(DPA.Descriptors.Numeric[,c("MEUNDA","ARTINC")],
check.keys=FALSE)
##
## Reliability analysis
## Call: alpha(x = DPA.Descriptors.Numeric[, c("MEUNDA", "ARTINC")], check.keys = FALSE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.89 0.89 0.8 0.8 8.2 0.031 2.3e-16 0.95 0.8
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.81 0.89 0.94
## Duhachek 0.83 0.89 0.95
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## MEUNDA 0.8 0.8 0.65 0.8 4.1 NA 0 0.8
## ARTINC 0.8 0.8 0.65 0.8 4.1 NA 0 0.8
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## MEUNDA 50 0.95 0.95 0.85 0.8 1.5e-16 1
## ARTINC 50 0.95 0.95 0.85 0.8 3.0e-16 1
##################################
# computing the internal consistency
# measure of reliability using the
# Cronbach's alpha coefficient
# for each factor
##################################
alpha(DPA.Descriptors.Numeric[,c("RDMORT","DBMORT")],
check.keys=FALSE)
##
## Reliability analysis
## Call: alpha(x = DPA.Descriptors.Numeric[, c("RDMORT", "DBMORT")], check.keys = FALSE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.76 0.76 0.61 0.61 3.2 0.068 1.5e-16 0.9 0.61
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.58 0.76 0.86
## Duhachek 0.63 0.76 0.89
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## RDMORT 0.61 0.61 0.38 0.61 1.6 NA 0 0.61
## DBMORT 0.61 0.61 0.38 0.61 1.6 NA 0 0.61
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## RDMORT 50 0.9 0.9 0.7 0.61 3.3e-16 1
## DBMORT 50 0.9 0.9 0.7 0.61 -1.6e-17 1
##################################
# computing the internal consistency
# measure of reliability using the
# Cronbach's alpha coefficient
# for each factor
##################################
alpha(DPA.Descriptors.Numeric[,c("SMPREV","PDMORT")],
check.keys=FALSE)
##
## Reliability analysis
## Call: alpha(x = DPA.Descriptors.Numeric[, c("SMPREV", "PDMORT")], check.keys = FALSE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.85 0.85 0.74 0.74 5.6 0.043 1.5e-16 0.93 0.74
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.73 0.85 0.91
## Duhachek 0.77 0.85 0.93
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## SMPREV 0.74 0.74 0.55 0.74 2.8 NA 0 0.74
## PDMORT 0.74 0.74 0.55 0.74 2.8 NA 0 0.74
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## SMPREV 50 0.93 0.93 0.8 0.74 1.2e-16 1
## PDMORT 50 0.93 0.93 0.8 0.74 1.6e-16 1
##################################
# computing the internal consistency
# measure of reliability using the
# Cronbach's alpha coefficient
# for each factor
##################################
alpha(DPA.Descriptors.Numeric[,c("STMORT","OVWINC","HDMORT")],
check.keys=FALSE)
##
## Reliability analysis
## Call: alpha(x = DPA.Descriptors.Numeric[, c("STMORT", "OVWINC", "HDMORT")],
## check.keys = FALSE)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.86 0.86 0.8 0.67 6.1 0.035 -5.4e-16 0.88 0.69
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.77 0.86 0.91
## Duhachek 0.79 0.86 0.93
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## STMORT 0.77 0.77 0.62 0.62 3.3 0.066 NA 0.62
## OVWINC 0.82 0.82 0.70 0.70 4.6 0.050 NA 0.70
## HDMORT 0.82 0.82 0.69 0.69 4.4 0.052 NA 0.69
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## STMORT 50 0.90 0.90 0.83 0.77 4.6e-16 1
## OVWINC 50 0.87 0.87 0.77 0.71 -1.8e-15 1
## HDMORT 50 0.88 0.88 0.78 0.72 -2.9e-16 1
##################################
# Formulating the dandelion plot to
# visualize both factor variances and loadings
# from the factor loading matrices
##################################
<- factload(DPA.Descriptors.Numeric,
FA_ML_P_4F_FactorLoading cormeth = "pearson",
method = "mle",
nfac = 4,
rotation = "promax")
<- rev(rainbow(100, start = 0, end = 0.2))
DandelionPlotPalette
dandelion(FA_ML_P_4F_FactorLoading,
bound=0,
mcex=c(1,1.2),
palet=DandelionPlotPalette)
##################################
# Consolidating the fit indices
# from the exploratory factor analysis
##################################
<- c(FA_PA_V_3F_RMS,
FA_RMSSummary
FA_PA_V_4F_RMS,
FA_PA_P_3F_RMS,
FA_PA_P_4F_RMS,
FA_ML_V_3F_RMS,
FA_ML_V_4F_RMS,
FA_ML_P_3F_RMS,
FA_ML_P_4F_RMS)
<- c(FA_PA_V_3F_TLI,
FA_TLISummary
FA_PA_V_4F_TLI,
FA_PA_P_3F_TLI,
FA_PA_P_4F_TLI,
FA_ML_V_3F_TLI,
FA_ML_V_4F_TLI,
FA_ML_P_3F_TLI,
FA_ML_P_4F_TLI)
<- c(FA_PA_V_3F_BIC,
FA_BICSummary
FA_PA_V_4F_BIC,
FA_PA_P_3F_BIC,
FA_PA_P_4F_BIC,
FA_ML_V_3F_BIC,
FA_ML_V_4F_BIC,
FA_ML_P_3F_BIC,
FA_ML_P_4F_BIC)
<- c(FA_PA_V_3F_HighResidualRate,
FA_HighResidualRateSummary
FA_PA_V_4F_HighResidualRate,
FA_PA_P_3F_HighResidualRate,
FA_PA_P_4F_HighResidualRate,
FA_ML_V_3F_HighResidualRate,
FA_ML_V_4F_HighResidualRate,
FA_ML_P_3F_HighResidualRate,
FA_ML_P_4F_HighResidualRate)
<- c("3-Factor Principal Axes Extraction + Varimax Rotation",
FA_AlgorithmSummary "4-Factor Principal Axes Extraction + Varimax Rotation",
"3-Factor Principal Axes Extraction + Promax Rotation",
"4-Factor Principal Axes Extraction + Promax Rotation",
"3-Factor Maximum Likelihood Extraction + Varimax Rotation",
"4-Factor Maximum Likelihood Extraction + Varimax Rotation",
"3-Factor Maximum Likelihood Extraction + Promax Rotation",
"4-Factor Maximum Likelihood Extraction + Promax Rotation")
<- cbind(FA_RMSSummary,
FA_Summary
FA_TLISummary,
FA_BICSummary,
FA_HighResidualRateSummary,
FA_AlgorithmSummary)
<- as.data.frame(FA_Summary)
FA_Summary names(FA_Summary) <- c("RMS",
"TLI",
"BIC",
"HighResidualRate",
"Algorithm")
$RMS <- as.numeric(as.character(FA_Summary$RMS))
FA_Summary$TLI <- as.numeric(as.character(FA_Summary$TLI))
FA_Summary$BIC <- as.numeric(as.character(FA_Summary$BIC))
FA_Summary$HighResidualRate <- as.numeric(as.character(FA_Summary$HighResidualRate))
FA_Summary
$Algorithm <- factor(FA_Summary$Algorithm ,
FA_Summarylevels = c("3-Factor Principal Axes Extraction + Varimax Rotation",
"4-Factor Principal Axes Extraction + Varimax Rotation",
"3-Factor Principal Axes Extraction + Promax Rotation",
"4-Factor Principal Axes Extraction + Promax Rotation",
"3-Factor Maximum Likelihood Extraction + Varimax Rotation",
"4-Factor Maximum Likelihood Extraction + Varimax Rotation",
"3-Factor Maximum Likelihood Extraction + Promax Rotation",
"4-Factor Maximum Likelihood Extraction + Promax Rotation"))
print(FA_Summary, row.names=FALSE)
## RMS TLI BIC HighResidualRate
## 0.03244678 0.9838317 -33.48016 0.11111111
## 0.01182674 1.0833708 -21.18092 0.05555556
## 0.03244678 0.9838317 -33.48016 0.11111111
## 0.01182674 1.0833708 -21.18092 0.05555556
## 0.03416164 0.9937238 -34.37594 0.16666667
## 0.01385925 1.0936669 -21.63894 0.05555556
## 0.03416164 0.9937238 -34.37594 0.16666667
## 0.01385925 1.0936669 -21.63894 0.05555556
## Algorithm
## 3-Factor Principal Axes Extraction + Varimax Rotation
## 4-Factor Principal Axes Extraction + Varimax Rotation
## 3-Factor Principal Axes Extraction + Promax Rotation
## 4-Factor Principal Axes Extraction + Promax Rotation
## 3-Factor Maximum Likelihood Extraction + Varimax Rotation
## 4-Factor Maximum Likelihood Extraction + Varimax Rotation
## 3-Factor Maximum Likelihood Extraction + Promax Rotation
## 4-Factor Maximum Likelihood Extraction + Promax Rotation
##################################
# Consolidating all calculated values
# for the Standardized Root Mean Square of the Residual
##################################
<- dotplot(Algorithm ~ RMS,
(RMS_Plot data = FA_Summary,
main = "Algorithm Comparison : Standardized Root Mean Square of the Residual",
ylab = "Algorithm",
xlab = "RMSR",
auto.key = list(adj = 1),
type=c("p", "h"),
origin = 0,
alpha = 0.45,
pch = 16,
cex = 2))
##################################
# Consolidating all calculated values
# for the Tucker-Lewis Fit Index
##################################
<- dotplot(Algorithm ~ TLI,
(TLI_Plot data = FA_Summary,
main = "Algorithm Comparison : Tucker-Lewis Fit Index",
ylab = "Algorithm",
xlab = "TLI",
auto.key = list(adj = 1),
type=c("p", "h"),
origin = 0,
alpha = 0.45,
pch = 16,
cex = 2))
##################################
# Consolidating all calculated values
# for the Bayesian Information Criterion
##################################
<- dotplot(Algorithm ~ BIC,
(BIC_Plot data = FA_Summary,
main = "Algorithm Comparison : Bayesian Information Criterion",
ylab = "Algorithm",
xlab = "BIC",
auto.key = list(adj = 1),
type=c("p", "h"),
origin = 0,
alpha = 0.45,
pch = 16,
cex = 2))
##################################
# Consolidating all calculated values
# for the High Residual Rate
##################################
<- dotplot(Algorithm ~ HighResidualRate,
(HighResidualRate_Plot data = FA_Summary,
main = "Algorithm Comparison : High Residual Rate ",
ylab = "Algorithm",
xlab = "High Residual Rate",
auto.key = list(adj = 1),
type=c("p", "h"),
origin = 0,
alpha = 0.45,
pch = 16,
cex = 2))
##################################
# Plotting the Factor Loading Diagram
# for the optimal EFA models
##################################
fa.diagram(FA_ML_V_4F,
sort=TRUE,
cut=0,
digits=3,
main="Maximum Likelihood Factor Extraction + Varimax Rotation : 4 Factors",
cex=0.75)
fa.diagram(FA_ML_V_3F,
sort=TRUE,
cut=0,
digits=3,
main="Maximum Likelihood Factor Extraction + Varimax Rotation : 3 Factors",
cex=0.75)
##################################
# Plotting the Dandelion Plot
# for the optimal EFA models
##################################
dandelion(FA_ML_V_4F_FactorLoading,
bound=0,
mcex=c(1,1.2),
palet=DandelionPlotPalette)
dandelion(FA_ML_V_3F_FactorLoading,
bound=0,
mcex=c(1,1.2),
palet=DandelionPlotPalette)
##################################
# Specifying the model structure
# for confirmatory factor analysis
##################################
<- 'UNHLIF =~ HDMORT + STMORT + OVWINC
CFA_4F TOBUSE =~ SMPREV + PDMORT
PINACT =~ MEUNDA + ARTINC
PNUTRI =~ RDMORT + DBMORT'
##################################
# Fitting the model
# for confirmatory factor analysis
##################################
<- cfa(CFA_4F, data = DPA.Descriptors.Numeric, mimic =c("MPlus"), std.lv = TRUE)
CFA_4F_MODEL
<- modindices(CFA_4F_MODEL, sort = TRUE)
CFA_4F_MODEL_CHANGE head(CFA_4F_MODEL_CHANGE, 15)
## lhs op rhs mi epc sepc.lv sepc.all sepc.nox
## 73 HDMORT ~~ MEUNDA 7.270 0.150 0.150 0.375 0.375
## 86 OVWINC ~~ MEUNDA 4.286 -0.106 -0.106 -0.301 -0.301
## 78 STMORT ~~ SMPREV 3.332 -0.084 -0.084 -0.458 -0.458
## 84 OVWINC ~~ SMPREV 2.860 0.079 0.079 0.409 0.409
## 75 HDMORT ~~ RDMORT 2.501 -0.103 -0.103 -0.372 -0.372
## 63 PNUTRI =~ STMORT 2.316 0.449 0.449 0.454 0.454
## 55 PINACT =~ HDMORT 2.286 0.188 0.188 0.189 0.189
## 57 PINACT =~ OVWINC 2.088 -0.173 -0.173 -0.174 -0.174
## 98 PDMORT ~~ DBMORT 1.946 0.097 0.097 0.222 0.222
## 82 STMORT ~~ RDMORT 1.936 0.086 0.086 0.375 0.375
## 48 TOBUSE =~ HDMORT 1.923 0.328 0.328 0.331 0.331
## 69 HDMORT ~~ STMORT 1.857 0.096 0.096 0.292 0.292
## 49 TOBUSE =~ STMORT 1.828 -0.316 -0.316 -0.319 -0.319
## 62 PNUTRI =~ HDMORT 1.513 -0.366 -0.366 -0.370 -0.370
## 58 PINACT =~ SMPREV 1.145 0.291 0.291 0.294 0.294
##################################
# Generating a model performance summary
# for confirmatory factor analysis
##################################
summary(CFA_4F_MODEL, fit.measures=TRUE)
## lavaan 0.6-19 ended normally after 30 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 33
##
## Number of observations 50
## Number of missing patterns 1
##
## Model Test User Model:
##
## Test statistic 32.593
## Degrees of freedom 21
## P-value (Chi-square) 0.051
##
## Model Test Baseline Model:
##
## Test statistic 356.367
## Degrees of freedom 36
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.964
## Tucker-Lewis Index (TLI) 0.938
##
## Robust Comparative Fit Index (CFI) 0.964
## Robust Tucker-Lewis Index (TLI) 0.938
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -472.090
## Loglikelihood unrestricted model (H1) -455.793
##
## Akaike (AIC) 1010.179
## Bayesian (BIC) 1073.276
## Sample-size adjusted Bayesian (SABIC) 969.694
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.105
## 90 Percent confidence interval - lower 0.000
## 90 Percent confidence interval - upper 0.172
## P-value H_0: RMSEA <= 0.050 0.114
## P-value H_0: RMSEA >= 0.080 0.743
##
## Robust RMSEA 0.105
## 90 Percent confidence interval - lower 0.000
## 90 Percent confidence interval - upper 0.172
## P-value H_0: Robust RMSEA <= 0.050 0.114
## P-value H_0: Robust RMSEA >= 0.080 0.743
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.053
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Observed
## Observed information based on Hessian
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|)
## UNHLIF =~
## HDMORT 0.764 0.123 6.220 0.000
## STMORT 0.840 0.118 7.141 0.000
## OVWINC 0.823 0.118 6.948 0.000
## TOBUSE =~
## SMPREV 0.926 0.109 8.454 0.000
## PDMORT 0.782 0.119 6.568 0.000
## PINACT =~
## MEUNDA 0.757 0.121 6.242 0.000
## ARTINC 1.039 0.102 10.182 0.000
## PNUTRI =~
## RDMORT 0.887 0.124 7.159 0.000
## DBMORT 0.679 0.131 5.186 0.000
##
## Covariances:
## Estimate Std.Err z-value P(>|z|)
## UNHLIF ~~
## TOBUSE 0.836 0.072 11.672 0.000
## PINACT 0.646 0.095 6.803 0.000
## PNUTRI 0.857 0.079 10.912 0.000
## TOBUSE ~~
## PINACT 0.830 0.064 12.948 0.000
## PNUTRI 0.731 0.102 7.140 0.000
## PINACT ~~
## PNUTRI 0.649 0.099 6.591 0.000
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|)
## .HDMORT -0.000 0.140 -0.000 1.000
## .STMORT 0.000 0.140 0.000 1.000
## .OVWINC -0.000 0.140 -0.000 1.000
## .SMPREV 0.000 0.140 0.000 1.000
## .PDMORT 0.000 0.140 0.000 1.000
## .MEUNDA 0.000 0.140 0.000 1.000
## .ARTINC 0.000 0.140 0.000 1.000
## .RDMORT 0.000 0.140 0.000 1.000
## .DBMORT -0.000 0.140 -0.000 1.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .HDMORT 0.396 0.097 4.081 0.000
## .STMORT 0.275 0.081 3.376 0.001
## .OVWINC 0.303 0.083 3.637 0.000
## .SMPREV 0.123 0.062 1.975 0.048
## .PDMORT 0.369 0.084 4.373 0.000
## .MEUNDA 0.406 0.093 4.387 0.000
## .ARTINC -0.100 0.086 -1.164 0.244
## .RDMORT 0.193 0.114 1.692 0.091
## .DBMORT 0.519 0.121 4.282 0.000
## UNHLIF 1.000
## TOBUSE 1.000
## PINACT 1.000
## PNUTRI 1.000
##################################
# Testing the null hypothesis
# that the matrix reproduced by the data
# and the specified model are statistically the same
# as the input or analysis matrix
# (Ideal Criterion: P-Value=>0.05)
##################################
fitMeasures(CFA_4F_MODEL, c("chisq", "df", "pvalue"))
## chisq df pvalue
## 32.593 21.000 0.051
##################################
# Measuring the discrepancy between
# the model-based and observed correlation matrices
# (Ideal Criterion: RMSEA<0.08)
##################################
fitMeasures(CFA_4F_MODEL, c("rmsea", "rmsea.ci.lower", "rmsea.ci.upper", "rmsea.pvalue"))
## rmsea rmsea.ci.lower rmsea.ci.upper rmsea.pvalue
## 0.105 0.000 0.172 0.114
<- fitMeasures(CFA_4F_MODEL, c("rmsea"))) (CFA_4F_MODEL_RMSEA
## rmsea
## 0.105
##################################
# Assessing model adequacy by comparing
# the hypothesized model
# to a baseline model
# (Ideal Criterion: CFI>0.90)
##################################
<- fitMeasures(CFA_4F_MODEL, c("cfi"))) (CFA_4F_MODEL_CFI
## cfi
## 0.964
##################################
# Assessing model adequacy by evaluating
# the improvement in fit of the hypothesized model
# relative to a baseline model
# (Ideal Criterion: TLI>0.90)
##################################
<- fitMeasures(CFA_4F_MODEL, c("tli"))) (CFA_4F_MODEL_TLI
## tli
## 0.938
##################################
# Measuring the actual differences (discrepancies)
# between the model-based correlations and the actual correlations
# (Ideal Criterion: SRMR<0.08)
##################################
<- fitMeasures(CFA_4F_MODEL, c("srmr"))) (CFA_4F_MODEL_SRMR
## srmr
## 0.053
##################################
# Evaluating correlation residuals
# using the pairwise coefficients
# to provide details about potential model misfit
# (Ideal Criterion: Residuals<0.10)
##################################
residuals(CFA_4F_MODEL)$cov
## HDMORT STMORT OVWINC SMPREV PDMORT MEUNDA ARTINC RDMORT DBMORT
## HDMORT 0.000
## STMORT 0.042 0.000
## OVWINC -0.020 -0.017 0.000
## SMPREV 0.026 -0.064 0.022 0.000
## PDMORT 0.070 0.004 0.057 0.000 0.000
## MEUNDA 0.254 0.072 -0.128 0.016 -0.015 0.000
## ARTINC 0.111 0.008 -0.064 0.009 -0.025 0.000 0.000
## RDMORT -0.068 0.047 -0.001 -0.014 -0.012 -0.026 -0.008 0.000
## DBMORT -0.041 0.005 0.009 0.027 0.115 0.011 0.014 0.000 0.000
##################################
# Evaluating loading magnitude estimates
# and fit statistics
# (Ideal Criterion: Loading Estimates>0.50)
# (Ideal Criterion: P-Value<0.05)
##################################
parameterEstimates(CFA_4F_MODEL) %>%
filter(op == "=~")
## lhs op rhs est se z pvalue ci.lower ci.upper
## 1 UNHLIF =~ HDMORT 0.764 0.123 6.220 0 0.524 1.005
## 2 UNHLIF =~ STMORT 0.840 0.118 7.141 0 0.609 1.070
## 3 UNHLIF =~ OVWINC 0.823 0.118 6.948 0 0.591 1.055
## 4 TOBUSE =~ SMPREV 0.926 0.109 8.454 0 0.711 1.140
## 5 TOBUSE =~ PDMORT 0.782 0.119 6.568 0 0.549 1.015
## 6 PINACT =~ MEUNDA 0.757 0.121 6.242 0 0.520 0.995
## 7 PINACT =~ ARTINC 1.039 0.102 10.182 0 0.839 1.239
## 8 PNUTRI =~ RDMORT 0.887 0.124 7.159 0 0.644 1.130
## 9 PNUTRI =~ DBMORT 0.679 0.131 5.186 0 0.422 0.935
##################################
# Evaluating the measurement reliability
# of the hypothesized model latent factors
# (Ideal Criterion: Cronbach's Alpha>0.70)
##################################
::compRelSEM(CFA_4F_MODEL, tau.eq=T, obs.var=T, return.total=T) semTools
## UNHLIF TOBUSE PINACT PNUTRI total
## 0.858 0.850 0.891 0.761 0.925
##################################
# Formulating a path diagram to visualize
# the relationships and paths between latent factors
# and observed chronic disease indicators
# from the hypothesized model
##################################
<- CFA_4F_MODEL@pta$vnames$lv[[1]]
CFA_4F_MODEL_FACTORS <- .65
size ::semPaths(CFA_4F_MODEL, latents = CFA_4F_MODEL_FACTORS, whatLabels = "std", layout = "tree2",
semPlotrotation = 4, style = "lisrel", optimizeLatRes = TRUE,
structural = FALSE, layoutSplit = FALSE,
intercepts = FALSE, residuals = FALSE,
curve = 1, curvature = 3, nCharNodes = 8,
sizeLat = 11 * size, sizeMan = 11 * size, sizeMan2 = 4 * size,
edge.label.cex = 1.2 * size,
edge.color = "#000000", edge.label.position = .40)
##################################
# Specifying the model structure
# for confirmatory factor analysis
##################################
<- 'UNHLIF =~ HDMORT + STMORT + OVWINC + RDMORT + DBMORT
CFA_3F TOBUSE =~ SMPREV + PDMORT + ARTINC
POMENT =~ MEUNDA'
##################################
# Fitting the model
# for confirmatory factor analysis
##################################
<- cfa(CFA_3F, data = DPA.Descriptors.Numeric, mimic =c("MPlus"), std.lv = TRUE)
CFA_3F_MODEL
##################################
# Generating a model performance summary
# for confirmatory factor analysis
##################################
summary(CFA_3F_MODEL, fit.measures = TRUE)
## lavaan 0.6-19 ended normally after 28 iterations
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 29
##
## Number of observations 50
## Number of missing patterns 1
##
## Model Test User Model:
##
## Test statistic 55.303
## Degrees of freedom 25
## P-value (Chi-square) 0.000
##
## Model Test Baseline Model:
##
## Test statistic 356.367
## Degrees of freedom 36
## P-value 0.000
##
## User Model versus Baseline Model:
##
## Comparative Fit Index (CFI) 0.905
## Tucker-Lewis Index (TLI) 0.864
##
## Robust Comparative Fit Index (CFI) 0.905
## Robust Tucker-Lewis Index (TLI) 0.864
##
## Loglikelihood and Information Criteria:
##
## Loglikelihood user model (H0) -483.445
## Loglikelihood unrestricted model (H1) -455.793
##
## Akaike (AIC) 1024.889
## Bayesian (BIC) 1080.338
## Sample-size adjusted Bayesian (SABIC) 989.312
##
## Root Mean Square Error of Approximation:
##
## RMSEA 0.156
## 90 Percent confidence interval - lower 0.100
## 90 Percent confidence interval - upper 0.211
## P-value H_0: RMSEA <= 0.050 0.003
## P-value H_0: RMSEA >= 0.080 0.984
##
## Robust RMSEA 0.156
## 90 Percent confidence interval - lower 0.100
## 90 Percent confidence interval - upper 0.211
## P-value H_0: Robust RMSEA <= 0.050 0.003
## P-value H_0: Robust RMSEA >= 0.080 0.984
##
## Standardized Root Mean Square Residual:
##
## SRMR 0.063
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Observed
## Observed information based on Hessian
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|)
## UNHLIF =~
## HDMORT 0.757 0.123 6.153 0.000
## STMORT 0.844 0.116 7.270 0.000
## OVWINC 0.792 0.120 6.597 0.000
## RDMORT 0.794 0.120 6.618 0.000
## DBMORT 0.631 0.131 4.830 0.000
## TOBUSE =~
## SMPREV 0.876 0.112 7.789 0.000
## PDMORT 0.741 0.123 6.002 0.000
## ARTINC 0.925 0.108 8.533 0.000
## POMENT =~
## MEUNDA 0.990 0.099 10.000 0.000
##
## Covariances:
## Estimate Std.Err z-value P(>|z|)
## UNHLIF ~~
## TOBUSE 0.817 0.068 12.046 0.000
## POMENT 0.558 0.108 5.183 0.000
## TOBUSE ~~
## POMENT 0.785 0.065 11.994 0.000
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|)
## .HDMORT -0.000 0.140 -0.000 1.000
## .STMORT 0.000 0.140 0.000 1.000
## .OVWINC -0.000 0.140 -0.000 1.000
## .RDMORT 0.000 0.140 0.000 1.000
## .DBMORT -0.000 0.140 -0.000 1.000
## .SMPREV 0.000 0.140 0.000 1.000
## .PDMORT 0.000 0.140 0.000 1.000
## .ARTINC 0.000 0.140 0.000 1.000
## .MEUNDA 0.000 0.140 0.000 1.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .HDMORT 0.406 0.098 4.157 0.000
## .STMORT 0.268 0.075 3.557 0.000
## .OVWINC 0.352 0.088 4.002 0.000
## .RDMORT 0.349 0.088 3.992 0.000
## .DBMORT 0.582 0.126 4.618 0.000
## .SMPREV 0.213 0.063 3.368 0.001
## .PDMORT 0.431 0.100 4.327 0.000
## .ARTINC 0.125 0.055 2.289 0.022
## .MEUNDA 0.000
## UNHLIF 1.000
## TOBUSE 1.000
## POMENT 1.000
##################################
# Testing the null hypothesis
# that the matrix reproduced by the data
# and the specified model are statistically the same
# as the input or analysis matrix
# (Ideal Criterion: P-Value=>0.05)
##################################
fitMeasures(CFA_3F_MODEL, c("chisq", "df", "pvalue"))
## chisq df pvalue
## 55.303 25.000 0.000
##################################
# Measuring the discrepancy between
# the model-based and observed correlation matrices
# (Ideal Criterion: RMSEA<0.08)
##################################
fitMeasures(CFA_3F_MODEL, c("rmsea", "rmsea.ci.lower", "rmsea.ci.upper", "rmsea.pvalue"))
## rmsea rmsea.ci.lower rmsea.ci.upper rmsea.pvalue
## 0.156 0.100 0.211 0.003
<- fitMeasures(CFA_3F_MODEL, c("rmsea"))) (CFA_3F_MODEL_RMSEA
## rmsea
## 0.156
##################################
# Assessing model adequacy by comparing
# the hypothesized model
# to a baseline model
# (Ideal Criterion: CFI>0.90)
##################################
<- fitMeasures(CFA_3F_MODEL, c("cfi"))) (CFA_3F_MODEL_CFI
## cfi
## 0.905
##################################
# Assessing model adequacy by evaluating
# the improvement in fit of the hypothesized model
# relative to a baseline model
# (Ideal Criterion: TLI>0.90)
##################################
<- fitMeasures(CFA_3F_MODEL, c("tli"))) (CFA_3F_MODEL_TLI
## tli
## 0.864
##################################
# Measuring the actual differences (discrepancies)
# between the model-based correlations and the actual correlations
# (Ideal Criterion: SRMR<0.08)
##################################
<- fitMeasures(CFA_3F_MODEL, c("srmr"))) (CFA_3F_MODEL_SRMR
## srmr
## 0.063
##################################
# Evaluating correlation residuals
# using the pairwise coefficients
# to provide details about potential model misfit
# (Ideal Criterion: Residuals<0.10)
##################################
residuals(CFA_3F_MODEL)$cov
## HDMORT STMORT OVWINC RDMORT DBMORT SMPREV PDMORT ARTINC MEUNDA
## HDMORT 0.000
## STMORT 0.045 0.000
## OVWINC 0.009 0.006 0.000
## RDMORT -0.087 0.016 -0.004 0.000
## DBMORT -0.074 -0.039 -0.012 0.101 0.000
## SMPREV 0.076 -0.018 0.092 0.018 0.035 0.000
## PDMORT 0.111 0.042 0.115 0.014 0.121 0.075 0.000
## ARTINC 0.052 -0.066 -0.110 -0.009 -0.005 -0.003 -0.035 0.000
## MEUNDA 0.209 0.016 -0.164 -0.029 -0.004 -0.083 -0.099 0.069 0.000
##################################
# Evaluating loading magnitude estimates
# and fit statistics
# (Ideal Criterion: Loading Estimates>0.50)
# (Ideal Criterion: P-Value<0.05)
##################################
parameterEstimates(CFA_3F_MODEL) %>%
filter(op == "=~")
## lhs op rhs est se z pvalue ci.lower ci.upper
## 1 UNHLIF =~ HDMORT 0.757 0.123 6.153 0 0.516 0.999
## 2 UNHLIF =~ STMORT 0.844 0.116 7.270 0 0.616 1.071
## 3 UNHLIF =~ OVWINC 0.792 0.120 6.597 0 0.557 1.028
## 4 UNHLIF =~ RDMORT 0.794 0.120 6.618 0 0.559 1.029
## 5 UNHLIF =~ DBMORT 0.631 0.131 4.830 0 0.375 0.887
## 6 TOBUSE =~ SMPREV 0.876 0.112 7.789 0 0.655 1.096
## 7 TOBUSE =~ PDMORT 0.741 0.123 6.002 0 0.499 0.983
## 8 TOBUSE =~ ARTINC 0.925 0.108 8.533 0 0.712 1.137
## 9 POMENT =~ MEUNDA 0.990 0.099 10.000 0 0.796 1.184
##################################
# Evaluating the measurement reliability
# of the hypothesized model latent factors
# (Ideal Criterion: Cronbach's Alpha>0.70)
##################################
::compRelSEM(CFA_3F_MODEL, tau.eq=T, obs.var=T, return.total=T) semTools
## UNHLIF TOBUSE total
## 0.878 0.896 0.922
##################################
# Formulating a path diagram to visualize
# the relationships and paths between latent factors
# and observed chronic disease indicators
# from the hypothesized model
##################################
<- CFA_3F_MODEL@pta$vnames$lv[[1]]
CFA_3F_MODEL_FACTORS <- .65
size ::semPaths(CFA_3F_MODEL, latents = CFA_3F_MODEL_FACTORS, whatLabels = "std", layout = "tree2",
semPlotrotation = 4, style = "lisrel", optimizeLatRes = TRUE,
structural = FALSE, layoutSplit = FALSE,
intercepts = FALSE, residuals = FALSE,
curve = 1, curvature = 3, nCharNodes = 8,
sizeLat = 11 * size, sizeMan = 11 * size, sizeMan2 = 4 * size,
edge.label.cex = 1.2 * size,
edge.color = "#000000", edge.label.position = .40)
##################################
# Consolidating the fit indices
# from the confirmatory factor analysis
##################################
<- c(CFA_4F_MODEL_RMSEA,
CFA_RMSEASummary
CFA_3F_MODEL_RMSEA)
<- c(CFA_4F_MODEL_CFI,
CFA_CFISummary
CFA_3F_MODEL_CFI)
<- c(CFA_4F_MODEL_TLI,
CFA_TLISummary
CFA_3F_MODEL_TLI)
<- c(CFA_4F_MODEL_SRMR,
CFA_SRMRSummary
CFA_3F_MODEL_SRMR)
<- c("4-Factor Confirmatory Factor Analysis",
CFA_AlgorithmSummary "3-Factor Confirmatory Factor Analysis")
<- cbind(CFA_RMSEASummary,
CFA_Summary
CFA_CFISummary,
CFA_TLISummary,
CFA_SRMRSummary,
CFA_AlgorithmSummary)
<- as.data.frame(CFA_Summary)
CFA_Summary names(CFA_Summary) <- c("RMSEA",
"CFI",
"TLI",
"SRMR",
"Algorithm")
$RMSEA <- as.numeric(as.character(CFA_Summary$RMSEA))
CFA_Summary$CFI <- as.numeric(as.character(CFA_Summary$CFI))
CFA_Summary$TLI <- as.numeric(as.character(CFA_Summary$TLI))
CFA_Summary$SRMR <- as.numeric(as.character(CFA_Summary$SRMR))
CFA_Summary
$Algorithm <- factor(CFA_Summary$Algorithm,
CFA_Summarylevels = c("4-Factor Confirmatory Factor Analysis",
"3-Factor Confirmatory Factor Analysis"))
print(CFA_Summary, row.names=FALSE)
## RMSEA CFI TLI SRMR Algorithm
## 0.1050763 0.9638131 0.9379654 0.05258369 4-Factor Confirmatory Factor Analysis
## 0.1557009 0.9054103 0.8637909 0.06286673 3-Factor Confirmatory Factor Analysis
##################################
# Consolidating all calculated values
# for the Root Mean Square Error of Approximation
##################################
<- dotplot(Algorithm ~ RMSEA,
(RMSEA_Plot data = CFA_Summary,
main = "Algorithm Comparison : Root Mean Square Error of Approximation",
ylab = "Algorithm",
xlab = "RMSEA",
auto.key = list(adj = 1),
type=c("p", "h"),
origin = 0,
alpha = 0.45,
pch = 16,
cex = 2))
##################################
# Consolidating all calculated values
# for the Tucker-Lewis Fit Index
##################################
<- dotplot(Algorithm ~ CFI,
(CFI_Plot data = CFA_Summary,
main = "Algorithm Comparison : Comparative Fit Index",
ylab = "Algorithm",
xlab = "CFI",
auto.key = list(adj = 1),
type=c("p", "h"),
origin = 0,
alpha = 0.45,
pch = 16,
cex = 2))
##################################
# Consolidating all calculated values
# for the Tucker-Lewis Fit Index
##################################
<- dotplot(Algorithm ~ TLI,
(TLI_Plot data = CFA_Summary,
main = "Algorithm Comparison : Tucker-Lewis Fit Index",
ylab = "Algorithm",
xlab = "TLI",
auto.key = list(adj = 1),
type=c("p", "h"),
origin = 0,
alpha = 0.45,
pch = 16,
cex = 2))
##################################
# Consolidating all calculated values
# for the Standardized Root Mean Square of the Residual
##################################
<- dotplot(Algorithm ~ SRMR,
(SRMR_Plot data = CFA_Summary,
main = "Algorithm Comparison : Standardized Root Mean Square of the Residual",
ylab = "Algorithm",
xlab = "SRMR",
auto.key = list(adj = 1),
type=c("p", "h"),
origin = 0,
alpha = 0.45,
pch = 16,
cex = 2))
##################################
# Plotting the Dandelion Plot
# for the optimal EFA model
##################################
dandelion(FA_ML_V_4F_FactorLoading,
bound=0,
mcex=c(1,1.2),
palet=DandelionPlotPalette)
##################################
# Plotting the Path Diagram
# for the optimal CFA model
##################################
<- CFA_4F_MODEL@pta$vnames$lv[[1]]
CFA_4F_MODEL_FACTORS <- .65
size ::semPaths(CFA_4F_MODEL, latents = CFA_4F_MODEL_FACTORS, whatLabels = "std", layout = "tree2",
semPlotrotation = 4, style = "lisrel", optimizeLatRes = TRUE,
structural = FALSE, layoutSplit = FALSE,
intercepts = FALSE, residuals = FALSE,
curve = 1, curvature = 3, nCharNodes = 8,
sizeLat = 11 * size, sizeMan = 11 * size, sizeMan2 = 4 * size,
edge.label.cex = 1.2 * size,
edge.color = "#000000", edge.label.position = .40)