##################################
# Loading R libraries
##################################
library(moments)
library(car)
library(stats)
library(multcomp)
library(effects)
library(psych)
library(ggplot2)
library(dplyr)
library(ggpubr)
library(rstatix)
library(ggfortify)
library(trend)
##################################
# Defining file paths
##################################
<- file.path("datasets")
DATASETS_PATH
##################################
# Reading and creating the complete dataset
##################################
<- read.csv(file.path("..", DATASETS_PATH, "DBP.csv"),
DBP.Complete na.strings=c("NA","NaN"," ",""),
stringsAsFactors = T)
##################################
# Performing a general exploration of the dataset
##################################
dim(DBP.Complete)
## [1] 40 9
str(DBP.Complete)
## 'data.frame': 40 obs. of 9 variables:
## $ Subject: int 1 2 3 4 5 6 7 8 9 10 ...
## $ TRT : Factor w/ 2 levels "A","B": 1 1 1 1 1 1 1 1 1 1 ...
## $ DBP1 : int 114 116 119 115 116 117 118 120 114 115 ...
## $ DBP2 : int 115 113 115 113 112 112 111 115 112 113 ...
## $ DBP3 : int 113 112 113 112 107 113 100 113 113 108 ...
## $ DBP4 : int 109 103 104 109 104 104 109 102 109 106 ...
## $ DBP5 : int 105 101 98 101 105 102 99 102 103 97 ...
## $ Age : int 43 51 48 42 49 47 50 61 43 51 ...
## $ Sex : Factor w/ 2 levels "F","M": 1 2 1 1 2 2 1 2 2 2 ...
summary(DBP.Complete)
## Subject TRT DBP1 DBP2 DBP3
## Min. : 1.00 A:20 Min. :114.0 Min. :111.0 Min. :100.0
## 1st Qu.:10.75 B:20 1st Qu.:115.0 1st Qu.:113.0 1st Qu.:112.0
## Median :20.50 Median :116.5 Median :115.0 Median :113.0
## Mean :20.50 Mean :116.7 Mean :114.3 Mean :112.4
## 3rd Qu.:30.25 3rd Qu.:118.0 3rd Qu.:115.0 3rd Qu.:113.0
## Max. :40.00 Max. :121.0 Max. :119.0 Max. :118.0
## DBP4 DBP5 Age Sex
## Min. :102.0 Min. : 97.0 Min. :38.00 F:18
## 1st Qu.:106.8 1st Qu.:101.8 1st Qu.:42.00 M:22
## Median :109.0 Median :106.5 Median :48.00
## Mean :109.3 Mean :106.7 Mean :47.83
## 3rd Qu.:113.2 3rd Qu.:112.0 3rd Qu.:51.25
## Max. :117.0 Max. :115.0 Max. :63.00
##################################
# Renaming the treatment categories to more verbose labels
##################################
$TRT <- ifelse(DBP.Complete$TRT=="A","New","Placebo")
DBP.Complete
##################################
# Setting the levels for the treatment categories
##################################
$TRT <- factor(DBP.Complete$TRT,
DBP.Completelevels = c("Placebo","New"))
##################################
# Creating an analysis dataset without the subject information column
##################################
<- DBP.Complete[,-1]
DBP.Analysis
##################################
# Creating a response-covariate dataset without the treatment column
##################################
<- DBP.Analysis[,-1]
DBP.Response.Covariate
##################################
# Formulating a data type assessment summary
##################################
<- DBP.Response.Covariate
PDA <- data.frame(
(PDA.Summary Column.Index=c(1:length(names(PDA))),
Column.Name= names(PDA),
Column.Type=sapply(PDA, function(x) class(x)),
row.names=NULL)
)
## Column.Index Column.Name Column.Type
## 1 1 DBP1 integer
## 2 2 DBP2 integer
## 3 3 DBP3 integer
## 4 4 DBP4 integer
## 5 5 DBP5 integer
## 6 6 Age integer
## 7 7 Sex factor
##################################
# Reusing the response-covariate dataset
##################################
<- DBP.Response.Covariate
DQA
##################################
# Formulating a data subset of response variables (numeric)
##################################
<- DQA[,names(DQA) %in% c("DBP1","DBP2","DBP3","DBP4","DBP5")]
DQA.Variables.Response
##################################
# Formulating a data subset of covariate variables (numeric)
##################################
<- DQA$Age
DQA.Variables.Covariate.Numeric
##################################
# Formulating a data subset of covariate variables (factor)
##################################
<- DQA$Sex
DQA.Variables.Covariate.Factor
##################################
# Formulating an overall data quality assessment summary
##################################
<- data.frame(
(DQA.Summary Column.Index=c(1:length(names(DQA))),
Column.Name= names(DQA),
Column.Type=sapply(DQA, function(x) class(x)),
Row.Count=sapply(DQA, function(x) nrow(DQA)),
NA.Count=sapply(DQA,function(x)sum(is.na(x))),
Fill.Rate=sapply(DQA,function(x)format(round((sum(!is.na(x))/nrow(DQA)),3),nsmall=3)),
row.names=NULL)
)
## Column.Index Column.Name Column.Type Row.Count NA.Count Fill.Rate
## 1 1 DBP1 integer 40 0 1.000
## 2 2 DBP2 integer 40 0 1.000
## 3 3 DBP3 integer 40 0 1.000
## 4 4 DBP4 integer 40 0 1.000
## 5 5 DBP5 integer 40 0 1.000
## 6 6 Age integer 40 0 1.000
## 7 7 Sex factor 40 0 1.000
##################################
# Checking for missing observations
##################################
if ((nrow(DQA.Summary[DQA.Summary$NA.Count>0,]))>0){
print(paste0("Missing observations noted for ",
nrow(DQA.Summary[DQA.Summary$NA.Count>0,])),
(" variable(s) with NA.Count>0 and Fill.Rate<1.0."))
$NA.Count>0,]
DQA.Summary[DQA.Summaryelse {
} print("No missing observations noted.")
}
## [1] "No missing observations noted."
##################################
# Formulating a data quality assessment summary for response variables (numeric)
##################################
if (length(names(DQA.Variables.Response))>0) {
##################################
# Formulating a function to determine the first mode
##################################
<- function(x) {
FirstModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab == max(tab)]
ux[tab
}
##################################
# Formulating a function to determine the second mode
##################################
<- function(x) {
SecondModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab = ux[tab == max(tab)]
fm = na.omit(x)[!(na.omit(x) %in% fm)]
sm <- unique(sm)
usm <- tabulate(match(sm, usm))
tabsm == max(tabsm)]
usm[tabsm
}
<- data.frame(
(DQA.Variables.Response.Summary Column.Name= names(DQA.Variables.Response),
Column.Type=sapply(DQA.Variables.Response, function(x) class(x)),
Unique.Count=sapply(DQA.Variables.Response, function(x) length(unique(x))),
Unique.Count.Ratio=sapply(DQA.Variables.Response, function(x) format(round((length(unique(x))/nrow(DQA.Variables.Response)),3), nsmall=3)),
First.Mode.Value=sapply(DQA.Variables.Response, function(x) format(round((FirstModes(x)[1]),3),nsmall=3)),
Second.Mode.Value=sapply(DQA.Variables.Response, function(x) format(round((SecondModes(x)[1]),3),nsmall=3)),
First.Mode.Count=sapply(DQA.Variables.Response, function(x) sum(na.omit(x) == FirstModes(x)[1])),
Second.Mode.Count=sapply(DQA.Variables.Response, function(x) sum(na.omit(x) == SecondModes(x)[1])),
First.Second.Mode.Ratio=sapply(DQA.Variables.Response, function(x) format(round((sum(na.omit(x) == FirstModes(x)[1])/sum(na.omit(x) == SecondModes(x)[1])),3), nsmall=3)),
Minimum=sapply(DQA.Variables.Response, function(x) format(round(min(x,na.rm = TRUE),3), nsmall=3)),
Mean=sapply(DQA.Variables.Response, function(x) format(round(mean(x,na.rm = TRUE),3), nsmall=3)),
Median=sapply(DQA.Variables.Response, function(x) format(round(median(x,na.rm = TRUE),3), nsmall=3)),
Maximum=sapply(DQA.Variables.Response, function(x) format(round(max(x,na.rm = TRUE),3), nsmall=3)),
Skewness=sapply(DQA.Variables.Response, function(x) format(round(skewness(x,na.rm = TRUE),3), nsmall=3)),
Kurtosis=sapply(DQA.Variables.Response, function(x) format(round(kurtosis(x,na.rm = TRUE),3), nsmall=3)),
Percentile25th=sapply(DQA.Variables.Response, function(x) format(round(quantile(x,probs=0.25,na.rm = TRUE),3), nsmall=3)),
Percentile75th=sapply(DQA.Variables.Response, function(x) format(round(quantile(x,probs=0.75,na.rm = TRUE),3), nsmall=3)),
row.names=NULL)
)
}
## Column.Name Column.Type Unique.Count Unique.Count.Ratio First.Mode.Value
## 1 DBP1 integer 8 0.200 114.000
## 2 DBP2 integer 8 0.200 115.000
## 3 DBP3 integer 13 0.325 113.000
## 4 DBP4 integer 13 0.325 109.000
## 5 DBP5 integer 16 0.400 115.000
## Second.Mode.Value First.Mode.Count Second.Mode.Count First.Second.Mode.Ratio
## 1 116.000 9 8 1.125
## 2 113.000 17 6 2.833
## 3 114.000 19 4 4.750
## 4 114.000 12 8 1.500
## 5 102.000 6 5 1.200
## Minimum Mean Median Maximum Skewness Kurtosis Percentile25th
## 1 114.000 116.650 116.500 121.000 0.244 2.074 115.000
## 2 111.000 114.350 115.000 119.000 0.357 3.139 113.000
## 3 100.000 112.375 113.000 118.000 -1.676 8.305 112.000
## 4 102.000 109.350 109.000 117.000 -0.077 2.114 106.750
## 5 97.000 106.650 106.500 115.000 0.053 1.552 101.750
## Percentile75th
## 1 118.000
## 2 115.000
## 3 113.000
## 4 113.250
## 5 112.000
##################################
# Identifying potential data quality issues for response variables (numeric)
##################################
##################################
# Checking for zero or near-zero variance predictors
##################################
if (length(names(DQA.Variables.Response))==0) {
print("No numeric predictors noted.")
else if (nrow(DQA.Variables.Response.Summary[as.numeric(as.character(DQA.Variables.Response.Summary$First.Second.Mode.Ratio))>5,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Variables.Response.Summary[as.numeric(as.character(DQA.Variables.Response.Summary$First.Second.Mode.Ratio))>5,])),
(" numeric variable(s) with First.Second.Mode.Ratio>5."))
as.numeric(as.character(DQA.Variables.Response.Summary$First.Second.Mode.Ratio))>5,]
DQA.Variables.Response.Summary[else {
} print("No low variance numeric predictors due to high first-second mode ratio noted.")
}
## [1] "No low variance numeric predictors due to high first-second mode ratio noted."
if (length(names(DQA.Variables.Response))==0) {
print("No numeric predictors noted.")
else if (nrow(DQA.Variables.Response.Summary[as.numeric(as.character(DQA.Variables.Response.Summary$Unique.Count.Ratio))<0.01,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Variables.Response.Summary[as.numeric(as.character(DQA.Variables.Response.Summary$Unique.Count.Ratio))<0.01,])),
(" numeric variable(s) with Unique.Count.Ratio<0.01."))
as.numeric(as.character(DQA.Variables.Response.Summary$Unique.Count.Ratio))<0.01,]
DQA.Variables.Response.Summary[else {
} print("No low variance numeric predictors due to low unique count ratio noted.")
}
## [1] "No low variance numeric predictors due to low unique count ratio noted."
##################################
# Checking for skewed response variables
##################################
if (length(names(DQA.Variables.Response))==0) {
print("No response variables noted.")
else if (nrow(DQA.Variables.Response.Summary[as.numeric(as.character(DQA.Variables.Response.Summary$Skewness))>3 |
} as.numeric(as.character(DQA.Variables.Response.Summary$Skewness))<(-3),])>0){
print(paste0("High skewness observed for ",
nrow(DQA.Variables.Response.Summary[as.numeric(as.character(DQA.Variables.Response.Summary$Skewness))>3 |
(as.numeric(as.character(DQA.Variables.Response.Summary$Skewness))<(-3),])),
" response variable(s) with Skewness>3 or Skewness<(-3)."))
as.numeric(as.character(DQA.Variables.Response.Summary$Skewness))>3 |
DQA.Variables.Response.Summary[as.numeric(as.character(DQA.Variables.Response.Summary$Skewness))<(-3),]
else {
} print("No skewed response variables noted.")
}
## [1] "No skewed response variables noted."
##################################
# Determining sample sizes
##################################
##################################
# Defining the range of possible effect sizes
##################################
##################################
# Defining a range of possible hypothesized values for the treatment mean
##################################
=c(-15,-14,-13,-12,-11,-10,-9,-8,-7,-6,-5,-4,-3)
mu_Treatment
##################################
# Defining a fixed hypothesized value for the control mean
##################################
=0
mu_Control
##################################
# Defining a fixed hypothesized value for the sample size ratio between the treatment and control groups
##################################
=1
kappa
##################################
# Defining a fixed hypothesized value for the standard deviation
##################################
=5
sd
##################################
# Defining a fixed hypothesized value for the Type I error
##################################
=0.05
alpha
##################################
# Defining a range of possible hypothesized values for the Type I error
##################################
=c(0.30,0.20,0.10)
beta
##################################
# Computing the range of samples sizes based on different levels type II error
##################################
=beta[1]
beta_1nB_Beta30=(1+1/kappa)*(sd*(qnorm(1-alpha/2)+qnorm(1-beta_1))/(mu_Treatment-mu_Control))^2) (
## [1] 1.371570 1.574507 1.826055 2.143079 2.550441 3.086033 3.809918
## [8] 4.821927 6.298028 8.572315 12.344134 19.287709 34.289261
ceiling(nB_Beta30)
## [1] 2 2 2 3 3 4 4 5 7 9 13 20 35
=(mu_Treatment-mu_Control)/(sd*sqrt((1+1/kappa)/nB_Beta30))
z_Beta30Power_Beta30=pnorm(z_Beta30-qnorm(1-alpha/2))+pnorm(-z_Beta30-qnorm(1-alpha/2))) (
## [1] 0.7000044 0.7000044 0.7000044 0.7000044 0.7000044 0.7000044 0.7000044
## [8] 0.7000044 0.7000044 0.7000044 0.7000044 0.7000044 0.7000044
=beta[2]
beta_2nB_Beta20=(1+1/kappa)*(sd*(qnorm(1-alpha/2)+qnorm(1-beta_2))/(mu_Treatment-mu_Control))^2) (
## [1] 1.744195 2.002265 2.322154 2.725305 3.243339 3.924440 4.844987
## [8] 6.131937 8.009061 10.901222 15.697759 24.527749 43.604887
ceiling(nB_Beta20)
## [1] 2 3 3 3 4 4 5 7 9 11 16 25 44
=(mu_Treatment-mu_Control)/(sd*sqrt((1+1/kappa)/nB_Beta20))
z_Beta20Power_Beta20=pnorm(z_Beta20-qnorm(1-alpha/2))+pnorm(-z_Beta20-qnorm(1-alpha/2))) (
## [1] 0.800001 0.800001 0.800001 0.800001 0.800001 0.800001 0.800001 0.800001
## [9] 0.800001 0.800001 0.800001 0.800001 0.800001
=beta[3]
beta_3nB_Beta10=(1+1/kappa)*(sd*(qnorm(1-alpha/2)+qnorm(1-beta_3))/(mu_Treatment-mu_Control))^2) (
## [1] 2.334983 2.680465 3.108705 3.648411 4.341910 5.253712 6.486064
## [8] 8.208924 10.721860 14.593643 21.014846 32.835697 58.374573
ceiling(nB_Beta10)
## [1] 3 3 4 4 5 6 7 9 11 15 22 33 59
=(mu_Treatment-mu_Control)/(sd*sqrt((1+1/kappa)/nB_Beta10))
z_Beta10Power_Beta10=pnorm(z_Beta10-qnorm(1-alpha/2))+pnorm(-z_Beta10-qnorm(1-alpha/2))) (
## [1] 0.9000001 0.9000001 0.9000001 0.9000001 0.9000001 0.9000001 0.9000001
## [8] 0.9000001 0.9000001 0.9000001 0.9000001 0.9000001 0.9000001
<- as.data.frame(cbind(mu_Treatment,nB_Beta30,nB_Beta20,nB_Beta10))
SampleSizePowerCurve names(SampleSizePowerCurve) <- c("DBP5.DBP1.Difference","Power=70%","Power=80%","Power=90%")
##################################
# Restructuring the data
##################################
<- gather(SampleSizePowerCurve,"Power=70%","Power=80%","Power=90%",
SampleSizePowerCurve.Reshaped key="Power",
value="Sample.Size")
##################################
# Plotting the sample size and power curve
##################################
<- ggplot(SampleSizePowerCurve.Reshaped,aes(x=DBP5.DBP1.Difference,
(DBP5DBP1Difference.SampleSize.LinePlot.ByPower y=Sample.Size,
color=Power)) +
geom_line(size=1)+
geom_point(size=4)+
theme_bw() +
scale_color_brewer(palette="Paired") +
scale_x_continuous(name="Hypothesized Diastolic Blood Pressure Change (DBP5-DBP1) for Treatment Group",limits=c(-15,-3),breaks=seq(-15,-3,by=1)) +
scale_y_continuous(name="Sample Size for Each Treatment Group",limits=c(0,80),breaks=seq(0,80,by=10)) +
theme(axis.title.x=element_text(color="black",face="bold",size=12),
legend.position="top",
legend.key.size = unit(1,"cm"),
legend.key.height = unit(1,"cm"),
legend.key.width = unit(1,"cm"),
legend.title = element_text(size=12),
legend.text = element_text(size=9),
text=element_text(size=9),
axis.text.y=element_text(color="black",hjust=0.25,size=9),
axis.text.x=element_text(color="black",size=9),
axis.title.y=element_text(color="black",face="bold",size=12),
axis.ticks.length=unit(0.25,"cm"),
plot.title=element_text(color="black",size=14,face="bold",hjust=0.50)) +
ggtitle("Sample Size Calculations for Different Power and Effect Size"))
##################################
# Reusing the dataset
##################################
head(DBP.Analysis)
## TRT DBP1 DBP2 DBP3 DBP4 DBP5 Age Sex
## 1 New 114 115 113 109 105 43 F
## 2 New 116 113 112 103 101 51 M
## 3 New 119 115 113 104 98 48 F
## 4 New 115 113 112 109 101 42 F
## 5 New 116 112 107 104 105 49 M
## 6 New 117 112 113 104 102 47 M
##################################
# Creating the DBP5.DBP1.Difference column
##################################
$DBP5.DBP1.Difference <- DBP.Analysis$DBP5-DBP.Analysis$DBP1
DBP.Analysis
##################################
# Printing a subset of the updated dataset
##################################
head(DBP.Analysis)
## TRT DBP1 DBP2 DBP3 DBP4 DBP5 Age Sex DBP5.DBP1.Difference
## 1 New 114 115 113 109 105 43 F -9
## 2 New 116 113 112 103 101 51 M -15
## 3 New 119 115 113 104 98 48 F -21
## 4 New 115 113 112 109 101 42 F -14
## 5 New 116 112 107 104 105 49 M -11
## 6 New 117 112 113 104 102 47 M -15
##################################
# Determining the max and min values for the DBP5.DBP1.Difference column
##################################
max(DBP.Analysis$DBP5.DBP1.Difference)
## [1] 1
min(DBP.Analysis$DBP5.DBP1.Difference)
## [1] -21
##################################
# Performing exploratory data analysis
##################################
<- ggplot(DBP.Analysis,aes(x=TRT,y=DBP5.DBP1.Difference,color=TRT)) +
(DBP5DBP1Difference.ViolinBoxplot.ByTreatment geom_violin(scale="width", trim=FALSE) +
stat_boxplot(geom="errorbar",lwd=2) +
geom_boxplot(lwd=2, outlier.size=3, width=0.4) +
theme_bw() +
scale_fill_manual(values=c("#3259A0","#FF5050")) +
scale_color_manual(values=c("#3259A0","#FF5050")) +
scale_x_discrete(name="Treatment (TRT)", limits=c("Placebo","New")) +
scale_y_continuous(name="Diastolic Blood Pressure Change (DBP5-DBP1)",limits=c(-25,5),breaks=seq(-25,5,by=5)) +
theme(axis.title.x=element_text(color="black",face="bold",size=12),
legend.position="top",
legend.key.size = unit(1,"cm"),
legend.key.height = unit(1,"cm"),
legend.key.width = unit(1,"cm"),
legend.title = element_text(size=12),
legend.text = element_text(size=9),
text=element_text(size=9),
axis.text.y=element_text(color="black",hjust=0.25,size=9),
axis.text.x=element_text(color="black",size=9),
axis.title.y=element_text(color="black",face="bold",size=12),
axis.ticks.length=unit(0.25,"cm"),
plot.title=element_text(color="black",size=14,face="bold",hjust=0.50)) +
stat_summary(fun=mean, geom="line",color="black",size=2,aes(group=1)) +
stat_summary(fun=mean, geom="point",color="black",size=5) +
ggtitle("Data Exploration Between Treatment Groups"))
##################################
# Computing summary statistics
##################################
<- DBP.Analysis %>%
DBP.Analysis.SummaryStatistics group_by(TRT) %>%
get_summary_stats(DBP5.DBP1.Difference, show = c("mean", "sd", "median", "q1", "q3", "iqr"))
as.data.frame(DBP.Analysis.SummaryStatistics)) (
## TRT variable n mean sd median q1 q3 iqr
## 1 Placebo DBP5.DBP1.Difference 20 -4.8 2.419 -5.5 -6.25 -3.75 2.50
## 2 New DBP5.DBP1.Difference 20 -15.2 2.966 -15.0 -17.25 -14.00 3.25
##################################
# Reusing the dataset
##################################
head(DBP.Analysis)
## TRT DBP1 DBP2 DBP3 DBP4 DBP5 Age Sex DBP5.DBP1.Difference
## 1 New 114 115 113 109 105 43 F -9
## 2 New 116 113 112 103 101 51 M -15
## 3 New 119 115 113 104 98 48 F -21
## 4 New 115 113 112 109 101 42 F -14
## 5 New 116 112 107 104 105 49 M -11
## 6 New 117 112 113 104 102 47 M -15
##################################
# Identifying extreme outliers
##################################
<- DBP.Analysis %>%
DBP.Analysis.ExtremeOutliers group_by(TRT) %>%
identify_outliers(DBP5.DBP1.Difference)
as.data.frame(DBP.Analysis.ExtremeOutliers)) (
## TRT DBP1 DBP2 DBP3 DBP4 DBP5 Age Sex DBP5.DBP1.Difference is.outlier
## 1 Placebo 114 115 113 114 115 38 M 1 TRUE
## 2 New 114 115 113 109 105 43 F -9 TRUE
## is.extreme
## 1 FALSE
## 2 FALSE
##################################
# Check normality of data distribution
##################################
<- DBP.Analysis %>%
DBP.Analysis.DistributionalNormality group_by(TRT) %>%
shapiro_test(DBP5.DBP1.Difference)
as.data.frame(DBP.Analysis.DistributionalNormality)) (
## TRT variable statistic p
## 1 Placebo DBP5.DBP1.Difference 0.9312024 0.1628639
## 2 New DBP5.DBP1.Difference 0.9740958 0.8378696
##################################
# Formulating the QQ-Plot
##################################
ggqqplot(DBP.Analysis,x="DBP5.DBP1.Difference",color="TRT",facet.by="TRT") +
theme_bw() +
scale_fill_manual(values=c("#3259A0","#FF5050")) +
scale_color_manual(values=c("#3259A0","#FF5050")) +
scale_y_continuous(name="Observed (DBP5-DBP1)") +
scale_x_continuous(name="Expected Normal (DBP5-DBP1)") +
theme(axis.title.x=element_text(color="black",face="bold",size=12),
legend.position="top",
legend.key.size = unit(1,"cm"),
legend.key.height = unit(1,"cm"),
legend.key.width = unit(1,"cm"),
legend.title = element_text(size=12),
legend.text = element_text(size=9),
text=element_text(size=9),
axis.text.y=element_text(color="black",hjust=0.25,size=9),
axis.text.x=element_text(color="black",size=9),
axis.title.y=element_text(color="black",face="bold",size=12),
axis.ticks.length=unit(0.25,"cm"),
plot.title=element_text(color="black",size=14,face="bold",hjust=0.50),
strip.text = element_text(color="black",face="bold",size=12)) +
ggtitle("QQ-Plot for Normality Assumption Evaluation")
##################################
# Checking the equality of variances
##################################
<- DBP.Analysis %>%
DBP.Analysis.VarianceEquality levene_test(DBP5.DBP1.Difference~TRT)
as.data.frame(DBP.Analysis.VarianceEquality)) (
## df1 df2 statistic p
## 1 1 38 0.285 0.596552
##################################
# Reusing the dataset
##################################
head(DBP.Analysis)
## TRT DBP1 DBP2 DBP3 DBP4 DBP5 Age Sex DBP5.DBP1.Difference
## 1 New 114 115 113 109 105 43 F -9
## 2 New 116 113 112 103 101 51 M -15
## 3 New 119 115 113 104 98 48 F -21
## 4 New 115 113 112 109 101 42 F -14
## 5 New 116 112 107 104 105 49 M -11
## 6 New 117 112 113 104 102 47 M -15
##################################
# Conducting Student's T-Test
##################################
<- DBP.Analysis %>%
DBP.Analysis.StudentTTest t_test(DBP5.DBP1.Difference ~ TRT, var.equal = TRUE, detailed = TRUE) %>%
add_significance()
as.data.frame(DBP.Analysis.StudentTTest)) (
## estimate estimate1 estimate2 .y. group1 group2 n1 n2
## 1 10.4 -4.8 -15.2 DBP5.DBP1.Difference Placebo New 20 20
## statistic p df conf.low conf.high method alternative p.signif
## 1 12.1504 1.17e-14 38 8.667242 12.13276 T-test two.sided ****
##################################
# Computing for the effect size
##################################
<- DBP.Analysis %>%
DBP.Analysis.StudentTTest.EffectSize cohens_d(DBP5.DBP1.Difference ~ TRT, var.equal = TRUE, hedges.correction = TRUE)
as.data.frame(DBP.Analysis.StudentTTest.EffectSize)) (
## .y. group1 group2 effsize n1 n2 magnitude
## 1 DBP5.DBP1.Difference Placebo New 3.765956 20 20 large
##################################
# Summarizing the hypothesis testing results
##################################
<- DBP.Analysis.StudentTTest %>% add_xy_position(x = "group")
DBP.Analysis.StudentTTest.Statistics
<- ggplot(DBP.Analysis,aes(x=TRT,y=DBP5.DBP1.Difference,color=TRT)) +
(DBP5DBP1Difference.ViolinBoxplot.ByTreatment geom_violin(scale="width", trim=FALSE) +
stat_boxplot(geom="errorbar",lwd=2) +
geom_boxplot(lwd=2, outlier.size=3, width=0.4) +
theme_bw() +
scale_fill_manual(values=c("#3259A0","#FF5050")) +
scale_color_manual(values=c("#3259A0","#FF5050")) +
scale_x_discrete(name="Treatment (TRT)", limits=c("Placebo","New")) +
scale_y_continuous(name="Diastolic Blood Pressure Change (DBP5-DBP1)",limits=c(-25,5),breaks=seq(-25,5,by=5)) +
theme(axis.title.x=element_text(color="black",face="bold",size=12),
legend.position="top",
legend.key.size = unit(1,"cm"),
legend.key.height = unit(1,"cm"),
legend.key.width = unit(1,"cm"),
legend.title = element_text(size=12),
legend.text = element_text(size=9),
text=element_text(size=9),
axis.text.y=element_text(color="black",hjust=0.25,size=9),
axis.text.x=element_text(color="black",size=9),
axis.title.y=element_text(color="black",face="bold",size=12),
axis.ticks.length=unit(0.25,"cm"),
plot.title=element_text(color="black",size=14,face="bold",hjust=0.50),
plot.subtitle = element_text(color="black",size=14,hjust=0.50)) +
stat_summary(fun=mean, geom="line",color="black",size=2,aes(group=1)) +
stat_summary(fun=mean, geom="point",color="black",size=5) +
labs(subtitle = "t(38.00)=12.15, p=<0.0001, n=40, Effect Size=3.84") +
ggtitle("Hypothesis Testing using Student's T-Test"))
##################################
# Reusing the dataset
##################################
head(DBP.Analysis)
## TRT DBP1 DBP2 DBP3 DBP4 DBP5 Age Sex DBP5.DBP1.Difference
## 1 New 114 115 113 109 105 43 F -9
## 2 New 116 113 112 103 101 51 M -15
## 3 New 119 115 113 104 98 48 F -21
## 4 New 115 113 112 109 101 42 F -14
## 5 New 116 112 107 104 105 49 M -11
## 6 New 117 112 113 104 102 47 M -15
##################################
# Conducting Welch T-Test
##################################
<- DBP.Analysis %>%
DBP.Analysis.WelchTTest t_test(DBP5.DBP1.Difference ~ TRT, var.equal = FALSE, detailed = TRUE) %>%
add_significance()
as.data.frame(DBP.Analysis.WelchTTest)) (
## estimate estimate1 estimate2 .y. group1 group2 n1 n2
## 1 10.4 -4.8 -15.2 DBP5.DBP1.Difference Placebo New 20 20
## statistic p df conf.low conf.high method alternative p.signif
## 1 12.1504 2.15e-14 36.52227 8.664937 12.13506 T-test two.sided ****
##################################
# Computing for the effect size
##################################
<- DBP.Analysis %>%
DBP.Analysis.WelchTTest.EffectSize cohens_d(DBP5.DBP1.Difference ~ TRT, var.equal = FALSE, hedges.correction = TRUE)
as.data.frame(DBP.Analysis.WelchTTest.EffectSize)) (
## .y. group1 group2 effsize n1 n2 magnitude
## 1 DBP5.DBP1.Difference Placebo New 3.765956 20 20 large
##################################
# Summarizing the hypothesis testing results
##################################
<- DBP.Analysis.WelchTTest %>% add_xy_position(x = "group")
DBP.Analysis.WelchTTest.Statistics
<- ggplot(DBP.Analysis,aes(x=TRT,y=DBP5.DBP1.Difference,color=TRT)) +
(DBP5DBP1Difference.ViolinBoxplot.ByTreatment geom_violin(scale="width", trim=FALSE) +
stat_boxplot(geom="errorbar",lwd=2) +
geom_boxplot(lwd=2, outlier.size=3, width=0.4) +
theme_bw() +
scale_fill_manual(values=c("#3259A0","#FF5050")) +
scale_color_manual(values=c("#3259A0","#FF5050")) +
scale_x_discrete(name="Treatment (TRT)", limits=c("Placebo","New")) +
scale_y_continuous(name="Diastolic Blood Pressure Change (DBP5-DBP1)",limits=c(-25,5),breaks=seq(-25,5,by=5)) +
theme(axis.title.x=element_text(color="black",face="bold",size=12),
legend.position="top",
legend.key.size = unit(1,"cm"),
legend.key.height = unit(1,"cm"),
legend.key.width = unit(1,"cm"),
legend.title = element_text(size=12),
legend.text = element_text(size=9),
text=element_text(size=9),
axis.text.y=element_text(color="black",hjust=0.25,size=9),
axis.text.x=element_text(color="black",size=9),
axis.title.y=element_text(color="black",face="bold",size=12),
axis.ticks.length=unit(0.25,"cm"),
plot.title=element_text(color="black",size=14,face="bold",hjust=0.50),
plot.subtitle = element_text(color="black",size=14,hjust=0.50)) +
stat_summary(fun=mean, geom="line",color="black",size=2,aes(group=1)) +
stat_summary(fun=mean, geom="point",color="black",size=5) +
labs(subtitle = "t(36.52)=12.15, p=<0.0001, n=40, Effect Size=3.84") +
ggtitle("Hypothesis Testing using Welch T-Test"))
##################################
# Reusing the dataset
##################################
head(DBP.Analysis)
## TRT DBP1 DBP2 DBP3 DBP4 DBP5 Age Sex DBP5.DBP1.Difference
## 1 New 114 115 113 109 105 43 F -9
## 2 New 116 113 112 103 101 51 M -15
## 3 New 119 115 113 104 98 48 F -21
## 4 New 115 113 112 109 101 42 F -14
## 5 New 116 112 107 104 105 49 M -11
## 6 New 117 112 113 104 102 47 M -15
##################################
# Conducting Welch T-Test
##################################
<- DBP.Analysis %>%
DBP.Analysis.WilcoxonRankSumTest wilcox_test(DBP5.DBP1.Difference ~ TRT, detailed = TRUE) %>%
add_significance()
as.data.frame(DBP.Analysis.WilcoxonRankSumTest)) (
## estimate .y. group1 group2 n1 n2 statistic p
## 1 10.00008 DBP5.DBP1.Difference Placebo New 20 20 400 6.29e-08
## conf.low conf.high method alternative p.signif
## 1 8.999927 12.00003 Wilcoxon two.sided ****
##################################
# Computing for the effect size
##################################
<- DBP.Analysis %>%
DBP.Analysis.WilcoxonRankSumTest.EffectSize wilcox_effsize(DBP5.DBP1.Difference ~ TRT)
as.data.frame(DBP.Analysis.WilcoxonRankSumTest.EffectSize)) (
## .y. group1 group2 effsize n1 n2 magnitude
## 1 DBP5.DBP1.Difference Placebo New 0.8576142 20 20 large
##################################
# Summarizing the hypothesis testing results
##################################
<- DBP.Analysis.WilcoxonRankSumTest %>% add_xy_position(x = "group")
DBP.Analysis.WilcoxonRankSumTest.Statistics
<- ggplot(DBP.Analysis,aes(x=TRT,y=DBP5.DBP1.Difference,color=TRT)) +
(DBP5DBP1Difference.ViolinBoxplot.ByTreatment geom_violin(scale="width", trim=FALSE) +
stat_boxplot(geom="errorbar",lwd=2) +
geom_boxplot(lwd=2, outlier.size=3, width=0.4) +
theme_bw() +
scale_fill_manual(values=c("#3259A0","#FF5050")) +
scale_color_manual(values=c("#3259A0","#FF5050")) +
scale_x_discrete(name="Treatment (TRT)", limits=c("Placebo","New")) +
scale_y_continuous(name="Diastolic Blood Pressure Change (DBP5-DBP1)",limits=c(-25,5),breaks=seq(-25,5,by=5)) +
theme(axis.title.x=element_text(color="black",face="bold",size=12),
legend.position="top",
legend.key.size = unit(1,"cm"),
legend.key.height = unit(1,"cm"),
legend.key.width = unit(1,"cm"),
legend.title = element_text(size=12),
legend.text = element_text(size=9),
text=element_text(size=9),
axis.text.y=element_text(color="black",hjust=0.25,size=9),
axis.text.x=element_text(color="black",size=9),
axis.title.y=element_text(color="black",face="bold",size=12),
axis.ticks.length=unit(0.25,"cm"),
plot.title=element_text(color="black",size=14,face="bold",hjust=0.50),
plot.subtitle = element_text(color="black",size=14,hjust=0.50)) +
stat_summary(fun=median, geom="line",color="black",size=2,aes(group=1)) +
stat_summary(fun=median, geom="point",color="black",size=5) +
labs(subtitle = "W=400.00, p=<0.0001, n=40, Effect Size=0.86") +
ggtitle("Hypothesis Testing using Wilcoxon Rank-Sum Test"))
##################################
# Reusing the dataset
##################################
head(DBP.Analysis)
## TRT DBP1 DBP2 DBP3 DBP4 DBP5 Age Sex DBP5.DBP1.Difference
## 1 New 114 115 113 109 105 43 F -9
## 2 New 116 113 112 103 101 51 M -15
## 3 New 119 115 113 104 98 48 F -21
## 4 New 115 113 112 109 101 42 F -14
## 5 New 116 112 107 104 105 49 M -11
## 6 New 117 112 113 104 102 47 M -15
##################################
# Conducting Welch T-Test
##################################
<- rrod.test(DBP5.DBP1.Difference ~ TRT, DBP.Analysis)) (DBP.Analysis.RobustRankOrderTest
##
## Robust Rank-Order Distributional Test
##
## data: DBP5.DBP1.Difference by TRT
## z = Inf, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
##################################
# Summarizing the hypothesis testing results
##################################
<- ggplot(DBP.Analysis,aes(x=TRT,y=DBP5.DBP1.Difference,color=TRT)) +
(DBP5DBP1Difference.ViolinBoxplot.ByTreatment geom_violin(scale="width", trim=FALSE) +
stat_boxplot(geom="errorbar",lwd=2) +
geom_boxplot(lwd=2, outlier.size=3, width=0.4) +
theme_bw() +
scale_fill_manual(values=c("#3259A0","#FF5050")) +
scale_color_manual(values=c("#3259A0","#FF5050")) +
scale_x_discrete(name="Treatment (TRT)", limits=c("Placebo","New")) +
scale_y_continuous(name="Diastolic Blood Pressure Change (DBP5-DBP1)",limits=c(-25,5),breaks=seq(-25,5,by=5)) +
theme(axis.title.x=element_text(color="black",face="bold",size=12),
legend.position="top",
legend.key.size = unit(1,"cm"),
legend.key.height = unit(1,"cm"),
legend.key.width = unit(1,"cm"),
legend.title = element_text(size=12),
legend.text = element_text(size=9),
text=element_text(size=9),
axis.text.y=element_text(color="black",hjust=0.25,size=9),
axis.text.x=element_text(color="black",size=9),
axis.title.y=element_text(color="black",face="bold",size=12),
axis.ticks.length=unit(0.25,"cm"),
plot.title=element_text(color="black",size=14,face="bold",hjust=0.50),
plot.subtitle = element_text(color="black",size=14,hjust=0.50)) +
stat_summary(fun=median, geom="line",color="black",size=2,aes(group=1)) +
stat_summary(fun=median, geom="point",color="black",size=5) +
labs(subtitle = "z=Inf, p=<0.0001, n=40, Effect Size=ND") +
ggtitle("Hypothesis Testing using Robust Rank-Order Test"))