##################################
# Loading R libraries
##################################
library("survival")
library("survminer")
library("mice")
library("foreign")
library("rms")
library("Hmisc")
library("VIM")
library("gridExtra")
library("finalfit")
library("knitr")
library("dplyr")
library("gtsummary")
library("tidyr")
library("purrr")
library("moments")
library("lattice")
##################################
# Defining file paths
##################################
<- file.path("datasets")
DATASETS_PATH
##################################
# Loading dataset
##################################
<- read.spss(file.path("..", DATASETS_PATH, "SMARTst.sav"),
SMART use.value.labels=F,
to.data.frame=T)
##################################
# Performing a general exploration of the dataset
##################################
dim(SMART)
## [1] 3873 29
summary(SMART)
## TEVENT EVENT SEX AGE
## Min. : 0.1 Min. :0.0000 Min. :1.000 Min. :19.00
## 1st Qu.: 555.0 1st Qu.:0.0000 1st Qu.:1.000 1st Qu.:52.00
## Median :1213.0 Median :0.0000 Median :1.000 Median :60.00
## Mean :1370.3 Mean :0.1188 Mean :1.252 Mean :59.56
## 3rd Qu.:2165.0 3rd Qu.:0.0000 3rd Qu.:2.000 3rd Qu.:68.00
## Max. :3466.0 Max. :1.0000 Max. :2.000 Max. :82.00
##
## DIABETES CEREBRAL CARDIAC AAA
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :1.0000 Median :0.0000
## Mean :0.2207 Mean :0.2962 Mean :0.5577 Mean :0.1074
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## NA's :40
## PERIPH STENOSIS SYSTBP DIASTBP
## Min. :0.0000 Min. :0.000 Min. : 96.0 Min. : 46.0
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:127.0 1st Qu.: 73.0
## Median :0.0000 Median :0.000 Median :139.0 Median : 79.0
## Mean :0.2427 Mean :0.191 Mean :141.3 Mean : 79.7
## 3rd Qu.:0.0000 3rd Qu.:0.000 3rd Qu.:154.0 3rd Qu.: 86.0
## Max. :1.0000 Max. :1.000 Max. :216.0 Max. :127.0
## NA's :93 NA's :1223 NA's :1221
## SYSTH DIASTH LENGTH WEIGHT BMI
## Min. : 79.0 Min. : 45.00 Min. :1.53 Min. : 50 Min. :18.70
## 1st Qu.:126.0 1st Qu.: 75.00 1st Qu.:1.68 1st Qu.: 72 1st Qu.:24.11
## Median :140.0 Median : 82.00 Median :1.75 Median : 80 Median :26.30
## Mean :142.2 Mean : 82.44 Mean :1.74 Mean : 81 Mean :26.70
## 3rd Qu.:156.0 3rd Qu.: 90.00 3rd Qu.:1.80 3rd Qu.: 89 3rd Qu.:28.73
## Max. :244.0 Max. :136.00 Max. :1.94 Max. :124 Max. :39.80
## NA's :1498 NA's :1499 NA's :1 NA's :2 NA's :3
## CHOL HDL LDL TRIG HOMOC
## Min. :2.800 Min. :0.58 Min. :1.100 Min. :0.560 Min. : 6.10
## 1st Qu.:4.400 1st Qu.:0.96 1st Qu.:2.390 1st Qu.:1.120 1st Qu.:10.32
## Median :5.100 Median :1.17 Median :3.060 Median :1.540 Median :12.80
## Mean :5.191 Mean :1.23 Mean :3.144 Mean :1.854 Mean :13.84
## 3rd Qu.:5.900 3rd Qu.:1.42 3rd Qu.:3.830 3rd Qu.:2.230 3rd Qu.:15.70
## Max. :9.400 Max. :2.51 Max. :6.600 Max. :8.960 Max. :38.30
## NA's :18 NA's :30 NA's :216 NA's :28 NA's :463
## GLUT CREAT IMT albumin
## Min. : 4.300 Min. : 54.00 Min. :0.4700 Min. :1.000
## 1st Qu.: 5.300 1st Qu.: 78.00 1st Qu.:0.7500 1st Qu.:1.000
## Median : 5.700 Median : 89.00 Median :0.8800 Median :1.000
## Mean : 6.333 Mean : 98.39 Mean :0.9346 Mean :1.241
## 3rd Qu.: 6.500 3rd Qu.:101.00 3rd Qu.:1.0700 3rd Qu.:1.000
## Max. :18.700 Max. :825.00 Max. :1.8300 Max. :3.000
## NA's :19 NA's :17 NA's :98 NA's :207
## SMOKING packyrs alcohol
## Min. :1.000 Min. : 0.00 Min. :1.000
## 1st Qu.:2.000 1st Qu.: 5.90 1st Qu.:2.000
## Median :2.000 Median : 19.50 Median :3.000
## Mean :1.935 Mean : 22.62 Mean :2.504
## 3rd Qu.:2.000 3rd Qu.: 34.20 3rd Qu.:3.000
## Max. :3.000 Max. :120.00 Max. :3.000
## NA's :25 NA's :21 NA's :25
describe(SMART)
## SMART
##
## 29 Variables 3873 Observations
## --------------------------------------------------------------------------------
## TEVENT
## n missing distinct Info Mean Gmd .05 .10
## 3873 0 1934 1 1370 1078 98.6 197.0
## .25 .50 .75 .90 .95
## 555.0 1213.0 2165.0 2762.4 3017.4
##
## lowest : 0.1 1 2 3 4 , highest: 3451 3452 3463 3465 3466
## --------------------------------------------------------------------------------
## EVENT
## n missing distinct Info Sum Mean Gmd
## 3873 0 2 0.314 460 0.1188 0.2094
##
## --------------------------------------------------------------------------------
## SEX
## n missing distinct Info Mean Gmd
## 3873 0 2 0.565 1.252 0.3771
##
## Value 1 2
## Frequency 2897 976
## Proportion 0.748 0.252
## --------------------------------------------------------------------------------
## AGE
## n missing distinct Info Mean Gmd .05 .10
## 3873 0 62 0.999 59.56 11.94 41 46
## .25 .50 .75 .90 .95
## 52 60 68 73 76
##
## lowest : 19 20 23 24 25, highest: 78 79 80 81 82
## --------------------------------------------------------------------------------
## DIABETES
## n missing distinct Info Sum Mean Gmd
## 3833 40 2 0.516 846 0.2207 0.3441
##
## --------------------------------------------------------------------------------
## CEREBRAL
## n missing distinct Info Sum Mean Gmd
## 3873 0 2 0.625 1147 0.2962 0.417
##
## --------------------------------------------------------------------------------
## CARDIAC
## n missing distinct Info Sum Mean Gmd
## 3873 0 2 0.74 2160 0.5577 0.4935
##
## --------------------------------------------------------------------------------
## AAA
## n missing distinct Info Sum Mean Gmd
## 3873 0 2 0.288 416 0.1074 0.1918
##
## --------------------------------------------------------------------------------
## PERIPH
## n missing distinct Info Sum Mean Gmd
## 3873 0 2 0.551 940 0.2427 0.3677
##
## --------------------------------------------------------------------------------
## STENOSIS
## n missing distinct Info Sum Mean Gmd
## 3780 93 2 0.464 722 0.191 0.3091
##
## --------------------------------------------------------------------------------
## SYSTBP
## n missing distinct Info Mean Gmd .05 .10
## 2650 1223 114 1 141.3 22.41 112 117
## .25 .50 .75 .90 .95
## 127 139 154 169 178
##
## lowest : 96 97 98 99 100, highest: 206 209 211 212 216
## --------------------------------------------------------------------------------
## DIASTBP
## n missing distinct Info Mean Gmd .05 .10
## 2652 1221 70 0.999 79.7 11.03 65 68
## .25 .50 .75 .90 .95
## 73 79 86 93 97
##
## lowest : 46 48 52 53 54, highest: 117 118 120 124 127
## --------------------------------------------------------------------------------
## SYSTH
## n missing distinct Info Mean Gmd .05 .10
## 2375 1498 132 1 142.2 24.91 110.0 116.0
## .25 .50 .75 .90 .95
## 126.0 140.0 156.0 172.0 182.3
##
## lowest : 79 88 91 93 94, highest: 222 223 228 242 244
## --------------------------------------------------------------------------------
## DIASTH
## n missing distinct Info Mean Gmd .05 .10
## 2374 1499 77 0.999 82.44 13.22 64 68
## .25 .50 .75 .90 .95
## 75 82 90 98 103
##
## lowest : 45 49 50 52 53, highest: 123 125 126 130 136
## --------------------------------------------------------------------------------
## LENGTH
## n missing distinct Info Mean Gmd .05 .10
## 3872 1 42 0.999 1.74 0.09863 1.59 1.62
## .25 .50 .75 .90 .95
## 1.68 1.75 1.80 1.85 1.88
##
## lowest : 1.53 1.54 1.55 1.56 1.57, highest: 1.9 1.91 1.92 1.93 1.94
## --------------------------------------------------------------------------------
## WEIGHT
## n missing distinct Info Mean Gmd .05 .10
## 3871 2 75 0.999 81 15.41 59 64
## .25 .50 .75 .90 .95
## 72 80 89 99 104
##
## lowest : 50 51 52 53 54, highest: 120 121 122 123 124
## --------------------------------------------------------------------------------
## BMI
## n missing distinct Info Mean Gmd .05 .10
## 3870 3 995 1 26.7 4.258 20.96 22.16
## .25 .50 .75 .90 .95
## 24.11 26.30 28.73 31.86 33.90
##
## lowest : 18.7 18.71 18.73 18.78 18.81, highest: 39.25 39.43 39.45 39.64 39.8
## --------------------------------------------------------------------------------
## CHOL
## n missing distinct Info Mean Gmd .05 .10
## 3855 18 65 0.999 5.191 1.283 3.5 3.8
## .25 .50 .75 .90 .95
## 4.4 5.1 5.9 6.7 7.2
##
## lowest : 2.8 2.9 3 3.1 3.2, highest: 8.8 8.9 9.1 9.2 9.4
## --------------------------------------------------------------------------------
## HDL
## n missing distinct Info Mean Gmd .05 .10
## 3843 30 188 1 1.23 0.4025 0.74 0.82
## .25 .50 .75 .90 .95
## 0.96 1.17 1.42 1.73 1.94
##
## lowest : 0.58 0.59 0.6 0.61 0.62, highest: 2.46 2.47 2.48 2.49 2.51
## --------------------------------------------------------------------------------
## LDL
## n missing distinct Info Mean Gmd .05 .10
## 3657 216 478 1 3.144 1.17 1.580 1.860
## .25 .50 .75 .90 .95
## 2.390 3.060 3.830 4.550 4.982
##
## lowest : 1.1 1.11 1.12 1.14 1.15, highest: 6.34 6.37 6.41 6.47 6.6
## --------------------------------------------------------------------------------
## TRIG
## n missing distinct Info Mean Gmd .05 .10
## 3845 28 461 1 1.854 1.095 0.760 0.880
## .25 .50 .75 .90 .95
## 1.120 1.540 2.230 3.130 3.918
##
## lowest : 0.56 0.57 0.58 0.59 0.6 , highest: 8.28 8.61 8.68 8.91 8.96
## --------------------------------------------------------------------------------
## HOMOC
## n missing distinct Info Mean Gmd .05 .10
## 3410 463 255 1 13.84 5.359 7.90 8.70
## .25 .50 .75 .90 .95
## 10.33 12.80 15.70 19.90 23.85
##
## lowest : 6.1 6.2 6.3 6.4 6.5 , highest: 35.5 35.9 36.1 37.1 38.3
## --------------------------------------------------------------------------------
## GLUT
## n missing distinct Info Mean Gmd .05 .10
## 3854 19 125 0.998 6.333 1.701 4.8 5.0
## .25 .50 .75 .90 .95
## 5.3 5.7 6.5 8.4 10.4
##
## lowest : 4.3 4.4 4.5 4.6 4.7 , highest: 17.6 17.7 17.9 18 18.7
## --------------------------------------------------------------------------------
## CREAT
## n missing distinct Info Mean Gmd .05 .10
## 3856 17 194 1 98.39 34.63 64.75 69.00
## .25 .50 .75 .90 .95
## 78.00 89.00 101.00 118.00 138.25
##
## lowest : 54 55 56 57 58, highest: 784 799 809 813 825
## --------------------------------------------------------------------------------
## IMT
## n missing distinct Info Mean Gmd .05 .10
## 3775 98 99 0.999 0.9346 0.2879 0.600 0.650
## .25 .50 .75 .90 .95
## 0.750 0.880 1.070 1.292 1.450
##
## lowest : 0.47 0.48 0.5 0.52 0.53, highest: 1.77 1.78 1.8 1.82 1.83
## --------------------------------------------------------------------------------
## albumin
## n missing distinct Info Mean Gmd
## 3666 207 3 0.501 1.241 0.3919
##
## Value 1 2 3
## Frequency 2897 655 114
## Proportion 0.790 0.179 0.031
##
## For the frequency table, variable is rounded to the nearest 0
## --------------------------------------------------------------------------------
## SMOKING
## n missing distinct Info Mean Gmd
## 3848 25 3 0.643 1.935 0.4996
##
## Value 1 2 3
## Frequency 693 2711 444
## Proportion 0.180 0.705 0.115
##
## For the frequency table, variable is rounded to the nearest 0
## --------------------------------------------------------------------------------
## packyrs
## n missing distinct Info Mean Gmd .05 .10
## 3852 21 263 0.994 22.62 22.26 0.0 0.0
## .25 .50 .75 .90 .95
## 5.9 19.5 34.2 50.4 62.0
##
## lowest : 0 0.3 0.6 0.7 0.8, highest: 100 102 104 110 120
## --------------------------------------------------------------------------------
## alcohol
## n missing distinct Info Mean Gmd
## 3848 25 3 0.65 2.504 0.7353
##
## Value 1 2 3
## Frequency 751 408 2689
## Proportion 0.195 0.106 0.699
##
## For the frequency table, variable is rounded to the nearest 0
## --------------------------------------------------------------------------------
##################################
# Performing re-categorization and re-grouping
# of factor variables
##################################
$EVENT <- as.factor(SMART$EVENT)
SMART$SEX <- as.factor(SMART$SEX)
SMART$DIABETES <- as.factor(SMART$DIABETES)
SMART$CEREBRAL <- as.factor(SMART$CEREBRAL)
SMART$CARDIAC <- as.factor(SMART$CARDIAC)
SMART$AAA <- as.factor(SMART$AAA)
SMART$PERIPH <- as.factor(SMART$PERIPH)
SMART$STENOSIS <- as.factor(SMART$STENOSIS)
SMART$albumin <- as.factor(SMART$albumin)
SMART$SMOKING <- as.factor(SMART$SMOKING)
SMART$alcohol <- as.factor(SMART$alcohol)
SMART
##################################
# Formulating a data type assessment summary
##################################
<- SMART
PDA <- data.frame(
(PDA.Summary Column.Index=c(1:length(names(PDA))),
Column.Name= names(PDA),
Column.Type=sapply(PDA, function(x) class(x)),
row.names=NULL)
)
## Column.Index Column.Name Column.Type
## 1 1 TEVENT numeric
## 2 2 EVENT factor
## 3 3 SEX factor
## 4 4 AGE numeric
## 5 5 DIABETES factor
## 6 6 CEREBRAL factor
## 7 7 CARDIAC factor
## 8 8 AAA factor
## 9 9 PERIPH factor
## 10 10 STENOSIS factor
## 11 11 SYSTBP numeric
## 12 12 DIASTBP numeric
## 13 13 SYSTH numeric
## 14 14 DIASTH numeric
## 15 15 LENGTH numeric
## 16 16 WEIGHT numeric
## 17 17 BMI numeric
## 18 18 CHOL numeric
## 19 19 HDL numeric
## 20 20 LDL numeric
## 21 21 TRIG numeric
## 22 22 HOMOC numeric
## 23 23 GLUT numeric
## 24 24 CREAT numeric
## 25 25 IMT numeric
## 26 26 albumin factor
## 27 27 SMOKING factor
## 28 28 packyrs numeric
## 29 29 alcohol factor
##################################
# Loading dataset
##################################
<- SMART
DQA
##################################
# Listing all predictors
##################################
<- DQA[,!names(DQA) %in% c("TEVENT","EVENT")]
DQA.Predictors
##################################
# Formulating an overall data quality assessment summary
##################################
<- data.frame(
(DQA.Summary Column.Index=c(1:length(names(DQA))),
Column.Name= names(DQA),
Column.Type=sapply(DQA, function(x) class(x)),
Row.Count=sapply(DQA, function(x) nrow(DQA)),
NA.Count=sapply(DQA,function(x)sum(is.na(x))),
Fill.Rate=sapply(DQA,function(x)format(round((sum(!is.na(x))/nrow(DQA)),3),nsmall=3)),
row.names=NULL)
)
## Column.Index Column.Name Column.Type Row.Count NA.Count Fill.Rate
## 1 1 TEVENT numeric 3873 0 1.000
## 2 2 EVENT factor 3873 0 1.000
## 3 3 SEX factor 3873 0 1.000
## 4 4 AGE numeric 3873 0 1.000
## 5 5 DIABETES factor 3873 40 0.990
## 6 6 CEREBRAL factor 3873 0 1.000
## 7 7 CARDIAC factor 3873 0 1.000
## 8 8 AAA factor 3873 0 1.000
## 9 9 PERIPH factor 3873 0 1.000
## 10 10 STENOSIS factor 3873 93 0.976
## 11 11 SYSTBP numeric 3873 1223 0.684
## 12 12 DIASTBP numeric 3873 1221 0.685
## 13 13 SYSTH numeric 3873 1498 0.613
## 14 14 DIASTH numeric 3873 1499 0.613
## 15 15 LENGTH numeric 3873 1 1.000
## 16 16 WEIGHT numeric 3873 2 0.999
## 17 17 BMI numeric 3873 3 0.999
## 18 18 CHOL numeric 3873 18 0.995
## 19 19 HDL numeric 3873 30 0.992
## 20 20 LDL numeric 3873 216 0.944
## 21 21 TRIG numeric 3873 28 0.993
## 22 22 HOMOC numeric 3873 463 0.880
## 23 23 GLUT numeric 3873 19 0.995
## 24 24 CREAT numeric 3873 17 0.996
## 25 25 IMT numeric 3873 98 0.975
## 26 26 albumin factor 3873 207 0.947
## 27 27 SMOKING factor 3873 25 0.994
## 28 28 packyrs numeric 3873 21 0.995
## 29 29 alcohol factor 3873 25 0.994
##################################
# Listing all numeric predictors
##################################
<- DQA.Predictors[,sapply(DQA.Predictors, is.numeric)]
DQA.Predictors.Numeric
if (length(names(DQA.Predictors.Numeric))>0) {
print(paste0("There are ",
length(names(DQA.Predictors.Numeric))),
(" numeric predictor variable(s)."))
else {
} print("There are no numeric predictor variables.")
}
## [1] "There are 17 numeric predictor variable(s)."
##################################
# Listing all factor predictors
##################################
<- DQA.Predictors[,sapply(DQA.Predictors, is.factor)]
DQA.Predictors.Factor
if (length(names(DQA.Predictors.Factor))>0) {
print(paste0("There are ",
length(names(DQA.Predictors.Factor))),
(" factor predictor variable(s)."))
else {
} print("There are no factor predictor variables.")
}
## [1] "There are 10 factor predictor variable(s)."
##################################
# Formulating a data quality assessment summary for factor predictors
##################################
if (length(names(DQA.Predictors.Factor))>0) {
##################################
# Formulating a function to determine the first mode
##################################
<- function(x) {
FirstModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab == max(tab)]
ux[tab
}
##################################
# Formulating a function to determine the second mode
##################################
<- function(x) {
SecondModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab = ux[tab == max(tab)]
fm = x[!(x %in% fm)]
sm <- unique(sm)
usm <- tabulate(match(sm, usm))
tabsm == max(tabsm)]
usm[tabsm
}
<- data.frame(
(DQA.Predictors.Factor.Summary Column.Name= names(DQA.Predictors.Factor),
Column.Type=sapply(DQA.Predictors.Factor, function(x) class(x)),
Unique.Count=sapply(DQA.Predictors.Factor, function(x) length(unique(x))),
First.Mode.Value=sapply(DQA.Predictors.Factor, function(x) as.character(FirstModes(x)[1])),
Second.Mode.Value=sapply(DQA.Predictors.Factor, function(x) as.character(SecondModes(x)[1])),
First.Mode.Count=sapply(DQA.Predictors.Factor, function(x) sum(na.omit(x) == FirstModes(x)[1])),
Second.Mode.Count=sapply(DQA.Predictors.Factor, function(x) sum(na.omit(x) == SecondModes(x)[1])),
Unique.Count.Ratio=sapply(DQA.Predictors.Factor, function(x) format(round((length(unique(x))/nrow(DQA.Predictors.Factor)),3), nsmall=3)),
First.Second.Mode.Ratio=sapply(DQA.Predictors.Factor, function(x) format(round((sum(na.omit(x) == FirstModes(x)[1])/sum(na.omit(x) == SecondModes(x)[1])),3), nsmall=3)),
row.names=NULL)
)
}
## Column.Name Column.Type Unique.Count First.Mode.Value Second.Mode.Value
## 1 SEX factor 2 1 2
## 2 DIABETES factor 3 0 1
## 3 CEREBRAL factor 2 0 1
## 4 CARDIAC factor 2 1 0
## 5 AAA factor 2 0 1
## 6 PERIPH factor 2 0 1
## 7 STENOSIS factor 3 0 1
## 8 albumin factor 4 1 2
## 9 SMOKING factor 4 2 1
## 10 alcohol factor 4 3 1
## First.Mode.Count Second.Mode.Count Unique.Count.Ratio
## 1 2897 976 0.001
## 2 2987 846 0.001
## 3 2726 1147 0.001
## 4 2160 1713 0.001
## 5 3457 416 0.001
## 6 2933 940 0.001
## 7 3058 722 0.001
## 8 2897 655 0.001
## 9 2711 693 0.001
## 10 2689 751 0.001
## First.Second.Mode.Ratio
## 1 2.968
## 2 3.531
## 3 2.377
## 4 1.261
## 5 8.310
## 6 3.120
## 7 4.235
## 8 4.423
## 9 3.912
## 10 3.581
##################################
# Formulating a data quality assessment summary for numeric predictors
##################################
if (length(names(DQA.Predictors.Numeric))>0) {
##################################
# Formulating a function to determine the first mode
##################################
<- function(x) {
FirstModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab == max(tab)]
ux[tab
}
##################################
# Formulating a function to determine the second mode
##################################
<- function(x) {
SecondModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab = ux[tab == max(tab)]
fm = na.omit(x)[!(na.omit(x) %in% fm)]
sm <- unique(sm)
usm <- tabulate(match(sm, usm))
tabsm == max(tabsm)]
usm[tabsm
}
<- data.frame(
(DQA.Predictors.Numeric.Summary Column.Name= names(DQA.Predictors.Numeric),
Column.Type=sapply(DQA.Predictors.Numeric, function(x) class(x)),
Unique.Count=sapply(DQA.Predictors.Numeric, function(x) length(unique(x))),
Unique.Count.Ratio=sapply(DQA.Predictors.Numeric, function(x) format(round((length(unique(x))/nrow(DQA.Predictors.Numeric)),3), nsmall=3)),
First.Mode.Value=sapply(DQA.Predictors.Numeric, function(x) format(round((FirstModes(x)[1]),3),nsmall=3)),
Second.Mode.Value=sapply(DQA.Predictors.Numeric, function(x) format(round((SecondModes(x)[1]),3),nsmall=3)),
First.Mode.Count=sapply(DQA.Predictors.Numeric, function(x) sum(na.omit(x) == FirstModes(x)[1])),
Second.Mode.Count=sapply(DQA.Predictors.Numeric, function(x) sum(na.omit(x) == SecondModes(x)[1])),
First.Second.Mode.Ratio=sapply(DQA.Predictors.Numeric, function(x) format(round((sum(na.omit(x) == FirstModes(x)[1])/sum(na.omit(x) == SecondModes(x)[1])),3), nsmall=3)),
Minimum=sapply(DQA.Predictors.Numeric, function(x) format(round(min(x,na.rm = TRUE),3), nsmall=3)),
Mean=sapply(DQA.Predictors.Numeric, function(x) format(round(mean(x,na.rm = TRUE),3), nsmall=3)),
Median=sapply(DQA.Predictors.Numeric, function(x) format(round(median(x,na.rm = TRUE),3), nsmall=3)),
Maximum=sapply(DQA.Predictors.Numeric, function(x) format(round(max(x,na.rm = TRUE),3), nsmall=3)),
Skewness=sapply(DQA.Predictors.Numeric, function(x) format(round(moments::skewness(x,na.rm = TRUE),3), nsmall=3)),
Kurtosis=sapply(DQA.Predictors.Numeric, function(x) format(round(moments::kurtosis(x,na.rm = TRUE),3), nsmall=3)),
Percentile25th=sapply(DQA.Predictors.Numeric, function(x) format(round(quantile(x,probs=0.25,na.rm = TRUE),3), nsmall=3)),
Percentile75th=sapply(DQA.Predictors.Numeric, function(x) format(round(quantile(x,probs=0.75,na.rm = TRUE),3), nsmall=3)),
row.names=NULL)
)
}
## Column.Name Column.Type Unique.Count Unique.Count.Ratio First.Mode.Value
## 1 AGE numeric 62 0.016 58.000
## 2 SYSTBP numeric 115 0.030 127.000
## 3 DIASTBP numeric 71 0.018 77.000
## 4 SYSTH numeric 133 0.034 140.000
## 5 DIASTH numeric 78 0.020 80.000
## 6 LENGTH numeric 43 0.011 1.780
## 7 WEIGHT numeric 76 0.020 80.000
## 8 BMI numeric 996 0.257 28.090
## 9 CHOL numeric 66 0.017 4.800
## 10 HDL numeric 189 0.049 1.160
## 11 LDL numeric 479 0.124 1.100
## 12 TRIG numeric 462 0.119 1.100
## 13 HOMOC numeric 256 0.066 11.600
## 14 GLUT numeric 126 0.033 5.400
## 15 CREAT numeric 195 0.050 86.000
## 16 IMT numeric 100 0.026 0.870
## 17 packyrs numeric 264 0.068 0.000
## Second.Mode.Value First.Mode.Count Second.Mode.Count First.Second.Mode.Ratio
## 1 63.000 147 140 1.050
## 2 133.000 66 62 1.065
## 3 75.000 122 118 1.034
## 4 130.000 66 60 1.100
## 5 84.000 132 94 1.404
## 6 1.760 223 208 1.072
## 7 78.000 148 137 1.080
## 8 28.730 35 32 1.094
## 9 4.500 156 146 1.068
## 10 1.020 61 58 1.052
## 11 2.790 41 24 1.708
## 12 1.460 34 33 1.030
## 13 11.700 49 48 1.021
## 14 5.500 224 220 1.018
## 15 93.000 99 93 1.065
## 16 0.780 126 121 1.041
## 17 30.600 697 42 16.595
## Minimum Mean Median Maximum Skewness Kurtosis Percentile25th
## 1 19.000 59.557 60.000 82.000 -0.388 2.839 52.000
## 2 96.000 141.322 139.000 216.000 0.531 3.151 127.000
## 3 46.000 79.698 79.000 127.000 0.475 3.693 73.000
## 4 79.000 142.163 140.000 244.000 0.684 3.735 126.000
## 5 45.000 82.442 82.000 136.000 0.347 3.396 75.000
## 6 1.530 1.740 1.750 1.940 -0.195 2.662 1.680
## 7 50.000 81.001 80.000 124.000 0.366 3.292 72.000
## 8 18.700 26.696 26.300 39.800 0.647 3.648 24.110
## 9 2.800 5.191 5.100 9.400 0.457 3.238 4.400
## 10 0.580 1.230 1.170 2.510 0.927 3.934 0.960
## 11 1.100 3.144 3.060 6.600 0.392 2.857 2.390
## 12 0.560 1.854 1.540 8.960 2.618 13.156 1.120
## 13 6.100 13.843 12.800 38.300 1.845 7.874 10.325
## 14 4.300 6.333 5.700 18.700 2.929 13.442 5.300
## 15 54.000 98.394 89.000 825.000 8.636 88.940 78.000
## 16 0.470 0.935 0.880 1.830 1.099 4.398 0.750
## 17 0.000 22.623 19.500 120.000 0.965 3.650 5.900
## Percentile75th
## 1 68.000
## 2 154.000
## 3 86.000
## 4 156.000
## 5 90.000
## 6 1.800
## 7 89.000
## 8 28.730
## 9 5.900
## 10 1.420
## 11 3.830
## 12 2.230
## 13 15.700
## 14 6.500
## 15 101.000
## 16 1.070
## 17 34.200
##################################
# Identifying potential data quality issues
##################################
##################################
# Checking for missing observations
##################################
if ((nrow(DQA.Summary[DQA.Summary$NA.Count>0,]))>0){
print(paste0("Missing observations noted for ",
nrow(DQA.Summary[DQA.Summary$NA.Count>0,])),
(" variable(s) with NA.Count>0 and Fill.Rate<1.0."))
$NA.Count>0,]
DQA.Summary[DQA.Summaryelse {
} print("No missing observations noted.")
}
## [1] "Missing observations noted for 21 variable(s) with NA.Count>0 and Fill.Rate<1.0."
## Column.Index Column.Name Column.Type Row.Count NA.Count Fill.Rate
## 5 5 DIABETES factor 3873 40 0.990
## 10 10 STENOSIS factor 3873 93 0.976
## 11 11 SYSTBP numeric 3873 1223 0.684
## 12 12 DIASTBP numeric 3873 1221 0.685
## 13 13 SYSTH numeric 3873 1498 0.613
## 14 14 DIASTH numeric 3873 1499 0.613
## 15 15 LENGTH numeric 3873 1 1.000
## 16 16 WEIGHT numeric 3873 2 0.999
## 17 17 BMI numeric 3873 3 0.999
## 18 18 CHOL numeric 3873 18 0.995
## 19 19 HDL numeric 3873 30 0.992
## 20 20 LDL numeric 3873 216 0.944
## 21 21 TRIG numeric 3873 28 0.993
## 22 22 HOMOC numeric 3873 463 0.880
## 23 23 GLUT numeric 3873 19 0.995
## 24 24 CREAT numeric 3873 17 0.996
## 25 25 IMT numeric 3873 98 0.975
## 26 26 albumin factor 3873 207 0.947
## 27 27 SMOKING factor 3873 25 0.994
## 28 28 packyrs numeric 3873 21 0.995
## 29 29 alcohol factor 3873 25 0.994
##################################
# Checking for zero or near-zero variance predictors
##################################
if (length(names(DQA.Predictors.Factor))==0) {
print("No factor predictors noted.")
else if (nrow(DQA.Predictors.Factor.Summary[as.numeric(as.character(DQA.Predictors.Factor.Summary$First.Second.Mode.Ratio))>5,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Predictors.Factor.Summary[as.numeric(as.character(DQA.Predictors.Factor.Summary$First.Second.Mode.Ratio))>5,])),
(" factor variable(s) with First.Second.Mode.Ratio>5."))
as.numeric(as.character(DQA.Predictors.Factor.Summary$First.Second.Mode.Ratio))>5,]
DQA.Predictors.Factor.Summary[else {
} print("No low variance factor predictors due to high first-second mode ratio noted.")
}
## [1] "Low variance observed for 1 factor variable(s) with First.Second.Mode.Ratio>5."
## Column.Name Column.Type Unique.Count First.Mode.Value Second.Mode.Value
## 5 AAA factor 2 0 1
## First.Mode.Count Second.Mode.Count Unique.Count.Ratio First.Second.Mode.Ratio
## 5 3457 416 0.001 8.310
if (length(names(DQA.Predictors.Numeric))==0) {
print("No numeric predictors noted.")
else if (nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$First.Second.Mode.Ratio))>5,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$First.Second.Mode.Ratio))>5,])),
(" numeric variable(s) with First.Second.Mode.Ratio>5."))
as.numeric(as.character(DQA.Predictors.Numeric.Summary$First.Second.Mode.Ratio))>5,]
DQA.Predictors.Numeric.Summary[else {
} print("No low variance numeric predictors due to high first-second mode ratio noted.")
}
## [1] "Low variance observed for 1 numeric variable(s) with First.Second.Mode.Ratio>5."
## Column.Name Column.Type Unique.Count Unique.Count.Ratio First.Mode.Value
## 17 packyrs numeric 264 0.068 0.000
## Second.Mode.Value First.Mode.Count Second.Mode.Count First.Second.Mode.Ratio
## 17 30.600 697 42 16.595
## Minimum Mean Median Maximum Skewness Kurtosis Percentile25th
## 17 0.000 22.623 19.500 120.000 0.965 3.650 5.900
## Percentile75th
## 17 34.200
if (length(names(DQA.Predictors.Numeric))==0) {
print("No numeric predictors noted.")
else if (nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Unique.Count.Ratio))<0.01,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Unique.Count.Ratio))<0.01,])),
(" numeric variable(s) with Unique.Count.Ratio<0.01."))
as.numeric(as.character(DQA.Predictors.Numeric.Summary$Unique.Count.Ratio))<0.01,]
DQA.Predictors.Numeric.Summary[else {
} print("No low variance numeric predictors due to low unique count ratio noted.")
}
## [1] "No low variance numeric predictors due to low unique count ratio noted."
##################################
# Checking for skewed predictors
##################################
if (length(names(DQA.Predictors.Numeric))==0) {
print("No numeric predictors noted.")
else if (nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))>3 |
} as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))<(-3),])>0){
print(paste0("High skewness observed for ",
nrow(DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))>3 |
(as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))<(-3),])),
" numeric variable(s) with Skewness>3 or Skewness<(-3)."))
as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))>3 |
DQA.Predictors.Numeric.Summary[as.numeric(as.character(DQA.Predictors.Numeric.Summary$Skewness))<(-3),]
else {
} print("No skewed numeric predictors noted.")
}
## [1] "High skewness observed for 1 numeric variable(s) with Skewness>3 or Skewness<(-3)."
## Column.Name Column.Type Unique.Count Unique.Count.Ratio First.Mode.Value
## 15 CREAT numeric 195 0.050 86.000
## Second.Mode.Value First.Mode.Count Second.Mode.Count First.Second.Mode.Ratio
## 15 93.000 99 93 1.065
## Minimum Mean Median Maximum Skewness Kurtosis Percentile25th
## 15 54.000 98.394 89.000 825.000 8.636 88.940 78.000
## Percentile75th
## 15 101.000
##################################
# Loading dataset
##################################
<- SMART
PC $EVENT <- as.numeric(PC$EVENT)
PC
##################################
# Evaluating the impact of predictor coding
# for AGE as applied in a univariate Cox regression model
#################################
##################################
# Relationship of AGE with prognostic outcome = Linear
# (TEVENT,EVENT) ~ AGE
#################################
<- cph(Surv(TEVENT,EVENT) ~ AGE, data=PC)
AGE_CoxPHFit_Linear anova(AGE_CoxPHFit_Linear)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## AGE 97.17 1 <.0001
## TOTAL 97.17 1 <.0001
<- anova(AGE_CoxPHFit_Linear)[1]) (AGE_CoxPHFit_Linear_WaldX2
## [1] 97.17258
##################################
# Relationship of AGE with prognostic outcome = Squared
# (TEVENT,EVENT) ~ AGE +AGE^2
#################################
<- cph(Surv(TEVENT,EVENT) ~ pol(AGE,2), data=PC)
AGE_CoxPHFit_Squared anova(AGE_CoxPHFit_Squared)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## AGE 124.99 2 <.0001
## Nonlinear 15.00 1 1e-04
## TOTAL 124.99 2 <.0001
<- anova(AGE_CoxPHFit_Squared)[1]) (AGE_CoxPHFit_Squared_WaldX2
## [1] 124.9852
##################################
# Relationship of AGE with prognostic outcome = Linear after 55
# (TEVENT,EVENT) ~ if (AGE>55) then (AGE-55) else 0
#################################
<- cph(Surv(TEVENT,EVENT) ~ ifelse(AGE>55, (AGE-55),0), data=PC)
AGE_CoxPHFit_LinearAfter55 anova(AGE_CoxPHFit_LinearAfter55)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## AGE 118.59 1 <.0001
## TOTAL 118.59 1 <.0001
<- anova(AGE_CoxPHFit_LinearAfter55)[1]) (AGE_CoxPHFit_LinearAfter55_WaldX2
## [1] 118.591
##################################
# Relationship of AGE with prognostic outcome = Squared after 50
# (TEVENT,EVENT) ~ if (AGE>50) then (AGE-50)^2 else 0
#################################
<- cph(Surv(TEVENT,EVENT) ~ ifelse(AGE>50, (AGE-50)^2,0), data=PC)
AGE_CoxPHFit_SquaredAfter50 anova(AGE_CoxPHFit_SquaredAfter50)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## AGE 129.64 1 <.0001
## TOTAL 129.64 1 <.0001
<- anova(AGE_CoxPHFit_SquaredAfter50)[1]) (AGE_CoxPHFit_SquaredAfter50_WaldX2
## [1] 129.6405
##################################
# Relationship of AGE with prognostic outcome = Restricted cubic spline (4 knots)
# (TEVENT,EVENT) ~ Restricted Cubic Spline(AGE,4)
#################################
<- cph(Surv(TEVENT,EVENT) ~ rcs(AGE,4), data=PC)
AGE_CoxPHFit_RCS4Knots anova(AGE_CoxPHFit_RCS4Knots)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## AGE 125.30 3 <.0001
## Nonlinear 13.77 2 0.001
## TOTAL 125.30 3 <.0001
<- anova(AGE_CoxPHFit_RCS4Knots)[1]) (AGE_CoxPHFit_RCS4Knots_WaldX2
## [1] 125.3017
##################################
# Relationship of AGE with prognostic outcome = Quartiles
# (TEVENT,EVENT) ~ AGE(Quartile1, Quartile2, Quartile3, Quartile4)
#################################
tapply(PC$AGE, ifelse(PC$AGE<50,1,
ifelse(PC$AGE<60,2,
ifelse(PC$AGE<70,3,4))),mean)
## 1 2 3 4
## 43.20362 54.78531 64.53566 73.59654
$AGEQuartile <- as.factor(ifelse(PC$AGE<50,43,
PCifelse(PC$AGE<60,55,
ifelse(PC$AGE<70,65,74))))
$AGEQuartile <- as.factor(PC$AGEQuartile)
PC<- cph(Surv(TEVENT,EVENT) ~ AGEQuartile, data=PC)
AGE_CoxPHFit_Quartile anova(AGE_CoxPHFit_Quartile)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## AGEQuartile 93.22 3 <.0001
## TOTAL 93.22 3 <.0001
<- anova(AGE_CoxPHFit_Quartile)[1]) (AGE_CoxPHFit_Quartile_WaldX2
## [1] 93.22204
##################################
# Relationship of AGE with prognostic outcome = Quartiles
# (TEVENT,EVENT) ~ AGE(Half1, Half2)
#################################
tapply(PC$AGE, cut2(PC$AGE,g=2), mean)
## [19,61) [61,82]
## 51.29891 68.50350
<- cph(Surv(TEVENT,EVENT) ~ ifelse(SMART$AGE<61,51,69), data=PC)
AGE_CoxPHFit_Dichotomized anova(AGE_CoxPHFit_Dichotomized)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## SMART 72.47 1 <.0001
## TOTAL 72.47 1 <.0001
<- anova(AGE_CoxPHFit_Dichotomized)[1]) (AGE_CoxPHFit_Dichotomized_WaldX2
## [1] 72.46705
##################################
# Evaluating the impact of predictor coding
# for CREAT as applied in a univariate Cox regression model
#################################
##################################
# Relationship of CREAT with prognostic outcome = Linear
# (TEVENT,EVENT) ~ CREAT
#################################
<- cph(Surv(TEVENT,EVENT) ~ CREAT, data=PC)
CREAT_CoxPHFit_Linear anova(CREAT_CoxPHFit_Linear)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## CREAT 93.42 1 <.0001
## TOTAL 93.42 1 <.0001
<- anova(CREAT_CoxPHFit_Linear)[1]) (CREAT_CoxPHFit_Linear_WaldX2
## [1] 93.42011
##################################
# Relationship of CREAT with prognostic outcome = Restricted cubic spline (4 knots)
# (TEVENT,EVENT) ~ Restricted Cubic Spline(CREAT,4)
#################################
<- cph(Surv(TEVENT,EVENT) ~ rcs(CREAT,4), data=PC)
CREAT_CoxPHFit_RCS4Knots anova(CREAT_CoxPHFit_RCS4Knots)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## CREAT 116.1 3 <.0001
## Nonlinear 42.6 2 <.0001
## TOTAL 116.1 3 <.0001
<- anova(CREAT_CoxPHFit_RCS4Knots)[1]) (CREAT_CoxPHFit_RCS4Knots_WaldX2
## [1] 116.102
##################################
# Relationship of CREAT with prognostic outcome = Restricted cubic spline (3 knots)
# (TEVENT,EVENT) ~ Restricted Cubic Spline(CREAT,3)
#################################
<- cph(Surv(TEVENT,EVENT) ~ rcs(CREAT,3), data=PC)
CREAT_CoxPHFit_RCS3Knots anova(CREAT_CoxPHFit_RCS3Knots)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## CREAT 99.03 2 <.0001
## Nonlinear 20.32 1 <.0001
## TOTAL 99.03 2 <.0001
<- anova(CREAT_CoxPHFit_RCS3Knots)[1]) (CREAT_CoxPHFit_RCS3Knots_WaldX2
## [1] 99.02755
##################################
# Relationship of CREAT with prognostic outcome = Logarithm
# (TEVENT,EVENT) ~ Logarithm(CREAT)
#################################
<- cph(Surv(TEVENT,EVENT) ~ log(CREAT), data=PC)
CREAT_CoxPHFit_Log anova(CREAT_CoxPHFit_Log)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## CREAT 131.01 1 <.0001
## TOTAL 131.01 1 <.0001
<- anova(CREAT_CoxPHFit_Log)[1]) (CREAT_CoxPHFit_Log_WaldX2
## [1] 131.0105
##################################
# Evaluating the impact of predictor coding
# for SYSTH as applied in a univariate Cox regression model
#################################
##################################
# Relationship of SYSTH with prognostic outcome = Linear
# (TEVENT,EVENT) ~ SYSTH
#################################
<- cph(Surv(TEVENT,EVENT) ~ SYSTH, data=PC)
SYSTH_CoxPHFit_Linear anova(SYSTH_CoxPHFit_Linear)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## SYSTH 15.49 1 1e-04
## TOTAL 15.49 1 1e-04
<- anova(SYSTH_CoxPHFit_Linear)[1]) (SYSTH_CoxPHFit_Linear_WaldX2
## [1] 15.49085
##################################
# Relationship of SYSTH with prognostic outcome = Restricted cubic spline (4 knots)
# (TEVENT,EVENT) ~ Restricted Cubic Spline(SYSTH,4)
#################################
<- cph(Surv(TEVENT,EVENT) ~ rcs(SYSTH,4), data=PC)
SYSTH_CoxPHFit_RCS4Knots anova(SYSTH_CoxPHFit_RCS4Knots)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## SYSTH 15.60 3 0.0014
## Nonlinear 0.25 2 0.8841
## TOTAL 15.60 3 0.0014
<- anova(SYSTH_CoxPHFit_RCS4Knots)[1]) (SYSTH_CoxPHFit_RCS4Knots_WaldX2
## [1] 15.5989
##################################
# Relationship of SYSTH with prognostic outcome = Restricted cubic spline (3 knots)
# (TEVENT,EVENT) ~ Restricted Cubic Spline(SYSTH,3)
#################################
<- cph(Surv(TEVENT,EVENT) ~ rcs(SYSTH,3), data=PC)
SYSTH_CoxPHFit_RCS3Knots anova(SYSTH_CoxPHFit_RCS3Knots)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## SYSTH 15.17 2 0.0005
## Nonlinear 0.05 1 0.8261
## TOTAL 15.17 2 0.0005
<- anova(SYSTH_CoxPHFit_RCS3Knots)[1]) (SYSTH_CoxPHFit_RCS3Knots_WaldX2
## [1] 15.16692
##################################
# Relationship of SYSTH with prognostic outcome = Logarithm
# (TEVENT,EVENT) ~ Logarithm(SYSTH)
#################################
<- cph(Surv(TEVENT,EVENT) ~ log(SYSTH), data=PC)
SYSTH_CoxPHFit_Log anova(SYSTH_CoxPHFit_Log)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## SYSTH 14.39 1 1e-04
## TOTAL 14.39 1 1e-04
<- anova(SYSTH_CoxPHFit_Log)[1]) (SYSTH_CoxPHFit_Log_WaldX2
## [1] 14.39016
##################################
# Evaluating the impact of predictor coding
# for DIASTH as applied in a univariate Cox regression model
#################################
##################################
# Relationship of DIASTH with prognostic outcome = Linear
# (TEVENT,EVENT) ~ DIASTH
#################################
<- cph(Surv(TEVENT,EVENT) ~ DIASTH, data=PC)
DIASTH_CoxPHFit_Linear anova(DIASTH_CoxPHFit_Linear)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## DIASTH 0.73 1 0.3916
## TOTAL 0.73 1 0.3916
<- anova(DIASTH_CoxPHFit_Linear)[1]) (DIASTH_CoxPHFit_Linear_WaldX2
## [1] 0.7339729
##################################
# Relationship of DIASTH with prognostic outcome = Restricted cubic spline (4 knots)
# (TEVENT,EVENT) ~ Restricted Cubic Spline(DIASTH,4)
#################################
<- cph(Surv(TEVENT,EVENT) ~ rcs(DIASTH,4), data=PC)
DIASTH_CoxPHFit_RCS4Knots anova(DIASTH_CoxPHFit_RCS4Knots)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## DIASTH 2.62 3 0.4540
## Nonlinear 1.91 2 0.3851
## TOTAL 2.62 3 0.4540
<- anova(DIASTH_CoxPHFit_RCS4Knots)[1]) (DIASTH_CoxPHFit_RCS4Knots_WaldX2
## [1] 2.620047
##################################
# Relationship of DIASTH with prognostic outcome = Restricted cubic spline (3 knots)
# (TEVENT,EVENT) ~ Restricted Cubic Spline(DIASTH,3)
#################################
<- cph(Surv(TEVENT,EVENT) ~ rcs(DIASTH,3), data=PC)
DIASTH_CoxPHFit_RCS3Knots anova(DIASTH_CoxPHFit_RCS3Knots)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## DIASTH 2.08 2 0.3528
## Nonlinear 1.24 1 0.2659
## TOTAL 2.08 2 0.3528
<- anova(DIASTH_CoxPHFit_RCS3Knots)[1]) (DIASTH_CoxPHFit_RCS3Knots_WaldX2
## [1] 2.083467
##################################
# Relationship of DIASTH with prognostic outcome = Logarithm
# (TEVENT,EVENT) ~ Logarithm(DIASTH)
#################################
<- cph(Surv(TEVENT,EVENT) ~ log(DIASTH), data=PC)
DIASTH_CoxPHFit_Log anova(DIASTH_CoxPHFit_Log)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## DIASTH 0.47 1 0.492
## TOTAL 0.47 1 0.492
<- anova(DIASTH_CoxPHFit_Log)[1]) (DIASTH_CoxPHFit_Log_WaldX2
## [1] 0.4721145
##################################
# Evaluating the impact of predictor coding
# for SYSTBP as applied in a univariate Cox regression model
#################################
##################################
# Relationship of SYSTBP with prognostic outcome = Linear
# (TEVENT,EVENT) ~ SYSTBP
#################################
<- cph(Surv(TEVENT,EVENT) ~ SYSTBP, data=PC)
SYSTBP_CoxPHFit_Linear anova(SYSTBP_CoxPHFit_Linear)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## SYSTBP 23.22 1 <.0001
## TOTAL 23.22 1 <.0001
<- anova(SYSTBP_CoxPHFit_Linear)[1]) (SYSTBP_CoxPHFit_Linear_WaldX2
## [1] 23.22008
##################################
# Relationship of SYSTBP with prognostic outcome = Restricted cubic spline (4 knots)
# (TEVENT,EVENT) ~ Restricted Cubic Spline(SYSTBP,4)
#################################
<- cph(Surv(TEVENT,EVENT) ~ rcs(SYSTBP,4), data=PC)
SYSTBP_CoxPHFit_RCS4Knots anova(SYSTBP_CoxPHFit_RCS4Knots)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## SYSTBP 26.27 3 <.0001
## Nonlinear 1.73 2 0.4219
## TOTAL 26.27 3 <.0001
<- anova(SYSTBP_CoxPHFit_RCS4Knots)[1]) (SYSTBP_CoxPHFit_RCS4Knots_WaldX2
## [1] 26.27116
##################################
# Relationship of SYSTBP with prognostic outcome = Restricted cubic spline (3 knots)
# (TEVENT,EVENT) ~ Restricted Cubic Spline(SYSTBP,3)
#################################
<- cph(Surv(TEVENT,EVENT) ~ rcs(SYSTBP,3), data=PC)
SYSTBP_CoxPHFit_RCS3Knots anova(SYSTBP_CoxPHFit_RCS3Knots)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## SYSTBP 26.26 2 <.0001
## Nonlinear 1.37 1 0.2422
## TOTAL 26.26 2 <.0001
<- anova(SYSTBP_CoxPHFit_RCS3Knots)[1]) (SYSTBP_CoxPHFit_RCS3Knots_WaldX2
## [1] 26.26421
##################################
# Relationship of SYSTBP with prognostic outcome = Logarithm
# (TEVENT,EVENT) ~ Logarithm(SYSTBP)
#################################
<- cph(Surv(TEVENT,EVENT) ~ log(SYSTBP), data=PC)
SYSTBP_CoxPHFit_Log anova(SYSTBP_CoxPHFit_Log)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## SYSTBP 21.2 1 <.0001
## TOTAL 21.2 1 <.0001
<- anova(SYSTBP_CoxPHFit_Log)[1]) (SYSTBP_CoxPHFit_Log_WaldX2
## [1] 21.20011
##################################
# Evaluating the impact of predictor coding
# for DIASTBP as applied in a univariate Cox regression model
#################################
##################################
# Relationship of DIASTBP with prognostic outcome = Linear
# (TEVENT,EVENT) ~ DIASTBP
#################################
<- cph(Surv(TEVENT,EVENT) ~ DIASTBP, data=PC)
DIASTBP_CoxPHFit_Linear anova(DIASTBP_CoxPHFit_Linear)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## DIASTBP 1.67 1 0.1962
## TOTAL 1.67 1 0.1962
<- anova(DIASTBP_CoxPHFit_Linear)[1]) (DIASTBP_CoxPHFit_Linear_WaldX2
## [1] 1.670766
##################################
# Relationship of DIASTBP with prognostic outcome = Restricted cubic spline (4 knots)
# (TEVENT,EVENT) ~ Restricted Cubic Spline(DIASTBP,4)
#################################
<- cph(Surv(TEVENT,EVENT) ~ rcs(DIASTBP,4), data=PC)
DIASTBP_CoxPHFit_RCS4Knots anova(DIASTBP_CoxPHFit_RCS4Knots)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## DIASTBP 25.15 3 <.0001
## Nonlinear 23.22 2 <.0001
## TOTAL 25.15 3 <.0001
<- anova(DIASTBP_CoxPHFit_RCS4Knots)[1]) (DIASTBP_CoxPHFit_RCS4Knots_WaldX2
## [1] 25.14634
##################################
# Relationship of DIASTBP with prognostic outcome = Restricted cubic spline (3 knots)
# (TEVENT,EVENT) ~ Restricted Cubic Spline(DIASTBP,3)
#################################
<- cph(Surv(TEVENT,EVENT) ~ rcs(DIASTBP,3), data=PC)
DIASTBP_CoxPHFit_RCS3Knots anova(DIASTBP_CoxPHFit_RCS3Knots)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## DIASTBP 24.84 2 <.0001
## Nonlinear 22.29 1 <.0001
## TOTAL 24.84 2 <.0001
<- anova(DIASTBP_CoxPHFit_RCS3Knots)[1]) (DIASTBP_CoxPHFit_RCS3Knots_WaldX2
## [1] 24.84403
##################################
# Relationship of DIASTBP with prognostic outcome = Logarithm
# (TEVENT,EVENT) ~ Logarithm(DIASTBP)
#################################
<- cph(Surv(TEVENT,EVENT) ~ log(DIASTBP), data=PC)
DIASTBP_CoxPHFit_Log anova(DIASTBP_CoxPHFit_Log)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## DIASTBP 0.68 1 0.4092
## TOTAL 0.68 1 0.4092
<- anova(DIASTBP_CoxPHFit_Log)[1]) (DIASTBP_CoxPHFit_Log_WaldX2
## [1] 0.6810459
##################################
# Evaluating the impact of predictor coding
# for CHOL as applied in a univariate Cox regression model
#################################
##################################
# Relationship of CHOL with prognostic outcome = Linear
# (TEVENT,EVENT) ~ CHOL
#################################
<- cph(Surv(TEVENT,EVENT) ~ CHOL, data=PC)
CHOL_CoxPHFit_Linear anova(CHOL_CoxPHFit_Linear)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## CHOL 0.32 1 0.5717
## TOTAL 0.32 1 0.5717
<- anova(CHOL_CoxPHFit_Linear)[1]) (CHOL_CoxPHFit_Linear_WaldX2
## [1] 0.3199142
##################################
# Relationship of CHOL with prognostic outcome = Restricted cubic spline (4 knots)
# (TEVENT,EVENT) ~ Restricted Cubic Spline(CHOL,4)
#################################
<- cph(Surv(TEVENT,EVENT) ~ rcs(CHOL,4), data=PC)
CHOL_CoxPHFit_RCS4Knots anova(CHOL_CoxPHFit_RCS4Knots)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## CHOL 0.60 3 0.8964
## Nonlinear 0.27 2 0.8720
## TOTAL 0.60 3 0.8964
<- anova(CHOL_CoxPHFit_RCS4Knots)[1]) (CHOL_CoxPHFit_RCS4Knots_WaldX2
## [1] 0.5999714
##################################
# Relationship of CHOL with prognostic outcome = Restricted cubic spline (3 knots)
# (TEVENT,EVENT) ~ Restricted Cubic Spline(CHOL,3)
#################################
<- cph(Surv(TEVENT,EVENT) ~ rcs(CHOL,3), data=PC)
CHOL_CoxPHFit_RCS3Knots anova(CHOL_CoxPHFit_RCS3Knots)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## CHOL 0.60 2 0.7399
## Nonlinear 0.25 1 0.6161
## TOTAL 0.60 2 0.7399
<- anova(CHOL_CoxPHFit_RCS3Knots)[1]) (CHOL_CoxPHFit_RCS3Knots_WaldX2
## [1] 0.6024457
##################################
# Relationship of CHOL with prognostic outcome = Logarithm
# (TEVENT,EVENT) ~ Logarithm(CHOL)
#################################
<- cph(Surv(TEVENT,EVENT) ~ log(CHOL), data=PC)
CHOL_CoxPHFit_Log anova(CHOL_CoxPHFit_Log)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## CHOL 0.25 1 0.6171
## TOTAL 0.25 1 0.6171
<- anova(CHOL_CoxPHFit_Log)[1]) (CHOL_CoxPHFit_Log_WaldX2
## [1] 0.2499827
##################################
# Evaluating the impact of predictor coding
# for HDL as applied in a univariate Cox regression model
#################################
##################################
# Relationship of HDL with prognostic outcome = Linear
# (TEVENT,EVENT) ~ HDL
#################################
<- cph(Surv(TEVENT,EVENT) ~ HDL, data=PC)
HDL_CoxPHFit_Linear anova(HDL_CoxPHFit_Linear)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## HDL 11.83 1 6e-04
## TOTAL 11.83 1 6e-04
<- anova(HDL_CoxPHFit_Linear)[1]) (HDL_CoxPHFit_Linear_WaldX2
## [1] 11.83362
##################################
# Relationship of HDL with prognostic outcome = Restricted cubic spline (4 knots)
# (TEVENT,EVENT) ~ Restricted Cubic Spline(HDL,4)
#################################
<- cph(Surv(TEVENT,EVENT) ~ rcs(HDL,4), data=PC)
HDL_CoxPHFit_RCS4Knots anova(HDL_CoxPHFit_RCS4Knots)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## HDL 11.94 3 0.0076
## Nonlinear 0.63 2 0.7302
## TOTAL 11.94 3 0.0076
<- anova(HDL_CoxPHFit_RCS4Knots)[1]) (HDL_CoxPHFit_RCS4Knots_WaldX2
## [1] 11.93661
##################################
# Relationship of HDL with prognostic outcome = Restricted cubic spline (3 knots)
# (TEVENT,EVENT) ~ Restricted Cubic Spline(HDL,3)
#################################
<- cph(Surv(TEVENT,EVENT) ~ rcs(HDL,3), data=PC)
HDL_CoxPHFit_RCS3Knots anova(HDL_CoxPHFit_RCS3Knots)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## HDL 11.61 2 0.0030
## Nonlinear 0.08 1 0.7741
## TOTAL 11.61 2 0.0030
<- anova(HDL_CoxPHFit_RCS3Knots)[1]) (HDL_CoxPHFit_RCS3Knots_WaldX2
## [1] 11.60599
##################################
# Relationship of HDL with prognostic outcome = Logarithm
# (TEVENT,EVENT) ~ Logarithm(HDL)
#################################
<- cph(Surv(TEVENT,EVENT) ~ log(HDL), data=PC)
HDL_CoxPHFit_Log anova(HDL_CoxPHFit_Log)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## HDL 11.8 1 6e-04
## TOTAL 11.8 1 6e-04
<- anova(HDL_CoxPHFit_Log)[1]) (HDL_CoxPHFit_Log_WaldX2
## [1] 11.79606
##################################
# Evaluating the impact of predictor coding
# for LDL as applied in a univariate Cox regression model
#################################
##################################
# Relationship of LDL with prognostic outcome = Linear
# (TEVENT,EVENT) ~ LDL
#################################
<- cph(Surv(TEVENT,EVENT) ~ LDL, data=PC)
LDL_CoxPHFit_Linear anova(LDL_CoxPHFit_Linear)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## LDL 1.28 1 0.2584
## TOTAL 1.28 1 0.2584
<- anova(LDL_CoxPHFit_Linear)[1]) (LDL_CoxPHFit_Linear_WaldX2
## [1] 1.2771
##################################
# Relationship of LDL with prognostic outcome = Restricted cubic spline (4 knots)
# (TEVENT,EVENT) ~ Restricted Cubic Spline(LDL,4)
#################################
<- cph(Surv(TEVENT,EVENT) ~ rcs(LDL,4), data=PC)
LDL_CoxPHFit_RCS4Knots anova(LDL_CoxPHFit_RCS4Knots)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## LDL 1.52 3 0.6777
## Nonlinear 0.25 2 0.8812
## TOTAL 1.52 3 0.6777
<- anova(LDL_CoxPHFit_RCS4Knots)[1]) (LDL_CoxPHFit_RCS4Knots_WaldX2
## [1] 1.519874
##################################
# Relationship of LDL with prognostic outcome = Restricted cubic spline (3 knots)
# (TEVENT,EVENT) ~ Restricted Cubic Spline(LDL,3)
#################################
<- cph(Surv(TEVENT,EVENT) ~ rcs(LDL,3), data=PC)
LDL_CoxPHFit_RCS3Knots anova(LDL_CoxPHFit_RCS3Knots)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## LDL 1.28 2 0.5260
## Nonlinear 0.00 1 0.9537
## TOTAL 1.28 2 0.5260
<- anova(LDL_CoxPHFit_RCS3Knots)[1]) (LDL_CoxPHFit_RCS3Knots_WaldX2
## [1] 1.284805
##################################
# Relationship of LDL with prognostic outcome = Logarithm
# (TEVENT,EVENT) ~ Logarithm(LDL)
#################################
<- cph(Surv(TEVENT,EVENT) ~ log(LDL), data=PC)
LDL_CoxPHFit_Log anova(LDL_CoxPHFit_Log)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## LDL 1.15 1 0.2839
## TOTAL 1.15 1 0.2839
<- anova(LDL_CoxPHFit_Log)[1]) (LDL_CoxPHFit_Log_WaldX2
## [1] 1.148225
##################################
# Evaluating the impact of predictor coding
# for TRIG as applied in a univariate Cox regression model
#################################
##################################
# Relationship of TRIG with prognostic outcome = Linear
# (TEVENT,EVENT) ~ TRIG
#################################
<- cph(Surv(TEVENT,EVENT) ~ TRIG, data=PC)
TRIG_CoxPHFit_Linear anova(TRIG_CoxPHFit_Linear)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## TRIG 1.4 1 0.2366
## TOTAL 1.4 1 0.2366
<- anova(TRIG_CoxPHFit_Linear)[1]) (TRIG_CoxPHFit_Linear_WaldX2
## [1] 1.400792
##################################
# Relationship of TRIG with prognostic outcome = Restricted cubic spline (4 knots)
# (TEVENT,EVENT) ~ Restricted Cubic Spline(TRIG,4)
#################################
<- cph(Surv(TEVENT,EVENT) ~ rcs(TRIG,4), data=PC)
TRIG_CoxPHFit_RCS4Knots anova(TRIG_CoxPHFit_RCS4Knots)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## TRIG 3.51 3 0.3192
## Nonlinear 2.12 2 0.3456
## TOTAL 3.51 3 0.3192
<- anova(TRIG_CoxPHFit_RCS4Knots)[1]) (TRIG_CoxPHFit_RCS4Knots_WaldX2
## [1] 3.512266
##################################
# Relationship of TRIG with prognostic outcome = Restricted cubic spline (3 knots)
# (TEVENT,EVENT) ~ Restricted Cubic Spline(TRIG,3)
#################################
<- cph(Surv(TEVENT,EVENT) ~ rcs(TRIG,3), data=PC)
TRIG_CoxPHFit_RCS3Knots anova(TRIG_CoxPHFit_RCS3Knots)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## TRIG 3.25 2 0.1965
## Nonlinear 1.94 1 0.1642
## TOTAL 3.25 2 0.1965
<- anova(TRIG_CoxPHFit_RCS3Knots)[1]) (TRIG_CoxPHFit_RCS3Knots_WaldX2
## [1] 3.253776
##################################
# Relationship of TRIG with prognostic outcome = Logarithm
# (TEVENT,EVENT) ~ Logarithm(TRIG)
#################################
<- cph(Surv(TEVENT,EVENT) ~ log(TRIG), data=PC)
TRIG_CoxPHFit_Log anova(TRIG_CoxPHFit_Log)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## TRIG 2.47 1 0.1164
## TOTAL 2.47 1 0.1164
<- anova(TRIG_CoxPHFit_Log)[1]) (TRIG_CoxPHFit_Log_WaldX2
## [1] 2.46539
##################################
# Evaluating the impact of predictor coding
# for HOMOC as applied in a univariate Cox regression model
#################################
##################################
# Relationship of HOMOC with prognostic outcome = Linear
# (TEVENT,EVENT) ~ HOMOC
#################################
<- cph(Surv(TEVENT,EVENT) ~ HOMOC, data=PC)
HOMOC_CoxPHFit_Linear anova(HOMOC_CoxPHFit_Linear)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## HOMOC 49.59 1 <.0001
## TOTAL 49.59 1 <.0001
<- anova(HOMOC_CoxPHFit_Linear)[1]) (HOMOC_CoxPHFit_Linear_WaldX2
## [1] 49.59385
##################################
# Relationship of HOMOC with prognostic outcome = Restricted cubic spline (4 knots)
# (TEVENT,EVENT) ~ Restricted Cubic Spline(HOMOC,4)
#################################
<- cph(Surv(TEVENT,EVENT) ~ rcs(HOMOC,4), data=PC)
HOMOC_CoxPHFit_RCS4Knots anova(HOMOC_CoxPHFit_RCS4Knots)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## HOMOC 50.68 3 <.0001
## Nonlinear 7.97 2 0.0186
## TOTAL 50.68 3 <.0001
<- anova(HOMOC_CoxPHFit_RCS4Knots)[1]) (HOMOC_CoxPHFit_RCS4Knots_WaldX2
## [1] 50.67529
##################################
# Relationship of HOMOC with prognostic outcome = Restricted cubic spline (3 knots)
# (TEVENT,EVENT) ~ Restricted Cubic Spline(HOMOC,3)
#################################
<- cph(Surv(TEVENT,EVENT) ~ rcs(HOMOC,3), data=PC)
HOMOC_CoxPHFit_RCS3Knots anova(HOMOC_CoxPHFit_RCS3Knots)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## HOMOC 47.32 2 <.0001
## Nonlinear 3.64 1 0.0563
## TOTAL 47.32 2 <.0001
<- anova(HOMOC_CoxPHFit_RCS3Knots)[1]) (HOMOC_CoxPHFit_RCS3Knots_WaldX2
## [1] 47.32265
##################################
# Relationship of HOMOC with prognostic outcome = Logarithm
# (TEVENT,EVENT) ~ Logarithm(HOMOC)
#################################
<- cph(Surv(TEVENT,EVENT) ~ log(HOMOC), data=PC)
HOMOC_CoxPHFit_Log anova(HOMOC_CoxPHFit_Log)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## HOMOC 52.46 1 <.0001
## TOTAL 52.46 1 <.0001
<- anova(HOMOC_CoxPHFit_Log)[1]) (HOMOC_CoxPHFit_Log_WaldX2
## [1] 52.46239
##################################
# Evaluating the impact of predictor coding
# for GLUT as applied in a univariate Cox regression model
#################################
##################################
# Relationship of GLUT with prognostic outcome = Linear
# (TEVENT,EVENT) ~ GLUT
#################################
<- cph(Surv(TEVENT,EVENT) ~ GLUT, data=PC)
GLUT_CoxPHFit_Linear anova(GLUT_CoxPHFit_Linear)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## GLUT 12.69 1 4e-04
## TOTAL 12.69 1 4e-04
<- anova(GLUT_CoxPHFit_Linear)[1]) (GLUT_CoxPHFit_Linear_WaldX2
## [1] 12.68661
##################################
# Relationship of GLUT with prognostic outcome = Restricted cubic spline (4 knots)
# (TEVENT,EVENT) ~ Restricted Cubic Spline(GLUT,4)
#################################
<- cph(Surv(TEVENT,EVENT) ~ rcs(GLUT,4), data=PC)
GLUT_CoxPHFit_RCS4Knots anova(GLUT_CoxPHFit_RCS4Knots)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## GLUT 15.43 3 0.0015
## Nonlinear 3.16 2 0.2059
## TOTAL 15.43 3 0.0015
<- anova(GLUT_CoxPHFit_RCS4Knots)[1]) (GLUT_CoxPHFit_RCS4Knots_WaldX2
## [1] 15.42745
##################################
# Relationship of GLUT with prognostic outcome = Restricted cubic spline (3 knots)
# (TEVENT,EVENT) ~ Restricted Cubic Spline(GLUT,3)
#################################
<- cph(Surv(TEVENT,EVENT) ~ rcs(GLUT,3), data=PC)
GLUT_CoxPHFit_RCS3Knots anova(GLUT_CoxPHFit_RCS3Knots)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## GLUT 13.03 2 0.0015
## Nonlinear 0.11 1 0.7407
## TOTAL 13.03 2 0.0015
<- anova(GLUT_CoxPHFit_RCS3Knots)[1]) (GLUT_CoxPHFit_RCS3Knots_WaldX2
## [1] 13.03222
##################################
# Relationship of GLUT with prognostic outcome = Logarithm
# (TEVENT,EVENT) ~ Logarithm(GLUT)
#################################
<- cph(Surv(TEVENT,EVENT) ~ log(GLUT), data=PC)
GLUT_CoxPHFit_Log anova(GLUT_CoxPHFit_Log)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## GLUT 12.08 1 5e-04
## TOTAL 12.08 1 5e-04
<- anova(GLUT_CoxPHFit_Log)[1]) (GLUT_CoxPHFit_Log_WaldX2
## [1] 12.08488
##################################
# Evaluating the impact of predictor coding
# for LENGTH as applied in a univariate Cox regression model
#################################
##################################
# Relationship of LENGTH with prognostic outcome = Linear
# (TEVENT,EVENT) ~ LENGTH
#################################
<- cph(Surv(TEVENT,EVENT) ~ LENGTH, data=PC)
LENGTH_CoxPHFit_Linear anova(LENGTH_CoxPHFit_Linear)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## LENGTH 0.01 1 0.9284
## TOTAL 0.01 1 0.9284
<- anova(LENGTH_CoxPHFit_Linear)[1]) (LENGTH_CoxPHFit_Linear_WaldX2
## [1] 0.008063822
##################################
# Relationship of LENGTH with prognostic outcome = Restricted cubic spline (4 knots)
# (TEVENT,EVENT) ~ Restricted Cubic Spline(LENGTH,4)
#################################
<- cph(Surv(TEVENT,EVENT) ~ rcs(LENGTH,4), data=PC)
LENGTH_CoxPHFit_RCS4Knots anova(LENGTH_CoxPHFit_RCS4Knots)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## LENGTH 4.19 3 0.2417
## Nonlinear 4.17 2 0.1243
## TOTAL 4.19 3 0.2417
<- anova(LENGTH_CoxPHFit_RCS4Knots)[1]) (LENGTH_CoxPHFit_RCS4Knots_WaldX2
## [1] 4.189518
##################################
# Relationship of LENGTH with prognostic outcome = Restricted cubic spline (3 knots)
# (TEVENT,EVENT) ~ Restricted Cubic Spline(LENGTH,3)
#################################
<- cph(Surv(TEVENT,EVENT) ~ rcs(LENGTH,3), data=PC)
LENGTH_CoxPHFit_RCS3Knots anova(LENGTH_CoxPHFit_RCS3Knots)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## LENGTH 4.11 2 0.1280
## Nonlinear 4.11 1 0.0427
## TOTAL 4.11 2 0.1280
<- anova(LENGTH_CoxPHFit_RCS3Knots)[1]) (LENGTH_CoxPHFit_RCS3Knots_WaldX2
## [1] 4.111306
##################################
# Relationship of LENGTH with prognostic outcome = Logarithm
# (TEVENT,EVENT) ~ Logarithm(LENGTH)
#################################
<- cph(Surv(TEVENT,EVENT) ~ log(LENGTH), data=PC)
LENGTH_CoxPHFit_Log anova(LENGTH_CoxPHFit_Log)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## LENGTH 0 1 0.9751
## TOTAL 0 1 0.9751
<- anova(LENGTH_CoxPHFit_Log)[1]) (LENGTH_CoxPHFit_Log_WaldX2
## [1] 0.0009703188
##################################
# Evaluating the impact of predictor coding
# for WEIGHT as applied in a univariate Cox regression model
#################################
##################################
# Relationship of WEIGHT with prognostic outcome = Linear
# (TEVENT,EVENT) ~ WEIGHT
#################################
<- cph(Surv(TEVENT,EVENT) ~ WEIGHT, data=PC)
WEIGHT_CoxPHFit_Linear anova(WEIGHT_CoxPHFit_Linear)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## WEIGHT 3.21 1 0.073
## TOTAL 3.21 1 0.073
<- anova(WEIGHT_CoxPHFit_Linear)[1]) (WEIGHT_CoxPHFit_Linear_WaldX2
## [1] 3.213383
##################################
# Relationship of WEIGHT with prognostic outcome = Restricted cubic spline (4 knots)
# (TEVENT,EVENT) ~ Restricted Cubic Spline(WEIGHT,4)
#################################
<- cph(Surv(TEVENT,EVENT) ~ rcs(WEIGHT,4), data=PC)
WEIGHT_CoxPHFit_RCS4Knots anova(WEIGHT_CoxPHFit_RCS4Knots)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## WEIGHT 4.55 3 0.2080
## Nonlinear 1.85 2 0.3966
## TOTAL 4.55 3 0.2080
<- anova(WEIGHT_CoxPHFit_RCS4Knots)[1]) (WEIGHT_CoxPHFit_RCS4Knots_WaldX2
## [1] 4.548998
##################################
# Relationship of WEIGHT with prognostic outcome = Restricted cubic spline (3 knots)
# (TEVENT,EVENT) ~ Restricted Cubic Spline(WEIGHT,3)
#################################
<- cph(Surv(TEVENT,EVENT) ~ rcs(WEIGHT,3), data=PC)
WEIGHT_CoxPHFit_RCS3Knots anova(WEIGHT_CoxPHFit_RCS3Knots)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## WEIGHT 4.08 2 0.1300
## Nonlinear 1.14 1 0.2847
## TOTAL 4.08 2 0.1300
<- anova(WEIGHT_CoxPHFit_RCS3Knots)[1]) (WEIGHT_CoxPHFit_RCS3Knots_WaldX2
## [1] 4.080816
##################################
# Relationship of WEIGHT with prognostic outcome = Logarithm
# (TEVENT,EVENT) ~ Logarithm(WEIGHT)
#################################
<- cph(Surv(TEVENT,EVENT) ~ log(WEIGHT), data=PC)
WEIGHT_CoxPHFit_Log anova(WEIGHT_CoxPHFit_Log)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## WEIGHT 2.73 1 0.0982
## TOTAL 2.73 1 0.0982
<- anova(WEIGHT_CoxPHFit_Log)[1]) (WEIGHT_CoxPHFit_Log_WaldX2
## [1] 2.734381
##################################
# Evaluating the impact of predictor coding
# for BMI as applied in a univariate Cox regression model
#################################
##################################
# Relationship of BMI with prognostic outcome = Linear
# (TEVENT,EVENT) ~ BMI
#################################
<- cph(Surv(TEVENT,EVENT) ~ BMI, data=PC)
BMI_CoxPHFit_Linear anova(BMI_CoxPHFit_Linear)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## BMI 3.87 1 0.0492
## TOTAL 3.87 1 0.0492
<- anova(BMI_CoxPHFit_Linear)[1]) (BMI_CoxPHFit_Linear_WaldX2
## [1] 3.867475
##################################
# Relationship of BMI with prognostic outcome = Restricted cubic spline (4 knots)
# (TEVENT,EVENT) ~ Restricted Cubic Spline(BMI,4)
#################################
<- cph(Surv(TEVENT,EVENT) ~ rcs(BMI,4), data=PC)
BMI_CoxPHFit_RCS4Knots anova(BMI_CoxPHFit_RCS4Knots)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## BMI 7.64 3 0.0542
## Nonlinear 3.77 2 0.1516
## TOTAL 7.64 3 0.0542
<- anova(BMI_CoxPHFit_RCS4Knots)[1]) (BMI_CoxPHFit_RCS4Knots_WaldX2
## [1] 7.635769
##################################
# Relationship of BMI with prognostic outcome = Restricted cubic spline (3 knots)
# (TEVENT,EVENT) ~ Restricted Cubic Spline(BMI,3)
#################################
<- cph(Surv(TEVENT,EVENT) ~ rcs(BMI,3), data=PC)
BMI_CoxPHFit_RCS3Knots anova(BMI_CoxPHFit_RCS3Knots)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## BMI 4.02 2 0.1342
## Nonlinear 0.08 1 0.7811
## TOTAL 4.02 2 0.1342
<- anova(BMI_CoxPHFit_RCS3Knots)[1]) (BMI_CoxPHFit_RCS3Knots_WaldX2
## [1] 4.01637
##################################
# Relationship of BMI with prognostic outcome = Logarithm
# (TEVENT,EVENT) ~ Logarithm(BMI)
#################################
<- cph(Surv(TEVENT,EVENT) ~ log(BMI), data=PC)
BMI_CoxPHFit_Log anova(BMI_CoxPHFit_Log)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## BMI 3.9 1 0.0482
## TOTAL 3.9 1 0.0482
<- anova(BMI_CoxPHFit_Log)[1]) (BMI_CoxPHFit_Log_WaldX2
## [1] 3.902606
##################################
# Evaluating the impact of predictor coding
# for PACKYRS as applied in a univariate Cox regression model
#################################
##################################
# Relationship of PACKYRS with prognostic outcome = Linear
# (TEVENT,EVENT) ~ PACKYRS
#################################
<- cph(Surv(TEVENT,EVENT) ~ packyrs, data=PC)
PACKYRS_CoxPHFit_Linear anova(PACKYRS_CoxPHFit_Linear)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## packyrs 12.95 1 3e-04
## TOTAL 12.95 1 3e-04
<- anova(PACKYRS_CoxPHFit_Linear)[1]) (PACKYRS_CoxPHFit_Linear_WaldX2
## [1] 12.95089
##################################
# Relationship of PACKYRS with prognostic outcome = Restricted cubic spline (4 knots)
# (TEVENT,EVENT) ~ Restricted Cubic Spline(PACKYRS,4)
#################################
<- cph(Surv(TEVENT,EVENT) ~ rcs(packyrs,4), data=PC)
PACKYRS_CoxPHFit_RCS4Knots anova(PACKYRS_CoxPHFit_RCS4Knots)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## packyrs 14.85 3 0.0019
## Nonlinear 1.54 2 0.4640
## TOTAL 14.85 3 0.0019
<- anova(PACKYRS_CoxPHFit_RCS4Knots)[1]) (PACKYRS_CoxPHFit_RCS4Knots_WaldX2
## [1] 14.85005
##################################
# Relationship of PACKYRS with prognostic outcome = Restricted cubic spline (3 knots)
# (TEVENT,EVENT) ~ Restricted Cubic Spline(PACKYRS,3)
#################################
<- cph(Surv(TEVENT,EVENT) ~ rcs(packyrs,3), data=PC)
PACKYRS_CoxPHFit_RCS3Knots anova(PACKYRS_CoxPHFit_RCS3Knots)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## packyrs 13.48 2 0.0012
## Nonlinear 0.13 1 0.7152
## TOTAL 13.48 2 0.0012
<- anova(PACKYRS_CoxPHFit_RCS3Knots)[1]) (PACKYRS_CoxPHFit_RCS3Knots_WaldX2
## [1] 13.47873
##################################
# Evaluating the impact of predictor coding
# for CEREBRAL, CARDIAC, AAA and PERIPH
# as applied in a univariate and multivariate Cox regression model
#################################
$CEREBRAL <- as.numeric(as.character(PC$CEREBRAL))
PC$CARDIAC <- as.numeric(as.character(PC$CARDIAC))
PC$AAA <- as.numeric(as.character(PC$AAA))
PC$PERIPH <- as.numeric(as.character(PC$PERIPH))
PC
##################################
# Relationship of CEREBRAL, CARDIAC, AAA and PERIPH
# with prognostic outcome = Linear
# (TEVENT,EVENT) ~ CEREBRAL + CARDIAC + AAA + PERIPH
#################################
<- cph(Surv(TEVENT,EVENT) ~ CEREBRAL + CARDIAC + AAA + PERIPH, data=PC)
COMBINED_CoxPHFit_Linear anova(COMBINED_CoxPHFit_Linear)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## CEREBRAL 35.73 1 <.0001
## CARDIAC 19.11 1 <.0001
## AAA 96.62 1 <.0001
## PERIPH 23.13 1 <.0001
## TOTAL 122.61 4 <.0001
<- anova(COMBINED_CoxPHFit_Linear)[1]) (CEREBRAL_CoxPHFit_Linear_WaldX2
## [1] 35.72751
<- anova(COMBINED_CoxPHFit_Linear)[2]) (CARDIAC_CoxPHFit_Linear_WaldX2
## [1] 19.11319
<- anova(COMBINED_CoxPHFit_Linear)[3]) (AAA_CoxPHFit_Linear_WaldX2
## [1] 96.62462
<- anova(COMBINED_CoxPHFit_Linear)[4]) (PERIPH_CoxPHFit_Linear_WaldX2
## [1] 23.12629
<- anova(COMBINED_CoxPHFit_Linear)[5]) (COMBINED_CoxPHFit_Linear_WaldX2
## [1] 122.6137
##################################
# Relationship of (CEREBRAL + CARDIAC + AAA + PERIPH)
# with prognostic outcome = Linear
# (TEVENT,EVENT) ~ (CEREBRAL + CARDIAC + AAA + PERIPH)
#################################
$SUMSCORE_4LEVELS <- PC$CEREBRAL + PC$CARDIAC + PC$AAA + PC$PERIPH
PC<- cph(Surv(TEVENT,EVENT) ~ SUMSCORE_4LEVELS, data=PC)
SUMSCORE_4LEVELS_CoxPHFit_Linear <- anova(SUMSCORE_4LEVELS_CoxPHFit_Linear)[1]) (SUMSCORE_4LEVELS_CoxPHFit_Linear_WaldX2
## [1] 95.98494
##################################
# Relationship of (CEREBRAL + CARDIAC + 2*AAA + PERIPH)
# with prognostic outcome = Linear
# (TEVENT,EVENT) ~ (CEREBRAL + CARDIAC + 2*AAA + PERIPH)
#################################
$SUMSCORE_5LEVELS <- PC$CEREBRAL + PC$CARDIAC + (2*PC$AAA) + PC$PERIPH
PC<- cph(Surv(TEVENT,EVENT) ~ SUMSCORE_5LEVELS, data=PC)
SUMSCORE_5LEVELS_CoxPHFit_Linear <- anova(SUMSCORE_5LEVELS_CoxPHFit_Linear)[1]) (SUMSCORE_5LEVELS_CoxPHFit_Linear_WaldX2
## [1] 119.3857
##################################
# Consolidating all results
# of predictor coding evaluation
#################################
<- c(rep("AGE",7),
PredictorInformation rep("CREAT",4),
rep("SYSTH",4),
rep("DIASTH",4),
rep("SYSTBP",4),
rep("DIASTBP",4),
rep("CHOL",4),
rep("HDL",4),
rep("LDL",4),
rep("TRIG",4),
rep("HOMOC",4),
rep("GLUT",4),
rep("LENGTH",4),
rep("WEIGHT",4),
rep("BMI",4),
rep("PACKYRS",3),
rep("A-CEREBRAL",1),
rep("B-CARDIAC",1),
rep("C-AAA",1),
rep("D-PERIPH",1),
rep("(A,B,C,D)",1),
rep("(A+B+C+D)",1),
rep("[A+B+(2*C)+D]",1))
<- c("Linear",
CodingInformation "Squared",
"Linear Effect After 55",
"Squared Effect After 50",
"Restricted Cubic Spline (4 knots)",
"Quartiles",
"Dichotomized",
"Linear",
"Restricted Cubic Spline (4 knots)",
"Restricted Cubic Spline (3 knots)",
"Logarithm",
"Linear",
"Restricted Cubic Spline (4 knots)",
"Restricted Cubic Spline (3 knots)",
"Logarithm",
"Linear",
"Restricted Cubic Spline (4 knots)",
"Restricted Cubic Spline (3 knots)",
"Logarithm",
"Linear",
"Restricted Cubic Spline (4 knots)",
"Restricted Cubic Spline (3 knots)",
"Logarithm",
"Linear",
"Restricted Cubic Spline (4 knots)",
"Restricted Cubic Spline (3 knots)",
"Logarithm",
"Linear",
"Restricted Cubic Spline (4 knots)",
"Restricted Cubic Spline (3 knots)",
"Logarithm",
"Linear",
"Restricted Cubic Spline (4 knots)",
"Restricted Cubic Spline (3 knots)",
"Logarithm",
"Linear",
"Restricted Cubic Spline (4 knots)",
"Restricted Cubic Spline (3 knots)",
"Logarithm",
"Linear",
"Restricted Cubic Spline (4 knots)",
"Restricted Cubic Spline (3 knots)",
"Logarithm",
"Linear",
"Restricted Cubic Spline (4 knots)",
"Restricted Cubic Spline (3 knots)",
"Logarithm",
"Linear",
"Restricted Cubic Spline (4 knots)",
"Restricted Cubic Spline (3 knots)",
"Logarithm",
"Linear",
"Restricted Cubic Spline (4 knots)",
"Restricted Cubic Spline (3 knots)",
"Logarithm",
"Linear",
"Restricted Cubic Spline (4 knots)",
"Restricted Cubic Spline (3 knots)",
"Logarithm",
"Linear",
"Restricted Cubic Spline (4 knots)",
"Restricted Cubic Spline (3 knots)",
"Logarithm",
"Linear",
"Restricted Cubic Spline (4 knots)",
"Restricted Cubic Spline (3 knots)",
"Linear",
"Linear",
"Linear",
"Linear",
"Linear",
"Linear",
"Linear")
<- c(AGE_CoxPHFit_Linear_WaldX2,
WaldX2Information
AGE_CoxPHFit_Squared_WaldX2,
AGE_CoxPHFit_LinearAfter55_WaldX2,
AGE_CoxPHFit_SquaredAfter50_WaldX2,
AGE_CoxPHFit_RCS4Knots_WaldX2,
AGE_CoxPHFit_Quartile_WaldX2,
AGE_CoxPHFit_Dichotomized_WaldX2,
CREAT_CoxPHFit_Linear_WaldX2,
CREAT_CoxPHFit_RCS4Knots_WaldX2,
CREAT_CoxPHFit_RCS3Knots_WaldX2,
CREAT_CoxPHFit_Log_WaldX2,
SYSTH_CoxPHFit_Linear_WaldX2,
SYSTH_CoxPHFit_RCS4Knots_WaldX2,
SYSTH_CoxPHFit_RCS3Knots_WaldX2,
SYSTH_CoxPHFit_Log_WaldX2,
DIASTH_CoxPHFit_Linear_WaldX2,
DIASTH_CoxPHFit_RCS4Knots_WaldX2,
DIASTH_CoxPHFit_RCS3Knots_WaldX2,
DIASTH_CoxPHFit_Log_WaldX2,
SYSTBP_CoxPHFit_Linear_WaldX2,
SYSTBP_CoxPHFit_RCS4Knots_WaldX2,
SYSTBP_CoxPHFit_RCS3Knots_WaldX2,
SYSTBP_CoxPHFit_Log_WaldX2,
DIASTBP_CoxPHFit_Linear_WaldX2,
DIASTBP_CoxPHFit_RCS4Knots_WaldX2,
DIASTBP_CoxPHFit_RCS3Knots_WaldX2,
DIASTBP_CoxPHFit_Log_WaldX2,
CHOL_CoxPHFit_Linear_WaldX2,
CHOL_CoxPHFit_RCS4Knots_WaldX2,
CHOL_CoxPHFit_RCS3Knots_WaldX2,
CHOL_CoxPHFit_Log_WaldX2,
HDL_CoxPHFit_Linear_WaldX2,
HDL_CoxPHFit_RCS4Knots_WaldX2,
HDL_CoxPHFit_RCS3Knots_WaldX2,
HDL_CoxPHFit_Log_WaldX2,
LDL_CoxPHFit_Linear_WaldX2,
LDL_CoxPHFit_RCS4Knots_WaldX2,
LDL_CoxPHFit_RCS3Knots_WaldX2,
LDL_CoxPHFit_Log_WaldX2,
TRIG_CoxPHFit_Linear_WaldX2,
TRIG_CoxPHFit_RCS4Knots_WaldX2,
TRIG_CoxPHFit_RCS3Knots_WaldX2,
TRIG_CoxPHFit_Log_WaldX2,
HOMOC_CoxPHFit_Linear_WaldX2,
HOMOC_CoxPHFit_RCS4Knots_WaldX2,
HOMOC_CoxPHFit_RCS3Knots_WaldX2,
HOMOC_CoxPHFit_Log_WaldX2,
GLUT_CoxPHFit_Linear_WaldX2,
GLUT_CoxPHFit_RCS4Knots_WaldX2,
GLUT_CoxPHFit_RCS3Knots_WaldX2,
GLUT_CoxPHFit_Log_WaldX2,
LENGTH_CoxPHFit_Linear_WaldX2,
LENGTH_CoxPHFit_RCS4Knots_WaldX2,
LENGTH_CoxPHFit_RCS3Knots_WaldX2,
LENGTH_CoxPHFit_Log_WaldX2,
WEIGHT_CoxPHFit_Linear_WaldX2,
WEIGHT_CoxPHFit_RCS4Knots_WaldX2,
WEIGHT_CoxPHFit_RCS3Knots_WaldX2,
WEIGHT_CoxPHFit_Log_WaldX2,
BMI_CoxPHFit_Linear_WaldX2,
BMI_CoxPHFit_RCS4Knots_WaldX2,
BMI_CoxPHFit_RCS3Knots_WaldX2,
BMI_CoxPHFit_Log_WaldX2,
PACKYRS_CoxPHFit_Linear_WaldX2,
PACKYRS_CoxPHFit_RCS4Knots_WaldX2,
PACKYRS_CoxPHFit_RCS3Knots_WaldX2,
CEREBRAL_CoxPHFit_Linear_WaldX2,
CARDIAC_CoxPHFit_Linear_WaldX2,
AAA_CoxPHFit_Linear_WaldX2,
PERIPH_CoxPHFit_Linear_WaldX2,
COMBINED_CoxPHFit_Linear_WaldX2,
SUMSCORE_4LEVELS_CoxPHFit_Linear_WaldX2,
SUMSCORE_5LEVELS_CoxPHFit_Linear_WaldX2)
<- as.data.frame(cbind(PredictorInformation,
PredictorCodingSummary
CodingInformation,
WaldX2Information))
$WaldX2Information <- round(as.numeric(as.character(PredictorCodingSummary$WaldX2Information)),2)
PredictorCodingSummary
colnames(PredictorCodingSummary)<- c("Predictor",
"Coding",
"Wald Chi-Squared")
##################################
# Summarizing all results
# of predictor coding evaluation
#################################
(PredictorCodingSummary)
## Predictor Coding Wald Chi-Squared
## 1 AGE Linear 97.17
## 2 AGE Squared 124.99
## 3 AGE Linear Effect After 55 118.59
## 4 AGE Squared Effect After 50 129.64
## 5 AGE Restricted Cubic Spline (4 knots) 125.30
## 6 AGE Quartiles 93.22
## 7 AGE Dichotomized 72.47
## 8 CREAT Linear 93.42
## 9 CREAT Restricted Cubic Spline (4 knots) 116.10
## 10 CREAT Restricted Cubic Spline (3 knots) 99.03
## 11 CREAT Logarithm 131.01
## 12 SYSTH Linear 15.49
## 13 SYSTH Restricted Cubic Spline (4 knots) 15.60
## 14 SYSTH Restricted Cubic Spline (3 knots) 15.17
## 15 SYSTH Logarithm 14.39
## 16 DIASTH Linear 0.73
## 17 DIASTH Restricted Cubic Spline (4 knots) 2.62
## 18 DIASTH Restricted Cubic Spline (3 knots) 2.08
## 19 DIASTH Logarithm 0.47
## 20 SYSTBP Linear 23.22
## 21 SYSTBP Restricted Cubic Spline (4 knots) 26.27
## 22 SYSTBP Restricted Cubic Spline (3 knots) 26.26
## 23 SYSTBP Logarithm 21.20
## 24 DIASTBP Linear 1.67
## 25 DIASTBP Restricted Cubic Spline (4 knots) 25.15
## 26 DIASTBP Restricted Cubic Spline (3 knots) 24.84
## 27 DIASTBP Logarithm 0.68
## 28 CHOL Linear 0.32
## 29 CHOL Restricted Cubic Spline (4 knots) 0.60
## 30 CHOL Restricted Cubic Spline (3 knots) 0.60
## 31 CHOL Logarithm 0.25
## 32 HDL Linear 11.83
## 33 HDL Restricted Cubic Spline (4 knots) 11.94
## 34 HDL Restricted Cubic Spline (3 knots) 11.61
## 35 HDL Logarithm 11.80
## 36 LDL Linear 1.28
## 37 LDL Restricted Cubic Spline (4 knots) 1.52
## 38 LDL Restricted Cubic Spline (3 knots) 1.28
## 39 LDL Logarithm 1.15
## 40 TRIG Linear 1.40
## 41 TRIG Restricted Cubic Spline (4 knots) 3.51
## 42 TRIG Restricted Cubic Spline (3 knots) 3.25
## 43 TRIG Logarithm 2.47
## 44 HOMOC Linear 49.59
## 45 HOMOC Restricted Cubic Spline (4 knots) 50.68
## 46 HOMOC Restricted Cubic Spline (3 knots) 47.32
## 47 HOMOC Logarithm 52.46
## 48 GLUT Linear 12.69
## 49 GLUT Restricted Cubic Spline (4 knots) 15.43
## 50 GLUT Restricted Cubic Spline (3 knots) 13.03
## 51 GLUT Logarithm 12.08
## 52 LENGTH Linear 0.01
## 53 LENGTH Restricted Cubic Spline (4 knots) 4.19
## 54 LENGTH Restricted Cubic Spline (3 knots) 4.11
## 55 LENGTH Logarithm 0.00
## 56 WEIGHT Linear 3.21
## 57 WEIGHT Restricted Cubic Spline (4 knots) 4.55
## 58 WEIGHT Restricted Cubic Spline (3 knots) 4.08
## 59 WEIGHT Logarithm 2.73
## 60 BMI Linear 3.87
## 61 BMI Restricted Cubic Spline (4 knots) 7.64
## 62 BMI Restricted Cubic Spline (3 knots) 4.02
## 63 BMI Logarithm 3.90
## 64 PACKYRS Linear 12.95
## 65 PACKYRS Restricted Cubic Spline (4 knots) 14.85
## 66 PACKYRS Restricted Cubic Spline (3 knots) 13.48
## 67 A-CEREBRAL Linear 35.73
## 68 B-CARDIAC Linear 19.11
## 69 C-AAA Linear 96.62
## 70 D-PERIPH Linear 23.13
## 71 (A,B,C,D) Linear 122.61
## 72 (A+B+C+D) Linear 95.98
## 73 [A+B+(2*C)+D] Linear 119.39
##################################
# Loading dataset
##################################
<- SMART
DPP
##################################
# Exploring the missing data patterns
##################################
<- aggr(DPP,
DPP.Profile plot = FALSE)
<- summary(DPP.Profile)
DPP.Profile.Summary
nrow(DPP.Profile.Summary)
## NULL
plot(DPP.Profile,
numbers = TRUE,
prop = TRUE,
sortVars = TRUE,
col=c("green","red"))
##
## Variables sorted by number of missings:
## Variable Count
## DIASTH 0.3870384715
## SYSTH 0.3867802737
## SYSTBP 0.3157758843
## DIASTBP 0.3152594888
## HOMOC 0.1195455719
## LDL 0.0557707204
## albumin 0.0534469404
## IMT 0.0253033824
## STENOSIS 0.0240123935
## DIABETES 0.0103279112
## HDL 0.0077459334
## TRIG 0.0072295378
## SMOKING 0.0064549445
## alcohol 0.0064549445
## packyrs 0.0054221534
## GLUT 0.0049057578
## CHOL 0.0046475600
## CREAT 0.0043893623
## BMI 0.0007745933
## WEIGHT 0.0005163956
## LENGTH 0.0002581978
## TEVENT 0.0000000000
## EVENT 0.0000000000
## SEX 0.0000000000
## AGE 0.0000000000
## CEREBRAL 0.0000000000
## CARDIAC 0.0000000000
## AAA 0.0000000000
## PERIPH 0.0000000000
par(mfrow=c(1,1))
<- naclus(DPP)
DPP.Patterns plot(DPP.Patterns,
ylab="Fraction of NAs in common",
col="red")
par(mfrow=c(1,2))
naplot(DPP.Patterns, which=c('na per var'),col="red")
naplot(DPP.Patterns, which=c('na per obs'),col="red")
##################################
# Conducting missing data imputation
# using AREGIMPUTE
# (Multiple Imputation using Additive Regression, Bootstrapping, and Predictive Mean Matching)
##################################
$CEREBRAL <- as.numeric(as.character(DPP$CEREBRAL))
DPP$CARDIAC <- as.numeric(as.character(DPP$CARDIAC))
DPP$AAA <- as.numeric(as.character(DPP$AAA))
DPP$PERIPH <- as.numeric(as.character(DPP$PERIPH))
DPP$SUMSCORE_5LEVELS <- DPP$CEREBRAL +
DPP$CARDIAC +
DPP2*DPP$AAA) +
($PERIPH
DPP
set.seed(123456789)
<-aregImpute(~I(TEVENT)+EVENT+SEX+I(AGE)+
DPP.AREGIMPUTED +DIASTBP+SYSTH+DIASTH+
SYSTBP+CEREBRAL+CARDIAC+AAA+PERIPH+I(SUMSCORE_5LEVELS)+STENOSIS+
DIABETESI(LENGTH)+I(WEIGHT)+I(BMI)+
I(CHOL)+I(HDL)+I(LDL)+I(TRIG)+I(HOMOC)+I(GLUT)+I(CREAT)+I(IMT)+
as.factor(albumin) +as.factor(SMOKING) + I(packyrs) + as.factor(alcohol),
n.impute=5,data=DPP)
## Iteration 1 Iteration 2 Iteration 3 Iteration 4 Iteration 5 Iteration 6 Iteration 7 Iteration 8
##################################
# Evaluating the plausibility of the
# AREG-imputed values for the
# numerical variables
##################################
<- (DPP$SYSTBP[complete.cases(DPP$SYSTBP)])
SYSTBP_complete
<- as.data.frame(DPP.AREGIMPUTED$imputed$SYSTBP)
DPP.AREGIMPUTED_aregImpute_SYSTBP
<- DPP.AREGIMPUTED_aregImpute_SYSTBP$V1
SYSTBP_aregImpute_1 <- DPP.AREGIMPUTED_aregImpute_SYSTBP$V2
SYSTBP_aregImpute_2 <- DPP.AREGIMPUTED_aregImpute_SYSTBP$V3
SYSTBP_aregImpute_3 <- DPP.AREGIMPUTED_aregImpute_SYSTBP$V4
SYSTBP_aregImpute_4 <- DPP.AREGIMPUTED_aregImpute_SYSTBP$V5
SYSTBP_aregImpute_5
<- c(SYSTBP_complete,
SYSTBP_values
SYSTBP_aregImpute_1,
SYSTBP_aregImpute_2,
SYSTBP_aregImpute_3,
SYSTBP_aregImpute_4,
SYSTBP_aregImpute_5)
<- c(rep("Original",length(SYSTBP_complete)),
SYSTBP_labels rep("aregImputed_1",length(SYSTBP_aregImpute_1)),
rep("aregImputed_2",length(SYSTBP_aregImpute_2)),
rep("aregImputed_3",length(SYSTBP_aregImpute_3)),
rep("aregImputed_4",length(SYSTBP_aregImpute_4)),
rep("aregImputed_5",length(SYSTBP_aregImpute_5)))
<- (DPP$DIASTBP[complete.cases(DPP$DIASTBP)])
DIASTBP_complete
<- as.data.frame(DPP.AREGIMPUTED$imputed$DIASTBP)
DPP.AREGIMPUTED_aregImpute_DIASTBP
<- DPP.AREGIMPUTED_aregImpute_DIASTBP$V1
DIASTBP_aregImpute_1 <- DPP.AREGIMPUTED_aregImpute_DIASTBP$V2
DIASTBP_aregImpute_2 <- DPP.AREGIMPUTED_aregImpute_DIASTBP$V3
DIASTBP_aregImpute_3 <- DPP.AREGIMPUTED_aregImpute_DIASTBP$V4
DIASTBP_aregImpute_4 <- DPP.AREGIMPUTED_aregImpute_DIASTBP$V5
DIASTBP_aregImpute_5
<- c(DIASTBP_complete,
DIASTBP_values
DIASTBP_aregImpute_1,
DIASTBP_aregImpute_2,
DIASTBP_aregImpute_3,
DIASTBP_aregImpute_4,
DIASTBP_aregImpute_5)
<- c(rep("Original",length(DIASTBP_complete)),
DIASTBP_labels rep("aregImputed_1",length(DIASTBP_aregImpute_1)),
rep("aregImputed_2",length(DIASTBP_aregImpute_2)),
rep("aregImputed_3",length(DIASTBP_aregImpute_3)),
rep("aregImputed_4",length(DIASTBP_aregImpute_4)),
rep("aregImputed_5",length(DIASTBP_aregImpute_5)))
<- (DPP$SYSTH[complete.cases(DPP$SYSTH)])
SYSTH_complete
<- as.data.frame(DPP.AREGIMPUTED$imputed$SYSTH)
DPP.AREGIMPUTED_aregImpute_SYSTH
<- DPP.AREGIMPUTED_aregImpute_SYSTH$V1
SYSTH_aregImpute_1 <- DPP.AREGIMPUTED_aregImpute_SYSTH$V2
SYSTH_aregImpute_2 <- DPP.AREGIMPUTED_aregImpute_SYSTH$V3
SYSTH_aregImpute_3 <- DPP.AREGIMPUTED_aregImpute_SYSTH$V4
SYSTH_aregImpute_4 <- DPP.AREGIMPUTED_aregImpute_SYSTH$V5
SYSTH_aregImpute_5
<- c(SYSTH_complete,
SYSTH_values
SYSTH_aregImpute_1,
SYSTH_aregImpute_2,
SYSTH_aregImpute_3,
SYSTH_aregImpute_4,
SYSTH_aregImpute_5)
<- c(rep("Original",length(SYSTH_complete)),
SYSTH_labels rep("aregImputed_1",length(SYSTH_aregImpute_1)),
rep("aregImputed_2",length(SYSTH_aregImpute_2)),
rep("aregImputed_3",length(SYSTH_aregImpute_3)),
rep("aregImputed_4",length(SYSTH_aregImpute_4)),
rep("aregImputed_5",length(SYSTH_aregImpute_5)))
<- (DPP$DIASTH[complete.cases(DPP$DIASTH)])
DIASTH_complete
<- as.data.frame(DPP.AREGIMPUTED$imputed$DIASTH)
DPP.AREGIMPUTED_aregImpute_DIASTH
<- DPP.AREGIMPUTED_aregImpute_DIASTH$V1
DIASTH_aregImpute_1 <- DPP.AREGIMPUTED_aregImpute_DIASTH$V2
DIASTH_aregImpute_2 <- DPP.AREGIMPUTED_aregImpute_DIASTH$V3
DIASTH_aregImpute_3 <- DPP.AREGIMPUTED_aregImpute_DIASTH$V4
DIASTH_aregImpute_4 <- DPP.AREGIMPUTED_aregImpute_DIASTH$V5
DIASTH_aregImpute_5
<- c(DIASTH_complete,
DIASTH_values
DIASTH_aregImpute_1,
DIASTH_aregImpute_2,
DIASTH_aregImpute_3,
DIASTH_aregImpute_4,
DIASTH_aregImpute_5)
<- c(rep("Original",length(DIASTH_complete)),
DIASTH_labels rep("aregImputed_1",length(DIASTH_aregImpute_1)),
rep("aregImputed_2",length(DIASTH_aregImpute_2)),
rep("aregImputed_3",length(DIASTH_aregImpute_3)),
rep("aregImputed_4",length(DIASTH_aregImpute_4)),
rep("aregImputed_5",length(DIASTH_aregImpute_5)))
<- (DPP$LENGTH[complete.cases(DPP$LENGTH)])
LENGTH_complete
<- as.data.frame(DPP.AREGIMPUTED$imputed$LENGTH)
DPP.AREGIMPUTED_aregImpute_LENGTH
<- DPP.AREGIMPUTED_aregImpute_LENGTH$V1
LENGTH_aregImpute_1 <- DPP.AREGIMPUTED_aregImpute_LENGTH$V2
LENGTH_aregImpute_2 <- DPP.AREGIMPUTED_aregImpute_LENGTH$V3
LENGTH_aregImpute_3 <- DPP.AREGIMPUTED_aregImpute_LENGTH$V4
LENGTH_aregImpute_4 <- DPP.AREGIMPUTED_aregImpute_LENGTH$V5
LENGTH_aregImpute_5
<- c(LENGTH_complete,
LENGTH_values
LENGTH_aregImpute_1,
LENGTH_aregImpute_2,
LENGTH_aregImpute_3,
LENGTH_aregImpute_4,
LENGTH_aregImpute_5)
<- c(rep("Original",length(LENGTH_complete)),
LENGTH_labels rep("aregImputed_1",length(LENGTH_aregImpute_1)),
rep("aregImputed_2",length(LENGTH_aregImpute_2)),
rep("aregImputed_3",length(LENGTH_aregImpute_3)),
rep("aregImputed_4",length(LENGTH_aregImpute_4)),
rep("aregImputed_5",length(LENGTH_aregImpute_5)))
<- (DPP$WEIGHT[complete.cases(DPP$WEIGHT)])
WEIGHT_complete
<- as.data.frame(DPP.AREGIMPUTED$imputed$WEIGHT)
DPP.AREGIMPUTED_aregImpute_WEIGHT
<- DPP.AREGIMPUTED_aregImpute_WEIGHT$V1
WEIGHT_aregImpute_1 <- DPP.AREGIMPUTED_aregImpute_WEIGHT$V2
WEIGHT_aregImpute_2 <- DPP.AREGIMPUTED_aregImpute_WEIGHT$V3
WEIGHT_aregImpute_3 <- DPP.AREGIMPUTED_aregImpute_WEIGHT$V4
WEIGHT_aregImpute_4 <- DPP.AREGIMPUTED_aregImpute_WEIGHT$V5
WEIGHT_aregImpute_5
<- c(WEIGHT_complete,
WEIGHT_values
WEIGHT_aregImpute_1,
WEIGHT_aregImpute_2,
WEIGHT_aregImpute_3,
WEIGHT_aregImpute_4,
WEIGHT_aregImpute_5)
<- c(rep("Original",length(WEIGHT_complete)),
WEIGHT_labels rep("aregImputed_1",length(WEIGHT_aregImpute_1)),
rep("aregImputed_2",length(WEIGHT_aregImpute_2)),
rep("aregImputed_3",length(WEIGHT_aregImpute_3)),
rep("aregImputed_4",length(WEIGHT_aregImpute_4)),
rep("aregImputed_5",length(WEIGHT_aregImpute_5)))
<- (DPP$BMI[complete.cases(DPP$BMI)])
BMI_complete
<- as.data.frame(DPP.AREGIMPUTED$imputed$BMI)
DPP.AREGIMPUTED_aregImpute_BMI
<- DPP.AREGIMPUTED_aregImpute_BMI$V1
BMI_aregImpute_1 <- DPP.AREGIMPUTED_aregImpute_BMI$V2
BMI_aregImpute_2 <- DPP.AREGIMPUTED_aregImpute_BMI$V3
BMI_aregImpute_3 <- DPP.AREGIMPUTED_aregImpute_BMI$V4
BMI_aregImpute_4 <- DPP.AREGIMPUTED_aregImpute_BMI$V5
BMI_aregImpute_5
<- c(BMI_complete,
BMI_values
BMI_aregImpute_1,
BMI_aregImpute_2,
BMI_aregImpute_3,
BMI_aregImpute_4,
BMI_aregImpute_5)
<- c(rep("Original",length(BMI_complete)),
BMI_labels rep("aregImputed_1",length(BMI_aregImpute_1)),
rep("aregImputed_2",length(BMI_aregImpute_2)),
rep("aregImputed_3",length(BMI_aregImpute_3)),
rep("aregImputed_4",length(BMI_aregImpute_4)),
rep("aregImputed_5",length(BMI_aregImpute_5)))
<- (DPP$CHOL[complete.cases(DPP$CHOL)])
CHOL_complete
<- as.data.frame(DPP.AREGIMPUTED$imputed$CHOL)
DPP.AREGIMPUTED_aregImpute_CHOL
<- DPP.AREGIMPUTED_aregImpute_CHOL$V1
CHOL_aregImpute_1 <- DPP.AREGIMPUTED_aregImpute_CHOL$V2
CHOL_aregImpute_2 <- DPP.AREGIMPUTED_aregImpute_CHOL$V3
CHOL_aregImpute_3 <- DPP.AREGIMPUTED_aregImpute_CHOL$V4
CHOL_aregImpute_4 <- DPP.AREGIMPUTED_aregImpute_CHOL$V5
CHOL_aregImpute_5
<- c(CHOL_complete,
CHOL_values
CHOL_aregImpute_1,
CHOL_aregImpute_2,
CHOL_aregImpute_3,
CHOL_aregImpute_4,
CHOL_aregImpute_5)
<- c(rep("Original",length(CHOL_complete)),
CHOL_labels rep("aregImputed_1",length(CHOL_aregImpute_1)),
rep("aregImputed_2",length(CHOL_aregImpute_2)),
rep("aregImputed_3",length(CHOL_aregImpute_3)),
rep("aregImputed_4",length(CHOL_aregImpute_4)),
rep("aregImputed_5",length(CHOL_aregImpute_5)))
<- (DPP$HDL[complete.cases(DPP$HDL)])
HDL_complete
<- as.data.frame(DPP.AREGIMPUTED$imputed$HDL)
DPP.AREGIMPUTED_aregImpute_HDL
<- DPP.AREGIMPUTED_aregImpute_HDL$V1
HDL_aregImpute_1 <- DPP.AREGIMPUTED_aregImpute_HDL$V2
HDL_aregImpute_2 <- DPP.AREGIMPUTED_aregImpute_HDL$V3
HDL_aregImpute_3 <- DPP.AREGIMPUTED_aregImpute_HDL$V4
HDL_aregImpute_4 <- DPP.AREGIMPUTED_aregImpute_HDL$V5
HDL_aregImpute_5
<- c(HDL_complete,
HDL_values
HDL_aregImpute_1,
HDL_aregImpute_2,
HDL_aregImpute_3,
HDL_aregImpute_4,
HDL_aregImpute_5)
<- c(rep("Original",length(HDL_complete)),
HDL_labels rep("aregImputed_1",length(HDL_aregImpute_1)),
rep("aregImputed_2",length(HDL_aregImpute_2)),
rep("aregImputed_3",length(HDL_aregImpute_3)),
rep("aregImputed_4",length(HDL_aregImpute_4)),
rep("aregImputed_5",length(HDL_aregImpute_5)))
<- (DPP$LDL[complete.cases(DPP$LDL)])
LDL_complete
<- as.data.frame(DPP.AREGIMPUTED$imputed$LDL)
DPP.AREGIMPUTED_aregImpute_LDL
<- DPP.AREGIMPUTED_aregImpute_LDL$V1
LDL_aregImpute_1 <- DPP.AREGIMPUTED_aregImpute_LDL$V2
LDL_aregImpute_2 <- DPP.AREGIMPUTED_aregImpute_LDL$V3
LDL_aregImpute_3 <- DPP.AREGIMPUTED_aregImpute_LDL$V4
LDL_aregImpute_4 <- DPP.AREGIMPUTED_aregImpute_LDL$V5
LDL_aregImpute_5
<- c(LDL_complete,
LDL_values
LDL_aregImpute_1,
LDL_aregImpute_2,
LDL_aregImpute_3,
LDL_aregImpute_4,
LDL_aregImpute_5)
<- c(rep("Original",length(LDL_complete)),
LDL_labels rep("aregImputed_1",length(LDL_aregImpute_1)),
rep("aregImputed_2",length(LDL_aregImpute_2)),
rep("aregImputed_3",length(LDL_aregImpute_3)),
rep("aregImputed_4",length(LDL_aregImpute_4)),
rep("aregImputed_5",length(LDL_aregImpute_5)))
<- (DPP$TRIG[complete.cases(DPP$TRIG)])
TRIG_complete
<- as.data.frame(DPP.AREGIMPUTED$imputed$TRIG)
DPP.AREGIMPUTED_aregImpute_TRIG
<- DPP.AREGIMPUTED_aregImpute_TRIG$V1
TRIG_aregImpute_1 <- DPP.AREGIMPUTED_aregImpute_TRIG$V2
TRIG_aregImpute_2 <- DPP.AREGIMPUTED_aregImpute_TRIG$V3
TRIG_aregImpute_3 <- DPP.AREGIMPUTED_aregImpute_TRIG$V4
TRIG_aregImpute_4 <- DPP.AREGIMPUTED_aregImpute_TRIG$V5
TRIG_aregImpute_5
<- c(TRIG_complete,
TRIG_values
TRIG_aregImpute_1,
TRIG_aregImpute_2,
TRIG_aregImpute_3,
TRIG_aregImpute_4,
TRIG_aregImpute_5)
<- c(rep("Original",length(TRIG_complete)),
TRIG_labels rep("aregImputed_1",length(TRIG_aregImpute_1)),
rep("aregImputed_2",length(TRIG_aregImpute_2)),
rep("aregImputed_3",length(TRIG_aregImpute_3)),
rep("aregImputed_4",length(TRIG_aregImpute_4)),
rep("aregImputed_5",length(TRIG_aregImpute_5)))
<- (DPP$HOMOC[complete.cases(DPP$HOMOC)])
HOMOC_complete
<- as.data.frame(DPP.AREGIMPUTED$imputed$HOMOC)
DPP.AREGIMPUTED_aregImpute_HOMOC
<- DPP.AREGIMPUTED_aregImpute_HOMOC$V1
HOMOC_aregImpute_1 <- DPP.AREGIMPUTED_aregImpute_HOMOC$V2
HOMOC_aregImpute_2 <- DPP.AREGIMPUTED_aregImpute_HOMOC$V3
HOMOC_aregImpute_3 <- DPP.AREGIMPUTED_aregImpute_HOMOC$V4
HOMOC_aregImpute_4 <- DPP.AREGIMPUTED_aregImpute_HOMOC$V5
HOMOC_aregImpute_5
<- c(HOMOC_complete,
HOMOC_values
HOMOC_aregImpute_1,
HOMOC_aregImpute_2,
HOMOC_aregImpute_3,
HOMOC_aregImpute_4,
HOMOC_aregImpute_5)
<- c(rep("Original",length(HOMOC_complete)),
HOMOC_labels rep("aregImputed_1",length(HOMOC_aregImpute_1)),
rep("aregImputed_2",length(HOMOC_aregImpute_2)),
rep("aregImputed_3",length(HOMOC_aregImpute_3)),
rep("aregImputed_4",length(HOMOC_aregImpute_4)),
rep("aregImputed_5",length(HOMOC_aregImpute_5)))
<- (DPP$GLUT[complete.cases(DPP$GLUT)])
GLUT_complete
<- as.data.frame(DPP.AREGIMPUTED$imputed$GLUT)
DPP.AREGIMPUTED_aregImpute_GLUT
<- DPP.AREGIMPUTED_aregImpute_GLUT$V1
GLUT_aregImpute_1 <- DPP.AREGIMPUTED_aregImpute_GLUT$V2
GLUT_aregImpute_2 <- DPP.AREGIMPUTED_aregImpute_GLUT$V3
GLUT_aregImpute_3 <- DPP.AREGIMPUTED_aregImpute_GLUT$V4
GLUT_aregImpute_4 <- DPP.AREGIMPUTED_aregImpute_GLUT$V5
GLUT_aregImpute_5
<- c(GLUT_complete,
GLUT_values
GLUT_aregImpute_1,
GLUT_aregImpute_2,
GLUT_aregImpute_3,
GLUT_aregImpute_4,
GLUT_aregImpute_5)
<- c(rep("Original",length(GLUT_complete)),
GLUT_labels rep("aregImputed_1",length(GLUT_aregImpute_1)),
rep("aregImputed_2",length(GLUT_aregImpute_2)),
rep("aregImputed_3",length(GLUT_aregImpute_3)),
rep("aregImputed_4",length(GLUT_aregImpute_4)),
rep("aregImputed_5",length(GLUT_aregImpute_5)))
<- (DPP$CREAT[complete.cases(DPP$CREAT)])
CREAT_complete
<- as.data.frame(DPP.AREGIMPUTED$imputed$CREAT)
DPP.AREGIMPUTED_aregImpute_CREAT
<- DPP.AREGIMPUTED_aregImpute_CREAT$V1
CREAT_aregImpute_1 <- DPP.AREGIMPUTED_aregImpute_CREAT$V2
CREAT_aregImpute_2 <- DPP.AREGIMPUTED_aregImpute_CREAT$V3
CREAT_aregImpute_3 <- DPP.AREGIMPUTED_aregImpute_CREAT$V4
CREAT_aregImpute_4 <- DPP.AREGIMPUTED_aregImpute_CREAT$V5
CREAT_aregImpute_5
<- c(CREAT_complete,
CREAT_values
CREAT_aregImpute_1,
CREAT_aregImpute_2,
CREAT_aregImpute_3,
CREAT_aregImpute_4,
CREAT_aregImpute_5)
<- c(rep("Original",length(CREAT_complete)),
CREAT_labels rep("aregImputed_1",length(CREAT_aregImpute_1)),
rep("aregImputed_2",length(CREAT_aregImpute_2)),
rep("aregImputed_3",length(CREAT_aregImpute_3)),
rep("aregImputed_4",length(CREAT_aregImpute_4)),
rep("aregImputed_5",length(CREAT_aregImpute_5)))
<- (DPP$IMT[complete.cases(DPP$IMT)])
IMT_complete
<- as.data.frame(DPP.AREGIMPUTED$imputed$IMT)
DPP.AREGIMPUTED_aregImpute_IMT
<- DPP.AREGIMPUTED_aregImpute_IMT$V1
IMT_aregImpute_1 <- DPP.AREGIMPUTED_aregImpute_IMT$V2
IMT_aregImpute_2 <- DPP.AREGIMPUTED_aregImpute_IMT$V3
IMT_aregImpute_3 <- DPP.AREGIMPUTED_aregImpute_IMT$V4
IMT_aregImpute_4 <- DPP.AREGIMPUTED_aregImpute_IMT$V5
IMT_aregImpute_5
<- c(IMT_complete,
IMT_values
IMT_aregImpute_1,
IMT_aregImpute_2,
IMT_aregImpute_3,
IMT_aregImpute_4,
IMT_aregImpute_5)
<- c(rep("Original",length(IMT_complete)),
IMT_labels rep("aregImputed_1",length(IMT_aregImpute_1)),
rep("aregImputed_2",length(IMT_aregImpute_2)),
rep("aregImputed_3",length(IMT_aregImpute_3)),
rep("aregImputed_4",length(IMT_aregImpute_4)),
rep("aregImputed_5",length(IMT_aregImpute_5)))
<- (DPP$packyrs[complete.cases(DPP$packyrs)])
PACKYRS_complete
<- as.data.frame(DPP.AREGIMPUTED$imputed$packyrs)
DPP.AREGIMPUTED_aregImpute_PACKYRS
<- DPP.AREGIMPUTED_aregImpute_PACKYRS$V1
PACKYRS_aregImpute_1 <- DPP.AREGIMPUTED_aregImpute_PACKYRS$V2
PACKYRS_aregImpute_2 <- DPP.AREGIMPUTED_aregImpute_PACKYRS$V3
PACKYRS_aregImpute_3 <- DPP.AREGIMPUTED_aregImpute_PACKYRS$V4
PACKYRS_aregImpute_4 <- DPP.AREGIMPUTED_aregImpute_PACKYRS$V5
PACKYRS_aregImpute_5
<- c(PACKYRS_complete,
PACKYRS_values
PACKYRS_aregImpute_1,
PACKYRS_aregImpute_2,
PACKYRS_aregImpute_3,
PACKYRS_aregImpute_4,
PACKYRS_aregImpute_5)
<- c(rep("Original",length(PACKYRS_complete)),
PACKYRS_labels rep("aregImputed_1",length(PACKYRS_aregImpute_1)),
rep("aregImputed_2",length(PACKYRS_aregImpute_2)),
rep("aregImputed_3",length(PACKYRS_aregImpute_3)),
rep("aregImputed_4",length(PACKYRS_aregImpute_4)),
rep("aregImputed_5",length(PACKYRS_aregImpute_5)))
<- c(rep("SYSTBP",length(SYSTBP_labels)),
VARIABLE_labels rep("DIASTBP",length(DIASTBP_labels)),
rep("SYSTH",length(SYSTH_labels)),
rep("DIASTH",length(DIASTH_labels)),
rep("LENGTH",length(LENGTH_labels)),
rep("WEIGHT",length(WEIGHT_labels)),
rep("BMI",length(BMI_labels)),
rep("CHOL",length(CHOL_labels)),
rep("HDL",length(HDL_labels)),
rep("LDL",length(LDL_labels)),
rep("TRIG",length(TRIG_labels)),
rep("HOMOC",length(HOMOC_labels)),
rep("GLUT",length(GLUT_labels)),
rep("CREAT",length(CREAT_labels)),
rep("IMT",length(IMT_labels)),
rep("PACKYRS",length(PACKYRS_labels)))
<- cbind(VARIABLE_labels,
NUMERIC_VARIABLES_postimputation c(SYSTBP_labels,
DIASTBP_labels,
SYSTH_labels,
DIASTH_labels,
LENGTH_labels,
WEIGHT_labels,
BMI_labels,
CHOL_labels,
HDL_labels,
LDL_labels,
TRIG_labels,
HOMOC_labels,
GLUT_labels,
CREAT_labels,
IMT_labels,
PACKYRS_labels),c(SYSTBP_values,
DIASTBP_values,
SYSTH_values,
DIASTH_values,
LENGTH_values,
WEIGHT_values,
BMI_values,
CHOL_values,
HDL_values,
LDL_values,
TRIG_values,
HOMOC_values,
GLUT_values,
CREAT_values,
IMT_values,
PACKYRS_values))<- as.data.frame(NUMERIC_VARIABLES_postimputation)
NUMERIC_VARIABLES_postimputation
colnames(NUMERIC_VARIABLES_postimputation) <- c("Variable",
"Category",
"Value")
$Variable <- factor(NUMERIC_VARIABLES_postimputation$Variable,
NUMERIC_VARIABLES_postimputationlevels=c("PACKYRS",
"IMT",
"CREAT",
"GLUT",
"HOMOC",
"TRIG",
"LDL",
"HDL",
"CHOL",
"BMI",
"WEIGHT",
"LENGTH",
"DIASTH",
"SYSTH",
"DIASTBP",
"SYSTBP"))
$Category <- factor(NUMERIC_VARIABLES_postimputation$Category,
NUMERIC_VARIABLES_postimputationlevels=c("Original",
"aregImputed_1",
"aregImputed_2",
"aregImputed_3",
"aregImputed_4",
"aregImputed_5"))
<- NUMERIC_VARIABLES_postimputation[NUMERIC_VARIABLES_postimputation$Variable=="SYSTBP",]
NUMERIC_VARIABLES_postimputation_SYSTBP <- NUMERIC_VARIABLES_postimputation[NUMERIC_VARIABLES_postimputation$Variable=="SYSTH",]
NUMERIC_VARIABLES_postimputation_SYSTH <- NUMERIC_VARIABLES_postimputation[NUMERIC_VARIABLES_postimputation$Variable=="DIASTBP",]
NUMERIC_VARIABLES_postimputation_DIASTBP <- NUMERIC_VARIABLES_postimputation[NUMERIC_VARIABLES_postimputation$Variable=="DIASTH",]
NUMERIC_VARIABLES_postimputation_DIASTH <- NUMERIC_VARIABLES_postimputation[NUMERIC_VARIABLES_postimputation$Variable=="LENGTH",]
NUMERIC_VARIABLES_postimputation_LENGTH <- NUMERIC_VARIABLES_postimputation[NUMERIC_VARIABLES_postimputation$Variable=="WEIGHT",]
NUMERIC_VARIABLES_postimputation_WEIGHT <- NUMERIC_VARIABLES_postimputation[NUMERIC_VARIABLES_postimputation$Variable=="BMI",]
NUMERIC_VARIABLES_postimputation_BMI <- NUMERIC_VARIABLES_postimputation[NUMERIC_VARIABLES_postimputation$Variable=="CHOL",]
NUMERIC_VARIABLES_postimputation_CHOL <- NUMERIC_VARIABLES_postimputation[NUMERIC_VARIABLES_postimputation$Variable=="HDL",]
NUMERIC_VARIABLES_postimputation_HDL <- NUMERIC_VARIABLES_postimputation[NUMERIC_VARIABLES_postimputation$Variable=="LDL",]
NUMERIC_VARIABLES_postimputation_LDL <- NUMERIC_VARIABLES_postimputation[NUMERIC_VARIABLES_postimputation$Variable=="TRIG",]
NUMERIC_VARIABLES_postimputation_TRIG <- NUMERIC_VARIABLES_postimputation[NUMERIC_VARIABLES_postimputation$Variable=="HOMOC",]
NUMERIC_VARIABLES_postimputation_HOMOC <- NUMERIC_VARIABLES_postimputation[NUMERIC_VARIABLES_postimputation$Variable=="GLUT",]
NUMERIC_VARIABLES_postimputation_GLUT <- NUMERIC_VARIABLES_postimputation[NUMERIC_VARIABLES_postimputation$Variable=="CREAT",]
NUMERIC_VARIABLES_postimputation_CREAT <- NUMERIC_VARIABLES_postimputation[NUMERIC_VARIABLES_postimputation$Variable=="IMT",]
NUMERIC_VARIABLES_postimputation_IMT <- NUMERIC_VARIABLES_postimputation[NUMERIC_VARIABLES_postimputation$Variable=="PACKYRS",]
NUMERIC_VARIABLES_postimputation_PACKYRS
<- densityplot(~ Value | Variable,
DENSITY_SYSTBP data = NUMERIC_VARIABLES_postimputation_SYSTBP,
groups = Category,
plot.points = FALSE,
ref = TRUE,
scales=list(relation="free"),
auto.key = list(adj=1, space="top", columns=1),
par.settings = list(superpose.line = list(lwd=2)))
<- densityplot(~ Value | Variable,
DENSITY_SYSTH data = NUMERIC_VARIABLES_postimputation_SYSTH,
groups = Category,
plot.points = FALSE,
ref = TRUE,
scales=list(relation="free"),
auto.key = list(adj=1, space="top", columns=1),
par.settings = list(superpose.line = list(lwd=2)))
<- densityplot(~ Value | Variable,
DENSITY_DIASTBP data = NUMERIC_VARIABLES_postimputation_DIASTBP,
groups = Category,
plot.points = FALSE,
ref = TRUE,
scales=list(relation="free"),
auto.key = list(adj=1, space="top", columns=1),
par.settings = list(superpose.line = list(lwd=2)))
<- densityplot(~ Value | Variable,
DENSITY_DIASTH data = NUMERIC_VARIABLES_postimputation_DIASTH,
groups = Category,
plot.points = FALSE,
ref = TRUE,
scales=list(relation="free"),
auto.key = list(adj=1, space="top", columns=1),
par.settings = list(superpose.line = list(lwd=2)))
<- densityplot(~ Value | Variable,
DENSITY_LENGTH data = NUMERIC_VARIABLES_postimputation_LENGTH,
groups = Category,
plot.points = FALSE,
ref = TRUE,
scales=list(relation="free"),
auto.key = list(adj=1, space="top", columns=1),
par.settings = list(superpose.line = list(lwd=2)))
<- densityplot(~ Value | Variable,
DENSITY_WEIGHT data = NUMERIC_VARIABLES_postimputation_WEIGHT,
groups = Category,
plot.points = FALSE,
ref = TRUE,
scales=list(relation="free"),
auto.key = list(adj=1, space="top", columns=1),
par.settings = list(superpose.line = list(lwd=2)))
<- densityplot(~ Value | Variable,
DENSITY_BMI data = NUMERIC_VARIABLES_postimputation_BMI,
groups = Category,
plot.points = FALSE,
ref = TRUE,
scales=list(relation="free"),
auto.key = list(adj=1, space="top", columns=1),
par.settings = list(superpose.line = list(lwd=2)))
<- densityplot(~ Value | Variable,
DENSITY_CHOL data = NUMERIC_VARIABLES_postimputation_CHOL,
groups = Category,
plot.points = FALSE,
ref = TRUE,
scales=list(relation="free"),
auto.key = list(adj=1, space="top", columns=1),
par.settings = list(superpose.line = list(lwd=2)))
<- densityplot(~ Value | Variable,
DENSITY_HDL data = NUMERIC_VARIABLES_postimputation_HDL,
groups = Category,
plot.points = FALSE,
ref = TRUE,
scales=list(relation="free"),
auto.key = list(adj=1, space="top", columns=1),
par.settings = list(superpose.line = list(lwd=2)))
<- densityplot(~ Value | Variable,
DENSITY_LDL data = NUMERIC_VARIABLES_postimputation_LDL,
groups = Category,
plot.points = FALSE,
ref = TRUE,
scales=list(relation="free"),
auto.key = list(adj=1, space="top", columns=1),
par.settings = list(superpose.line = list(lwd=2)))
<- densityplot(~ Value | Variable,
DENSITY_TRIG data = NUMERIC_VARIABLES_postimputation_TRIG,
groups = Category,
plot.points = FALSE,
ref = TRUE,
scales=list(relation="free"),
auto.key = list(adj=1, space="top", columns=1),
par.settings = list(superpose.line = list(lwd=2)))
<- densityplot(~ Value | Variable,
DENSITY_HOMOC data = NUMERIC_VARIABLES_postimputation_HOMOC,
groups = Category,
plot.points = FALSE,
ref = TRUE,
scales=list(relation="free"),
auto.key = list(adj=1, space="top", columns=1),
par.settings = list(superpose.line = list(lwd=2)))
<- densityplot(~ Value | Variable,
DENSITY_GLUT data = NUMERIC_VARIABLES_postimputation_GLUT,
groups = Category,
plot.points = FALSE,
ref = TRUE,
scales=list(relation="free"),
auto.key = list(adj=1, space="top", columns=1),
par.settings = list(superpose.line = list(lwd=2)))
<- densityplot(~ Value | Variable,
DENSITY_CREAT data = NUMERIC_VARIABLES_postimputation_CREAT,
groups = Category,
plot.points = FALSE,
ref = TRUE,
scales=list(relation="free"),
auto.key = list(adj=1, space="top", columns=1),
par.settings = list(superpose.line = list(lwd=2)))
<- densityplot(~ Value | Variable,
DENSITY_IMT data = NUMERIC_VARIABLES_postimputation_IMT,
groups = Category,
plot.points = FALSE,
ref = TRUE,
scales=list(relation="free"),
auto.key = list(adj=1, space="top", columns=1),
par.settings = list(superpose.line = list(lwd=2)))
<- densityplot(~ Value | Variable,
DENSITY_PACKYRS data = NUMERIC_VARIABLES_postimputation_PACKYRS,
groups = Category,
plot.points = FALSE,
ref = TRUE,
scales=list(relation="free"),
auto.key = list(adj=1, space="top", columns=1),
par.settings = list(superpose.line = list(lwd=2)))
grid.arrange(DENSITY_SYSTBP,
DENSITY_SYSTH,
DENSITY_DIASTBP,
DENSITY_DIASTH,
DENSITY_LENGTH,
DENSITY_WEIGHT,
DENSITY_BMI,
DENSITY_CHOL,
DENSITY_HDL,
DENSITY_LDL,
DENSITY_TRIG,
DENSITY_HOMOC,
DENSITY_GLUT,
DENSITY_CREAT,
DENSITY_IMT,
DENSITY_PACKYRS,ncol = 4)
##################################
# Evaluating the plausibility of the
# AREG-imputed values for the
# numerical variables
##################################
<- (DPP$DIABETES[complete.cases(DPP$DIABETES)])
DIABETES_complete
<- as.data.frame(DPP.AREGIMPUTED$imputed$DIABETES)
DPP.AREGIMPUTED_aregImpute_DIABETES
<- DPP.AREGIMPUTED_aregImpute_DIABETES$V1
DIABETES_aregImpute_1 <- DPP.AREGIMPUTED_aregImpute_DIABETES$V2
DIABETES_aregImpute_2 <- DPP.AREGIMPUTED_aregImpute_DIABETES$V3
DIABETES_aregImpute_3 <- DPP.AREGIMPUTED_aregImpute_DIABETES$V4
DIABETES_aregImpute_4 <- DPP.AREGIMPUTED_aregImpute_DIABETES$V5
DIABETES_aregImpute_5
<- c(DIABETES_complete,
DIABETES_values
DIABETES_aregImpute_1,
DIABETES_aregImpute_2,
DIABETES_aregImpute_3,
DIABETES_aregImpute_4,
DIABETES_aregImpute_5)
<- c(rep("Original",length(DIABETES_complete)),
DIABETES_labels rep("aregImputed_1",length(DIABETES_aregImpute_1)),
rep("aregImputed_2",length(DIABETES_aregImpute_2)),
rep("aregImputed_3",length(DIABETES_aregImpute_3)),
rep("aregImputed_4",length(DIABETES_aregImpute_4)),
rep("aregImputed_5",length(DIABETES_aregImpute_5)))
<- cbind(DIABETES_values,
DIABETES_postimputation
DIABETES_labels)
<- as.data.frame(DIABETES_postimputation)
DIABETES_postimputation
colnames(DIABETES_postimputation) <- c("Value",
"Category")
<- as.data.frame(prop.table(table(DIABETES_postimputation), 2))
DIABETES_postimputation_proportion
$Category <- factor(DIABETES_postimputation_proportion$Category,
DIABETES_postimputation_proportionlevels=c("Original",
"aregImputed_1",
"aregImputed_2",
"aregImputed_3",
"aregImputed_4",
"aregImputed_5"))
$Variable <- rep("DIABETES",nrow(DIABETES_postimputation_proportion))
DIABETES_postimputation_proportion
<- barchart(Freq ~ Category | Variable,
BAR_DIABETES data=DIABETES_postimputation_proportion,
groups = Value,
stack=TRUE,
ylab = "Proportion",
auto.key = list(adj=1, space="top", columns=2))
<- (DPP$STENOSIS[complete.cases(DPP$STENOSIS)])
STENOSIS_complete
<- as.data.frame(DPP.AREGIMPUTED$imputed$STENOSIS)
DPP.AREGIMPUTED_aregImpute_STENOSIS
<- DPP.AREGIMPUTED_aregImpute_STENOSIS$V1
STENOSIS_aregImpute_1 <- DPP.AREGIMPUTED_aregImpute_STENOSIS$V2
STENOSIS_aregImpute_2 <- DPP.AREGIMPUTED_aregImpute_STENOSIS$V3
STENOSIS_aregImpute_3 <- DPP.AREGIMPUTED_aregImpute_STENOSIS$V4
STENOSIS_aregImpute_4 <- DPP.AREGIMPUTED_aregImpute_STENOSIS$V5
STENOSIS_aregImpute_5
<- c(STENOSIS_complete,
STENOSIS_values
STENOSIS_aregImpute_1,
STENOSIS_aregImpute_2,
STENOSIS_aregImpute_3,
STENOSIS_aregImpute_4,
STENOSIS_aregImpute_5)
<- c(rep("Original",length(STENOSIS_complete)),
STENOSIS_labels rep("aregImputed_1",length(STENOSIS_aregImpute_1)),
rep("aregImputed_2",length(STENOSIS_aregImpute_2)),
rep("aregImputed_3",length(STENOSIS_aregImpute_3)),
rep("aregImputed_4",length(STENOSIS_aregImpute_4)),
rep("aregImputed_5",length(STENOSIS_aregImpute_5)))
<- cbind(STENOSIS_values,
STENOSIS_postimputation
STENOSIS_labels)
<- as.data.frame(STENOSIS_postimputation)
STENOSIS_postimputation
colnames(STENOSIS_postimputation) <- c("Value",
"Category")
<- as.data.frame(prop.table(table(STENOSIS_postimputation), 2))
STENOSIS_postimputation_proportion
$Category <- factor(STENOSIS_postimputation_proportion$Category,
STENOSIS_postimputation_proportionlevels=c("Original",
"aregImputed_1",
"aregImputed_2",
"aregImputed_3",
"aregImputed_4",
"aregImputed_5"))
$Variable <- rep("STENOSIS",nrow(STENOSIS_postimputation_proportion))
STENOSIS_postimputation_proportion
<- barchart(Freq ~ Category | Variable,
BAR_STENOSIS data=STENOSIS_postimputation_proportion,
groups = Value,
stack=TRUE,
ylab = "Proportion",
auto.key = list(adj=1, space="top", columns=2))
<- (DPP$ albumin [complete.cases(DPP$albumin)])
ALBUMIN_complete
<- as.data.frame(DPP.AREGIMPUTED$imputed$ albumin)
DPP.AREGIMPUTED_aregImpute_ALBUMIN
<- DPP.AREGIMPUTED_aregImpute_ALBUMIN$V1
ALBUMIN_aregImpute_1 <- DPP.AREGIMPUTED_aregImpute_ALBUMIN$V2
ALBUMIN_aregImpute_2 <- DPP.AREGIMPUTED_aregImpute_ALBUMIN$V3
ALBUMIN_aregImpute_3 <- DPP.AREGIMPUTED_aregImpute_ALBUMIN$V4
ALBUMIN_aregImpute_4 <- DPP.AREGIMPUTED_aregImpute_ALBUMIN$V5
ALBUMIN_aregImpute_5
<- c(ALBUMIN_complete,
ALBUMIN_values
ALBUMIN_aregImpute_1,
ALBUMIN_aregImpute_2,
ALBUMIN_aregImpute_3,
ALBUMIN_aregImpute_4,
ALBUMIN_aregImpute_5)
<- c(rep("Original",length(ALBUMIN_complete)),
ALBUMIN_labels rep("aregImputed_1",length(ALBUMIN_aregImpute_1)),
rep("aregImputed_2",length(ALBUMIN_aregImpute_2)),
rep("aregImputed_3",length(ALBUMIN_aregImpute_3)),
rep("aregImputed_4",length(ALBUMIN_aregImpute_4)),
rep("aregImputed_5",length(ALBUMIN_aregImpute_5)))
<- cbind(ALBUMIN_values,
ALBUMIN_postimputation
ALBUMIN_labels)
<- as.data.frame(ALBUMIN_postimputation)
ALBUMIN_postimputation
colnames(ALBUMIN_postimputation) <- c("Value",
"Category")
<- as.data.frame(prop.table(table(ALBUMIN_postimputation), 2))
ALBUMIN_postimputation_proportion
$Category <- factor(ALBUMIN_postimputation_proportion$Category,
ALBUMIN_postimputation_proportionlevels=c("Original",
"aregImputed_1",
"aregImputed_2",
"aregImputed_3",
"aregImputed_4",
"aregImputed_5"))
$Variable <- rep("ALBUMIN",nrow(ALBUMIN_postimputation_proportion))
ALBUMIN_postimputation_proportion
<- barchart(Freq ~ Category | Variable,
BAR_ALBUMIN data=ALBUMIN_postimputation_proportion,
groups = Value,
stack=TRUE,
ylab = "Proportion",
auto.key = list(adj=1, space="top", columns=3))
<- (DPP$ SMOKING [complete.cases(DPP$SMOKING)])
SMOKING_complete
<- as.data.frame(DPP.AREGIMPUTED$imputed$SMOKING)
DPP.AREGIMPUTED_aregImpute_SMOKING
<- DPP.AREGIMPUTED_aregImpute_SMOKING$V1
SMOKING_aregImpute_1 <- DPP.AREGIMPUTED_aregImpute_SMOKING$V2
SMOKING_aregImpute_2 <- DPP.AREGIMPUTED_aregImpute_SMOKING$V3
SMOKING_aregImpute_3 <- DPP.AREGIMPUTED_aregImpute_SMOKING$V4
SMOKING_aregImpute_4 <- DPP.AREGIMPUTED_aregImpute_SMOKING$V5
SMOKING_aregImpute_5
<- c(SMOKING_complete,
SMOKING_values
SMOKING_aregImpute_1,
SMOKING_aregImpute_2,
SMOKING_aregImpute_3,
SMOKING_aregImpute_4,
SMOKING_aregImpute_5)
<- c(rep("Original",length(SMOKING_complete)),
SMOKING_labels rep("aregImputed_1",length(SMOKING_aregImpute_1)),
rep("aregImputed_2",length(SMOKING_aregImpute_2)),
rep("aregImputed_3",length(SMOKING_aregImpute_3)),
rep("aregImputed_4",length(SMOKING_aregImpute_4)),
rep("aregImputed_5",length(SMOKING_aregImpute_5)))
<- cbind(SMOKING_values,
SMOKING_postimputation
SMOKING_labels)
<- as.data.frame(SMOKING_postimputation)
SMOKING_postimputation
colnames(SMOKING_postimputation) <- c("Value",
"Category")
<- as.data.frame(prop.table(table(SMOKING_postimputation), 2))
SMOKING_postimputation_proportion
$Category <- factor(SMOKING_postimputation_proportion$Category,
SMOKING_postimputation_proportionlevels=c("Original",
"aregImputed_1",
"aregImputed_2",
"aregImputed_3",
"aregImputed_4",
"aregImputed_5"))
$Variable <- rep("SMOKING",nrow(SMOKING_postimputation_proportion))
SMOKING_postimputation_proportion
<- barchart(Freq ~ Category | Variable,
BAR_SMOKING data=SMOKING_postimputation_proportion,
groups = Value,
stack=TRUE,
ylab = "Proportion",
auto.key = list(adj=1, space="top", columns=3))
<- (DPP$ alcohol [complete.cases(DPP$alcohol)])
ALCOHOL_complete
<- as.data.frame(DPP.AREGIMPUTED$imputed$alcohol)
DPP.AREGIMPUTED_aregImpute_ALCOHOL
<- DPP.AREGIMPUTED_aregImpute_ALCOHOL$V1
ALCOHOL_aregImpute_1 <- DPP.AREGIMPUTED_aregImpute_ALCOHOL$V2
ALCOHOL_aregImpute_2 <- DPP.AREGIMPUTED_aregImpute_ALCOHOL$V3
ALCOHOL_aregImpute_3 <- DPP.AREGIMPUTED_aregImpute_ALCOHOL$V4
ALCOHOL_aregImpute_4 <- DPP.AREGIMPUTED_aregImpute_ALCOHOL$V5
ALCOHOL_aregImpute_5
<- c(ALCOHOL_complete,
ALCOHOL_values
ALCOHOL_aregImpute_1,
ALCOHOL_aregImpute_2,
ALCOHOL_aregImpute_3,
ALCOHOL_aregImpute_4,
ALCOHOL_aregImpute_5)
<- c(rep("Original",length(ALCOHOL_complete)),
ALCOHOL_labels rep("aregImputed_1",length(ALCOHOL_aregImpute_1)),
rep("aregImputed_2",length(ALCOHOL_aregImpute_2)),
rep("aregImputed_3",length(ALCOHOL_aregImpute_3)),
rep("aregImputed_4",length(ALCOHOL_aregImpute_4)),
rep("aregImputed_5",length(ALCOHOL_aregImpute_5)))
<- cbind(ALCOHOL_values,
ALCOHOL_postimputation
ALCOHOL_labels)
<- as.data.frame(ALCOHOL_postimputation)
ALCOHOL_postimputation
colnames(ALCOHOL_postimputation) <- c("Value",
"Category")
<- as.data.frame(prop.table(table(ALCOHOL_postimputation), 2))
ALCOHOL_postimputation_proportion
$Category <- factor(ALCOHOL_postimputation_proportion$Category,
ALCOHOL_postimputation_proportionlevels=c("Original",
"aregImputed_1",
"aregImputed_2",
"aregImputed_3",
"aregImputed_4",
"aregImputed_5"))
$Variable <- rep("ALCOHOL",nrow(ALCOHOL_postimputation_proportion))
ALCOHOL_postimputation_proportion
<- barchart(Freq ~ Category | Variable,
BAR_ALCOHOL data=ALCOHOL_postimputation_proportion,
groups = Value,
stack=TRUE,
ylab = "Proportion",
auto.key = list(adj=1, space="top", columns=3))
grid.arrange(BAR_DIABETES,
BAR_STENOSIS,
BAR_ALBUMIN,
BAR_SMOKING,
BAR_ALCOHOL,ncol = 2)
##################################
# Loading the modelling dataset
# by combining the complete and imputed values
##################################
##################################
# Combining the complete and
# 1ST imputation results from AREG
##################################
<- impute.transcan(DPP.AREGIMPUTED,
DPP.AREGIMPUTED_1 imputation=1,
data=DPP,
list.out=TRUE,
pr=FALSE,
check=FALSE)
<- SMART
SMART.AREGIMPUTED_1
names(DPP.AREGIMPUTED_1)] <- DPP.AREGIMPUTED_1
SMART.AREGIMPUTED_1[
$CEREBRAL <- as.numeric(as.character(SMART.AREGIMPUTED_1$CEREBRAL))
SMART.AREGIMPUTED_1$CARDIAC <- as.numeric(as.character(SMART.AREGIMPUTED_1$CARDIAC))
SMART.AREGIMPUTED_1$AAA <- as.numeric(as.character(SMART.AREGIMPUTED_1$AAA))
SMART.AREGIMPUTED_1$PERIPH <- as.numeric(as.character(SMART.AREGIMPUTED_1$PERIPH))
SMART.AREGIMPUTED_1
$SUMSCORE_5LEVELS <- SMART.AREGIMPUTED_1$CEREBRAL +
SMART.AREGIMPUTED_1$CARDIAC +
SMART.AREGIMPUTED_12*SMART.AREGIMPUTED_1$AAA) +
($PERIPH
SMART.AREGIMPUTED_1
##################################
# Combining the complete and
# 2ND imputation results from AREG
##################################
<- impute.transcan(DPP.AREGIMPUTED,
DPP.AREGIMPUTED_2 imputation=2,
data=DPP,
list.out=TRUE,
pr=FALSE,
check=FALSE)
<- SMART
SMART.AREGIMPUTED_2
names(DPP.AREGIMPUTED_2)] <- DPP.AREGIMPUTED_2
SMART.AREGIMPUTED_2[
$CEREBRAL <- as.numeric(as.character(SMART.AREGIMPUTED_2$CEREBRAL))
SMART.AREGIMPUTED_2$CARDIAC <- as.numeric(as.character(SMART.AREGIMPUTED_2$CARDIAC))
SMART.AREGIMPUTED_2$AAA <- as.numeric(as.character(SMART.AREGIMPUTED_2$AAA))
SMART.AREGIMPUTED_2$PERIPH <- as.numeric(as.character(SMART.AREGIMPUTED_2$PERIPH))
SMART.AREGIMPUTED_2
$SUMSCORE_5LEVELS <- SMART.AREGIMPUTED_2$CEREBRAL +
SMART.AREGIMPUTED_2$CARDIAC +
SMART.AREGIMPUTED_22*SMART.AREGIMPUTED_2$AAA) +
($PERIPH
SMART.AREGIMPUTED_2
##################################
# Combining the complete and
# 3RD imputation results from AREG
##################################
<- impute.transcan(DPP.AREGIMPUTED,
DPP.AREGIMPUTED_3 imputation=3,
data=DPP,
list.out=TRUE,
pr=FALSE,
check=FALSE)
<- SMART
SMART.AREGIMPUTED_3
names(DPP.AREGIMPUTED_3)] <- DPP.AREGIMPUTED_3
SMART.AREGIMPUTED_3[
$CEREBRAL <- as.numeric(as.character(SMART.AREGIMPUTED_3$CEREBRAL))
SMART.AREGIMPUTED_3$CARDIAC <- as.numeric(as.character(SMART.AREGIMPUTED_3$CARDIAC))
SMART.AREGIMPUTED_3$AAA <- as.numeric(as.character(SMART.AREGIMPUTED_3$AAA))
SMART.AREGIMPUTED_3$PERIPH <- as.numeric(as.character(SMART.AREGIMPUTED_3$PERIPH))
SMART.AREGIMPUTED_3
$SUMSCORE_5LEVELS <- SMART.AREGIMPUTED_3$CEREBRAL +
SMART.AREGIMPUTED_3$CARDIAC +
SMART.AREGIMPUTED_32*SMART.AREGIMPUTED_3$AAA) +
($PERIPH
SMART.AREGIMPUTED_3
##################################
# Combining the complete and
# 4TH imputation results from AREG
##################################
<- impute.transcan(DPP.AREGIMPUTED,
DPP.AREGIMPUTED_4 imputation=4,
data=DPP,
list.out=TRUE,
pr=FALSE,
check=FALSE)
<- SMART
SMART.AREGIMPUTED_4
names(DPP.AREGIMPUTED_4)] <- DPP.AREGIMPUTED_4
SMART.AREGIMPUTED_4[
$CEREBRAL <- as.numeric(as.character(SMART.AREGIMPUTED_4$CEREBRAL))
SMART.AREGIMPUTED_4$CARDIAC <- as.numeric(as.character(SMART.AREGIMPUTED_4$CARDIAC))
SMART.AREGIMPUTED_4$AAA <- as.numeric(as.character(SMART.AREGIMPUTED_4$AAA))
SMART.AREGIMPUTED_4$PERIPH <- as.numeric(as.character(SMART.AREGIMPUTED_4$PERIPH))
SMART.AREGIMPUTED_4
$SUMSCORE_5LEVELS <- SMART.AREGIMPUTED_4$CEREBRAL +
SMART.AREGIMPUTED_4$CARDIAC +
SMART.AREGIMPUTED_42*SMART.AREGIMPUTED_4$AAA) +
($PERIPH
SMART.AREGIMPUTED_4
##################################
# Combining the complete and
# 5TH imputation results from AREG
##################################
<- impute.transcan(DPP.AREGIMPUTED,
DPP.AREGIMPUTED_5 imputation=5,
data=DPP,
list.out=TRUE,
pr=FALSE,
check=FALSE)
<- SMART
SMART.AREGIMPUTED_5
names(DPP.AREGIMPUTED_5)] <- DPP.AREGIMPUTED_5
SMART.AREGIMPUTED_5[
$CEREBRAL <- as.numeric(as.character(SMART.AREGIMPUTED_5$CEREBRAL))
SMART.AREGIMPUTED_5$CARDIAC <- as.numeric(as.character(SMART.AREGIMPUTED_5$CARDIAC))
SMART.AREGIMPUTED_5$AAA <- as.numeric(as.character(SMART.AREGIMPUTED_5$AAA))
SMART.AREGIMPUTED_5$PERIPH <- as.numeric(as.character(SMART.AREGIMPUTED_5$PERIPH))
SMART.AREGIMPUTED_5
$SUMSCORE_5LEVELS <- SMART.AREGIMPUTED_5$CEREBRAL +
SMART.AREGIMPUTED_5$CARDIAC +
SMART.AREGIMPUTED_52*SMART.AREGIMPUTED_5$AAA) +
($PERIPH
SMART.AREGIMPUTED_5
##################################
# Exploring the single imputation
# results using AREG
##################################
describe(SMART.AREGIMPUTED_1)
## SMART.AREGIMPUTED_1
##
## 30 Variables 3873 Observations
## --------------------------------------------------------------------------------
## TEVENT
## n missing distinct Info Mean Gmd .05 .10
## 3873 0 1934 1 1370 1078 98.6 197.0
## .25 .50 .75 .90 .95
## 555.0 1213.0 2165.0 2762.4 3017.4
##
## lowest : 0.1 1 2 3 4 , highest: 3451 3452 3463 3465 3466
## --------------------------------------------------------------------------------
## EVENT
## n missing distinct
## 3873 0 2
##
## Value 0 1
## Frequency 3413 460
## Proportion 0.881 0.119
## --------------------------------------------------------------------------------
## SEX
## n missing distinct
## 3873 0 2
##
## Value 1 2
## Frequency 2897 976
## Proportion 0.748 0.252
## --------------------------------------------------------------------------------
## AGE
## n missing distinct Info Mean Gmd .05 .10
## 3873 0 62 0.999 59.56 11.94 41 46
## .25 .50 .75 .90 .95
## 52 60 68 73 76
##
## lowest : 19 20 23 24 25, highest: 78 79 80 81 82
## --------------------------------------------------------------------------------
## DIABETES
## n missing imputed distinct
## 3873 0 40 2
##
## Value 0 1
## Frequency 3024 849
## Proportion 0.781 0.219
## --------------------------------------------------------------------------------
## CEREBRAL
## n missing distinct Info Sum Mean Gmd
## 3873 0 2 0.625 1147 0.2962 0.417
##
## --------------------------------------------------------------------------------
## CARDIAC
## n missing distinct Info Sum Mean Gmd
## 3873 0 2 0.74 2160 0.5577 0.4935
##
## --------------------------------------------------------------------------------
## AAA
## n missing distinct Info Sum Mean Gmd
## 3873 0 2 0.288 416 0.1074 0.1918
##
## --------------------------------------------------------------------------------
## PERIPH
## n missing distinct Info Sum Mean Gmd
## 3873 0 2 0.551 940 0.2427 0.3677
##
## --------------------------------------------------------------------------------
## STENOSIS
## n missing imputed distinct
## 3873 0 93 2
##
## Value 0 1
## Frequency 3131 742
## Proportion 0.808 0.192
## --------------------------------------------------------------------------------
## SYSTBP
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 1223 114 1 140.8 22.31 112
## .10 .25 .50 .75 .90 .95
## 117 126 139 153 168 178
##
## lowest : 96 97 98 99 100, highest: 206 209 211 212 216
## --------------------------------------------------------------------------------
## DIASTBP
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 1221 70 0.999 79.72 10.94 65
## .10 .25 .50 .75 .90 .95
## 68 73 79 86 93 97
##
## lowest : 46 48 52 53 54, highest: 117 118 120 124 127
## --------------------------------------------------------------------------------
## SYSTH
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 1498 132 1 143.1 24.73 111
## .10 .25 .50 .75 .90 .95
## 117 127 140 157 173 183
##
## lowest : 79 88 91 93 94, highest: 222 223 228 242 244
## --------------------------------------------------------------------------------
## DIASTH
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 1499 77 0.999 81.93 12.92 64
## .10 .25 .50 .75 .90 .95
## 68 74 81 89 97 102
##
## lowest : 45 49 50 52 53, highest: 123 125 126 130 136
## --------------------------------------------------------------------------------
## LENGTH
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 1 42 0.999 1.74 0.09862 1.59
## .10 .25 .50 .75 .90 .95
## 1.62 1.68 1.75 1.80 1.85 1.88
##
## lowest : 1.53 1.54 1.55 1.56 1.57, highest: 1.9 1.91 1.92 1.93 1.94
## --------------------------------------------------------------------------------
## WEIGHT
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 2 75 0.999 80.99 15.41 59
## .10 .25 .50 .75 .90 .95
## 64 72 80 89 99 104
##
## lowest : 50 51 52 53 54, highest: 120 121 122 123 124
## --------------------------------------------------------------------------------
## BMI
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 3 995 1 26.69 4.259 20.96
## .10 .25 .50 .75 .90 .95
## 22.16 24.11 26.30 28.73 31.86 33.90
##
## lowest : 18.7 18.71 18.73 18.78 18.81, highest: 39.25 39.43 39.45 39.64 39.8
## --------------------------------------------------------------------------------
## CHOL
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 18 65 0.999 5.191 1.283 3.5
## .10 .25 .50 .75 .90 .95
## 3.8 4.4 5.1 5.9 6.7 7.2
##
## lowest : 2.8 2.9 3 3.1 3.2, highest: 8.8 8.9 9.1 9.2 9.4
## --------------------------------------------------------------------------------
## HDL
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 30 188 1 1.23 0.4031 0.74
## .10 .25 .50 .75 .90 .95
## 0.82 0.96 1.17 1.42 1.73 1.94
##
## lowest : 0.58 0.59 0.6 0.61 0.62, highest: 2.46 2.47 2.48 2.49 2.51
## --------------------------------------------------------------------------------
## LDL
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 216 478 1 3.129 1.184 1.52
## .10 .25 .50 .75 .90 .95
## 1.82 2.37 3.05 3.83 4.55 4.98
##
## lowest : 1.1 1.11 1.12 1.14 1.15, highest: 6.34 6.37 6.41 6.47 6.6
## --------------------------------------------------------------------------------
## TRIG
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 28 461 1 1.852 1.093 0.760
## .10 .25 .50 .75 .90 .95
## 0.880 1.120 1.530 2.230 3.120 3.904
##
## lowest : 0.56 0.57 0.58 0.59 0.6 , highest: 8.28 8.61 8.68 8.91 8.96
## --------------------------------------------------------------------------------
## HOMOC
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 463 255 1 14.17 5.609 8.0
## .10 .25 .50 .75 .90 .95
## 8.8 10.5 13.0 16.2 20.6 24.8
##
## lowest : 6.1 6.2 6.3 6.4 6.5 , highest: 35.5 35.9 36.1 37.1 38.3
## --------------------------------------------------------------------------------
## GLUT
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 19 125 0.998 6.333 1.702 4.8
## .10 .25 .50 .75 .90 .95
## 5.0 5.3 5.7 6.5 8.4 10.4
##
## lowest : 4.3 4.4 4.5 4.6 4.7 , highest: 17.6 17.7 17.9 18 18.7
## --------------------------------------------------------------------------------
## CREAT
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 17 194 1 98.35 34.53 65
## .10 .25 .50 .75 .90 .95
## 69 78 89 101 118 138
##
## lowest : 54 55 56 57 58, highest: 784 799 809 813 825
## --------------------------------------------------------------------------------
## IMT
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 98 99 0.999 0.936 0.2883 0.60
## .10 .25 .50 .75 .90 .95
## 0.65 0.75 0.88 1.07 1.30 1.45
##
## lowest : 0.47 0.48 0.5 0.52 0.53, highest: 1.77 1.78 1.8 1.82 1.83
## --------------------------------------------------------------------------------
## albumin
## n missing imputed distinct
## 3873 0 207 3
##
## Value 1 2 3
## Frequency 3045 696 132
## Proportion 0.786 0.180 0.034
## --------------------------------------------------------------------------------
## SMOKING
## n missing imputed distinct
## 3873 0 25 3
##
## Value 1 2 3
## Frequency 694 2733 446
## Proportion 0.179 0.706 0.115
## --------------------------------------------------------------------------------
## packyrs
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 21 263 0.994 22.64 22.26 0.0
## .10 .25 .50 .75 .90 .95
## 0.0 5.9 19.5 34.2 50.4 62.0
##
## lowest : 0 0.3 0.6 0.7 0.8, highest: 100 102 104 110 120
## --------------------------------------------------------------------------------
## alcohol
## n missing imputed distinct
## 3873 0 25 3
##
## Value 1 2 3
## Frequency 756 412 2705
## Proportion 0.195 0.106 0.698
## --------------------------------------------------------------------------------
## SUMSCORE_5LEVELS
## n missing distinct Info Mean Gmd
## 3873 0 5 0.532 1.311 0.511
##
## Value 1 2 3 4 5
## Frequency 2999 605 211 53 5
## Proportion 0.774 0.156 0.054 0.014 0.001
##
## For the frequency table, variable is rounded to the nearest 0
## --------------------------------------------------------------------------------
describe(SMART.AREGIMPUTED_2)
## SMART.AREGIMPUTED_2
##
## 30 Variables 3873 Observations
## --------------------------------------------------------------------------------
## TEVENT
## n missing distinct Info Mean Gmd .05 .10
## 3873 0 1934 1 1370 1078 98.6 197.0
## .25 .50 .75 .90 .95
## 555.0 1213.0 2165.0 2762.4 3017.4
##
## lowest : 0.1 1 2 3 4 , highest: 3451 3452 3463 3465 3466
## --------------------------------------------------------------------------------
## EVENT
## n missing distinct
## 3873 0 2
##
## Value 0 1
## Frequency 3413 460
## Proportion 0.881 0.119
## --------------------------------------------------------------------------------
## SEX
## n missing distinct
## 3873 0 2
##
## Value 1 2
## Frequency 2897 976
## Proportion 0.748 0.252
## --------------------------------------------------------------------------------
## AGE
## n missing distinct Info Mean Gmd .05 .10
## 3873 0 62 0.999 59.56 11.94 41 46
## .25 .50 .75 .90 .95
## 52 60 68 73 76
##
## lowest : 19 20 23 24 25, highest: 78 79 80 81 82
## --------------------------------------------------------------------------------
## DIABETES
## n missing imputed distinct
## 3873 0 40 2
##
## Value 0 1
## Frequency 3024 849
## Proportion 0.781 0.219
## --------------------------------------------------------------------------------
## CEREBRAL
## n missing distinct Info Sum Mean Gmd
## 3873 0 2 0.625 1147 0.2962 0.417
##
## --------------------------------------------------------------------------------
## CARDIAC
## n missing distinct Info Sum Mean Gmd
## 3873 0 2 0.74 2160 0.5577 0.4935
##
## --------------------------------------------------------------------------------
## AAA
## n missing distinct Info Sum Mean Gmd
## 3873 0 2 0.288 416 0.1074 0.1918
##
## --------------------------------------------------------------------------------
## PERIPH
## n missing distinct Info Sum Mean Gmd
## 3873 0 2 0.551 940 0.2427 0.3677
##
## --------------------------------------------------------------------------------
## STENOSIS
## n missing imputed distinct
## 3873 0 93 2
##
## Value 0 1
## Frequency 3130 743
## Proportion 0.808 0.192
## --------------------------------------------------------------------------------
## SYSTBP
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 1223 114 1 140.7 21.96 112
## .10 .25 .50 .75 .90 .95
## 117 126 138 153 167 177
##
## lowest : 96 97 98 99 100, highest: 206 209 211 212 216
## --------------------------------------------------------------------------------
## DIASTBP
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 1221 70 0.999 79.85 10.89 65
## .10 .25 .50 .75 .90 .95
## 68 73 79 86 93 97
##
## lowest : 46 48 52 53 54, highest: 117 118 120 124 127
## --------------------------------------------------------------------------------
## SYSTH
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 1498 132 1 143.1 24.78 111
## .10 .25 .50 .75 .90 .95
## 117 127 140 157 173 183
##
## lowest : 79 88 91 93 94, highest: 222 223 228 242 244
## --------------------------------------------------------------------------------
## DIASTH
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 1499 77 0.999 82.13 12.95 64
## .10 .25 .50 .75 .90 .95
## 68 74 81 90 97 102
##
## lowest : 45 49 50 52 53, highest: 123 125 126 130 136
## --------------------------------------------------------------------------------
## LENGTH
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 1 42 0.999 1.74 0.09862 1.59
## .10 .25 .50 .75 .90 .95
## 1.62 1.68 1.75 1.80 1.85 1.88
##
## lowest : 1.53 1.54 1.55 1.56 1.57, highest: 1.9 1.91 1.92 1.93 1.94
## --------------------------------------------------------------------------------
## WEIGHT
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 2 75 0.999 80.99 15.41 59
## .10 .25 .50 .75 .90 .95
## 64 72 80 89 99 104
##
## lowest : 50 51 52 53 54, highest: 120 121 122 123 124
## --------------------------------------------------------------------------------
## BMI
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 3 995 1 26.69 4.258 20.96
## .10 .25 .50 .75 .90 .95
## 22.16 24.11 26.30 28.73 31.86 33.90
##
## lowest : 18.7 18.71 18.73 18.78 18.81, highest: 39.25 39.43 39.45 39.64 39.8
## --------------------------------------------------------------------------------
## CHOL
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 18 65 0.999 5.191 1.283 3.5
## .10 .25 .50 .75 .90 .95
## 3.8 4.4 5.1 5.9 6.7 7.2
##
## lowest : 2.8 2.9 3 3.1 3.2, highest: 8.8 8.9 9.1 9.2 9.4
## --------------------------------------------------------------------------------
## HDL
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 30 188 1 1.23 0.4031 0.74
## .10 .25 .50 .75 .90 .95
## 0.82 0.96 1.17 1.42 1.73 1.94
##
## lowest : 0.58 0.59 0.6 0.61 0.62, highest: 2.46 2.47 2.48 2.49 2.51
## --------------------------------------------------------------------------------
## LDL
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 216 478 1 3.129 1.186 1.52
## .10 .25 .50 .75 .90 .95
## 1.82 2.37 3.05 3.83 4.55 4.97
##
## lowest : 1.1 1.11 1.12 1.14 1.15, highest: 6.34 6.37 6.41 6.47 6.6
## --------------------------------------------------------------------------------
## TRIG
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 28 461 1 1.852 1.093 0.760
## .10 .25 .50 .75 .90 .95
## 0.880 1.120 1.530 2.230 3.120 3.904
##
## lowest : 0.56 0.57 0.58 0.59 0.6 , highest: 8.28 8.61 8.68 8.91 8.96
## --------------------------------------------------------------------------------
## HOMOC
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 463 255 1 14.1 5.525 8.00
## .10 .25 .50 .75 .90 .95
## 8.80 10.50 12.90 16.00 20.48 24.60
##
## lowest : 6.1 6.2 6.3 6.4 6.5 , highest: 35.5 35.9 36.1 37.1 38.3
## --------------------------------------------------------------------------------
## GLUT
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 19 125 0.998 6.335 1.704 4.8
## .10 .25 .50 .75 .90 .95
## 5.0 5.3 5.7 6.5 8.4 10.4
##
## lowest : 4.3 4.4 4.5 4.6 4.7 , highest: 17.6 17.7 17.9 18 18.7
## --------------------------------------------------------------------------------
## CREAT
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 17 194 1 98.34 34.54 65.0
## .10 .25 .50 .75 .90 .95
## 69.2 78.0 89.0 101.0 118.0 138.0
##
## lowest : 54 55 56 57 58, highest: 784 799 809 813 825
## --------------------------------------------------------------------------------
## IMT
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 98 99 0.999 0.9359 0.2887 0.60
## .10 .25 .50 .75 .90 .95
## 0.65 0.75 0.88 1.07 1.30 1.47
##
## lowest : 0.47 0.48 0.5 0.52 0.53, highest: 1.77 1.78 1.8 1.82 1.83
## --------------------------------------------------------------------------------
## albumin
## n missing imputed distinct
## 3873 0 207 3
##
## Value 1 2 3
## Frequency 3040 695 138
## Proportion 0.785 0.179 0.036
## --------------------------------------------------------------------------------
## SMOKING
## n missing imputed distinct
## 3873 0 25 3
##
## Value 1 2 3
## Frequency 696 2731 446
## Proportion 0.180 0.705 0.115
## --------------------------------------------------------------------------------
## packyrs
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 21 263 0.994 22.7 22.32 0.0
## .10 .25 .50 .75 .90 .95
## 0.0 5.9 19.5 34.2 51.3 62.0
##
## lowest : 0 0.3 0.6 0.7 0.8, highest: 100 102 104 110 120
## --------------------------------------------------------------------------------
## alcohol
## n missing imputed distinct
## 3873 0 25 3
##
## Value 1 2 3
## Frequency 754 408 2711
## Proportion 0.195 0.105 0.700
## --------------------------------------------------------------------------------
## SUMSCORE_5LEVELS
## n missing distinct Info Mean Gmd
## 3873 0 5 0.532 1.311 0.511
##
## Value 1 2 3 4 5
## Frequency 2999 605 211 53 5
## Proportion 0.774 0.156 0.054 0.014 0.001
##
## For the frequency table, variable is rounded to the nearest 0
## --------------------------------------------------------------------------------
describe(SMART.AREGIMPUTED_3)
## SMART.AREGIMPUTED_3
##
## 30 Variables 3873 Observations
## --------------------------------------------------------------------------------
## TEVENT
## n missing distinct Info Mean Gmd .05 .10
## 3873 0 1934 1 1370 1078 98.6 197.0
## .25 .50 .75 .90 .95
## 555.0 1213.0 2165.0 2762.4 3017.4
##
## lowest : 0.1 1 2 3 4 , highest: 3451 3452 3463 3465 3466
## --------------------------------------------------------------------------------
## EVENT
## n missing distinct
## 3873 0 2
##
## Value 0 1
## Frequency 3413 460
## Proportion 0.881 0.119
## --------------------------------------------------------------------------------
## SEX
## n missing distinct
## 3873 0 2
##
## Value 1 2
## Frequency 2897 976
## Proportion 0.748 0.252
## --------------------------------------------------------------------------------
## AGE
## n missing distinct Info Mean Gmd .05 .10
## 3873 0 62 0.999 59.56 11.94 41 46
## .25 .50 .75 .90 .95
## 52 60 68 73 76
##
## lowest : 19 20 23 24 25, highest: 78 79 80 81 82
## --------------------------------------------------------------------------------
## DIABETES
## n missing imputed distinct
## 3873 0 40 2
##
## Value 0 1
## Frequency 3024 849
## Proportion 0.781 0.219
## --------------------------------------------------------------------------------
## CEREBRAL
## n missing distinct Info Sum Mean Gmd
## 3873 0 2 0.625 1147 0.2962 0.417
##
## --------------------------------------------------------------------------------
## CARDIAC
## n missing distinct Info Sum Mean Gmd
## 3873 0 2 0.74 2160 0.5577 0.4935
##
## --------------------------------------------------------------------------------
## AAA
## n missing distinct Info Sum Mean Gmd
## 3873 0 2 0.288 416 0.1074 0.1918
##
## --------------------------------------------------------------------------------
## PERIPH
## n missing distinct Info Sum Mean Gmd
## 3873 0 2 0.551 940 0.2427 0.3677
##
## --------------------------------------------------------------------------------
## STENOSIS
## n missing imputed distinct
## 3873 0 93 2
##
## Value 0 1
## Frequency 3136 737
## Proportion 0.81 0.19
## --------------------------------------------------------------------------------
## SYSTBP
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 1223 114 1 140.7 22.07 112
## .10 .25 .50 .75 .90 .95
## 117 126 139 153 167 177
##
## lowest : 96 97 98 99 100, highest: 206 209 211 212 216
## --------------------------------------------------------------------------------
## DIASTBP
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 1221 70 0.999 79.71 10.75 65
## .10 .25 .50 .75 .90 .95
## 68 73 79 86 92 97
##
## lowest : 46 48 52 53 54, highest: 117 118 120 124 127
## --------------------------------------------------------------------------------
## SYSTH
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 1498 132 1 143 24.67 111
## .10 .25 .50 .75 .90 .95
## 117 127 141 156 172 182
##
## lowest : 79 88 91 93 94, highest: 222 223 228 242 244
## --------------------------------------------------------------------------------
## DIASTH
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 1499 77 0.999 82.15 13 64
## .10 .25 .50 .75 .90 .95
## 68 74 81 90 97 102
##
## lowest : 45 49 50 52 53, highest: 123 125 126 130 136
## --------------------------------------------------------------------------------
## LENGTH
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 1 42 0.999 1.74 0.09862 1.59
## .10 .25 .50 .75 .90 .95
## 1.62 1.68 1.75 1.80 1.85 1.88
##
## lowest : 1.53 1.54 1.55 1.56 1.57, highest: 1.9 1.91 1.92 1.93 1.94
## --------------------------------------------------------------------------------
## WEIGHT
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 2 75 0.999 80.99 15.41 59
## .10 .25 .50 .75 .90 .95
## 64 72 80 89 99 104
##
## lowest : 50 51 52 53 54, highest: 120 121 122 123 124
## --------------------------------------------------------------------------------
## BMI
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 3 995 1 26.69 4.257 20.96
## .10 .25 .50 .75 .90 .95
## 22.16 24.11 26.30 28.73 31.86 33.90
##
## lowest : 18.7 18.71 18.73 18.78 18.81, highest: 39.25 39.43 39.45 39.64 39.8
## --------------------------------------------------------------------------------
## CHOL
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 18 65 0.999 5.191 1.283 3.5
## .10 .25 .50 .75 .90 .95
## 3.8 4.4 5.1 5.9 6.7 7.2
##
## lowest : 2.8 2.9 3 3.1 3.2, highest: 8.8 8.9 9.1 9.2 9.4
## --------------------------------------------------------------------------------
## HDL
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 30 188 1 1.23 0.4028 0.74
## .10 .25 .50 .75 .90 .95
## 0.82 0.96 1.17 1.42 1.73 1.94
##
## lowest : 0.58 0.59 0.6 0.61 0.62, highest: 2.46 2.47 2.48 2.49 2.51
## --------------------------------------------------------------------------------
## LDL
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 216 478 1 3.128 1.186 1.520
## .10 .25 .50 .75 .90 .95
## 1.820 2.370 3.050 3.830 4.550 4.984
##
## lowest : 1.1 1.11 1.12 1.14 1.15, highest: 6.34 6.37 6.41 6.47 6.6
## --------------------------------------------------------------------------------
## TRIG
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 28 461 1 1.853 1.093 0.760
## .10 .25 .50 .75 .90 .95
## 0.880 1.130 1.540 2.230 3.120 3.914
##
## lowest : 0.56 0.57 0.58 0.59 0.6 , highest: 8.28 8.61 8.68 8.91 8.96
## --------------------------------------------------------------------------------
## HOMOC
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 463 255 1 14.12 5.548 8.00
## .10 .25 .50 .75 .90 .95
## 8.80 10.50 13.00 16.00 20.50 24.64
##
## lowest : 6.1 6.2 6.3 6.4 6.5 , highest: 35.5 35.9 36.1 37.1 38.3
## --------------------------------------------------------------------------------
## GLUT
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 19 125 0.998 6.33 1.698 4.8
## .10 .25 .50 .75 .90 .95
## 5.0 5.3 5.7 6.5 8.4 10.4
##
## lowest : 4.3 4.4 4.5 4.6 4.7 , highest: 17.6 17.7 17.9 18 18.7
## --------------------------------------------------------------------------------
## CREAT
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 17 194 1 98.34 34.56 64
## .10 .25 .50 .75 .90 .95
## 69 78 89 101 118 138
##
## lowest : 54 55 56 57 58, highest: 784 799 809 813 825
## --------------------------------------------------------------------------------
## IMT
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 98 99 0.999 0.9368 0.2895 0.60
## .10 .25 .50 .75 .90 .95
## 0.65 0.75 0.88 1.07 1.30 1.47
##
## lowest : 0.47 0.48 0.5 0.52 0.53, highest: 1.77 1.78 1.8 1.82 1.83
## --------------------------------------------------------------------------------
## albumin
## n missing imputed distinct
## 3873 0 207 3
##
## Value 1 2 3
## Frequency 3040 696 137
## Proportion 0.785 0.180 0.035
## --------------------------------------------------------------------------------
## SMOKING
## n missing imputed distinct
## 3873 0 25 3
##
## Value 1 2 3
## Frequency 695 2731 447
## Proportion 0.179 0.705 0.115
## --------------------------------------------------------------------------------
## packyrs
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 21 263 0.994 22.62 22.24 0.0
## .10 .25 .50 .75 .90 .95
## 0.0 5.9 19.5 34.2 50.4 62.0
##
## lowest : 0 0.3 0.6 0.7 0.8, highest: 100 102 104 110 120
## --------------------------------------------------------------------------------
## alcohol
## n missing imputed distinct
## 3873 0 25 3
##
## Value 1 2 3
## Frequency 759 410 2704
## Proportion 0.196 0.106 0.698
## --------------------------------------------------------------------------------
## SUMSCORE_5LEVELS
## n missing distinct Info Mean Gmd
## 3873 0 5 0.532 1.311 0.511
##
## Value 1 2 3 4 5
## Frequency 2999 605 211 53 5
## Proportion 0.774 0.156 0.054 0.014 0.001
##
## For the frequency table, variable is rounded to the nearest 0
## --------------------------------------------------------------------------------
describe(SMART.AREGIMPUTED_4)
## SMART.AREGIMPUTED_4
##
## 30 Variables 3873 Observations
## --------------------------------------------------------------------------------
## TEVENT
## n missing distinct Info Mean Gmd .05 .10
## 3873 0 1934 1 1370 1078 98.6 197.0
## .25 .50 .75 .90 .95
## 555.0 1213.0 2165.0 2762.4 3017.4
##
## lowest : 0.1 1 2 3 4 , highest: 3451 3452 3463 3465 3466
## --------------------------------------------------------------------------------
## EVENT
## n missing distinct
## 3873 0 2
##
## Value 0 1
## Frequency 3413 460
## Proportion 0.881 0.119
## --------------------------------------------------------------------------------
## SEX
## n missing distinct
## 3873 0 2
##
## Value 1 2
## Frequency 2897 976
## Proportion 0.748 0.252
## --------------------------------------------------------------------------------
## AGE
## n missing distinct Info Mean Gmd .05 .10
## 3873 0 62 0.999 59.56 11.94 41 46
## .25 .50 .75 .90 .95
## 52 60 68 73 76
##
## lowest : 19 20 23 24 25, highest: 78 79 80 81 82
## --------------------------------------------------------------------------------
## DIABETES
## n missing imputed distinct
## 3873 0 40 2
##
## Value 0 1
## Frequency 3022 851
## Proportion 0.78 0.22
## --------------------------------------------------------------------------------
## CEREBRAL
## n missing distinct Info Sum Mean Gmd
## 3873 0 2 0.625 1147 0.2962 0.417
##
## --------------------------------------------------------------------------------
## CARDIAC
## n missing distinct Info Sum Mean Gmd
## 3873 0 2 0.74 2160 0.5577 0.4935
##
## --------------------------------------------------------------------------------
## AAA
## n missing distinct Info Sum Mean Gmd
## 3873 0 2 0.288 416 0.1074 0.1918
##
## --------------------------------------------------------------------------------
## PERIPH
## n missing distinct Info Sum Mean Gmd
## 3873 0 2 0.551 940 0.2427 0.3677
##
## --------------------------------------------------------------------------------
## STENOSIS
## n missing imputed distinct
## 3873 0 93 2
##
## Value 0 1
## Frequency 3135 738
## Proportion 0.809 0.191
## --------------------------------------------------------------------------------
## SYSTBP
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 1223 114 1 140.4 21.93 112.0
## .10 .25 .50 .75 .90 .95
## 117.0 126.0 138.0 152.0 167.0 176.4
##
## lowest : 96 97 98 99 100, highest: 206 209 211 212 216
## --------------------------------------------------------------------------------
## DIASTBP
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 1221 70 0.999 79.66 10.82 65
## .10 .25 .50 .75 .90 .95
## 68 73 79 86 92 97
##
## lowest : 46 48 52 53 54, highest: 117 118 120 124 127
## --------------------------------------------------------------------------------
## SYSTH
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 1498 132 1 143.2 24.87 111
## .10 .25 .50 .75 .90 .95
## 117 127 141 157 173 183
##
## lowest : 79 88 91 93 94, highest: 222 223 228 242 244
## --------------------------------------------------------------------------------
## DIASTH
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 1499 77 0.999 82.17 13.01 64
## .10 .25 .50 .75 .90 .95
## 68 74 81 90 97 102
##
## lowest : 45 49 50 52 53, highest: 123 125 126 130 136
## --------------------------------------------------------------------------------
## LENGTH
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 1 42 0.999 1.74 0.09862 1.59
## .10 .25 .50 .75 .90 .95
## 1.62 1.68 1.75 1.80 1.85 1.88
##
## lowest : 1.53 1.54 1.55 1.56 1.57, highest: 1.9 1.91 1.92 1.93 1.94
## --------------------------------------------------------------------------------
## WEIGHT
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 2 75 0.999 80.99 15.41 59
## .10 .25 .50 .75 .90 .95
## 64 72 80 89 99 104
##
## lowest : 50 51 52 53 54, highest: 120 121 122 123 124
## --------------------------------------------------------------------------------
## BMI
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 3 995 1 26.69 4.257 20.96
## .10 .25 .50 .75 .90 .95
## 22.17 24.11 26.30 28.73 31.86 33.90
##
## lowest : 18.7 18.71 18.73 18.78 18.81, highest: 39.25 39.43 39.45 39.64 39.8
## --------------------------------------------------------------------------------
## CHOL
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 18 65 0.999 5.191 1.283 3.5
## .10 .25 .50 .75 .90 .95
## 3.8 4.4 5.1 5.9 6.7 7.2
##
## lowest : 2.8 2.9 3 3.1 3.2, highest: 8.8 8.9 9.1 9.2 9.4
## --------------------------------------------------------------------------------
## HDL
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 30 188 1 1.23 0.4029 0.74
## .10 .25 .50 .75 .90 .95
## 0.82 0.96 1.17 1.42 1.73 1.94
##
## lowest : 0.58 0.59 0.6 0.61 0.62, highest: 2.46 2.47 2.48 2.49 2.51
## --------------------------------------------------------------------------------
## LDL
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 216 478 1 3.128 1.187 1.51
## .10 .25 .50 .75 .90 .95
## 1.82 2.37 3.05 3.83 4.55 4.98
##
## lowest : 1.1 1.11 1.12 1.14 1.15, highest: 6.34 6.37 6.41 6.47 6.6
## --------------------------------------------------------------------------------
## TRIG
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 28 461 1 1.853 1.093 0.760
## .10 .25 .50 .75 .90 .95
## 0.880 1.130 1.540 2.230 3.120 3.914
##
## lowest : 0.56 0.57 0.58 0.59 0.6 , highest: 8.28 8.61 8.68 8.91 8.96
## --------------------------------------------------------------------------------
## HOMOC
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 463 255 1 14.07 5.533 8.0
## .10 .25 .50 .75 .90 .95
## 8.8 10.5 12.9 16.0 20.4 24.8
##
## lowest : 6.1 6.2 6.3 6.4 6.5 , highest: 35.5 35.9 36.1 37.1 38.3
## --------------------------------------------------------------------------------
## GLUT
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 19 125 0.998 6.333 1.702 4.8
## .10 .25 .50 .75 .90 .95
## 5.0 5.3 5.7 6.5 8.4 10.4
##
## lowest : 4.3 4.4 4.5 4.6 4.7 , highest: 17.6 17.7 17.9 18 18.7
## --------------------------------------------------------------------------------
## CREAT
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 17 194 1 98.4 34.6 65.0
## .10 .25 .50 .75 .90 .95
## 69.2 78.0 89.0 101.0 118.0 138.4
##
## lowest : 54 55 56 57 58, highest: 784 799 809 813 825
## --------------------------------------------------------------------------------
## IMT
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 98 99 0.999 0.9366 0.2889 0.60
## .10 .25 .50 .75 .90 .95
## 0.65 0.75 0.88 1.07 1.30 1.47
##
## lowest : 0.47 0.48 0.5 0.52 0.53, highest: 1.77 1.78 1.8 1.82 1.83
## --------------------------------------------------------------------------------
## albumin
## n missing imputed distinct
## 3873 0 207 3
##
## Value 1 2 3
## Frequency 3046 689 138
## Proportion 0.786 0.178 0.036
## --------------------------------------------------------------------------------
## SMOKING
## n missing imputed distinct
## 3873 0 25 3
##
## Value 1 2 3
## Frequency 696 2731 446
## Proportion 0.180 0.705 0.115
## --------------------------------------------------------------------------------
## packyrs
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 21 263 0.994 22.64 22.26 0.00
## .10 .25 .50 .75 .90 .95
## 0.00 5.90 19.50 34.20 51.12 62.00
##
## lowest : 0 0.3 0.6 0.7 0.8, highest: 100 102 104 110 120
## --------------------------------------------------------------------------------
## alcohol
## n missing imputed distinct
## 3873 0 25 3
##
## Value 1 2 3
## Frequency 755 411 2707
## Proportion 0.195 0.106 0.699
## --------------------------------------------------------------------------------
## SUMSCORE_5LEVELS
## n missing distinct Info Mean Gmd
## 3873 0 5 0.532 1.311 0.511
##
## Value 1 2 3 4 5
## Frequency 2999 605 211 53 5
## Proportion 0.774 0.156 0.054 0.014 0.001
##
## For the frequency table, variable is rounded to the nearest 0
## --------------------------------------------------------------------------------
describe(SMART.AREGIMPUTED_5)
## SMART.AREGIMPUTED_5
##
## 30 Variables 3873 Observations
## --------------------------------------------------------------------------------
## TEVENT
## n missing distinct Info Mean Gmd .05 .10
## 3873 0 1934 1 1370 1078 98.6 197.0
## .25 .50 .75 .90 .95
## 555.0 1213.0 2165.0 2762.4 3017.4
##
## lowest : 0.1 1 2 3 4 , highest: 3451 3452 3463 3465 3466
## --------------------------------------------------------------------------------
## EVENT
## n missing distinct
## 3873 0 2
##
## Value 0 1
## Frequency 3413 460
## Proportion 0.881 0.119
## --------------------------------------------------------------------------------
## SEX
## n missing distinct
## 3873 0 2
##
## Value 1 2
## Frequency 2897 976
## Proportion 0.748 0.252
## --------------------------------------------------------------------------------
## AGE
## n missing distinct Info Mean Gmd .05 .10
## 3873 0 62 0.999 59.56 11.94 41 46
## .25 .50 .75 .90 .95
## 52 60 68 73 76
##
## lowest : 19 20 23 24 25, highest: 78 79 80 81 82
## --------------------------------------------------------------------------------
## DIABETES
## n missing imputed distinct
## 3873 0 40 2
##
## Value 0 1
## Frequency 3023 850
## Proportion 0.781 0.219
## --------------------------------------------------------------------------------
## CEREBRAL
## n missing distinct Info Sum Mean Gmd
## 3873 0 2 0.625 1147 0.2962 0.417
##
## --------------------------------------------------------------------------------
## CARDIAC
## n missing distinct Info Sum Mean Gmd
## 3873 0 2 0.74 2160 0.5577 0.4935
##
## --------------------------------------------------------------------------------
## AAA
## n missing distinct Info Sum Mean Gmd
## 3873 0 2 0.288 416 0.1074 0.1918
##
## --------------------------------------------------------------------------------
## PERIPH
## n missing distinct Info Sum Mean Gmd
## 3873 0 2 0.551 940 0.2427 0.3677
##
## --------------------------------------------------------------------------------
## STENOSIS
## n missing imputed distinct
## 3873 0 93 2
##
## Value 0 1
## Frequency 3127 746
## Proportion 0.807 0.193
## --------------------------------------------------------------------------------
## SYSTBP
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 1223 114 1 140.3 21.97 112
## .10 .25 .50 .75 .90 .95
## 116 126 138 152 167 176
##
## lowest : 96 97 98 99 100, highest: 206 209 211 212 216
## --------------------------------------------------------------------------------
## DIASTBP
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 1221 70 0.999 79.78 10.93 65
## .10 .25 .50 .75 .90 .95
## 68 73 79 86 93 97
##
## lowest : 46 48 52 53 54, highest: 117 118 120 124 127
## --------------------------------------------------------------------------------
## SYSTH
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 1498 132 1 142.9 24.88 110
## .10 .25 .50 .75 .90 .95
## 117 127 140 156 172 183
##
## lowest : 79 88 91 93 94, highest: 222 223 228 242 244
## --------------------------------------------------------------------------------
## DIASTH
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 1499 77 0.999 82.12 12.93 64
## .10 .25 .50 .75 .90 .95
## 68 74 81 89 97 102
##
## lowest : 45 49 50 52 53, highest: 123 125 126 130 136
## --------------------------------------------------------------------------------
## LENGTH
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 1 42 0.999 1.74 0.09862 1.59
## .10 .25 .50 .75 .90 .95
## 1.62 1.68 1.75 1.80 1.85 1.88
##
## lowest : 1.53 1.54 1.55 1.56 1.57, highest: 1.9 1.91 1.92 1.93 1.94
## --------------------------------------------------------------------------------
## WEIGHT
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 2 75 0.999 81 15.41 59
## .10 .25 .50 .75 .90 .95
## 64 72 80 89 99 104
##
## lowest : 50 51 52 53 54, highest: 120 121 122 123 124
## --------------------------------------------------------------------------------
## BMI
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 3 995 1 26.69 4.257 20.96
## .10 .25 .50 .75 .90 .95
## 22.17 24.11 26.30 28.73 31.86 33.90
##
## lowest : 18.7 18.71 18.73 18.78 18.81, highest: 39.25 39.43 39.45 39.64 39.8
## --------------------------------------------------------------------------------
## CHOL
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 18 65 0.999 5.191 1.283 3.5
## .10 .25 .50 .75 .90 .95
## 3.8 4.4 5.1 5.9 6.7 7.2
##
## lowest : 2.8 2.9 3 3.1 3.2, highest: 8.8 8.9 9.1 9.2 9.4
## --------------------------------------------------------------------------------
## HDL
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 30 188 1 1.229 0.4026 0.74
## .10 .25 .50 .75 .90 .95
## 0.82 0.96 1.17 1.42 1.73 1.94
##
## lowest : 0.58 0.59 0.6 0.61 0.62, highest: 2.46 2.47 2.48 2.49 2.51
## --------------------------------------------------------------------------------
## LDL
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 216 478 1 3.129 1.185 1.52
## .10 .25 .50 .75 .90 .95
## 1.82 2.37 3.05 3.83 4.55 4.98
##
## lowest : 1.1 1.11 1.12 1.14 1.15, highest: 6.34 6.37 6.41 6.47 6.6
## --------------------------------------------------------------------------------
## TRIG
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 28 461 1 1.853 1.092 0.760
## .10 .25 .50 .75 .90 .95
## 0.880 1.130 1.540 2.230 3.120 3.914
##
## lowest : 0.56 0.57 0.58 0.59 0.6 , highest: 8.28 8.61 8.68 8.91 8.96
## --------------------------------------------------------------------------------
## HOMOC
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 463 255 1 14.19 5.641 8.0
## .10 .25 .50 .75 .90 .95
## 8.8 10.5 13.0 16.1 20.7 25.5
##
## lowest : 6.1 6.2 6.3 6.4 6.5 , highest: 35.5 35.9 36.1 37.1 38.3
## --------------------------------------------------------------------------------
## GLUT
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 19 125 0.998 6.334 1.701 4.80
## .10 .25 .50 .75 .90 .95
## 5.00 5.30 5.70 6.50 8.48 10.40
##
## lowest : 4.3 4.4 4.5 4.6 4.7 , highest: 17.6 17.7 17.9 18 18.7
## --------------------------------------------------------------------------------
## CREAT
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 17 194 1 98.38 34.6 64.0
## .10 .25 .50 .75 .90 .95
## 69.0 78.0 89.0 101.0 118.0 138.4
##
## lowest : 54 55 56 57 58, highest: 784 799 809 813 825
## --------------------------------------------------------------------------------
## IMT
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 98 99 0.999 0.9361 0.2889 0.60
## .10 .25 .50 .75 .90 .95
## 0.65 0.75 0.88 1.07 1.30 1.47
##
## lowest : 0.47 0.48 0.5 0.52 0.53, highest: 1.77 1.78 1.8 1.82 1.83
## --------------------------------------------------------------------------------
## albumin
## n missing imputed distinct
## 3873 0 207 3
##
## Value 1 2 3
## Frequency 3040 692 141
## Proportion 0.785 0.179 0.036
## --------------------------------------------------------------------------------
## SMOKING
## n missing imputed distinct
## 3873 0 25 3
##
## Value 1 2 3
## Frequency 694 2730 449
## Proportion 0.179 0.705 0.116
## --------------------------------------------------------------------------------
## packyrs
## n missing imputed distinct Info Mean Gmd .05
## 3873 0 21 263 0.994 22.65 22.28 0.00
## .10 .25 .50 .75 .90 .95
## 0.00 5.90 19.50 34.20 51.12 62.00
##
## lowest : 0 0.3 0.6 0.7 0.8, highest: 100 102 104 110 120
## --------------------------------------------------------------------------------
## alcohol
## n missing imputed distinct
## 3873 0 25 3
##
## Value 1 2 3
## Frequency 757 411 2705
## Proportion 0.195 0.106 0.698
## --------------------------------------------------------------------------------
## SUMSCORE_5LEVELS
## n missing distinct Info Mean Gmd
## 3873 0 5 0.532 1.311 0.511
##
## Value 1 2 3 4 5
## Frequency 2999 605 211 53 5
## Proportion 0.774 0.156 0.054 0.014 0.001
##
## For the frequency table, variable is rounded to the nearest 0
## --------------------------------------------------------------------------------
##################################
# Formulating the FULL
# Cox Proportional Hazards Model
# Using the combined complete and
# 1ST imputation results from AREG
##################################
$EVENT <- as.numeric(SMART.AREGIMPUTED_1$EVENT)
SMART.AREGIMPUTED_1$SMOKING <- as.factor(SMART.AREGIMPUTED_1$SMOKING)
SMART.AREGIMPUTED_1$alcohol <- as.factor(SMART.AREGIMPUTED_1$alcohol)
SMART.AREGIMPUTED_1$albumin <- as.factor(SMART.AREGIMPUTED_1$albumin)
SMART.AREGIMPUTED_1
<- datadist(SMART.AREGIMPUTED_1)
dd options(datadist="dd")
##################################
# FULL COMPLETE
##################################
<-cph(Surv(TEVENT,EVENT) ~ ifelse(AGE>50, (AGE-50)^2,0) +
COXPH.FULL.COMPLETE.AREGIMPUTED_1 +
SEX +
SMOKING +
alcohol +
BMI +
SYSTH +
SYSTBP +
DIASTH +
DIASTBP +
WEIGHT +
LENGTH +
CHOL +
LDL +
HDL+
DIABETES +
SUMSCORE_5LEVELS +
HOMOC log(CREAT) +
+
albumin +
STENOSIS +
IMT +
TRIG +
GLUT
packyrs,data = SMART.AREGIMPUTED_1,
x=T,
y=T,
surv=T)
anova(COXPH.FULL.COMPLETE.AREGIMPUTED_1)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## AGE 36.78 1 <.0001
## SEX 0.88 1 0.3477
## SMOKING 0.13 2 0.9381
## alcohol 1.83 2 0.4004
## BMI 0.70 1 0.4029
## SYSTH 0.27 1 0.6059
## SYSTBP 0.01 1 0.9269
## DIASTH 2.02 1 0.1556
## DIASTBP 0.18 1 0.6745
## WEIGHT 1.28 1 0.2575
## LENGTH 0.64 1 0.4242
## CHOL 0.14 1 0.7100
## LDL 0.19 1 0.6664
## HDL 0.00 1 0.9978
## DIABETES 0.17 1 0.6790
## SUMSCORE_5LEVELS 34.66 1 <.0001
## HOMOC 0.01 1 0.9157
## CREAT 18.21 1 <.0001
## albumin 9.55 2 0.0084
## STENOSIS 2.29 1 0.1302
## IMT 9.33 1 0.0023
## TRIG 0.14 1 0.7067
## GLUT 2.83 1 0.0922
## packyrs 5.29 1 0.0214
## TOTAL 328.92 27 <.0001
summary(COXPH.FULL.COMPLETE.AREGIMPUTED_1)
## Effects Response : Surv(TEVENT, EVENT)
##
## Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95
## AGE 52.00 68.00 16.00 0.4359300 0.071877 0.2950500 0.576800
## Hazard Ratio 52.00 68.00 16.00 1.5464000 NA 1.3432000 1.780300
## BMI 24.11 28.73 4.62 0.4127800 0.493520 -0.5545000 1.380100
## Hazard Ratio 24.11 28.73 4.62 1.5110000 NA 0.5743600 3.975100
## SYSTH 127.00 157.00 30.00 -0.0614390 0.119090 -0.2948400 0.171960
## Hazard Ratio 127.00 157.00 30.00 0.9404100 NA 0.7446500 1.187600
## SYSTBP 126.00 153.00 27.00 -0.0105570 0.115110 -0.2361700 0.215050
## Hazard Ratio 126.00 153.00 27.00 0.9895000 NA 0.7896500 1.239900
## DIASTH 74.00 89.00 15.00 0.1512000 0.106460 -0.0574710 0.359860
## Hazard Ratio 74.00 89.00 15.00 1.1632000 NA 0.9441500 1.433100
## DIASTBP 73.00 86.00 13.00 -0.0410810 0.097805 -0.2327800 0.150610
## Hazard Ratio 73.00 86.00 13.00 0.9597500 NA 0.7923300 1.162500
## WEIGHT 72.00 89.00 17.00 -0.6868900 0.606660 -1.8759000 0.502160
## Hazard Ratio 72.00 89.00 17.00 0.5031400 NA 0.1532100 1.652300
## LENGTH 1.68 1.80 0.12 0.3213700 0.402110 -0.4667600 1.109500
## Hazard Ratio 1.68 1.80 0.12 1.3790000 NA 0.6270300 3.032800
## CHOL 4.40 5.90 1.50 -0.5446400 1.464600 -3.4152000 2.325900
## Hazard Ratio 4.40 5.90 1.50 0.5800500 NA 0.0328700 10.236000
## LDL 2.37 3.83 1.46 0.6199000 1.438000 -2.1985000 3.438300
## Hazard Ratio 2.37 3.83 1.46 1.8587000 NA 0.1109600 31.135000
## HDL 0.96 1.42 0.46 -0.0012456 0.455170 -0.8933600 0.890870
## Hazard Ratio 0.96 1.42 0.46 0.9987600 NA 0.4092800 2.437300
## SUMSCORE_5LEVELS 1.00 5.00 4.00 1.3307000 0.226020 0.8876800 1.773700
## Hazard Ratio 1.00 5.00 4.00 3.7836000 NA 2.4295000 5.892400
## HOMOC 10.50 16.20 5.70 -0.0052107 0.049239 -0.1017200 0.091297
## Hazard Ratio 10.50 16.20 5.70 0.9948000 NA 0.9032800 1.095600
## CREAT 78.00 101.00 23.00 0.1653600 0.038752 0.0894030 0.241310
## Hazard Ratio 78.00 101.00 23.00 1.1798000 NA 1.0935000 1.272900
## IMT 0.75 1.07 0.32 0.1772100 0.058007 0.0635150 0.290900
## Hazard Ratio 0.75 1.07 0.32 1.1939000 NA 1.0656000 1.337600
## TRIG 1.12 2.23 1.11 0.1800500 0.478510 -0.7578200 1.117900
## Hazard Ratio 1.12 2.23 1.11 1.1973000 NA 0.4686900 3.058500
## GLUT 5.30 6.50 1.20 0.0593030 0.035222 -0.0097303 0.128340
## Hazard Ratio 5.30 6.50 1.20 1.0611000 NA 0.9903200 1.136900
## packyrs 5.90 34.20 28.30 0.1585500 0.068926 0.0234600 0.293650
## Hazard Ratio 5.90 34.20 28.30 1.1718000 NA 1.0237000 1.341300
## SEX - 2:1 1.00 2.00 NA -0.1524500 0.162350 -0.4706500 0.165750
## Hazard Ratio 1.00 2.00 NA 0.8586000 NA 0.6246000 1.180300
## SMOKING - 1:2 2.00 1.00 NA -0.0262960 0.161320 -0.3424800 0.289890
## Hazard Ratio 2.00 1.00 NA 0.9740500 NA 0.7100000 1.336300
## SMOKING - 3:2 2.00 3.00 NA 0.0602720 0.201840 -0.3353300 0.455880
## Hazard Ratio 2.00 3.00 NA 1.0621000 NA 0.7151000 1.577600
## alcohol - 1:3 3.00 1.00 NA 0.1474700 0.121360 -0.0903840 0.385330
## Hazard Ratio 3.00 1.00 NA 1.1589000 NA 0.9135800 1.470100
## alcohol - 2:3 3.00 2.00 NA -0.0417340 0.148450 -0.3326900 0.249220
## Hazard Ratio 3.00 2.00 NA 0.9591200 NA 0.7169900 1.283000
## DIABETES - 1:0 1.00 2.00 NA 0.0645840 0.156090 -0.2413400 0.370510
## Hazard Ratio 1.00 2.00 NA 1.0667000 NA 0.7855700 1.448500
## albumin - 2:1 1.00 2.00 NA 0.2609800 0.117980 0.0297510 0.492210
## Hazard Ratio 1.00 2.00 NA 1.2982000 NA 1.0302000 1.635900
## albumin - 3:1 1.00 3.00 NA 0.5620000 0.208220 0.1538900 0.970110
## Hazard Ratio 1.00 3.00 NA 1.7542000 NA 1.1664000 2.638200
## STENOSIS - 1:0 1.00 2.00 NA 0.1637700 0.108210 -0.0483250 0.375870
## Hazard Ratio 1.00 2.00 NA 1.1779000 NA 0.9528200 1.456300
vif(COXPH.FULL.COMPLETE.AREGIMPUTED_1)
## AGE SEX=2 SMOKING=2 SMOKING=3
## 1.422223 1.839738 1.917030 1.535045
## alcohol=2 alcohol=3 BMI SYSTH
## 1.439301 1.576363 63.830100 3.778977
## SYSTBP DIASTH DIASTBP WEIGHT
## 3.466886 3.098436 2.731506 89.258836
## LENGTH CHOL LDL HDL
## 34.868141 499.517309 446.035588 46.254999
## DIABETES=1 SUMSCORE_5LEVELS HOMOC CREAT
## 2.268709 1.173653 1.521747 1.920921
## albumin=2 albumin=3 STENOSIS=1 IMT
## 1.194679 1.573030 1.206110 1.329620
## TRIG GLUT packyrs
## 117.623369 2.191857 1.355974
COXPH.FULL.COMPLETE.AREGIMPUTED_1
## Cox Proportional Hazards Model
##
## cph(formula = Surv(TEVENT, EVENT) ~ ifelse(AGE > 50, (AGE - 50)^2,
## 0) + SEX + SMOKING + alcohol + BMI + SYSTH + SYSTBP + DIASTH +
## DIASTBP + WEIGHT + LENGTH + CHOL + LDL + HDL + DIABETES +
## SUMSCORE_5LEVELS + HOMOC + log(CREAT) + albumin + STENOSIS +
## IMT + TRIG + GLUT + packyrs, data = SMART.AREGIMPUTED_1,
## x = T, y = T, surv = T)
##
## Model Tests Discrimination
## Indexes
## Obs 3873 LR chi2 291.65 R2 0.087
## Events 460 d.f. 27 R2(27,3873)0.066
## Center 8.2501 Pr(> chi2) 0.0000 R2(27,460)0.437
## Score chi2 362.97 Dxy 0.395
## Pr(> chi2) 0.0000
##
## Coef S.E. Wald Z Pr(>|Z|)
## AGE 0.0014 0.0002 6.06 <0.0001
## SEX=2 -0.1524 0.1623 -0.94 0.3477
## SMOKING=2 0.0263 0.1613 0.16 0.8705
## SMOKING=3 0.0866 0.2445 0.35 0.7232
## alcohol=2 -0.1892 0.1673 -1.13 0.2581
## alcohol=3 -0.1475 0.1214 -1.22 0.2243
## BMI 0.0893 0.1068 0.84 0.4029
## SYSTH -0.0020 0.0040 -0.52 0.6059
## SYSTBP -0.0004 0.0043 -0.09 0.9269
## DIASTH 0.0101 0.0071 1.42 0.1556
## DIASTBP -0.0032 0.0075 -0.42 0.6745
## WEIGHT -0.0404 0.0357 -1.13 0.2575
## LENGTH 2.6781 3.3509 0.80 0.4242
## CHOL -0.3631 0.9764 -0.37 0.7100
## LDL 0.4246 0.9849 0.43 0.6664
## HDL -0.0027 0.9895 0.00 0.9978
## DIABETES=1 0.0646 0.1561 0.41 0.6790
## SUMSCORE_5LEVELS 0.3327 0.0565 5.89 <0.0001
## HOMOC -0.0009 0.0086 -0.11 0.9157
## CREAT 0.6399 0.1500 4.27 <0.0001
## albumin=2 0.2610 0.1180 2.21 0.0270
## albumin=3 0.5620 0.2082 2.70 0.0070
## STENOSIS=1 0.1638 0.1082 1.51 0.1302
## IMT 0.5538 0.1813 3.05 0.0023
## TRIG 0.1622 0.4311 0.38 0.7067
## GLUT 0.0494 0.0294 1.68 0.0922
## packyrs 0.0056 0.0024 2.30 0.0214
##################################
# FULL
# After removing variables which are:
# High multicollinearity contributors
# Non-standard risk factors
# Minimal predictors of survival outcome based from initial exploration
# Minimal predictors of survival outcome based from domain knowledge and literature
##################################
<-cph(Surv(TEVENT,EVENT) ~ ifelse(AGE>50, (AGE-50)^2,0) +
COXPH.FULL.AREGIMPUTED_1 +
SEX +
SMOKING +
alcohol +
BMI +
SYSTH +
HDL+
DIABETES +
SUMSCORE_5LEVELS +
HOMOC log(CREAT) +
+
albumin +
STENOSIS
IMT,data = SMART.AREGIMPUTED_1,
x=T,
y=T,
surv=T)
anova(COXPH.FULL.AREGIMPUTED_1)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## AGE 37.92 1 <.0001
## SEX 0.15 1 0.6941
## SMOKING 1.83 2 0.3998
## alcohol 1.31 2 0.5189
## BMI 3.06 1 0.0802
## SYSTH 0.24 1 0.6232
## HDL 5.61 1 0.0178
## DIABETES 3.62 1 0.0570
## SUMSCORE_5LEVELS 35.39 1 <.0001
## HOMOC 0.01 1 0.9047
## CREAT 16.77 1 <.0001
## albumin 9.89 2 0.0071
## STENOSIS 2.56 1 0.1099
## IMT 8.73 1 0.0031
## TOTAL 314.86 17 <.0001
summary(COXPH.FULL.AREGIMPUTED_1)
## Effects Response : Surv(TEVENT, EVENT)
##
## Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95
## AGE 52.00 68.00 16.00 0.422220 0.068568 0.2878300 0.556610
## Hazard Ratio 52.00 68.00 16.00 1.525300 NA 1.3335000 1.744800
## BMI 24.11 28.73 4.62 -0.114360 0.065359 -0.2424600 0.013741
## Hazard Ratio 24.11 28.73 4.62 0.891940 NA 0.7846900 1.013800
## SYSTH 127.00 157.00 30.00 0.032049 0.065222 -0.0957840 0.159880
## Hazard Ratio 127.00 157.00 30.00 1.032600 NA 0.9086600 1.173400
## HDL 0.96 1.42 0.46 -0.173710 0.073314 -0.3174000 -0.030015
## Hazard Ratio 0.96 1.42 0.46 0.840540 NA 0.7280400 0.970430
## SUMSCORE_5LEVELS 1.00 5.00 4.00 1.327400 0.223150 0.8900700 1.764800
## Hazard Ratio 1.00 5.00 4.00 3.771400 NA 2.4353000 5.840500
## HOMOC 10.50 16.20 5.70 0.005793 0.048361 -0.0889930 0.100580
## Hazard Ratio 10.50 16.20 5.70 1.005800 NA 0.9148500 1.105800
## CREAT 78.00 101.00 23.00 0.155040 0.037857 0.0808410 0.229240
## Hazard Ratio 78.00 101.00 23.00 1.167700 NA 1.0842000 1.257600
## IMT 0.75 1.07 0.32 0.168820 0.057133 0.0568450 0.280800
## Hazard Ratio 0.75 1.07 0.32 1.183900 NA 1.0585000 1.324200
## SEX - 2:1 1.00 2.00 NA -0.053230 0.135340 -0.3184900 0.212030
## Hazard Ratio 1.00 2.00 NA 0.948160 NA 0.7272400 1.236200
## SMOKING - 1:2 2.00 1.00 NA -0.183620 0.144760 -0.4673500 0.100120
## Hazard Ratio 2.00 1.00 NA 0.832250 NA 0.6266600 1.105300
## SMOKING - 3:2 2.00 3.00 NA 0.066860 0.200970 -0.3270300 0.460750
## Hazard Ratio 2.00 3.00 NA 1.069100 NA 0.7210600 1.585300
## alcohol - 1:3 3.00 1.00 NA 0.126340 0.120810 -0.1104500 0.363130
## Hazard Ratio 3.00 1.00 NA 1.134700 NA 0.8954300 1.437800
## alcohol - 2:3 3.00 2.00 NA -0.029509 0.147220 -0.3180500 0.259030
## Hazard Ratio 3.00 2.00 NA 0.970920 NA 0.7275700 1.295700
## DIABETES - 1:0 1.00 2.00 NA 0.208900 0.109740 -0.0061847 0.423990
## Hazard Ratio 1.00 2.00 NA 1.232300 NA 0.9938300 1.528000
## albumin - 2:1 1.00 2.00 NA 0.264540 0.117310 0.0346040 0.494470
## Hazard Ratio 1.00 2.00 NA 1.302800 NA 1.0352000 1.639600
## albumin - 3:1 1.00 3.00 NA 0.562210 0.205920 0.1586100 0.965820
## Hazard Ratio 1.00 3.00 NA 1.754600 NA 1.1719000 2.626900
## STENOSIS - 1:0 1.00 2.00 NA 0.170850 0.106860 -0.0385980 0.380290
## Hazard Ratio 1.00 2.00 NA 1.186300 NA 0.9621400 1.462700
vif(COXPH.FULL.AREGIMPUTED_1)
## AGE SEX=2 SMOKING=2 SMOKING=3
## 1.299339 1.278601 1.543592 1.412798
## alcohol=2 alcohol=3 BMI SYSTH
## 1.422216 1.562523 1.108005 1.152175
## HDL DIABETES=1 SUMSCORE_5LEVELS HOMOC
## 1.209756 1.121312 1.156664 1.473997
## CREAT albumin=2 albumin=3 STENOSIS=1
## 1.847485 1.181861 1.539938 1.175423
## IMT
## 1.303815
COXPH.FULL.AREGIMPUTED_1
## Cox Proportional Hazards Model
##
## cph(formula = Surv(TEVENT, EVENT) ~ ifelse(AGE > 50, (AGE - 50)^2,
## 0) + SEX + SMOKING + alcohol + BMI + SYSTH + HDL + DIABETES +
## SUMSCORE_5LEVELS + HOMOC + log(CREAT) + albumin + STENOSIS +
## IMT, data = SMART.AREGIMPUTED_1, x = T, y = T, surv = T)
##
## Model Tests Discrimination
## Indexes
## Obs 3873 LR chi2 276.30 R2 0.082
## Events 460 d.f. 17 R2(17,3873)0.065
## Center 3.1178 Pr(> chi2) 0.0000 R2(17,460)0.431
## Score chi2 347.73 Dxy 0.392
## Pr(> chi2) 0.0000
##
## Coef S.E. Wald Z Pr(>|Z|)
## AGE 0.0013 0.0002 6.16 <0.0001
## SEX=2 -0.0532 0.1353 -0.39 0.6941
## SMOKING=2 0.1836 0.1448 1.27 0.2047
## SMOKING=3 0.2505 0.2345 1.07 0.2855
## alcohol=2 -0.1559 0.1663 -0.94 0.3488
## alcohol=3 -0.1263 0.1208 -1.05 0.2957
## BMI -0.0248 0.0141 -1.75 0.0802
## SYSTH 0.0011 0.0022 0.49 0.6232
## HDL -0.3776 0.1594 -2.37 0.0178
## DIABETES=1 0.2089 0.1097 1.90 0.0570
## SUMSCORE_5LEVELS 0.3319 0.0558 5.95 <0.0001
## HOMOC 0.0010 0.0085 0.12 0.9047
## CREAT 0.6000 0.1465 4.10 <0.0001
## albumin=2 0.2645 0.1173 2.25 0.0241
## albumin=3 0.5622 0.2059 2.73 0.0063
## STENOSIS=1 0.1708 0.1069 1.60 0.1099
## IMT 0.5276 0.1785 2.95 0.0031
##################################
# Formulating the FULL
# Cox Proportional Hazards Model
# Using the combined complete and
# 2ND imputation results from AREG
##################################
$EVENT <- as.numeric(SMART.AREGIMPUTED_2$EVENT)
SMART.AREGIMPUTED_2$SMOKING <- as.factor(SMART.AREGIMPUTED_2$SMOKING)
SMART.AREGIMPUTED_2$alcohol <- as.factor(SMART.AREGIMPUTED_2$alcohol)
SMART.AREGIMPUTED_2$albumin <- as.factor(SMART.AREGIMPUTED_2$albumin)
SMART.AREGIMPUTED_2
<- datadist(SMART.AREGIMPUTED_2)
dd options(datadist="dd")
##################################
# FULL COMPLETE
##################################
<-cph(Surv(TEVENT,EVENT) ~ ifelse(AGE>50, (AGE-50)^2,0) +
COXPH.FULL.COMPLETE.AREGIMPUTED_2 +
SEX +
SMOKING +
alcohol +
BMI +
SYSTH +
SYSTBP +
DIASTH +
DIASTBP +
WEIGHT +
LENGTH +
CHOL +
LDL +
HDL+
DIABETES +
SUMSCORE_5LEVELS +
HOMOC log(CREAT) +
+
albumin +
STENOSIS +
IMT +
TRIG +
GLUT
packyrs,data = SMART.AREGIMPUTED_2,
x=T,
y=T,
surv=T)
anova(COXPH.FULL.COMPLETE.AREGIMPUTED_2)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## AGE 35.47 1 <.0001
## SEX 0.46 1 0.4990
## SMOKING 0.06 2 0.9682
## alcohol 1.70 2 0.4283
## BMI 0.75 1 0.3850
## SYSTH 1.60 1 0.2055
## SYSTBP 3.89 1 0.0485
## DIASTH 0.04 1 0.8409
## DIASTBP 1.88 1 0.1708
## WEIGHT 1.34 1 0.2479
## LENGTH 0.69 1 0.4045
## CHOL 0.08 1 0.7736
## LDL 0.12 1 0.7307
## HDL 0.01 1 0.9305
## DIABETES 0.09 1 0.7655
## SUMSCORE_5LEVELS 36.02 1 <.0001
## HOMOC 0.09 1 0.7672
## CREAT 15.96 1 0.0001
## albumin 11.13 2 0.0038
## STENOSIS 2.17 1 0.1411
## IMT 10.54 1 0.0012
## TRIG 0.09 1 0.7673
## GLUT 3.16 1 0.0753
## packyrs 5.60 1 0.0180
## TOTAL 332.03 27 <.0001
summary(COXPH.FULL.COMPLETE.AREGIMPUTED_2)
## Effects Response : Surv(TEVENT, EVENT)
##
## Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95
## AGE 52.00 68.00 16.00 0.427070 0.071709 0.28652000 0.5676200
## Hazard Ratio 52.00 68.00 16.00 1.532800 NA 1.33180000 1.7641000
## BMI 24.11 28.73 4.62 0.430730 0.495820 -0.54105000 1.4025000
## Hazard Ratio 24.11 28.73 4.62 1.538400 NA 0.58214000 4.0654000
## SYSTH 127.00 157.00 30.00 0.151300 0.119500 -0.08291300 0.3855200
## Hazard Ratio 127.00 157.00 30.00 1.163300 NA 0.92043000 1.4704000
## SYSTBP 126.00 153.00 27.00 -0.223730 0.113390 -0.44597000 -0.0014791
## Hazard Ratio 126.00 153.00 27.00 0.799530 NA 0.64020000 0.9985200
## DIASTH 74.00 90.00 16.00 -0.022334 0.111270 -0.24041000 0.1957500
## Hazard Ratio 74.00 90.00 16.00 0.977910 NA 0.78630000 1.2162000
## DIASTBP 73.00 86.00 13.00 0.128540 0.093857 -0.05542000 0.3124900
## Hazard Ratio 73.00 86.00 13.00 1.137200 NA 0.94609000 1.3668000
## WEIGHT 72.00 89.00 17.00 -0.704430 0.609600 -1.89920000 0.4903500
## Hazard Ratio 72.00 89.00 17.00 0.494390 NA 0.14968000 1.6329000
## LENGTH 1.68 1.80 0.12 0.336580 0.403770 -0.45478000 1.1279000
## Hazard Ratio 1.68 1.80 0.12 1.400200 NA 0.63458000 3.0893000
## CHOL 4.40 5.90 1.50 -0.422550 1.468700 -3.30110000 2.4560000
## Hazard Ratio 4.40 5.90 1.50 0.655370 NA 0.03684200 11.6580000
## LDL 2.37 3.83 1.46 0.495510 1.439800 -2.32650000 3.3175000
## Hazard Ratio 2.37 3.83 1.46 1.641300 NA 0.09763400 27.5930000
## HDL 0.96 1.42 0.46 -0.039806 0.456530 -0.93459000 0.8549800
## Hazard Ratio 0.96 1.42 0.46 0.960980 NA 0.39275000 2.3513000
## SUMSCORE_5LEVELS 1.00 5.00 4.00 1.357300 0.226150 0.91409000 1.8006000
## Hazard Ratio 1.00 5.00 4.00 3.885900 NA 2.49450000 6.0533000
## HOMOC 10.50 16.00 5.50 0.014223 0.048052 -0.07995600 0.1084000
## Hazard Ratio 10.50 16.00 5.50 1.014300 NA 0.92316000 1.1145000
## CREAT 78.00 101.00 23.00 0.155110 0.038821 0.07902000 0.2312000
## Hazard Ratio 78.00 101.00 23.00 1.167800 NA 1.08220000 1.2601000
## IMT 0.75 1.07 0.32 0.188340 0.058019 0.07462800 0.3020600
## Hazard Ratio 0.75 1.07 0.32 1.207200 NA 1.07750000 1.3526000
## TRIG 1.12 2.23 1.11 0.142450 0.481340 -0.80096000 1.0859000
## Hazard Ratio 1.12 2.23 1.11 1.153100 NA 0.44890000 2.9620000
## GLUT 5.30 6.50 1.20 0.061953 0.034827 -0.00630670 0.1302100
## Hazard Ratio 5.30 6.50 1.20 1.063900 NA 0.99371000 1.1391000
## packyrs 5.90 34.20 28.30 0.165940 0.070146 0.02845400 0.3034200
## Hazard Ratio 5.90 34.20 28.30 1.180500 NA 1.02890000 1.3545000
## SEX - 2:1 1.00 2.00 NA -0.109690 0.162250 -0.42770000 0.2083200
## Hazard Ratio 1.00 2.00 NA 0.896110 NA 0.65201000 1.2316000
## SMOKING - 1:2 2.00 1.00 NA -0.017550 0.161910 -0.33488000 0.2997800
## Hazard Ratio 2.00 1.00 NA 0.982600 NA 0.71542000 1.3496000
## SMOKING - 3:2 2.00 3.00 NA 0.043922 0.201980 -0.35195000 0.4397900
## Hazard Ratio 2.00 3.00 NA 1.044900 NA 0.70332000 1.5524000
## alcohol - 1:3 3.00 1.00 NA 0.147610 0.121350 -0.09023100 0.3854400
## Hazard Ratio 3.00 1.00 NA 1.159100 NA 0.91372000 1.4703000
## alcohol - 2:3 3.00 2.00 NA -0.024662 0.148250 -0.31522000 0.2658900
## Hazard Ratio 3.00 2.00 NA 0.975640 NA 0.72963000 1.3046000
## DIABETES - 1:0 1.00 2.00 NA 0.046684 0.156520 -0.26009000 0.3534600
## Hazard Ratio 1.00 2.00 NA 1.047800 NA 0.77098000 1.4240000
## albumin - 2:1 1.00 2.00 NA 0.234650 0.119250 0.00093568 0.4683700
## Hazard Ratio 1.00 2.00 NA 1.264500 NA 1.00090000 1.5974000
## albumin - 3:1 1.00 3.00 NA 0.637920 0.202230 0.24155000 1.0343000
## Hazard Ratio 1.00 3.00 NA 1.892500 NA 1.27320000 2.8131000
## STENOSIS - 1:0 1.00 2.00 NA 0.159590 0.108420 -0.05291900 0.3721000
## Hazard Ratio 1.00 2.00 NA 1.173000 NA 0.94846000 1.4508000
vif(COXPH.FULL.COMPLETE.AREGIMPUTED_2)
## AGE SEX=2 SMOKING=2 SMOKING=3
## 1.415309 1.837522 1.930927 1.547808
## alcohol=2 alcohol=3 BMI SYSTH
## 1.431119 1.572093 64.453293 3.876986
## SYSTBP DIASTH DIASTBP WEIGHT
## 3.362049 3.035763 2.601137 90.131708
## LENGTH CHOL LDL HDL
## 35.159350 502.529866 449.417395 46.213107
## DIABETES=1 SUMSCORE_5LEVELS HOMOC CREAT
## 2.271060 1.170919 1.542203 1.941807
## albumin=2 albumin=3 STENOSIS=1 IMT
## 1.184876 1.614102 1.209187 1.346155
## TRIG GLUT packyrs
## 119.680408 2.177683 1.362550
COXPH.FULL.COMPLETE.AREGIMPUTED_2
## Cox Proportional Hazards Model
##
## cph(formula = Surv(TEVENT, EVENT) ~ ifelse(AGE > 50, (AGE - 50)^2,
## 0) + SEX + SMOKING + alcohol + BMI + SYSTH + SYSTBP + DIASTH +
## DIASTBP + WEIGHT + LENGTH + CHOL + LDL + HDL + DIABETES +
## SUMSCORE_5LEVELS + HOMOC + log(CREAT) + albumin + STENOSIS +
## IMT + TRIG + GLUT + packyrs, data = SMART.AREGIMPUTED_2,
## x = T, y = T, surv = T)
##
## Model Tests Discrimination
## Indexes
## Obs 3873 LR chi2 298.32 R2 0.089
## Events 460 d.f. 27 R2(27,3873)0.068
## Center 8.4099 Pr(> chi2) 0.0000 R2(27,460)0.446
## Score chi2 372.48 Dxy 0.397
## Pr(> chi2) 0.0000
##
## Coef S.E. Wald Z Pr(>|Z|)
## AGE 0.0013 0.0002 5.96 <0.0001
## SEX=2 -0.1097 0.1623 -0.68 0.4990
## SMOKING=2 0.0175 0.1619 0.11 0.9137
## SMOKING=3 0.0615 0.2454 0.25 0.8022
## alcohol=2 -0.1723 0.1681 -1.02 0.3054
## alcohol=3 -0.1476 0.1213 -1.22 0.2238
## BMI 0.0932 0.1073 0.87 0.3850
## SYSTH 0.0050 0.0040 1.27 0.2055
## SYSTBP -0.0083 0.0042 -1.97 0.0485
## DIASTH -0.0014 0.0070 -0.20 0.8409
## DIASTBP 0.0099 0.0072 1.37 0.1708
## WEIGHT -0.0414 0.0359 -1.16 0.2479
## LENGTH 2.8049 3.3647 0.83 0.4045
## CHOL -0.2817 0.9791 -0.29 0.7736
## LDL 0.3394 0.9862 0.34 0.7307
## HDL -0.0865 0.9925 -0.09 0.9305
## DIABETES=1 0.0467 0.1565 0.30 0.7655
## SUMSCORE_5LEVELS 0.3393 0.0565 6.00 <0.0001
## HOMOC 0.0026 0.0087 0.30 0.7672
## CREAT 0.6002 0.1502 4.00 <0.0001
## albumin=2 0.2347 0.1192 1.97 0.0491
## albumin=3 0.6379 0.2022 3.15 0.0016
## STENOSIS=1 0.1596 0.1084 1.47 0.1411
## IMT 0.5886 0.1813 3.25 0.0012
## TRIG 0.1283 0.4336 0.30 0.7673
## GLUT 0.0516 0.0290 1.78 0.0753
## packyrs 0.0059 0.0025 2.37 0.0180
##################################
# FULL
# After removing variables which are:
# High multicollinearity contributors
# Non-standard risk factors
# Minimal predictors of survival outcome based from initial exploration
# Minimal predictors of survival outcome based from domain knowledge and literature
##################################
<-cph(Surv(TEVENT,EVENT) ~ ifelse(AGE>50, (AGE-50)^2,0) +
COXPH.FULL.AREGIMPUTED_2 +
SEX +
SMOKING +
alcohol +
BMI +
SYSTH +
HDL+
DIABETES +
SUMSCORE_5LEVELS +
HOMOC log(CREAT) +
+
albumin +
STENOSIS
IMT,data = SMART.AREGIMPUTED_2,
x=T,
y=T,
surv=T)
anova(COXPH.FULL.AREGIMPUTED_2)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## AGE 36.20 1 <.0001
## SEX 0.15 1 0.6996
## SMOKING 1.81 2 0.4040
## alcohol 1.25 2 0.5347
## BMI 2.77 1 0.0959
## SYSTH 1.01 1 0.3157
## HDL 5.80 1 0.0161
## DIABETES 2.84 1 0.0919
## SUMSCORE_5LEVELS 35.74 1 <.0001
## HOMOC 0.18 1 0.6730
## CREAT 14.49 1 0.0001
## albumin 11.33 2 0.0035
## STENOSIS 2.63 1 0.1046
## IMT 9.41 1 0.0022
## TOTAL 318.65 17 <.0001
summary(COXPH.FULL.AREGIMPUTED_2)
## Effects Response : Surv(TEVENT, EVENT)
##
## Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95
## AGE 52.00 68.00 16.00 0.412890 0.068624 0.2783900 0.547400
## Hazard Ratio 52.00 68.00 16.00 1.511200 NA 1.3210000 1.728700
## BMI 24.11 28.73 4.62 -0.109170 0.065571 -0.2376900 0.019344
## Hazard Ratio 24.11 28.73 4.62 0.896580 NA 0.7884500 1.019500
## SYSTH 127.00 157.00 30.00 0.066168 0.065951 -0.0630930 0.195430
## Hazard Ratio 127.00 157.00 30.00 1.068400 NA 0.9388600 1.215800
## HDL 0.96 1.42 0.46 -0.176910 0.073475 -0.3209200 -0.032900
## Hazard Ratio 0.96 1.42 0.46 0.837860 NA 0.7254800 0.967640
## SUMSCORE_5LEVELS 1.00 5.00 4.00 1.333000 0.222970 0.8959500 1.770000
## Hazard Ratio 1.00 5.00 4.00 3.792300 NA 2.4496000 5.870800
## HOMOC 10.50 16.00 5.50 0.020056 0.047525 -0.0730920 0.113200
## Hazard Ratio 10.50 16.00 5.50 1.020300 NA 0.9295200 1.119900
## CREAT 78.00 101.00 23.00 0.144220 0.037890 0.0699530 0.218480
## Hazard Ratio 78.00 101.00 23.00 1.155100 NA 1.0725000 1.244200
## IMT 0.75 1.07 0.32 0.174920 0.057034 0.0631390 0.286710
## Hazard Ratio 0.75 1.07 0.32 1.191200 NA 1.0652000 1.332000
## SEX - 2:1 1.00 2.00 NA -0.052169 0.135210 -0.3171800 0.212840
## Hazard Ratio 1.00 2.00 NA 0.949170 NA 0.7282000 1.237200
## SMOKING - 1:2 2.00 1.00 NA -0.184270 0.144830 -0.4681400 0.099591
## Hazard Ratio 2.00 1.00 NA 0.831710 NA 0.6261700 1.104700
## SMOKING - 3:2 2.00 3.00 NA 0.060750 0.201060 -0.3333200 0.454820
## Hazard Ratio 2.00 3.00 NA 1.062600 NA 0.7165400 1.575900
## alcohol - 1:3 3.00 1.00 NA 0.119670 0.120890 -0.1172600 0.356600
## Hazard Ratio 3.00 1.00 NA 1.127100 NA 0.8893500 1.428500
## alcohol - 2:3 3.00 2.00 NA -0.040309 0.147650 -0.3297000 0.249080
## Hazard Ratio 3.00 2.00 NA 0.960490 NA 0.7191400 1.282900
## DIABETES - 1:0 1.00 2.00 NA 0.185690 0.110160 -0.0302130 0.401600
## Hazard Ratio 1.00 2.00 NA 1.204100 NA 0.9702400 1.494200
## albumin - 2:1 1.00 2.00 NA 0.230480 0.118490 -0.0017567 0.462710
## Hazard Ratio 1.00 2.00 NA 1.259200 NA 0.9982400 1.588400
## albumin - 3:1 1.00 3.00 NA 0.636100 0.199770 0.2445500 1.027700
## Hazard Ratio 1.00 3.00 NA 1.889100 NA 1.2771000 2.794500
## STENOSIS - 1:0 1.00 2.00 NA 0.174090 0.107260 -0.0361370 0.384320
## Hazard Ratio 1.00 2.00 NA 1.190200 NA 0.9645100 1.468600
vif(COXPH.FULL.AREGIMPUTED_2)
## AGE SEX=2 SMOKING=2 SMOKING=3
## 1.299593 1.276089 1.544960 1.418226
## alcohol=2 alcohol=3 BMI SYSTH
## 1.417444 1.560499 1.109009 1.190620
## HDL DIABETES=1 SUMSCORE_5LEVELS HOMOC
## 1.209404 1.124695 1.158816 1.507124
## CREAT albumin=2 albumin=3 STENOSIS=1
## 1.869956 1.170582 1.576933 1.182581
## IMT
## 1.309736
COXPH.FULL.AREGIMPUTED_2
## Cox Proportional Hazards Model
##
## cph(formula = Surv(TEVENT, EVENT) ~ ifelse(AGE > 50, (AGE - 50)^2,
## 0) + SEX + SMOKING + alcohol + BMI + SYSTH + HDL + DIABETES +
## SUMSCORE_5LEVELS + HOMOC + log(CREAT) + albumin + STENOSIS +
## IMT, data = SMART.AREGIMPUTED_2, x = T, y = T, surv = T)
##
## Model Tests Discrimination
## Indexes
## Obs 3873 LR chi2 280.67 R2 0.084
## Events 460 d.f. 17 R2(17,3873)0.066
## Center 3.1607 Pr(> chi2) 0.0000 R2(17,460)0.436
## Score chi2 355.08 Dxy 0.393
## Pr(> chi2) 0.0000
##
## Coef S.E. Wald Z Pr(>|Z|)
## AGE 0.0013 0.0002 6.02 <0.0001
## SEX=2 -0.0522 0.1352 -0.39 0.6996
## SMOKING=2 0.1843 0.1448 1.27 0.2033
## SMOKING=3 0.2450 0.2349 1.04 0.2969
## alcohol=2 -0.1600 0.1673 -0.96 0.3389
## alcohol=3 -0.1197 0.1209 -0.99 0.3222
## BMI -0.0236 0.0142 -1.66 0.0959
## SYSTH 0.0022 0.0022 1.00 0.3157
## HDL -0.3846 0.1597 -2.41 0.0161
## DIABETES=1 0.1857 0.1102 1.69 0.0919
## SUMSCORE_5LEVELS 0.3332 0.0557 5.98 <0.0001
## HOMOC 0.0036 0.0086 0.42 0.6730
## CREAT 0.5581 0.1466 3.81 0.0001
## albumin=2 0.2305 0.1185 1.95 0.0518
## albumin=3 0.6361 0.1998 3.18 0.0015
## STENOSIS=1 0.1741 0.1073 1.62 0.1046
## IMT 0.5466 0.1782 3.07 0.0022
##################################
# Formulating the FULL
# Cox Proportional Hazards Model
# Using the combined complete and
# 3RD imputation results from AREG
##################################
$EVENT <- as.numeric(SMART.AREGIMPUTED_3$EVENT)
SMART.AREGIMPUTED_3$SMOKING <- as.factor(SMART.AREGIMPUTED_3$SMOKING)
SMART.AREGIMPUTED_3$alcohol <- as.factor(SMART.AREGIMPUTED_3$alcohol)
SMART.AREGIMPUTED_3$albumin <- as.factor(SMART.AREGIMPUTED_3$albumin)
SMART.AREGIMPUTED_3
<- datadist(SMART.AREGIMPUTED_3)
dd options(datadist="dd")
##################################
# FULL COMPLETE
##################################
<-cph(Surv(TEVENT,EVENT) ~ ifelse(AGE>50, (AGE-50)^2,0) +
COXPH.FULL.COMPLETE.AREGIMPUTED_3 +
SEX +
SMOKING +
alcohol +
BMI +
SYSTH +
SYSTBP +
DIASTH +
DIASTBP +
WEIGHT +
LENGTH +
CHOL +
LDL +
HDL+
DIABETES +
SUMSCORE_5LEVELS +
HOMOC log(CREAT) +
+
albumin +
STENOSIS +
IMT +
TRIG +
GLUT
packyrs,data = SMART.AREGIMPUTED_3,
x=T,
y=T,
surv=T)
anova(COXPH.FULL.COMPLETE.AREGIMPUTED_3)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## AGE 36.90 1 <.0001
## SEX 0.75 1 0.3869
## SMOKING 0.35 2 0.8388
## alcohol 1.46 2 0.4817
## BMI 0.68 1 0.4083
## SYSTH 0.15 1 0.6983
## SYSTBP 0.44 1 0.5070
## DIASTH 0.66 1 0.4168
## DIASTBP 0.19 1 0.6599
## WEIGHT 1.27 1 0.2590
## LENGTH 0.59 1 0.4439
## CHOL 0.55 1 0.4581
## LDL 0.65 1 0.4218
## HDL 0.09 1 0.7588
## DIABETES 0.19 1 0.6617
## SUMSCORE_5LEVELS 34.76 1 <.0001
## HOMOC 0.06 1 0.8014
## CREAT 17.68 1 <.0001
## albumin 7.20 2 0.0274
## STENOSIS 3.01 1 0.0825
## IMT 7.65 1 0.0057
## TRIG 0.56 1 0.4530
## GLUT 2.49 1 0.1143
## packyrs 5.86 1 0.0155
## TOTAL 320.20 27 <.0001
summary(COXPH.FULL.COMPLETE.AREGIMPUTED_3)
## Effects Response : Surv(TEVENT, EVENT)
##
## Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95
## AGE 52.00 68.00 16.00 0.435920 0.071766 0.295260 0.57658
## Hazard Ratio 52.00 68.00 16.00 1.546400 NA 1.343500 1.77990
## BMI 24.11 28.73 4.62 0.406580 0.491680 -0.557090 1.37030
## Hazard Ratio 24.11 28.73 4.62 1.501700 NA 0.572870 3.93640
## SYSTH 127.00 156.00 29.00 0.045244 0.116720 -0.183520 0.27401
## Hazard Ratio 127.00 156.00 29.00 1.046300 NA 0.832330 1.31520
## SYSTBP 126.00 153.00 27.00 -0.074237 0.111870 -0.293510 0.14503
## Hazard Ratio 126.00 153.00 27.00 0.928450 NA 0.745640 1.15610
## DIASTH 74.00 90.00 16.00 0.088751 0.109310 -0.125490 0.30299
## Hazard Ratio 74.00 90.00 16.00 1.092800 NA 0.882060 1.35390
## DIASTBP 73.00 86.00 13.00 0.041562 0.094439 -0.143530 0.22666
## Hazard Ratio 73.00 86.00 13.00 1.042400 NA 0.866290 1.25440
## WEIGHT 72.00 89.00 17.00 -0.681140 0.603430 -1.863800 0.50155
## Hazard Ratio 72.00 89.00 17.00 0.506040 NA 0.155080 1.65130
## LENGTH 1.68 1.80 0.12 0.306690 0.400570 -0.478400 1.09180
## Hazard Ratio 1.68 1.80 0.12 1.358900 NA 0.619770 2.97960
## CHOL 4.40 5.90 1.50 -0.991960 1.336800 -3.612000 1.62810
## Hazard Ratio 4.40 5.90 1.50 0.370850 NA 0.026997 5.09420
## LDL 2.37 3.83 1.46 1.054400 1.312500 -1.518100 3.62690
## Hazard Ratio 2.37 3.83 1.46 2.870100 NA 0.219120 37.59400
## HDL 0.96 1.42 0.46 0.127730 0.416070 -0.687750 0.94322
## Hazard Ratio 0.96 1.42 0.46 1.136200 NA 0.502700 2.56820
## SUMSCORE_5LEVELS 1.00 5.00 4.00 1.337900 0.226920 0.893180 1.78270
## Hazard Ratio 1.00 5.00 4.00 3.811200 NA 2.442900 5.94580
## HOMOC 10.50 16.00 5.50 0.011972 0.047590 -0.081302 0.10525
## Hazard Ratio 10.50 16.00 5.50 1.012000 NA 0.921920 1.11100
## CREAT 78.00 101.00 23.00 0.162680 0.038689 0.086847 0.23850
## Hazard Ratio 78.00 101.00 23.00 1.176700 NA 1.090700 1.26930
## IMT 0.75 1.07 0.32 0.160880 0.058184 0.046842 0.27492
## Hazard Ratio 0.75 1.07 0.32 1.174500 NA 1.048000 1.31640
## TRIG 1.13 2.23 1.10 0.327860 0.436890 -0.528420 1.18410
## Hazard Ratio 1.13 2.23 1.10 1.388000 NA 0.589540 3.26790
## GLUT 5.30 6.50 1.20 0.055650 0.035244 -0.013426 0.12473
## Hazard Ratio 5.30 6.50 1.20 1.057200 NA 0.986660 1.13280
## packyrs 5.90 34.20 28.30 0.172410 0.071214 0.032828 0.31198
## Hazard Ratio 5.90 34.20 28.30 1.188200 NA 1.033400 1.36610
## SEX - 2:1 1.00 2.00 NA -0.141230 0.163230 -0.461150 0.17869
## Hazard Ratio 1.00 2.00 NA 0.868290 NA 0.630560 1.19570
## SMOKING - 1:2 2.00 1.00 NA -0.024508 0.161910 -0.341850 0.29283
## Hazard Ratio 2.00 1.00 NA 0.975790 NA 0.710460 1.34020
## SMOKING - 3:2 2.00 3.00 NA 0.110050 0.198790 -0.279580 0.49967
## Hazard Ratio 2.00 3.00 NA 1.116300 NA 0.756100 1.64820
## alcohol - 1:3 3.00 1.00 NA 0.133760 0.120890 -0.103180 0.37071
## Hazard Ratio 3.00 1.00 NA 1.143100 NA 0.901960 1.44880
## alcohol - 2:3 3.00 2.00 NA -0.029839 0.147830 -0.319570 0.25990
## Hazard Ratio 3.00 2.00 NA 0.970600 NA 0.726460 1.29680
## DIABETES - 1:0 1.00 2.00 NA 0.068163 0.155790 -0.237180 0.37351
## Hazard Ratio 1.00 2.00 NA 1.070500 NA 0.788850 1.45280
## albumin - 2:1 1.00 2.00 NA 0.188130 0.119630 -0.046335 0.42259
## Hazard Ratio 1.00 2.00 NA 1.207000 NA 0.954720 1.52590
## albumin - 3:1 1.00 3.00 NA 0.532280 0.209570 0.121520 0.94303
## Hazard Ratio 1.00 3.00 NA 1.702800 NA 1.129200 2.56770
## STENOSIS - 1:0 1.00 2.00 NA 0.187470 0.107970 -0.024146 0.39908
## Hazard Ratio 1.00 2.00 NA 1.206200 NA 0.976140 1.49050
vif(COXPH.FULL.COMPLETE.AREGIMPUTED_3)
## AGE SEX=2 SMOKING=2 SMOKING=3
## 1.420179 1.859980 1.945645 1.558441
## alcohol=2 alcohol=3 BMI SYSTH
## 1.426484 1.568374 63.258403 3.781325
## SYSTBP DIASTH DIASTBP WEIGHT
## 3.316955 3.055128 2.581655 88.682439
## LENGTH CHOL LDL HDL
## 34.592113 414.210018 374.656141 38.295659
## DIABETES=1 SUMSCORE_5LEVELS HOMOC CREAT
## 2.249795 1.180905 1.455960 1.929293
## albumin=2 albumin=3 STENOSIS=1 IMT
## 1.184010 1.626924 1.203558 1.339445
## TRIG GLUT packyrs
## 99.397839 2.164191 1.362993
COXPH.FULL.COMPLETE.AREGIMPUTED_3
## Cox Proportional Hazards Model
##
## cph(formula = Surv(TEVENT, EVENT) ~ ifelse(AGE > 50, (AGE - 50)^2,
## 0) + SEX + SMOKING + alcohol + BMI + SYSTH + SYSTBP + DIASTH +
## DIASTBP + WEIGHT + LENGTH + CHOL + LDL + HDL + DIABETES +
## SUMSCORE_5LEVELS + HOMOC + log(CREAT) + albumin + STENOSIS +
## IMT + TRIG + GLUT + packyrs, data = SMART.AREGIMPUTED_3,
## x = T, y = T, surv = T)
##
## Model Tests Discrimination
## Indexes
## Obs 3873 LR chi2 289.89 R2 0.086
## Events 460 d.f. 27 R2(27,3873)0.066
## Center 8.2833 Pr(> chi2) 0.0000 R2(27,460)0.435
## Score chi2 357.27 Dxy 0.393
## Pr(> chi2) 0.0000
##
## Coef S.E. Wald Z Pr(>|Z|)
## AGE 0.0014 0.0002 6.07 <0.0001
## SEX=2 -0.1412 0.1632 -0.87 0.3869
## SMOKING=2 0.0245 0.1619 0.15 0.8797
## SMOKING=3 0.1346 0.2424 0.56 0.5789
## alcohol=2 -0.1636 0.1666 -0.98 0.3260
## alcohol=3 -0.1338 0.1209 -1.11 0.2685
## BMI 0.0880 0.1064 0.83 0.4083
## SYSTH 0.0016 0.0040 0.39 0.6983
## SYSTBP -0.0027 0.0041 -0.66 0.5070
## DIASTH 0.0055 0.0068 0.81 0.4168
## DIASTBP 0.0032 0.0073 0.44 0.6599
## WEIGHT -0.0401 0.0355 -1.13 0.2590
## LENGTH 2.5558 3.3380 0.77 0.4439
## CHOL -0.6613 0.8912 -0.74 0.4581
## LDL 0.7222 0.8990 0.80 0.4218
## HDL 0.2777 0.9045 0.31 0.7588
## DIABETES=1 0.0682 0.1558 0.44 0.6617
## SUMSCORE_5LEVELS 0.3345 0.0567 5.90 <0.0001
## HOMOC 0.0022 0.0087 0.25 0.8014
## CREAT 0.6295 0.1497 4.20 <0.0001
## albumin=2 0.1881 0.1196 1.57 0.1158
## albumin=3 0.5323 0.2096 2.54 0.0111
## STENOSIS=1 0.1875 0.1080 1.74 0.0825
## IMT 0.5027 0.1818 2.77 0.0057
## TRIG 0.2981 0.3972 0.75 0.4530
## GLUT 0.0464 0.0294 1.58 0.1143
## packyrs 0.0061 0.0025 2.42 0.0155
##################################
# FULL
# After removing variables which are:
# High multicollinearity contributors
# Non-standard risk factors
# Minimal predictors of survival outcome based from initial exploration
# Minimal predictors of survival outcome based from domain knowledge and literature
##################################
<-cph(Surv(TEVENT,EVENT) ~ ifelse(AGE>50, (AGE-50)^2,0) +
COXPH.FULL.AREGIMPUTED_3 +
SEX +
SMOKING +
alcohol +
BMI +
SYSTH +
HDL+
DIABETES +
SUMSCORE_5LEVELS +
HOMOC log(CREAT) +
+
albumin +
STENOSIS
IMT,data = SMART.AREGIMPUTED_3,
x=T,
y=T,
surv=T)
anova(COXPH.FULL.AREGIMPUTED_3)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## AGE 37.94 1 <.0001
## SEX 0.12 1 0.7269
## SMOKING 2.25 2 0.3250
## alcohol 1.11 2 0.5753
## BMI 2.88 1 0.0899
## SYSTH 1.69 1 0.1939
## HDL 6.08 1 0.0137
## DIABETES 3.15 1 0.0761
## SUMSCORE_5LEVELS 36.05 1 <.0001
## HOMOC 0.24 1 0.6207
## CREAT 15.67 1 0.0001
## albumin 7.32 2 0.0257
## STENOSIS 3.43 1 0.0642
## IMT 6.57 1 0.0103
## TOTAL 311.06 17 <.0001
summary(COXPH.FULL.AREGIMPUTED_3)
## Effects Response : Surv(TEVENT, EVENT)
##
## Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95
## AGE 52.00 68.00 16.00 0.421290 0.068396 0.287240 0.555340
## Hazard Ratio 52.00 68.00 16.00 1.523900 NA 1.332700 1.742500
## BMI 24.11 28.73 4.62 -0.111310 0.065642 -0.239960 0.017347
## Hazard Ratio 24.11 28.73 4.62 0.894660 NA 0.786660 1.017500
## SYSTH 127.00 156.00 29.00 0.083927 0.064609 -0.042704 0.210560
## Hazard Ratio 127.00 156.00 29.00 1.087500 NA 0.958200 1.234400
## HDL 0.96 1.42 0.46 -0.181520 0.073610 -0.325790 -0.037244
## Hazard Ratio 0.96 1.42 0.46 0.834000 NA 0.721960 0.963440
## SUMSCORE_5LEVELS 1.00 5.00 4.00 1.338700 0.222960 0.901750 1.775700
## Hazard Ratio 1.00 5.00 4.00 3.814200 NA 2.463900 5.904600
## HOMOC 10.50 16.00 5.50 0.023263 0.047004 -0.068862 0.115390
## Hazard Ratio 10.50 16.00 5.50 1.023500 NA 0.933460 1.122300
## CREAT 78.00 101.00 23.00 0.150200 0.037942 0.075835 0.224570
## Hazard Ratio 78.00 101.00 23.00 1.162100 NA 1.078800 1.251800
## IMT 0.75 1.07 0.32 0.146240 0.057032 0.034457 0.258020
## Hazard Ratio 0.75 1.07 0.32 1.157500 NA 1.035100 1.294400
## SEX - 2:1 1.00 2.00 NA -0.047213 0.135210 -0.312210 0.217790
## Hazard Ratio 1.00 2.00 NA 0.953880 NA 0.731830 1.243300
## SMOKING - 1:2 2.00 1.00 NA -0.193240 0.144660 -0.476770 0.090286
## Hazard Ratio 2.00 1.00 NA 0.824280 NA 0.620780 1.094500
## SMOKING - 3:2 2.00 3.00 NA 0.105120 0.197840 -0.282640 0.492880
## Hazard Ratio 2.00 3.00 NA 1.110800 NA 0.753790 1.637000
## alcohol - 1:3 3.00 1.00 NA 0.114520 0.120600 -0.121850 0.350880
## Hazard Ratio 3.00 1.00 NA 1.121300 NA 0.885280 1.420300
## alcohol - 2:3 3.00 2.00 NA -0.030120 0.147370 -0.318970 0.258730
## Hazard Ratio 3.00 2.00 NA 0.970330 NA 0.726900 1.295300
## DIABETES - 1:0 1.00 2.00 NA 0.195840 0.110430 -0.020590 0.412270
## Hazard Ratio 1.00 2.00 NA 1.216300 NA 0.979620 1.510200
## albumin - 2:1 1.00 2.00 NA 0.194770 0.118990 -0.038447 0.427990
## Hazard Ratio 1.00 2.00 NA 1.215000 NA 0.962280 1.534200
## albumin - 3:1 1.00 3.00 NA 0.525450 0.207680 0.118400 0.932500
## Hazard Ratio 1.00 3.00 NA 1.691200 NA 1.125700 2.540900
## STENOSIS - 1:0 1.00 2.00 NA 0.197860 0.106910 -0.011678 0.407410
## Hazard Ratio 1.00 2.00 NA 1.218800 NA 0.988390 1.502900
vif(COXPH.FULL.AREGIMPUTED_3)
## AGE SEX=2 SMOKING=2 SMOKING=3
## 1.295338 1.276244 1.553076 1.428452
## alcohol=2 alcohol=3 BMI SYSTH
## 1.414689 1.560807 1.108995 1.175784
## HDL DIABETES=1 SUMSCORE_5LEVELS HOMOC
## 1.214632 1.130052 1.153692 1.418524
## CREAT albumin=2 albumin=3 STENOSIS=1
## 1.872846 1.172256 1.599649 1.179656
## IMT
## 1.303127
COXPH.FULL.AREGIMPUTED_3
## Cox Proportional Hazards Model
##
## cph(formula = Surv(TEVENT, EVENT) ~ ifelse(AGE > 50, (AGE - 50)^2,
## 0) + SEX + SMOKING + alcohol + BMI + SYSTH + HDL + DIABETES +
## SUMSCORE_5LEVELS + HOMOC + log(CREAT) + albumin + STENOSIS +
## IMT, data = SMART.AREGIMPUTED_3, x = T, y = T, surv = T)
##
## Model Tests Discrimination
## Indexes
## Obs 3873 LR chi2 274.70 R2 0.082
## Events 460 d.f. 17 R2(17,3873)0.064
## Center 3.2859 Pr(> chi2) 0.0000 R2(17,460)0.429
## Score chi2 344.55 Dxy 0.391
## Pr(> chi2) 0.0000
##
## Coef S.E. Wald Z Pr(>|Z|)
## AGE 0.0013 0.0002 6.16 <0.0001
## SEX=2 -0.0472 0.1352 -0.35 0.7269
## SMOKING=2 0.1932 0.1447 1.34 0.1816
## SMOKING=3 0.2984 0.2320 1.29 0.1985
## alcohol=2 -0.1446 0.1659 -0.87 0.3833
## alcohol=3 -0.1145 0.1206 -0.95 0.3423
## BMI -0.0241 0.0142 -1.70 0.0899
## SYSTH 0.0029 0.0022 1.30 0.1939
## HDL -0.3946 0.1600 -2.47 0.0137
## DIABETES=1 0.1958 0.1104 1.77 0.0761
## SUMSCORE_5LEVELS 0.3347 0.0557 6.00 <0.0001
## HOMOC 0.0042 0.0085 0.49 0.6207
## CREAT 0.5812 0.1468 3.96 <0.0001
## albumin=2 0.1948 0.1190 1.64 0.1017
## albumin=3 0.5255 0.2077 2.53 0.0114
## STENOSIS=1 0.1979 0.1069 1.85 0.0642
## IMT 0.4570 0.1782 2.56 0.0103
##################################
# Formulating the FULL
# Cox Proportional Hazards Model
# Using the combined complete and
# 4TH imputation results from AREG
##################################
$EVENT <- as.numeric(SMART.AREGIMPUTED_4$EVENT)
SMART.AREGIMPUTED_4$SMOKING <- as.factor(SMART.AREGIMPUTED_4$SMOKING)
SMART.AREGIMPUTED_4$alcohol <- as.factor(SMART.AREGIMPUTED_4$alcohol)
SMART.AREGIMPUTED_4$albumin <- as.factor(SMART.AREGIMPUTED_4$albumin)
SMART.AREGIMPUTED_4
<- datadist(SMART.AREGIMPUTED_4)
dd options(datadist="dd")
##################################
# FULL COMPLETE
##################################
<-cph(Surv(TEVENT,EVENT) ~ ifelse(AGE>50, (AGE-50)^2,0) +
COXPH.FULL.COMPLETE.AREGIMPUTED_4 +
SEX +
SMOKING +
alcohol +
BMI +
SYSTH +
SYSTBP +
DIASTH +
DIASTBP +
WEIGHT +
LENGTH +
CHOL +
LDL +
HDL+
DIABETES +
SUMSCORE_5LEVELS +
HOMOC log(CREAT) +
+
albumin +
STENOSIS +
IMT +
TRIG +
GLUT
packyrs,data = SMART.AREGIMPUTED_4,
x=T,
y=T,
surv=T)
anova(COXPH.FULL.COMPLETE.AREGIMPUTED_4)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## AGE 32.02 1 <.0001
## SEX 0.65 1 0.4189
## SMOKING 0.06 2 0.9706
## alcohol 1.76 2 0.4157
## BMI 1.03 1 0.3107
## SYSTH 0.14 1 0.7122
## SYSTBP 0.48 1 0.4866
## DIASTH 1.49 1 0.2221
## DIASTBP 0.01 1 0.9432
## WEIGHT 1.72 1 0.1892
## LENGTH 0.92 1 0.3388
## CHOL 0.00 1 0.9449
## LDL 0.00 1 0.9841
## HDL 0.17 1 0.6797
## DIABETES 0.19 1 0.6617
## SUMSCORE_5LEVELS 32.02 1 <.0001
## HOMOC 2.03 1 0.1541
## CREAT 16.44 1 0.0001
## albumin 8.32 2 0.0156
## STENOSIS 1.89 1 0.1693
## IMT 9.93 1 0.0016
## TRIG 0.01 1 0.9395
## GLUT 3.35 1 0.0673
## packyrs 5.40 1 0.0202
## TOTAL 326.89 27 <.0001
summary(COXPH.FULL.COMPLETE.AREGIMPUTED_4)
## Effects Response : Surv(TEVENT, EVENT)
##
## Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95
## AGE 52.00 68.00 16.00 0.4076500 0.072045 0.2664400 0.54885
## Hazard Ratio 52.00 68.00 16.00 1.5033000 NA 1.3053000 1.73130
## BMI 24.11 28.73 4.62 0.5012000 0.494450 -0.4679000 1.47030
## Hazard Ratio 24.11 28.73 4.62 1.6507000 NA 0.6263100 4.35060
## SYSTH 127.00 157.00 30.00 0.0448550 0.121590 -0.1934600 0.28317
## Hazard Ratio 127.00 157.00 30.00 1.0459000 NA 0.8241100 1.32730
## SYSTBP 126.00 152.00 26.00 -0.0768050 0.110390 -0.2931600 0.13955
## Hazard Ratio 126.00 152.00 26.00 0.9260700 NA 0.7459000 1.14980
## DIASTH 74.00 90.00 16.00 0.1377800 0.112850 -0.0833960 0.35895
## Hazard Ratio 74.00 90.00 16.00 1.1477000 NA 0.9199900 1.43180
## DIASTBP 73.00 86.00 13.00 0.0068640 0.096422 -0.1821200 0.19585
## Hazard Ratio 73.00 86.00 13.00 1.0069000 NA 0.8335000 1.21630
## WEIGHT 72.00 89.00 17.00 -0.7990600 0.608650 -1.9920000 0.39387
## Hazard Ratio 72.00 89.00 17.00 0.4497500 NA 0.1364200 1.48270
## LENGTH 1.68 1.80 0.12 0.3853400 0.402830 -0.4041900 1.17490
## Hazard Ratio 1.68 1.80 0.12 1.4701000 NA 0.6675200 3.23770
## CHOL 4.40 5.90 1.50 0.1124800 1.626600 -3.0756000 3.30060
## Hazard Ratio 4.40 5.90 1.50 1.1191000 NA 0.0461620 27.12800
## LDL 2.37 3.83 1.46 -0.0317260 1.590600 -3.1492000 3.08580
## Hazard Ratio 2.37 3.83 1.46 0.9687700 NA 0.0428860 21.88400
## HDL 0.96 1.42 0.46 -0.2087600 0.505560 -1.1996000 0.78212
## Hazard Ratio 0.96 1.42 0.46 0.8115900 NA 0.3013000 2.18610
## SUMSCORE_5LEVELS 1.00 5.00 4.00 1.2765000 0.225590 0.8343900 1.71870
## Hazard Ratio 1.00 5.00 4.00 3.5842000 NA 2.3034000 5.57720
## HOMOC 10.50 16.00 5.50 0.0641290 0.045001 -0.0240710 0.15233
## Hazard Ratio 10.50 16.00 5.50 1.0662000 NA 0.9762200 1.16450
## CREAT 78.00 101.00 23.00 0.1523000 0.037558 0.0786910 0.22591
## Hazard Ratio 78.00 101.00 23.00 1.1645000 NA 1.0819000 1.25350
## IMT 0.75 1.07 0.32 0.1836600 0.058280 0.0694360 0.29789
## Hazard Ratio 0.75 1.07 0.32 1.2016000 NA 1.0719000 1.34700
## TRIG 1.13 2.23 1.10 -0.0404980 0.533860 -1.0869000 1.00590
## Hazard Ratio 1.13 2.23 1.10 0.9603100 NA 0.3372800 2.73420
## GLUT 5.30 6.50 1.20 0.0641980 0.035088 -0.0045735 0.13297
## Hazard Ratio 5.30 6.50 1.20 1.0663000 NA 0.9954400 1.14220
## packyrs 5.90 34.20 28.30 0.1662400 0.071570 0.0259670 0.30652
## Hazard Ratio 5.90 34.20 28.30 1.1809000 NA 1.0263000 1.35870
## SEX - 2:1 1.00 2.00 NA -0.1310400 0.162110 -0.4487700 0.18668
## Hazard Ratio 1.00 2.00 NA 0.8771800 NA 0.6384200 1.20520
## SMOKING - 1:2 2.00 1.00 NA -0.0013686 0.161460 -0.3178100 0.31508
## Hazard Ratio 2.00 1.00 NA 0.9986300 NA 0.7277400 1.37040
## SMOKING - 3:2 2.00 3.00 NA 0.0488110 0.201850 -0.3468100 0.44443
## Hazard Ratio 2.00 3.00 NA 1.0500000 NA 0.7069400 1.55960
## alcohol - 1:3 3.00 1.00 NA 0.1445200 0.120930 -0.0925020 0.38154
## Hazard Ratio 3.00 1.00 NA 1.1555000 NA 0.9116500 1.46450
## alcohol - 2:3 3.00 2.00 NA -0.0393420 0.147710 -0.3288400 0.25016
## Hazard Ratio 3.00 2.00 NA 0.9614200 NA 0.7197600 1.28420
## DIABETES - 1:0 1.00 2.00 NA 0.0680540 0.155510 -0.2367400 0.37285
## Hazard Ratio 1.00 2.00 NA 1.0704000 NA 0.7892000 1.45190
## albumin - 2:1 1.00 2.00 NA 0.2732000 0.117180 0.0435270 0.50287
## Hazard Ratio 1.00 2.00 NA 1.3142000 NA 1.0445000 1.65350
## albumin - 3:1 1.00 3.00 NA 0.4784600 0.209270 0.0683080 0.88862
## Hazard Ratio 1.00 3.00 NA 1.6136000 NA 1.0707000 2.43180
## STENOSIS - 1:0 1.00 2.00 NA 0.1494800 0.108760 -0.0636870 0.36266
## Hazard Ratio 1.00 2.00 NA 1.1612000 NA 0.9383000 1.43710
vif(COXPH.FULL.COMPLETE.AREGIMPUTED_4)
## AGE SEX=2 SMOKING=2 SMOKING=3
## 1.423801 1.834715 1.936069 1.535325
## alcohol=2 alcohol=3 BMI SYSTH
## 1.429097 1.569112 64.065892 3.937466
## SYSTBP DIASTH DIASTBP WEIGHT
## 3.435346 3.206930 2.673371 89.404506
## LENGTH CHOL LDL HDL
## 35.040767 611.492448 544.551054 56.706677
## DIABETES=1 SUMSCORE_5LEVELS HOMOC CREAT
## 2.251743 1.164305 1.432342 1.777291
## albumin=2 albumin=3 STENOSIS=1 IMT
## 1.178710 1.552418 1.212514 1.352500
## TRIG GLUT packyrs
## 149.510494 2.164995 1.379450
COXPH.FULL.COMPLETE.AREGIMPUTED_4
## Cox Proportional Hazards Model
##
## cph(formula = Surv(TEVENT, EVENT) ~ ifelse(AGE > 50, (AGE - 50)^2,
## 0) + SEX + SMOKING + alcohol + BMI + SYSTH + SYSTBP + DIASTH +
## DIASTBP + WEIGHT + LENGTH + CHOL + LDL + HDL + DIABETES +
## SUMSCORE_5LEVELS + HOMOC + log(CREAT) + albumin + STENOSIS +
## IMT + TRIG + GLUT + packyrs, data = SMART.AREGIMPUTED_4,
## x = T, y = T, surv = T)
##
## Model Tests Discrimination
## Indexes
## Obs 3873 LR chi2 296.57 R2 0.088
## Events 460 d.f. 27 R2(27,3873)0.067
## Center 9.3809 Pr(> chi2) 0.0000 R2(27,460)0.443
## Score chi2 363.32 Dxy 0.402
## Pr(> chi2) 0.0000
##
## Coef S.E. Wald Z Pr(>|Z|)
## AGE 0.0013 0.0002 5.66 <0.0001
## SEX=2 -0.1310 0.1621 -0.81 0.4189
## SMOKING=2 0.0014 0.1615 0.01 0.9932
## SMOKING=3 0.0502 0.2444 0.21 0.8373
## alcohol=2 -0.1839 0.1667 -1.10 0.2702
## alcohol=3 -0.1445 0.1209 -1.20 0.2321
## BMI 0.1085 0.1070 1.01 0.3107
## SYSTH 0.0015 0.0041 0.37 0.7122
## SYSTBP -0.0030 0.0042 -0.70 0.4866
## DIASTH 0.0086 0.0071 1.22 0.2221
## DIASTBP 0.0005 0.0074 0.07 0.9432
## WEIGHT -0.0470 0.0358 -1.31 0.1892
## LENGTH 3.2112 3.3569 0.96 0.3388
## CHOL 0.0750 1.0844 0.07 0.9449
## LDL -0.0217 1.0894 -0.02 0.9841
## HDL -0.4538 1.0990 -0.41 0.6797
## DIABETES=1 0.0681 0.1555 0.44 0.6617
## SUMSCORE_5LEVELS 0.3191 0.0564 5.66 <0.0001
## HOMOC 0.0117 0.0082 1.43 0.1541
## CREAT 0.5894 0.1453 4.06 <0.0001
## albumin=2 0.2732 0.1172 2.33 0.0197
## albumin=3 0.4785 0.2093 2.29 0.0222
## STENOSIS=1 0.1495 0.1088 1.37 0.1693
## IMT 0.5739 0.1821 3.15 0.0016
## TRIG -0.0368 0.4853 -0.08 0.9395
## GLUT 0.0535 0.0292 1.83 0.0673
## packyrs 0.0059 0.0025 2.32 0.0202
##################################
# FULL
# After removing variables which are:
# High multicollinearity contributors
# Non-standard risk factors
# Minimal predictors of survival outcome based from initial exploration
# Minimal predictors of survival outcome based from domain knowledge and literature
##################################
<-cph(Surv(TEVENT,EVENT) ~ ifelse(AGE>50, (AGE-50)^2,0) +
COXPH.FULL.AREGIMPUTED_4 +
SEX +
SMOKING +
alcohol +
BMI +
SYSTH +
HDL+
DIABETES +
SUMSCORE_5LEVELS +
HOMOC log(CREAT) +
+
albumin +
STENOSIS
IMT,data = SMART.AREGIMPUTED_4,
x=T,
y=T,
surv=T)
anova(COXPH.FULL.AREGIMPUTED_4)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## AGE 31.66 1 <.0001
## SEX 0.08 1 0.7798
## SMOKING 1.53 2 0.4659
## alcohol 1.32 2 0.5159
## BMI 3.03 1 0.0816
## SYSTH 1.97 1 0.1606
## HDL 5.63 1 0.0177
## DIABETES 3.77 1 0.0521
## SUMSCORE_5LEVELS 33.70 1 <.0001
## HOMOC 3.03 1 0.0816
## CREAT 14.89 1 0.0001
## albumin 7.74 2 0.0208
## STENOSIS 2.04 1 0.1537
## IMT 8.53 1 0.0035
## TOTAL 315.74 17 <.0001
summary(COXPH.FULL.AREGIMPUTED_4)
## Effects Response : Surv(TEVENT, EVENT)
##
## Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95
## AGE 52.00 68.00 16.00 0.388080 0.068969 0.2529000 0.523250
## Hazard Ratio 52.00 68.00 16.00 1.474100 NA 1.2878000 1.687500
## BMI 24.11 28.73 4.62 -0.114050 0.065492 -0.2424100 0.014314
## Hazard Ratio 24.11 28.73 4.62 0.892210 NA 0.7847300 1.014400
## SYSTH 127.00 157.00 30.00 0.092473 0.065911 -0.0367100 0.221660
## Hazard Ratio 127.00 157.00 30.00 1.096900 NA 0.9639600 1.248100
## HDL 0.96 1.42 0.46 -0.174110 0.073384 -0.3179400 -0.030275
## Hazard Ratio 0.96 1.42 0.46 0.840210 NA 0.7276500 0.970180
## SUMSCORE_5LEVELS 1.00 5.00 4.00 1.289000 0.222030 0.8538400 1.724200
## Hazard Ratio 1.00 5.00 4.00 3.629200 NA 2.3487000 5.607900
## HOMOC 10.50 16.00 5.50 0.076810 0.044099 -0.0096231 0.163240
## Hazard Ratio 10.50 16.00 5.50 1.079800 NA 0.9904200 1.177300
## CREAT 78.00 101.00 23.00 0.143120 0.037091 0.0704260 0.215820
## Hazard Ratio 78.00 101.00 23.00 1.153900 NA 1.0730000 1.240900
## IMT 0.75 1.07 0.32 0.167110 0.057217 0.0549680 0.279250
## Hazard Ratio 0.75 1.07 0.32 1.181900 NA 1.0565000 1.322100
## SEX - 2:1 1.00 2.00 NA -0.037842 0.135330 -0.3030800 0.227390
## Hazard Ratio 1.00 2.00 NA 0.962860 NA 0.7385400 1.255300
## SMOKING - 1:2 2.00 1.00 NA -0.167830 0.143840 -0.4497600 0.114090
## Hazard Ratio 2.00 1.00 NA 0.845500 NA 0.6377800 1.120900
## SMOKING - 3:2 2.00 3.00 NA 0.056140 0.200990 -0.3377900 0.450070
## Hazard Ratio 2.00 3.00 NA 1.057700 NA 0.7133400 1.568400
## alcohol - 1:3 3.00 1.00 NA 0.126330 0.120170 -0.1092000 0.361860
## Hazard Ratio 3.00 1.00 NA 1.134700 NA 0.8965500 1.436000
## alcohol - 2:3 3.00 2.00 NA -0.029610 0.146650 -0.3170400 0.257820
## Hazard Ratio 3.00 2.00 NA 0.970820 NA 0.7283000 1.294100
## DIABETES - 1:0 1.00 2.00 NA 0.213620 0.109970 -0.0019123 0.429140
## Hazard Ratio 1.00 2.00 NA 1.238100 NA 0.9980900 1.535900
## albumin - 2:1 1.00 2.00 NA 0.273640 0.116500 0.0452980 0.501980
## Hazard Ratio 1.00 2.00 NA 1.314700 NA 1.0463000 1.652000
## albumin - 3:1 1.00 3.00 NA 0.431190 0.207600 0.0242960 0.838080
## Hazard Ratio 1.00 3.00 NA 1.539100 NA 1.0246000 2.311900
## STENOSIS - 1:0 1.00 2.00 NA 0.153770 0.107790 -0.0574970 0.365040
## Hazard Ratio 1.00 2.00 NA 1.166200 NA 0.9441200 1.440600
vif(COXPH.FULL.AREGIMPUTED_4)
## AGE SEX=2 SMOKING=2 SMOKING=3
## 1.310056 1.278569 1.536609 1.410042
## alcohol=2 alcohol=3 BMI SYSTH
## 1.413500 1.549665 1.106940 1.173223
## HDL DIABETES=1 SUMSCORE_5LEVELS HOMOC
## 1.206997 1.125698 1.143414 1.378802
## CREAT albumin=2 albumin=3 STENOSIS=1
## 1.735913 1.165891 1.528349 1.190245
## IMT
## 1.315947
COXPH.FULL.AREGIMPUTED_4
## Cox Proportional Hazards Model
##
## cph(formula = Surv(TEVENT, EVENT) ~ ifelse(AGE > 50, (AGE - 50)^2,
## 0) + SEX + SMOKING + alcohol + BMI + SYSTH + HDL + DIABETES +
## SUMSCORE_5LEVELS + HOMOC + log(CREAT) + albumin + STENOSIS +
## IMT, data = SMART.AREGIMPUTED_4, x = T, y = T, surv = T)
##
## Model Tests Discrimination
## Indexes
## Obs 3873 LR chi2 280.66 R2 0.084
## Events 460 d.f. 17 R2(17,3873)0.066
## Center 3.3281 Pr(> chi2) 0.0000 R2(17,460)0.436
## Score chi2 349.00 Dxy 0.402
## Pr(> chi2) 0.0000
##
## Coef S.E. Wald Z Pr(>|Z|)
## AGE 0.0012 0.0002 5.63 <0.0001
## SEX=2 -0.0378 0.1353 -0.28 0.7798
## SMOKING=2 0.1678 0.1438 1.17 0.2433
## SMOKING=3 0.2240 0.2342 0.96 0.3389
## alcohol=2 -0.1559 0.1658 -0.94 0.3470
## alcohol=3 -0.1263 0.1202 -1.05 0.2932
## BMI -0.0247 0.0142 -1.74 0.0816
## SYSTH 0.0031 0.0022 1.40 0.1606
## HDL -0.3785 0.1595 -2.37 0.0177
## DIABETES=1 0.2136 0.1100 1.94 0.0521
## SUMSCORE_5LEVELS 0.3223 0.0555 5.81 <0.0001
## HOMOC 0.0140 0.0080 1.74 0.0816
## CREAT 0.5539 0.1435 3.86 0.0001
## albumin=2 0.2736 0.1165 2.35 0.0188
## albumin=3 0.4312 0.2076 2.08 0.0378
## STENOSIS=1 0.1538 0.1078 1.43 0.1537
## IMT 0.5222 0.1788 2.92 0.0035
##################################
# Formulating the FULL
# Cox Proportional Hazards Model
# Using the combined complete and
# 5TH imputation results from AREG
##################################
$EVENT <- as.numeric(SMART.AREGIMPUTED_5$EVENT)
SMART.AREGIMPUTED_5$SMOKING <- as.factor(SMART.AREGIMPUTED_5$SMOKING)
SMART.AREGIMPUTED_5$alcohol <- as.factor(SMART.AREGIMPUTED_5$alcohol)
SMART.AREGIMPUTED_5$albumin <- as.factor(SMART.AREGIMPUTED_5$albumin)
SMART.AREGIMPUTED_5
<- datadist(SMART.AREGIMPUTED_5)
dd options(datadist="dd")
##################################
# FULL COMPLETE
##################################
<-cph(Surv(TEVENT,EVENT) ~ ifelse(AGE>50, (AGE-50)^2,0) +
COXPH.FULL.COMPLETE.AREGIMPUTED_5 +
SEX +
SMOKING +
alcohol +
BMI +
SYSTH +
SYSTBP +
DIASTH +
DIASTBP +
WEIGHT +
LENGTH +
CHOL +
LDL +
HDL+
DIABETES +
SUMSCORE_5LEVELS +
HOMOC log(CREAT) +
+
albumin +
STENOSIS +
IMT +
TRIG +
GLUT
packyrs,data = SMART.AREGIMPUTED_5,
x=T,
y=T,
surv=T)
anova(COXPH.FULL.COMPLETE.AREGIMPUTED_5)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## AGE 35.82 1 <.0001
## SEX 0.69 1 0.4062
## SMOKING 0.08 2 0.9595
## alcohol 2.09 2 0.3509
## BMI 0.80 1 0.3712
## SYSTH 0.53 1 0.4685
## SYSTBP 1.28 1 0.2581
## DIASTH 0.09 1 0.7600
## DIASTBP 0.44 1 0.5092
## WEIGHT 1.39 1 0.2376
## LENGTH 0.76 1 0.3844
## CHOL 0.26 1 0.6099
## LDL 0.21 1 0.6498
## HDL 0.74 1 0.3886
## DIABETES 0.10 1 0.7462
## SUMSCORE_5LEVELS 33.23 1 <.0001
## HOMOC 0.02 1 0.8773
## CREAT 15.93 1 0.0001
## albumin 14.22 2 0.0008
## STENOSIS 1.98 1 0.1590
## IMT 8.67 1 0.0032
## TRIG 0.25 1 0.6152
## GLUT 2.79 1 0.0950
## packyrs 4.68 1 0.0305
## TOTAL 328.34 27 <.0001
summary(COXPH.FULL.COMPLETE.AREGIMPUTED_5)
## Effects Response : Surv(TEVENT, EVENT)
##
## Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95
## AGE 52.00 68.00 16.00 0.4301400 0.071865 0.289280 0.570990
## Hazard Ratio 52.00 68.00 16.00 1.5375000 NA 1.335500 1.770000
## BMI 24.11 28.73 4.62 0.4403300 0.492380 -0.524720 1.405400
## Hazard Ratio 24.11 28.73 4.62 1.5532000 NA 0.591720 4.077100
## SYSTH 127.00 156.00 29.00 0.0844850 0.116530 -0.143920 0.312890
## Hazard Ratio 127.00 156.00 29.00 1.0882000 NA 0.865960 1.367400
## SYSTBP 126.00 152.00 26.00 -0.1252400 0.110750 -0.342300 0.091818
## Hazard Ratio 126.00 152.00 26.00 0.8822800 NA 0.710140 1.096200
## DIASTH 74.00 89.00 15.00 0.0319010 0.104430 -0.172780 0.236580
## Hazard Ratio 74.00 89.00 15.00 1.0324000 NA 0.841330 1.266900
## DIASTBP 73.00 86.00 13.00 0.0640560 0.097036 -0.126130 0.254240
## Hazard Ratio 73.00 86.00 13.00 1.0662000 NA 0.881500 1.289500
## WEIGHT 72.00 89.00 17.00 -0.7144200 0.604960 -1.900100 0.471290
## Hazard Ratio 72.00 89.00 17.00 0.4894800 NA 0.149550 1.602100
## LENGTH 1.68 1.80 0.12 0.3486800 0.400830 -0.436950 1.134300
## Hazard Ratio 1.68 1.80 0.12 1.4172000 NA 0.646010 3.109000
## CHOL 4.40 5.90 1.50 0.7892000 1.546700 -2.242200 3.820600
## Hazard Ratio 4.40 5.90 1.50 2.2016000 NA 0.106230 45.631000
## LDL 2.37 3.83 1.46 -0.6877900 1.515000 -3.657100 2.281500
## Hazard Ratio 2.37 3.83 1.46 0.5026900 NA 0.025808 9.791300
## HDL 0.96 1.42 0.46 -0.4164300 0.483040 -1.363200 0.530310
## Hazard Ratio 0.96 1.42 0.46 0.6594000 NA 0.255850 1.699500
## SUMSCORE_5LEVELS 1.00 5.00 4.00 1.3091000 0.227090 0.863980 1.754100
## Hazard Ratio 1.00 5.00 4.00 3.7027000 NA 2.372600 5.778500
## HOMOC 10.50 16.10 5.60 0.0071533 0.046316 -0.083623 0.097930
## Hazard Ratio 10.50 16.10 5.60 1.0072000 NA 0.919780 1.102900
## CREAT 78.00 101.00 23.00 0.1488100 0.037288 0.075728 0.221900
## Hazard Ratio 78.00 101.00 23.00 1.1605000 NA 1.078700 1.248400
## IMT 0.75 1.07 0.32 0.1701200 0.057789 0.056857 0.283390
## Hazard Ratio 0.75 1.07 0.32 1.1855000 NA 1.058500 1.327600
## TRIG 1.13 2.23 1.10 -0.2556900 0.508600 -1.252500 0.741160
## Hazard Ratio 1.13 2.23 1.10 0.7743900 NA 0.285780 2.098400
## GLUT 5.30 6.50 1.20 0.0588280 0.035239 -0.010239 0.127900
## Hazard Ratio 5.30 6.50 1.20 1.0606000 NA 0.989810 1.136400
## packyrs 5.90 34.20 28.30 0.1556900 0.071942 0.014681 0.296690
## Hazard Ratio 5.90 34.20 28.30 1.1685000 NA 1.014800 1.345400
## SEX - 2:1 1.00 2.00 NA -0.1345300 0.161970 -0.451990 0.182920
## Hazard Ratio 1.00 2.00 NA 0.8741200 NA 0.636360 1.200700
## SMOKING - 1:2 2.00 1.00 NA -0.0140640 0.161680 -0.330940 0.302820
## Hazard Ratio 2.00 1.00 NA 0.9860300 NA 0.718250 1.353700
## SMOKING - 3:2 2.00 3.00 NA 0.0520460 0.198480 -0.336970 0.441060
## Hazard Ratio 2.00 3.00 NA 1.0534000 NA 0.713930 1.554400
## alcohol - 1:3 3.00 1.00 NA 0.1590900 0.121030 -0.078122 0.396300
## Hazard Ratio 3.00 1.00 NA 1.1724000 NA 0.924850 1.486300
## alcohol - 2:3 3.00 2.00 NA -0.0402250 0.147920 -0.330130 0.249680
## Hazard Ratio 3.00 2.00 NA 0.9605700 NA 0.718830 1.283600
## DIABETES - 1:0 1.00 2.00 NA 0.0505060 0.156070 -0.255380 0.356390
## Hazard Ratio 1.00 2.00 NA 1.0518000 NA 0.774620 1.428200
## albumin - 2:1 1.00 2.00 NA 0.2777100 0.118500 0.045455 0.509960
## Hazard Ratio 1.00 2.00 NA 1.3201000 NA 1.046500 1.665200
## albumin - 3:1 1.00 3.00 NA 0.6964900 0.197790 0.308820 1.084200
## Hazard Ratio 1.00 3.00 NA 2.0067000 NA 1.361800 2.956900
## STENOSIS - 1:0 1.00 2.00 NA 0.1528100 0.108490 -0.059828 0.365450
## Hazard Ratio 1.00 2.00 NA 1.1651000 NA 0.941930 1.441200
vif(COXPH.FULL.COMPLETE.AREGIMPUTED_5)
## AGE SEX=2 SMOKING=2 SMOKING=3
## 1.418819 1.831725 1.955859 1.544054
## alcohol=2 alcohol=3 BMI SYSTH
## 1.435126 1.571437 63.692998 3.876737
## SYSTBP DIASTH DIASTBP WEIGHT
## 3.436950 3.081005 2.682241 88.457160
## LENGTH CHOL LDL HDL
## 34.762923 558.992648 495.027328 52.120441
## DIABETES=1 SUMSCORE_5LEVELS HOMOC CREAT
## 2.257665 1.183474 1.509397 1.793220
## albumin=2 albumin=3 STENOSIS=1 IMT
## 1.192460 1.610680 1.211036 1.337456
## TRIG GLUT packyrs
## 134.677350 2.172496 1.385593
COXPH.FULL.COMPLETE.AREGIMPUTED_5
## Cox Proportional Hazards Model
##
## cph(formula = Surv(TEVENT, EVENT) ~ ifelse(AGE > 50, (AGE - 50)^2,
## 0) + SEX + SMOKING + alcohol + BMI + SYSTH + SYSTBP + DIASTH +
## DIASTBP + WEIGHT + LENGTH + CHOL + LDL + HDL + DIABETES +
## SUMSCORE_5LEVELS + HOMOC + log(CREAT) + albumin + STENOSIS +
## IMT + TRIG + GLUT + packyrs, data = SMART.AREGIMPUTED_5,
## x = T, y = T, surv = T)
##
## Model Tests Discrimination
## Indexes
## Obs 3873 LR chi2 293.29 R2 0.087
## Events 460 d.f. 27 R2(27,3873)0.066
## Center 8.4236 Pr(> chi2) 0.0000 R2(27,460)0.439
## Score chi2 368.04 Dxy 0.396
## Pr(> chi2) 0.0000
##
## Coef S.E. Wald Z Pr(>|Z|)
## AGE 0.0013 0.0002 5.99 <0.0001
## SEX=2 -0.1345 0.1620 -0.83 0.4062
## SMOKING=2 0.0141 0.1617 0.09 0.9307
## SMOKING=3 0.0661 0.2412 0.27 0.7840
## alcohol=2 -0.1993 0.1671 -1.19 0.2329
## alcohol=3 -0.1591 0.1210 -1.31 0.1887
## BMI 0.0953 0.1066 0.89 0.3712
## SYSTH 0.0029 0.0040 0.72 0.4685
## SYSTBP -0.0048 0.0043 -1.13 0.2581
## DIASTH 0.0021 0.0070 0.31 0.7600
## DIASTBP 0.0049 0.0075 0.66 0.5092
## WEIGHT -0.0420 0.0356 -1.18 0.2376
## LENGTH 2.9056 3.3403 0.87 0.3844
## CHOL 0.5261 1.0311 0.51 0.6099
## LDL -0.4711 1.0376 -0.45 0.6498
## HDL -0.9053 1.0501 -0.86 0.3886
## DIABETES=1 0.0505 0.1561 0.32 0.7462
## SUMSCORE_5LEVELS 0.3273 0.0568 5.76 <0.0001
## HOMOC 0.0013 0.0083 0.15 0.8773
## CREAT 0.5759 0.1443 3.99 <0.0001
## albumin=2 0.2777 0.1185 2.34 0.0191
## albumin=3 0.6965 0.1978 3.52 0.0004
## STENOSIS=1 0.1528 0.1085 1.41 0.1590
## IMT 0.5316 0.1806 2.94 0.0032
## TRIG -0.2324 0.4624 -0.50 0.6152
## GLUT 0.0490 0.0294 1.67 0.0950
## packyrs 0.0055 0.0025 2.16 0.0305
##################################
# FULL
# After removing variables which are:
# High multicollinearity contributors
# Non-standard risk factors
# Minimal predictors of survival outcome based from initial exploration
# Minimal predictors of survival outcome based from domain knowledge and literature
##################################
<-cph(Surv(TEVENT,EVENT) ~ ifelse(AGE>50, (AGE-50)^2,0) +
COXPH.FULL.AREGIMPUTED_5 +
SEX +
SMOKING +
alcohol +
BMI +
SYSTH +
HDL+
DIABETES +
SUMSCORE_5LEVELS +
HOMOC log(CREAT) +
+
albumin +
STENOSIS
IMT,data = SMART.AREGIMPUTED_5,
x=T,
y=T,
surv=T)
anova(COXPH.FULL.AREGIMPUTED_5)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## AGE 36.75 1 <.0001
## SEX 0.23 1 0.6330
## SMOKING 1.54 2 0.4621
## alcohol 1.65 2 0.4376
## BMI 2.85 1 0.0913
## SYSTH 0.73 1 0.3927
## HDL 5.79 1 0.0161
## DIABETES 2.87 1 0.0902
## SUMSCORE_5LEVELS 34.24 1 <.0001
## HOMOC 0.12 1 0.7297
## CREAT 14.31 1 0.0002
## albumin 14.00 2 0.0009
## STENOSIS 2.29 1 0.1301
## IMT 8.16 1 0.0043
## TOTAL 319.93 17 <.0001
summary(COXPH.FULL.AREGIMPUTED_5)
## Effects Response : Surv(TEVENT, EVENT)
##
## Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95
## AGE 52.00 68.00 16.00 0.417560 0.068876 0.282560 0.552550
## Hazard Ratio 52.00 68.00 16.00 1.518300 NA 1.326500 1.737700
## BMI 24.11 28.73 4.62 -0.110470 0.065425 -0.238700 0.017763
## Hazard Ratio 24.11 28.73 4.62 0.895420 NA 0.787650 1.017900
## SYSTH 127.00 156.00 29.00 0.054260 0.063487 -0.070172 0.178690
## Hazard Ratio 127.00 156.00 29.00 1.055800 NA 0.932230 1.195700
## HDL 0.96 1.42 0.46 -0.176580 0.073406 -0.320450 -0.032708
## Hazard Ratio 0.96 1.42 0.46 0.838130 NA 0.725820 0.967820
## SUMSCORE_5LEVELS 1.00 5.00 4.00 1.309800 0.223860 0.871080 1.748600
## Hazard Ratio 1.00 5.00 4.00 3.705600 NA 2.389500 5.746600
## HOMOC 10.50 16.10 5.60 0.015627 0.045224 -0.073011 0.104260
## Hazard Ratio 10.50 16.10 5.60 1.015700 NA 0.929590 1.109900
## CREAT 78.00 101.00 23.00 0.137640 0.036383 0.066330 0.208950
## Hazard Ratio 78.00 101.00 23.00 1.147600 NA 1.068600 1.232400
## IMT 0.75 1.07 0.32 0.161310 0.056455 0.050656 0.271960
## Hazard Ratio 0.75 1.07 0.32 1.175000 NA 1.052000 1.312500
## SEX - 2:1 1.00 2.00 NA -0.064458 0.135000 -0.329050 0.200140
## Hazard Ratio 1.00 2.00 NA 0.937580 NA 0.719600 1.221600
## SMOKING - 1:2 2.00 1.00 NA -0.167940 0.144060 -0.450300 0.114420
## Hazard Ratio 2.00 1.00 NA 0.845410 NA 0.637440 1.121200
## SMOKING - 3:2 2.00 3.00 NA 0.058064 0.197470 -0.328970 0.445100
## Hazard Ratio 2.00 3.00 NA 1.059800 NA 0.719670 1.560600
## alcohol - 1:3 3.00 1.00 NA 0.139280 0.120460 -0.096822 0.375380
## Hazard Ratio 3.00 1.00 NA 1.149400 NA 0.907720 1.455500
## alcohol - 2:3 3.00 2.00 NA -0.039480 0.147280 -0.328150 0.249190
## Hazard Ratio 3.00 2.00 NA 0.961290 NA 0.720260 1.283000
## DIABETES - 1:0 1.00 2.00 NA 0.186040 0.109820 -0.029193 0.401270
## Hazard Ratio 1.00 2.00 NA 1.204500 NA 0.971230 1.493700
## albumin - 2:1 1.00 2.00 NA 0.280910 0.117820 0.049979 0.511840
## Hazard Ratio 1.00 2.00 NA 1.324300 NA 1.051200 1.668400
## albumin - 3:1 1.00 3.00 NA 0.673130 0.195120 0.290710 1.055500
## Hazard Ratio 1.00 3.00 NA 1.960400 NA 1.337400 2.873500
## STENOSIS - 1:0 1.00 2.00 NA 0.162680 0.107470 -0.047954 0.373320
## Hazard Ratio 1.00 2.00 NA 1.176700 NA 0.953180 1.452500
vif(COXPH.FULL.AREGIMPUTED_5)
## AGE SEX=2 SMOKING=2 SMOKING=3
## 1.309269 1.272356 1.552807 1.413467
## alcohol=2 alcohol=3 BMI SYSTH
## 1.422649 1.556864 1.108079 1.161651
## HDL DIABETES=1 SUMSCORE_5LEVELS HOMOC
## 1.212141 1.117513 1.165569 1.443350
## CREAT albumin=2 albumin=3 STENOSIS=1
## 1.723397 1.179223 1.567982 1.188038
## IMT
## 1.294708
COXPH.FULL.AREGIMPUTED_5
## Cox Proportional Hazards Model
##
## cph(formula = Surv(TEVENT, EVENT) ~ ifelse(AGE > 50, (AGE - 50)^2,
## 0) + SEX + SMOKING + alcohol + BMI + SYSTH + HDL + DIABETES +
## SUMSCORE_5LEVELS + HOMOC + log(CREAT) + albumin + STENOSIS +
## IMT, data = SMART.AREGIMPUTED_5, x = T, y = T, surv = T)
##
## Model Tests Discrimination
## Indexes
## Obs 3873 LR chi2 279.73 R2 0.083
## Events 460 d.f. 17 R2(17,3873)0.066
## Center 2.9113 Pr(> chi2) 0.0000 R2(17,460)0.435
## Score chi2 356.53 Dxy 0.394
## Pr(> chi2) 0.0000
##
## Coef S.E. Wald Z Pr(>|Z|)
## AGE 0.0013 0.0002 6.06 <0.0001
## SEX=2 -0.0645 0.1350 -0.48 0.6330
## SMOKING=2 0.1679 0.1441 1.17 0.2437
## SMOKING=3 0.2260 0.2307 0.98 0.3273
## alcohol=2 -0.1788 0.1664 -1.07 0.2826
## alcohol=3 -0.1393 0.1205 -1.16 0.2476
## BMI -0.0239 0.0142 -1.69 0.0913
## SYSTH 0.0019 0.0022 0.85 0.3927
## HDL -0.3839 0.1596 -2.41 0.0161
## DIABETES=1 0.1860 0.1098 1.69 0.0902
## SUMSCORE_5LEVELS 0.3275 0.0560 5.85 <0.0001
## HOMOC 0.0028 0.0081 0.35 0.7297
## CREAT 0.5326 0.1408 3.78 0.0002
## albumin=2 0.2809 0.1178 2.38 0.0171
## albumin=3 0.6731 0.1951 3.45 0.0006
## STENOSIS=1 0.1627 0.1075 1.51 0.1301
## IMT 0.5041 0.1764 2.86 0.0043
##################################
# Consolidating all coefficients from the
# FULL Cox Proportional Hazards Model
# Using the combined complete and
# 1ST to 5TH imputation results from AREG
##################################
<- rbind(coef(COXPH.FULL.AREGIMPUTED_1),
(COXPH.FULL.AREGIMPUTED_12345.Coefficients coef(COXPH.FULL.AREGIMPUTED_2),
coef(COXPH.FULL.AREGIMPUTED_3),
coef(COXPH.FULL.AREGIMPUTED_4),
coef(COXPH.FULL.AREGIMPUTED_5)))
## AGE SEX=2 SMOKING=2 SMOKING=3 alcohol=2 alcohol=3
## [1,] 0.001319438 -0.05323006 0.1836178 0.2504782 -0.1558516 -0.1263426
## [2,] 0.001290295 -0.05216943 0.1842736 0.2450232 -0.1599791 -0.1196699
## [3,] 0.001316533 -0.04721299 0.1932424 0.2983591 -0.1446361 -0.1145161
## [4,] 0.001212739 -0.03784236 0.1678314 0.2239712 -0.1559366 -0.1263267
## [5,] 0.001304874 -0.06445816 0.1679385 0.2260025 -0.1787601 -0.1392797
## BMI SYSTH HDL DIABETES=1 SUMSCORE_5LEVELS HOMOC
## [1,] -0.02475359 0.001068306 -0.3776243 0.2089031 0.3318611 0.001016311
## [2,] -0.02363049 0.002205609 -0.3845823 0.1856939 0.3332411 0.003646549
## [3,] -0.02409281 0.002894025 -0.3946006 0.1958399 0.3346853 0.004229653
## [4,] -0.02468578 0.003082448 -0.3784896 0.2136157 0.3222527 0.013965456
## [5,] -0.02391059 0.001871029 -0.3838719 0.1860407 0.3274619 0.002790467
## CREAT albumin=2 albumin=3 STENOSIS=1 IMT
## [1,] 0.5999677 0.2645359 0.5622142 0.1708450 0.5275760
## [2,] 0.5580850 0.2304765 0.6361029 0.1740932 0.5466388
## [3,] 0.5812481 0.1947722 0.5254523 0.1978645 0.4569895
## [4,] 0.5538597 0.2736406 0.4311872 0.1537693 0.5222199
## [5,] 0.5326395 0.2809080 0.6731279 0.1626805 0.5040841
rownames(COXPH.FULL.AREGIMPUTED_12345.Coefficients) <- c("aregImpute_1",
"aregImpute_2",
"aregImpute_3",
"aregImpute_4",
"aregImpute_5")
barchart(COXPH.FULL.AREGIMPUTED_12345.Coefficients,
groups=rownames(COXPH.FULL.AREGIMPUTED_12345.Coefficients),
scales=list(x="free"),
auto.key = list(columns = 5),
layout = c(4,5),
xlab = "Estimated Coefficients",)
##################################
# Loading the 3RD imputation results from AREG
# for the subsequent model specification
##################################
##################################
# FULL MODEL
# with 14 variables
##################################
<-cph(Surv(TEVENT,EVENT) ~ ifelse(AGE>50, (AGE-50)^2,0) +
COXPH.FULL.AREGIMPUTED_3 +
SEX +
SMOKING +
alcohol +
BMI +
SYSTH +
HDL +
DIABETES +
SUMSCORE_5LEVELS +
HOMOC log(CREAT) +
+
albumin +
STENOSIS
IMT,data = SMART.AREGIMPUTED_3,
x=T,
y=T,
surv=T)
anova(COXPH.FULL.AREGIMPUTED_3)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## AGE 37.94 1 <.0001
## SEX 0.12 1 0.7269
## SMOKING 2.25 2 0.3250
## alcohol 1.11 2 0.5753
## BMI 2.88 1 0.0899
## SYSTH 1.69 1 0.1939
## HDL 6.08 1 0.0137
## DIABETES 3.15 1 0.0761
## SUMSCORE_5LEVELS 36.05 1 <.0001
## HOMOC 0.24 1 0.6207
## CREAT 15.67 1 0.0001
## albumin 7.32 2 0.0257
## STENOSIS 3.43 1 0.0642
## IMT 6.57 1 0.0103
## TOTAL 311.06 17 <.0001
summary(COXPH.FULL.AREGIMPUTED_3)
## Effects Response : Surv(TEVENT, EVENT)
##
## Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95
## AGE 52.00 68.00 16.00 0.421290 0.068396 0.287240 0.555340
## Hazard Ratio 52.00 68.00 16.00 1.523900 NA 1.332700 1.742500
## BMI 24.11 28.73 4.62 -0.111310 0.065642 -0.239960 0.017347
## Hazard Ratio 24.11 28.73 4.62 0.894660 NA 0.786660 1.017500
## SYSTH 127.00 156.00 29.00 0.083927 0.064609 -0.042704 0.210560
## Hazard Ratio 127.00 156.00 29.00 1.087500 NA 0.958200 1.234400
## HDL 0.96 1.42 0.46 -0.181520 0.073610 -0.325790 -0.037244
## Hazard Ratio 0.96 1.42 0.46 0.834000 NA 0.721960 0.963440
## SUMSCORE_5LEVELS 1.00 5.00 4.00 1.338700 0.222960 0.901750 1.775700
## Hazard Ratio 1.00 5.00 4.00 3.814200 NA 2.463900 5.904600
## HOMOC 10.50 16.10 5.60 0.023686 0.047858 -0.070114 0.117490
## Hazard Ratio 10.50 16.10 5.60 1.024000 NA 0.932290 1.124700
## CREAT 78.00 101.00 23.00 0.150200 0.037942 0.075835 0.224570
## Hazard Ratio 78.00 101.00 23.00 1.162100 NA 1.078800 1.251800
## IMT 0.75 1.07 0.32 0.146240 0.057032 0.034457 0.258020
## Hazard Ratio 0.75 1.07 0.32 1.157500 NA 1.035100 1.294400
## SEX - 2:1 1.00 2.00 NA -0.047213 0.135210 -0.312210 0.217790
## Hazard Ratio 1.00 2.00 NA 0.953880 NA 0.731830 1.243300
## SMOKING - 1:2 2.00 1.00 NA -0.193240 0.144660 -0.476770 0.090286
## Hazard Ratio 2.00 1.00 NA 0.824280 NA 0.620780 1.094500
## SMOKING - 3:2 2.00 3.00 NA 0.105120 0.197840 -0.282640 0.492880
## Hazard Ratio 2.00 3.00 NA 1.110800 NA 0.753790 1.637000
## alcohol - 1:3 3.00 1.00 NA 0.114520 0.120600 -0.121850 0.350880
## Hazard Ratio 3.00 1.00 NA 1.121300 NA 0.885280 1.420300
## alcohol - 2:3 3.00 2.00 NA -0.030120 0.147370 -0.318970 0.258730
## Hazard Ratio 3.00 2.00 NA 0.970330 NA 0.726900 1.295300
## DIABETES - 1:0 1.00 2.00 NA 0.195840 0.110430 -0.020590 0.412270
## Hazard Ratio 1.00 2.00 NA 1.216300 NA 0.979620 1.510200
## albumin - 2:1 1.00 2.00 NA 0.194770 0.118990 -0.038447 0.427990
## Hazard Ratio 1.00 2.00 NA 1.215000 NA 0.962280 1.534200
## albumin - 3:1 1.00 3.00 NA 0.525450 0.207680 0.118400 0.932500
## Hazard Ratio 1.00 3.00 NA 1.691200 NA 1.125700 2.540900
## STENOSIS - 1:0 1.00 2.00 NA 0.197860 0.106910 -0.011678 0.407410
## Hazard Ratio 1.00 2.00 NA 1.218800 NA 0.988390 1.502900
COXPH.FULL.AREGIMPUTED_3
## Cox Proportional Hazards Model
##
## cph(formula = Surv(TEVENT, EVENT) ~ ifelse(AGE > 50, (AGE - 50)^2,
## 0) + SEX + SMOKING + alcohol + BMI + SYSTH + HDL + DIABETES +
## SUMSCORE_5LEVELS + HOMOC + log(CREAT) + albumin + STENOSIS +
## IMT, data = SMART.AREGIMPUTED_3, x = T, y = T, surv = T)
##
## Model Tests Discrimination
## Indexes
## Obs 3873 LR chi2 274.70 R2 0.082
## Events 460 d.f. 17 R2(17,3873)0.064
## Center 3.2859 Pr(> chi2) 0.0000 R2(17,460)0.429
## Score chi2 344.55 Dxy 0.391
## Pr(> chi2) 0.0000
##
## Coef S.E. Wald Z Pr(>|Z|)
## AGE 0.0013 0.0002 6.16 <0.0001
## SEX=2 -0.0472 0.1352 -0.35 0.7269
## SMOKING=2 0.1932 0.1447 1.34 0.1816
## SMOKING=3 0.2984 0.2320 1.29 0.1985
## alcohol=2 -0.1446 0.1659 -0.87 0.3833
## alcohol=3 -0.1145 0.1206 -0.95 0.3423
## BMI -0.0241 0.0142 -1.70 0.0899
## SYSTH 0.0029 0.0022 1.30 0.1939
## HDL -0.3946 0.1600 -2.47 0.0137
## DIABETES=1 0.1958 0.1104 1.77 0.0761
## SUMSCORE_5LEVELS 0.3347 0.0557 6.00 <0.0001
## HOMOC 0.0042 0.0085 0.49 0.6207
## CREAT 0.5812 0.1468 3.96 <0.0001
## albumin=2 0.1948 0.1190 1.64 0.1017
## albumin=3 0.5255 0.2077 2.53 0.0114
## STENOSIS=1 0.1979 0.1069 1.85 0.0642
## IMT 0.4570 0.1782 2.56 0.0103
rcorr.cens(-as.numeric(COXPH.FULL.AREGIMPUTED_3$linear.pred),COXPH.FULL.AREGIMPUTED_3$y)[1]
## C Index
## 0.6952528
##################################
# REDUCED MODEL
# with 14 variables evaluated
# using backward stepwise selection
# resulting to 9 variables
##################################
<- fastbw(COXPH.FULL.AREGIMPUTED_3,
BackwardElimination rule="aic",
type="individual")
<- update(COXPH.FULL.AREGIMPUTED_3, ~COXPH.FULL.AREGIMPUTED_3$x[,BackwardElimination$parms.kept])
COXPH.BACKWARDELIMINATION.AREGIMPUTED_3
COXPH.BACKWARDELIMINATION.AREGIMPUTED_3
## Cox Proportional Hazards Model
##
## cph(formula = Surv(TEVENT, EVENT) ~ COXPH.FULL.AREGIMPUTED_3$x[,
## BackwardElimination$parms.kept], data = SMART.AREGIMPUTED_3,
## x = T, y = T, surv = T)
##
## Model Tests Discrimination
## Indexes
## Obs 3873 LR chi2 269.65 R2 0.080
## Events 460 d.f. 10 R2(10,3873)0.065
## Center 2.9008 Pr(> chi2) 0.0000 R2(10,460)0.431
## Score chi2 340.60 Dxy 0.384
## Pr(> chi2) 0.0000
##
## Coef S.E. Wald Z Pr(>|Z|)
## AGE 0.0013 0.0002 6.47 <0.0001
## BMI -0.0249 0.0141 -1.76 0.0780
## HDL -0.4111 0.1528 -2.69 0.0071
## DIABETES=1 0.1896 0.1097 1.73 0.0840
## SUMSCORE_5LEVELS 0.3452 0.0540 6.39 <0.0001
## CREAT 0.6076 0.1334 4.56 <0.0001
## albumin=2 0.2321 0.1176 1.97 0.0485
## albumin=3 0.5644 0.2060 2.74 0.0061
## STENOSIS=1 0.2291 0.1051 2.18 0.0293
## IMT 0.5059 0.1749 2.89 0.0038
<-cph(Surv(TEVENT,EVENT) ~ ifelse(AGE>50, (AGE-50)^2,0) +
COXPH.REDUCED.AREGIMPUTED_3 +
BMI +
HDL+
DIABETES +
SUMSCORE_5LEVELS log(CREAT) +
+
albumin +
STENOSIS
IMT,data = SMART.AREGIMPUTED_3,
x=T,
y=T,
surv=T)
anova(COXPH.REDUCED.AREGIMPUTED_3)
## Wald Statistics Response: Surv(TEVENT, EVENT)
##
## Factor Chi-Square d.f. P
## AGE 41.87 1 <.0001
## BMI 3.11 1 0.0780
## HDL 7.24 1 0.0071
## DIABETES 2.99 1 0.0840
## SUMSCORE_5LEVELS 40.87 1 <.0001
## CREAT 20.75 1 <.0001
## albumin 9.09 2 0.0106
## STENOSIS 4.75 1 0.0293
## IMT 8.37 1 0.0038
## TOTAL 309.72 10 <.0001
summary(COXPH.REDUCED.AREGIMPUTED_3)
## Effects Response : Surv(TEVENT, EVENT)
##
## Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95
## AGE 52.00 68.00 16.00 0.42636 0.065888 0.2972200 0.555500
## Hazard Ratio 52.00 68.00 16.00 1.53170 NA 1.3461000 1.742800
## BMI 24.11 28.73 4.62 -0.11510 0.065302 -0.2430900 0.012885
## Hazard Ratio 24.11 28.73 4.62 0.89127 NA 0.7842000 1.013000
## HDL 0.96 1.42 0.46 -0.18912 0.070306 -0.3269100 -0.051318
## Hazard Ratio 0.96 1.42 0.46 0.82769 NA 0.7211500 0.949980
## SUMSCORE_5LEVELS 1.00 5.00 4.00 1.38100 0.216010 0.9575800 1.804300
## Hazard Ratio 1.00 5.00 4.00 3.97870 NA 2.6054000 6.076000
## CREAT 78.00 101.00 23.00 0.15700 0.034464 0.0894550 0.224550
## Hazard Ratio 78.00 101.00 23.00 1.17000 NA 1.0936000 1.251800
## IMT 0.75 1.07 0.32 0.16190 0.055968 0.0522010 0.271590
## Hazard Ratio 0.75 1.07 0.32 1.17570 NA 1.0536000 1.312000
## DIABETES - 1:0 1.00 2.00 NA 0.18957 0.109700 -0.0254450 0.404580
## Hazard Ratio 1.00 2.00 NA 1.20870 NA 0.9748800 1.498700
## albumin - 2:1 1.00 2.00 NA 0.23207 0.117640 0.0014922 0.462640
## Hazard Ratio 1.00 2.00 NA 1.26120 NA 1.0015000 1.588300
## albumin - 3:1 1.00 3.00 NA 0.56442 0.206000 0.1606600 0.968180
## Hazard Ratio 1.00 3.00 NA 1.75840 NA 1.1743000 2.633100
## STENOSIS - 1:0 1.00 2.00 NA 0.22905 0.105070 0.0231120 0.434990
## Hazard Ratio 1.00 2.00 NA 1.25740 NA 1.0234000 1.545000
COXPH.REDUCED.AREGIMPUTED_3
## Cox Proportional Hazards Model
##
## cph(formula = Surv(TEVENT, EVENT) ~ ifelse(AGE > 50, (AGE - 50)^2,
## 0) + BMI + HDL + DIABETES + SUMSCORE_5LEVELS + log(CREAT) +
## albumin + STENOSIS + IMT, data = SMART.AREGIMPUTED_3, x = T,
## y = T, surv = T)
##
## Model Tests Discrimination
## Indexes
## Obs 3873 LR chi2 269.65 R2 0.080
## Events 460 d.f. 10 R2(10,3873)0.065
## Center 2.9008 Pr(> chi2) 0.0000 R2(10,460)0.431
## Score chi2 340.60 Dxy 0.384
## Pr(> chi2) 0.0000
##
## Coef S.E. Wald Z Pr(>|Z|)
## AGE 0.0013 0.0002 6.47 <0.0001
## BMI -0.0249 0.0141 -1.76 0.0780
## HDL -0.4111 0.1528 -2.69 0.0071
## DIABETES=1 0.1896 0.1097 1.73 0.0840
## SUMSCORE_5LEVELS 0.3452 0.0540 6.39 <0.0001
## CREAT 0.6076 0.1334 4.56 <0.0001
## albumin=2 0.2321 0.1176 1.97 0.0485
## albumin=3 0.5644 0.2060 2.74 0.0061
## STENOSIS=1 0.2291 0.1051 2.18 0.0293
## IMT 0.5059 0.1749 2.89 0.0038
rcorr.cens(-as.numeric(COXPH.REDUCED.AREGIMPUTED_3$linear.pred),COXPH.REDUCED.AREGIMPUTED_3$y)[1]
## C Index
## 0.6917632
##################################
# Formulating the FULL and REDUCED models
##################################
$AGE_SQUAREDAFTER50 <- ifelse(SMART.AREGIMPUTED_3$AGE>50, (SMART.AREGIMPUTED_3$AGE-50)^2,0)
SMART.AREGIMPUTED_3$CREAT_LOG <- log(SMART.AREGIMPUTED_3$CREAT)
SMART.AREGIMPUTED_3
<- "Surv(TEVENT, EVENT)"
COXPH.AREGIMPUTED_3.SurvivalResponse <- c("AGE_SQUAREDAFTER50",
COXPH.AREGIMPUTED_3.SurvivalPredictorFull "SEX",
"SMOKING",
"alcohol",
"BMI",
"SYSTH",
"HDL",
"DIABETES",
"SUMSCORE_5LEVELS",
"HOMOC",
"CREAT_LOG",
"albumin",
"STENOSIS",
"IMT")
<- c("AGE_SQUAREDAFTER50",
COXPH.AREGIMPUTED_3.SurvivalPredictorReduced "BMI",
"HDL",
"DIABETES",
"SUMSCORE_5LEVELS",
"CREAT_LOG",
"albumin",
"STENOSIS",
"IMT")
<- SMART.AREGIMPUTED_3 %>%
COXPH.AREGIMPUTED_3.CoxPHModelSummary finalfit(COXPH.AREGIMPUTED_3.SurvivalResponse,
COXPH.AREGIMPUTED_3.SurvivalPredictorFull,
COXPH.AREGIMPUTED_3.SurvivalPredictorReduced, keep_models = TRUE,
add_dependent_label = FALSE) %>%
rename("Overall Survival" = label) %>%
rename(" " = levels) %>%
rename(" " = all)
kable(COXPH.AREGIMPUTED_3.CoxPHModelSummary)
Overall Survival | HR (univariable) | HR (multivariable full) | HR (multivariable) | |||
---|---|---|---|---|---|---|
1 | AGE_SQUAREDAFTER50 | Mean (SD) | 189.0 (217.0) | 1.00 (1.00-1.00, p<0.001) | 1.00 (1.00-1.00, p<0.001) | 1.00 (1.00-1.00, p<0.001) |
15 | SEX | 1 | 2897 (74.8) | - | - | - |
16 | 2 | 976 (25.2) | 0.67 (0.53-0.85, p=0.001) | 0.95 (0.73-1.24, p=0.725) | - | |
17 | SMOKING | 1 | 695 (17.9) | - | - | - |
18 | 2 | 2731 (70.5) | 1.29 (0.99-1.68, p=0.060) | 1.21 (0.91-1.61, p=0.182) | - | |
19 | 3 | 447 (11.5) | 1.16 (0.74-1.80, p=0.516) | 1.35 (0.86-2.12, p=0.199) | - | |
5 | alcohol | 1 | 759 (19.6) | - | - | - |
6 | 2 | 410 (10.6) | 1.02 (0.74-1.39, p=0.914) | 0.87 (0.63-1.20, p=0.383) | - | |
7 | 3 | 2704 (69.8) | 0.78 (0.62-0.97, p=0.024) | 0.89 (0.70-1.13, p=0.342) | - | |
8 | BMI | Mean (SD) | 26.7 (3.8) | 0.98 (0.95-1.00, p=0.052) | 0.98 (0.95-1.00, p=0.090) | 0.98 (0.95-1.00, p=0.078) |
23 | SYSTH | Mean (SD) | 143.0 (22.2) | 1.01 (1.01-1.02, p<0.001) | 1.00 (1.00-1.01, p=0.194) | - |
12 | HDL | Mean (SD) | 1.2 (0.4) | 0.60 (0.45-0.80, p<0.001) | 0.67 (0.49-0.92, p=0.014) | 0.66 (0.49-0.89, p=0.007) |
10 | DIABETES | 0 | 3024 (78.1) | - | - | - |
11 | 1 | 849 (21.9) | 1.47 (1.20-1.80, p<0.001) | 1.22 (0.98-1.51, p=0.076) | 1.21 (0.97-1.50, p=0.084) | |
22 | SUMSCORE_5LEVELS | Mean (SD) | 1.3 (0.6) | 1.73 (1.57-1.90, p<0.001) | 1.40 (1.25-1.56, p<0.001) | 1.41 (1.27-1.57, p<0.001) |
13 | HOMOC | Mean (SD) | 14.1 (5.4) | 1.05 (1.04-1.06, p<0.001) | 1.00 (0.99-1.02, p=0.620) | - |
9 | CREAT_LOG | Mean (SD) | 4.5 (0.3) | 2.93 (2.44-3.53, p<0.001) | 1.79 (1.34-2.38, p<0.001) | 1.83 (1.41-2.38, p<0.001) |
2 | albumin | 1 | 3040 (78.5) | - | - | - |
3 | 2 | 696 (18.0) | 1.94 (1.56-2.41, p<0.001) | 1.22 (0.96-1.53, p=0.101) | 1.26 (1.00-1.59, p=0.048) | |
4 | 3 | 137 (3.5) | 3.75 (2.70-5.19, p<0.001) | 1.69 (1.12-2.54, p=0.012) | 1.76 (1.17-2.63, p=0.006) | |
20 | STENOSIS | 0 | 3136 (81.0) | - | - | - |
21 | 1 | 737 (19.0) | 1.75 (1.44-2.12, p<0.001) | 1.22 (0.99-1.50, p=0.064) | 1.26 (1.02-1.54, p=0.029) | |
14 | IMT | Mean (SD) | 0.9 (0.3) | 3.58 (2.68-4.76, p<0.001) | 1.58 (1.11-2.24, p=0.010) | 1.66 (1.18-2.34, p=0.004) |
##################################
# Conducting a likelihood ratio test
# to compare the FULL and REDUCED models
##################################
<- coxph(Surv(TEVENT,EVENT) ~ AGE_SQUAREDAFTER50 +
COXPH.FULLFINAL.AREGIMPUTED_3 +
SEX +
SMOKING +
alcohol +
BMI +
SYSTH +
HDL +
DIABETES +
SUMSCORE_5LEVELS +
HOMOC +
CREAT_LOG +
albumin +
STENOSIS
IMT,data = SMART.AREGIMPUTED_3)
<- coxph(Surv(TEVENT,EVENT) ~ AGE_SQUAREDAFTER50 +
COXPH.REDUCEDFINAL.AREGIMPUTED_3 +
BMI +
HDL+
DIABETES +
SUMSCORE_5LEVELS +
CREAT_LOG +
albumin +
STENOSIS
IMT,data = SMART.AREGIMPUTED_3)
anova(COXPH.REDUCEDFINAL.AREGIMPUTED_3,
COXPH.FULLFINAL.AREGIMPUTED_3,test="LRT")
## Analysis of Deviance Table
## Cox model: response is Surv(TEVENT, EVENT)
## Model 1: ~ AGE_SQUAREDAFTER50 + BMI + HDL + DIABETES + SUMSCORE_5LEVELS + CREAT_LOG + albumin + STENOSIS + IMT
## Model 2: ~ AGE_SQUAREDAFTER50 + SEX + SMOKING + alcohol + BMI + SYSTH + HDL + DIABETES + SUMSCORE_5LEVELS + HOMOC + CREAT_LOG + albumin + STENOSIS + IMT
## loglik Chisq Df Pr(>|Chi|)
## 1 -3370.0
## 2 -3367.4 5.0507 7 0.6538
##################################
# Summarizing the FINAL REDUCED model
##################################
summary(COXPH.REDUCEDFINAL.AREGIMPUTED_3)
## Call:
## coxph(formula = Surv(TEVENT, EVENT) ~ AGE_SQUAREDAFTER50 + BMI +
## HDL + DIABETES + SUMSCORE_5LEVELS + CREAT_LOG + albumin +
## STENOSIS + IMT, data = SMART.AREGIMPUTED_3)
##
## n= 3873, number of events= 460
##
## coef exp(coef) se(coef) z Pr(>|z|)
## AGE_SQUAREDAFTER50 0.0013327 1.0013336 0.0002059 6.473 9.62e-11 ***
## BMI -0.0249103 0.9753974 0.0141344 -1.762 0.07800 .
## HDL -0.4112635 0.6628123 0.1528392 -2.691 0.00713 **
## DIABETES1 0.1894940 1.2086379 0.1097039 1.727 0.08411 .
## SUMSCORE_5LEVELS 0.3452748 1.4123780 0.0540046 6.393 1.62e-10 ***
## CREAT_LOG 0.6064023 1.8338220 0.1334447 4.544 5.51e-06 ***
## albumin2 0.2321931 1.2613633 0.1176337 1.974 0.04840 *
## albumin3 0.5636867 1.7571386 0.2060667 2.735 0.00623 **
## STENOSIS1 0.2290771 1.2574389 0.1050720 2.180 0.02924 *
## IMT 0.5059860 1.6586202 0.1748953 2.893 0.00381 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## AGE_SQUAREDAFTER50 1.0013 0.9987 1.0009 1.0017
## BMI 0.9754 1.0252 0.9487 1.0028
## HDL 0.6628 1.5087 0.4912 0.8943
## DIABETES1 1.2086 0.8274 0.9748 1.4986
## SUMSCORE_5LEVELS 1.4124 0.7080 1.2705 1.5701
## CREAT_LOG 1.8338 0.5453 1.4118 2.3820
## albumin2 1.2614 0.7928 1.0016 1.5884
## albumin3 1.7571 0.5691 1.1733 2.6315
## STENOSIS1 1.2574 0.7953 1.0234 1.5450
## IMT 1.6586 0.6029 1.1773 2.3368
##
## Concordance= 0.692 (se = 0.014 )
## Likelihood ratio test= 269.6 on 10 df, p=<2e-16
## Wald test = 309.4 on 10 df, p=<2e-16
## Score (logrank) test = 340.6 on 10 df, p=<2e-16
##################################
# Describing the main effects
##################################
tbl_regression(COXPH.REDUCEDFINAL.AREGIMPUTED_3,
exponentiate = TRUE)
Characteristic | HR1 | 95% CI1 | p-value |
---|---|---|---|
AGE_SQUAREDAFTER50 | 1.00 | 1.00, 1.00 | <0.001 |
BMI | 0.98 | 0.95, 1.00 | 0.078 |
HDL | 0.66 | 0.49, 0.89 | 0.007 |
DIABETES | |||
0 | — | — | |
1 | 1.21 | 0.97, 1.50 | 0.084 |
SUMSCORE_5LEVELS | 1.41 | 1.27, 1.57 | <0.001 |
CREAT_LOG | 1.83 | 1.41, 2.38 | <0.001 |
albumin | |||
1 | — | — | |
2 | 1.26 | 1.00, 1.59 | 0.048 |
3 | 1.76 | 1.17, 2.63 | 0.006 |
STENOSIS | |||
0 | — | — | |
1 | 1.26 | 1.02, 1.54 | 0.029 |
IMT | 1.66 | 1.18, 2.34 | 0.004 |
1 HR = Hazard Ratio, CI = Confidence Interval |
##################################
# Formulating the forest plot
##################################
ggforest(COXPH.REDUCEDFINAL.AREGIMPUTED_3,
(data=SMART.AREGIMPUTED_3,
main = "",
fontsize = 0.90))
##################################
# Plotting the Kaplan-Meier survival curves
##################################
<- survfit(COXPH.REDUCEDFINAL.AREGIMPUTED_3,
COXPH.REDUCEDFINAL.AREGIMPUTED_3.KMEstimates data = SMART.AREGIMPUTED_3)
<- ggsurvplot(COXPH.REDUCEDFINAL.AREGIMPUTED_3.KMEstimates,
COXPH.REDUCEDFINAL.AREGIMPUTED_3.KMPlot title = "Kaplan-Meier Survival Curve (Final Model)",
pval = FALSE,
conf.int = TRUE,
conf.int.style = "ribbon",
xlab = "Time (Days)",
ylab="Estimated Probability (Free of Cardiovascular Events)",
break.time.by = 365,
ggtheme = theme_bw(),
risk.table = "abs_pct",
risk.table.title="Number at Risk (Estimated Probability)",
risk.table.y.text.col = FALSE,
risk.table.y.text = TRUE,
fontsize = 3,
ncensor.plot = FALSE,
surv.median.line = "hv",
palette = c("#000000"))
ggpar(COXPH.REDUCEDFINAL.AREGIMPUTED_3.KMPlot,
font.title = c(14,"bold"),
font.x = c(12,"bold"),
font.y = c(12,"bold"),
font.legend=c(12),
font.xtickslab=c(9,"black"),
font.ytickslab=c(9,"black"))
##################################
# Testing for the proportional hazards assumption
##################################
<- cox.zph(COXPH.REDUCEDFINAL.AREGIMPUTED_3)) (COXPH.REDUCEDFINAL.AREGIMPUTED_3.PHAssumptionCheck
## chisq df p
## AGE_SQUAREDAFTER50 6.2279 1 0.013
## BMI 0.3371 1 0.561
## HDL 0.0483 1 0.826
## DIABETES 0.0069 1 0.934
## SUMSCORE_5LEVELS 4.0453 1 0.044
## CREAT_LOG 0.1978 1 0.657
## albumin 2.6737 2 0.263
## STENOSIS 1.4148 1 0.234
## IMT 0.8929 1 0.345
## GLOBAL 12.0725 10 0.280
##################################
# Formulating the graphical verification
# of the proportional hazards assumption test results
# using the scaled Schoenfeld residuals against time
##################################
<- ggcoxzph(COXPH.REDUCEDFINAL.AREGIMPUTED_3.PHAssumptionCheck,
(COXPH.REDUCEDFINAL.AREGIMPUTED_3.PHAssumptionPlot point.col = "red",
point.size = 2,
point.shape = 19,
point.alpha = 0.50,
ggtheme = theme_survminer(),
font.main = c(12,"bold"),
font.x = c(12,"bold"),
font.y = c(12,"bold"),
font.xtickslab=c(9,"black"),
font.ytickslab=c(9,"black")))
##################################
# Implementing internal cross validation
##################################
##################################
# Determining the performance of the
# Model formulated from original data
# And evaluated on original data
##################################
<- SMART.AREGIMPUTED_3
ICV_Data <- Surv(time=ICV_Data$TEVENT, event=ICV_Data$EVENT)
ICV_Survival <- cph(ICV_Survival ~ AGE_SQUAREDAFTER50 +
ICV_Original_CoxPH_Full +
SEX +
SMOKING +
alcohol +
BMI +
SYSTH +
HDL +
DIABETES +
SUMSCORE_5LEVELS +
HOMOC +
CREAT_LOG +
albumin +
STENOSIS
IMT,data=ICV_Data,
x=T,
y=T)
<- fastbw(ICV_Original_CoxPH_Full,
ICV_Original_CoxPH_Backward rule="aic",
type="individual")
<- update(ICV_Original_CoxPH_Full,
ICV_Original_CoxPH_Reduced ~ICV_Original_CoxPH_Full$x[,ICV_Original_CoxPH_Backward$parms.kept])
<- as.data.frame(coef(ICV_Original_CoxPH_Reduced))
ICV_Original_CoxPH_Reduced_Coef
<- ICV_Original_CoxPH_Full$x[,(colnames(ICV_Original_CoxPH_Full$x) %in% rownames(ICV_Original_CoxPH_Reduced_Coef))]
ICV_Original_CoxPH_Full_CoefAdjusted
<- ICV_Original_CoxPH_Full_CoefAdjusted %*% coef(ICV_Original_CoxPH_Reduced)
ICV_OriginalOnOriginal_CoxPH_Reduced.LP
<- rcorr.cens(-as.numeric(ICV_OriginalOnOriginal_CoxPH_Reduced.LP),ICV_Original_CoxPH_Reduced$y)[1]
ICV_OriginalOnOriginal_CoxPH_Reduced.Concordance
1]] ICV_OriginalOnOriginal_CoxPH_Reduced.Concordance[[
## [1] 0.6917632
##################################
# Conducting bootstrap validation
##################################
######################################
# Generating a random number list
######################################
set.seed(123456789)
<- (sample.int(99999999,200))
RandomSeedList length(RandomSeedList)
## [1] 200
<- c()
ICV_BootstrapOnBootstrap_CoxPH_Reduced.Concordance.List <- c()
ICV_BootstrapOnOriginal_CoxPH_Reduced.Concordance.List <- c()
ICV_CoxPH_Reduced.Concordance.Optimism.List <- c()
ProcessedSeed.List
######################################
# Implementing the bootstrap cycles
######################################
for (r in 1:length(RandomSeedList)) {
##################################
# Generating the bootstrap data
##################################
<- as.integer(RandomSeedList[r]))
(SelectedSeed set.seed(SelectedSeed)
<- sample(nrow(ICV_Data),
ICV_BootstrapRow replace=T)
<- ICV_Data[ICV_BootstrapRow,]
ICV_BootstrapData
<- Surv(time=ICV_BootstrapData$TEVENT, event=ICV_BootstrapData$EVENT)
ICV_BootstrapSurvival
##################################
# Formulating the FULL
# Cox Proportional Hazards Model
# Using the bootstrap data
##################################
<- cph(ICV_Survival ~ AGE_SQUAREDAFTER50 +
ICV_Bootstrap_CoxPH_Full +
SEX +
SMOKING +
alcohol +
BMI +
SYSTH +
HDL +
DIABETES +
SUMSCORE_5LEVELS +
HOMOC +
CREAT_LOG +
albumin +
STENOSIS
IMT,data=ICV_BootstrapData,
x=T,
y=T)
##################################
# Conducting variable selection
# Backward elimination
##################################
<- fastbw(ICV_Bootstrap_CoxPH_Full,
ICV_Bootstrap_CoxPH_Backward rule="aic",
type="individual")
##################################
# Ignoring instances when no
# Reduced model is formulated
# Due to failure to meet
# The final model criterion
##################################
if(is.null(ICV_Bootstrap_CoxPH_Backward$factors.kept)!=TRUE) {
<- as.integer(RandomSeedList[r]))
(ProcessedSeed
print(paste0("Implementing bootstrap cycle ",r))
##################################
# Formulating the REDUCED
# Cox Proportional Hazards Model
# Using the bootstrap data
##################################
<- update(ICV_Bootstrap_CoxPH_Full,
ICV_Bootstrap_CoxPH_Reduced ~ICV_Bootstrap_CoxPH_Full$x[,ICV_Bootstrap_CoxPH_Backward$parms.kept, drop = FALSE])
<- as.data.frame(coef(ICV_Bootstrap_CoxPH_Reduced))
ICV_Bootstrap_CoxPH_Reduced_Coef
ICV_Bootstrap_CoxPH_Reduced_Coef
<- ICV_Bootstrap_CoxPH_Full$x[,(colnames(ICV_Bootstrap_CoxPH_Full$x) %in% rownames(ICV_Bootstrap_CoxPH_Reduced_Coef)), drop = FALSE]
ICV_Bootstrap_CoxPH_Full_CoefAdjusted
##################################
# Determining the performance of the
# Model formulated from bootstrap data
# And evaluated on bootstrap data
##################################
<- ICV_Bootstrap_CoxPH_Full_CoefAdjusted %*% coef(ICV_Bootstrap_CoxPH_Reduced)
ICV_BootstrapOnBootstrap_CoxPH_Reduced.LP
<- rcorr.cens(-as.numeric(ICV_BootstrapOnBootstrap_CoxPH_Reduced.LP),ICV_Bootstrap_CoxPH_Reduced$y)[1]
ICV_BootstrapOnBootstrap_CoxPH_Reduced.Concordance
ICV_BootstrapOnBootstrap_CoxPH_Reduced.Concordance
<- ICV_Original_CoxPH_Full$x[,(colnames(ICV_Original_CoxPH_Full$x) %in% rownames(ICV_Bootstrap_CoxPH_Reduced_Coef)), drop = FALSE]
ICV_Original_CoxPH_Full_CoefAdjusted
##################################
# Determining the performance of the
# Model formulated from bootstrap data
# And evaluated on original data
##################################
<- ICV_Original_CoxPH_Full_CoefAdjusted %*% coef(ICV_Bootstrap_CoxPH_Reduced)
ICV_BootstrapOnOriginal_CoxPH_Reduced.LP
<- rcorr.cens(-as.numeric(ICV_BootstrapOnOriginal_CoxPH_Reduced.LP),ICV_Original_CoxPH_Reduced$y)[1]
ICV_BootstrapOnOriginal_CoxPH_Reduced.Concordance
ICV_BootstrapOnOriginal_CoxPH_Reduced.Concordance
##################################
# Computing for the individual
# Estimated performance optimism
##################################
<- ICV_BootstrapOnBootstrap_CoxPH_Reduced.Concordance - ICV_BootstrapOnOriginal_CoxPH_Reduced.Concordance
ICV_CoxPH_Reduced.Concordance.Optimism
ICV_CoxPH_Reduced.Concordance.Optimism
<- append(ProcessedSeed.List,ProcessedSeed)
ProcessedSeed.List <- append(ICV_BootstrapOnBootstrap_CoxPH_Reduced.Concordance.List,ICV_BootstrapOnBootstrap_CoxPH_Reduced.Concordance[[1]])
ICV_BootstrapOnBootstrap_CoxPH_Reduced.Concordance.List <- append(ICV_BootstrapOnOriginal_CoxPH_Reduced.Concordance.List,ICV_BootstrapOnOriginal_CoxPH_Reduced.Concordance[[1]])
ICV_BootstrapOnOriginal_CoxPH_Reduced.Concordance.List <- append(ICV_CoxPH_Reduced.Concordance.Optimism.List,ICV_CoxPH_Reduced.Concordance.Optimism)
ICV_CoxPH_Reduced.Concordance.Optimism.List
}
}
## [1] "Implementing bootstrap cycle 1"
## [1] "Implementing bootstrap cycle 2"
## [1] "Implementing bootstrap cycle 3"
## [1] "Implementing bootstrap cycle 4"
## [1] "Implementing bootstrap cycle 5"
## [1] "Implementing bootstrap cycle 6"
## [1] "Implementing bootstrap cycle 7"
## [1] "Implementing bootstrap cycle 8"
## [1] "Implementing bootstrap cycle 9"
## [1] "Implementing bootstrap cycle 10"
## [1] "Implementing bootstrap cycle 11"
## [1] "Implementing bootstrap cycle 12"
## [1] "Implementing bootstrap cycle 13"
## [1] "Implementing bootstrap cycle 14"
## [1] "Implementing bootstrap cycle 15"
## [1] "Implementing bootstrap cycle 16"
## [1] "Implementing bootstrap cycle 17"
## [1] "Implementing bootstrap cycle 18"
## [1] "Implementing bootstrap cycle 19"
## [1] "Implementing bootstrap cycle 20"
## [1] "Implementing bootstrap cycle 21"
## [1] "Implementing bootstrap cycle 22"
## [1] "Implementing bootstrap cycle 23"
## [1] "Implementing bootstrap cycle 24"
## [1] "Implementing bootstrap cycle 25"
## [1] "Implementing bootstrap cycle 26"
## [1] "Implementing bootstrap cycle 27"
## [1] "Implementing bootstrap cycle 28"
## [1] "Implementing bootstrap cycle 29"
## [1] "Implementing bootstrap cycle 32"
## [1] "Implementing bootstrap cycle 34"
## [1] "Implementing bootstrap cycle 35"
## [1] "Implementing bootstrap cycle 36"
## [1] "Implementing bootstrap cycle 37"
## [1] "Implementing bootstrap cycle 38"
## [1] "Implementing bootstrap cycle 39"
## [1] "Implementing bootstrap cycle 40"
## [1] "Implementing bootstrap cycle 41"
## [1] "Implementing bootstrap cycle 42"
## [1] "Implementing bootstrap cycle 43"
## [1] "Implementing bootstrap cycle 44"
## [1] "Implementing bootstrap cycle 45"
## [1] "Implementing bootstrap cycle 46"
## [1] "Implementing bootstrap cycle 48"
## [1] "Implementing bootstrap cycle 49"
## [1] "Implementing bootstrap cycle 51"
## [1] "Implementing bootstrap cycle 52"
## [1] "Implementing bootstrap cycle 53"
## [1] "Implementing bootstrap cycle 54"
## [1] "Implementing bootstrap cycle 55"
## [1] "Implementing bootstrap cycle 56"
## [1] "Implementing bootstrap cycle 57"
## [1] "Implementing bootstrap cycle 58"
## [1] "Implementing bootstrap cycle 59"
## [1] "Implementing bootstrap cycle 60"
## [1] "Implementing bootstrap cycle 61"
## [1] "Implementing bootstrap cycle 62"
## [1] "Implementing bootstrap cycle 63"
## [1] "Implementing bootstrap cycle 64"
## [1] "Implementing bootstrap cycle 65"
## [1] "Implementing bootstrap cycle 66"
## [1] "Implementing bootstrap cycle 67"
## [1] "Implementing bootstrap cycle 68"
## [1] "Implementing bootstrap cycle 69"
## [1] "Implementing bootstrap cycle 70"
## [1] "Implementing bootstrap cycle 71"
## [1] "Implementing bootstrap cycle 72"
## [1] "Implementing bootstrap cycle 73"
## [1] "Implementing bootstrap cycle 74"
## [1] "Implementing bootstrap cycle 75"
## [1] "Implementing bootstrap cycle 76"
## [1] "Implementing bootstrap cycle 78"
## [1] "Implementing bootstrap cycle 79"
## [1] "Implementing bootstrap cycle 80"
## [1] "Implementing bootstrap cycle 81"
## [1] "Implementing bootstrap cycle 83"
## [1] "Implementing bootstrap cycle 84"
## [1] "Implementing bootstrap cycle 85"
## [1] "Implementing bootstrap cycle 86"
## [1] "Implementing bootstrap cycle 87"
## [1] "Implementing bootstrap cycle 88"
## [1] "Implementing bootstrap cycle 89"
## [1] "Implementing bootstrap cycle 90"
## [1] "Implementing bootstrap cycle 92"
## [1] "Implementing bootstrap cycle 93"
## [1] "Implementing bootstrap cycle 94"
## [1] "Implementing bootstrap cycle 96"
## [1] "Implementing bootstrap cycle 97"
## [1] "Implementing bootstrap cycle 98"
## [1] "Implementing bootstrap cycle 99"
## [1] "Implementing bootstrap cycle 100"
## [1] "Implementing bootstrap cycle 101"
## [1] "Implementing bootstrap cycle 102"
## [1] "Implementing bootstrap cycle 103"
## [1] "Implementing bootstrap cycle 104"
## [1] "Implementing bootstrap cycle 106"
## [1] "Implementing bootstrap cycle 107"
## [1] "Implementing bootstrap cycle 108"
## [1] "Implementing bootstrap cycle 110"
## [1] "Implementing bootstrap cycle 111"
## [1] "Implementing bootstrap cycle 112"
## [1] "Implementing bootstrap cycle 113"
## [1] "Implementing bootstrap cycle 114"
## [1] "Implementing bootstrap cycle 115"
## [1] "Implementing bootstrap cycle 116"
## [1] "Implementing bootstrap cycle 117"
## [1] "Implementing bootstrap cycle 119"
## [1] "Implementing bootstrap cycle 120"
## [1] "Implementing bootstrap cycle 121"
## [1] "Implementing bootstrap cycle 122"
## [1] "Implementing bootstrap cycle 123"
## [1] "Implementing bootstrap cycle 124"
## [1] "Implementing bootstrap cycle 125"
## [1] "Implementing bootstrap cycle 126"
## [1] "Implementing bootstrap cycle 127"
## [1] "Implementing bootstrap cycle 128"
## [1] "Implementing bootstrap cycle 129"
## [1] "Implementing bootstrap cycle 130"
## [1] "Implementing bootstrap cycle 131"
## [1] "Implementing bootstrap cycle 132"
## [1] "Implementing bootstrap cycle 134"
## [1] "Implementing bootstrap cycle 135"
## [1] "Implementing bootstrap cycle 136"
## [1] "Implementing bootstrap cycle 137"
## [1] "Implementing bootstrap cycle 138"
## [1] "Implementing bootstrap cycle 140"
## [1] "Implementing bootstrap cycle 141"
## [1] "Implementing bootstrap cycle 142"
## [1] "Implementing bootstrap cycle 143"
## [1] "Implementing bootstrap cycle 144"
## [1] "Implementing bootstrap cycle 145"
## [1] "Implementing bootstrap cycle 146"
## [1] "Implementing bootstrap cycle 147"
## [1] "Implementing bootstrap cycle 148"
## [1] "Implementing bootstrap cycle 150"
## [1] "Implementing bootstrap cycle 151"
## [1] "Implementing bootstrap cycle 153"
## [1] "Implementing bootstrap cycle 154"
## [1] "Implementing bootstrap cycle 155"
## [1] "Implementing bootstrap cycle 156"
## [1] "Implementing bootstrap cycle 157"
## [1] "Implementing bootstrap cycle 158"
## [1] "Implementing bootstrap cycle 159"
## [1] "Implementing bootstrap cycle 160"
## [1] "Implementing bootstrap cycle 161"
## [1] "Implementing bootstrap cycle 162"
## [1] "Implementing bootstrap cycle 163"
## [1] "Implementing bootstrap cycle 164"
## [1] "Implementing bootstrap cycle 165"
## [1] "Implementing bootstrap cycle 167"
## [1] "Implementing bootstrap cycle 168"
## [1] "Implementing bootstrap cycle 169"
## [1] "Implementing bootstrap cycle 170"
## [1] "Implementing bootstrap cycle 171"
## [1] "Implementing bootstrap cycle 172"
## [1] "Implementing bootstrap cycle 173"
## [1] "Implementing bootstrap cycle 174"
## [1] "Implementing bootstrap cycle 175"
## [1] "Implementing bootstrap cycle 176"
## [1] "Implementing bootstrap cycle 177"
## [1] "Implementing bootstrap cycle 178"
## [1] "Implementing bootstrap cycle 179"
## [1] "Implementing bootstrap cycle 180"
## [1] "Implementing bootstrap cycle 181"
## [1] "Implementing bootstrap cycle 182"
## [1] "Implementing bootstrap cycle 183"
## [1] "Implementing bootstrap cycle 184"
## [1] "Implementing bootstrap cycle 185"
## [1] "Implementing bootstrap cycle 186"
## [1] "Implementing bootstrap cycle 187"
## [1] "Implementing bootstrap cycle 188"
## [1] "Implementing bootstrap cycle 189"
## [1] "Implementing bootstrap cycle 190"
## [1] "Implementing bootstrap cycle 191"
## [1] "Implementing bootstrap cycle 192"
## [1] "Implementing bootstrap cycle 193"
## [1] "Implementing bootstrap cycle 194"
## [1] "Implementing bootstrap cycle 195"
## [1] "Implementing bootstrap cycle 196"
## [1] "Implementing bootstrap cycle 197"
## [1] "Implementing bootstrap cycle 198"
## [1] "Implementing bootstrap cycle 199"
## [1] "Implementing bootstrap cycle 200"
##################################
# Consolidating all the individual
# Estimated performance optimism
##################################
<- cbind(seq(1:length(ProcessedSeed.List)),
ICV_CoxPH_Reduced.Concordance.EstimatedOptimismSummary
ProcessedSeed.List,
ICV_BootstrapOnBootstrap_CoxPH_Reduced.Concordance.List,
ICV_BootstrapOnOriginal_CoxPH_Reduced.Concordance.List,
ICV_CoxPH_Reduced.Concordance.Optimism.List)
<- as.data.frame(ICV_CoxPH_Reduced.Concordance.EstimatedOptimismSummary)
ICV_CoxPH_Reduced.Concordance.EstimatedOptimismSummary
colnames(ICV_CoxPH_Reduced.Concordance.EstimatedOptimismSummary) <- c("Item",
"Random Seed",
"Bootstrap",
"Test",
"Optimism")
print(ICV_CoxPH_Reduced.Concordance.EstimatedOptimismSummary,row.names=FALSE)
## Item Random Seed Bootstrap Test Optimism
## 1 24357954 0.5543502 0.4566681 0.0976821298
## 2 65725996 0.5476381 0.5391635 0.0084746534
## 3 70053462 0.5398864 0.5608731 -0.0209866759
## 4 78017548 0.5162009 0.4729145 0.0432863839
## 5 84420140 0.5356534 0.5500449 -0.0143914874
## 6 606720 0.5438464 0.4830998 0.0607466176
## 7 10029333 0.5383122 0.4388762 0.0994360240
## 8 9876518 0.5246633 0.4485447 0.0761185569
## 9 82419514 0.5459457 0.4777053 0.0682403640
## 10 42862636 0.5586620 0.4933852 0.0652767149
## 11 54910456 0.5326734 0.6241630 -0.0914895863
## 12 63139597 0.5352076 0.4459614 0.0892461485
## 13 53332617 0.5595355 0.6232712 -0.0637357448
## 14 8034413 0.5375183 0.5113660 0.0261523609
## 15 52415042 0.5568198 0.5863339 -0.0295141053
## 16 14954495 0.5091375 0.6082396 -0.0991020789
## 17 41054996 0.5345242 0.6044238 -0.0698996254
## 18 70033269 0.5119724 0.5514553 -0.0394828673
## 19 57006029 0.5330833 0.5468488 -0.0137654541
## 20 15249657 0.5327894 0.6456055 -0.1128161218
## 21 15020852 0.5250868 0.3917604 0.1333264482
## 22 58737737 0.5351302 0.4565334 0.0785967572
## 23 47064181 0.5476764 0.4646823 0.0829940045
## 24 29510608 0.5223479 0.4874980 0.0348499476
## 25 2926223 0.5109874 0.3973687 0.1136186820
## 26 15597395 0.5238630 0.5483488 -0.0244858201
## 27 17565909 0.5185945 0.4932924 0.0253020292
## 28 53494443 0.5649787 0.5852961 -0.0203174207
## 29 60222957 0.5369637 0.6057860 -0.0688222657
## 30 70635560 0.5272916 0.6026313 -0.0753396549
## 31 48609906 0.5386576 0.4724031 0.0662544371
## 32 40534381 0.5448233 0.6009747 -0.0561514600
## 33 61196946 0.5227492 0.5196900 0.0030591920
## 34 68587780 0.5586833 0.4889976 0.0696857913
## 35 93490098 0.5155689 0.3973687 0.1182001905
## 36 82482027 0.5508948 0.5883103 -0.0374155014
## 37 81130178 0.5467641 0.5072917 0.0394724031
## 38 15641896 0.5287548 0.5904109 -0.0616560948
## 39 15033507 0.5453697 0.4837054 0.0616642842
## 40 95751689 0.5480139 0.4724695 0.0755443897
## 41 74283531 0.5213188 0.5312816 -0.0099628475
## 42 63712377 0.5579731 0.5223402 0.0356329443
## 43 31672772 0.5186395 0.5270855 -0.0084459905
## 44 88691650 0.5139201 0.4634307 0.0504894070
## 45 79451861 0.5650720 0.4812422 0.0838297772
## 46 90232912 0.5523498 0.4151870 0.1371627223
## 47 94043774 0.5180631 0.5349004 -0.0168373851
## 48 80510746 0.5347594 0.5500968 -0.0153373619
## 49 34102210 0.5871710 0.5835190 0.0036520128
## 50 17883462 0.5533584 0.4601823 0.0931761456
## 51 8772866 0.5452878 0.5443860 0.0009017428
## 52 87703613 0.5435862 0.3638459 0.1797402690
## 53 53348271 0.5564627 0.4044912 0.1519714136
## 54 24011095 0.5215326 0.3926762 0.1288564063
## 55 31341514 0.5379983 0.5029177 0.0350806154
## 56 80187434 0.5188556 0.5321097 -0.0132540722
## 57 43745901 0.5388941 0.5172778 0.0216163489
## 58 93922134 0.5156527 0.4729145 0.0427381497
## 59 66126492 0.5582502 0.4220120 0.1362382312
## 60 37431387 0.5105821 0.6026313 -0.0920491946
## 61 20990637 0.5549485 0.4915636 0.0633849659
## 62 81273149 0.5466236 0.3620047 0.1846188704
## 63 77598731 0.5386171 0.5100611 0.0285559467
## 64 10644229 0.5100029 0.3917604 0.1182425024
## 65 40926246 0.5427359 0.4835425 0.0591933633
## 66 80877223 0.5468970 0.5319482 0.0149488209
## 67 97912308 0.5436062 0.4014721 0.1421341367
## 68 31028144 0.5625228 0.4528942 0.1096286294
## 69 5257245 0.5424447 0.4108594 0.1315852929
## 70 59004516 0.5140184 0.5270855 -0.0130670811
## 71 34088822 0.5222488 0.5029177 0.0193310543
## 72 25666373 0.5553139 0.5673718 -0.0120579663
## 73 68623095 0.5616174 0.5434524 0.0181649762
## 74 63482681 0.5213092 0.5452823 -0.0239730733
## 75 76784074 0.5267115 0.3881025 0.1386090595
## 76 42512786 0.5118164 0.5196900 -0.0078736432
## 77 95549440 0.5655770 0.5394619 0.0261150536
## 78 13442618 0.5359978 0.4574561 0.0785417063
## 79 3819300 0.5238134 0.4803100 0.0435034027
## 80 74985875 0.5379478 0.5489198 -0.0109719623
## 81 94058848 0.5310678 0.5071976 0.0238702510
## 82 3110705 0.5623713 0.5370547 0.0253165882
## 83 34050107 0.5555959 0.5230932 0.0325027776
## 84 49851621 0.5284372 0.5672595 -0.0388222566
## 85 665609 0.5521291 0.6269742 -0.0748451068
## 86 79266495 0.5228111 0.4067401 0.1160709492
## 87 54231681 0.5446294 0.3953778 0.1492516263
## 88 33345182 0.5455184 0.5097709 0.0357475957
## 89 28525383 0.5509790 0.4883096 0.0626693043
## 90 48257725 0.5442941 0.5154579 0.0288362058
## 91 42446822 0.5564012 0.5429529 0.0134483427
## 92 45803378 0.5436713 0.4308251 0.1128461496
## 93 75002096 0.5202301 0.3973687 0.1228613181
## 94 43813903 0.5396526 0.5064173 0.0332352730
## 95 29858393 0.5431358 0.5049937 0.0381420823
## 96 75727172 0.5311697 0.5766541 -0.0454843251
## 97 81424081 0.5450912 0.5197719 0.0253193180
## 98 6960366 0.5302170 0.6243704 -0.0941534127
## 99 6160576 0.5704856 0.5235049 0.0469807085
## 100 19881212 0.5294031 0.4659226 0.0634805088
## 101 38081837 0.5410120 0.5681717 -0.0271596558
## 102 1300489 0.5146258 0.5514553 -0.0368295051
## 103 74339302 0.5436249 0.5724038 -0.0287788801
## 104 63614203 0.5558585 0.5149798 0.0408787033
## 105 9353482 0.5478802 0.5898317 -0.0419515134
## 106 70489628 0.5318149 0.4886864 0.0431285106
## 107 94299495 0.5407540 0.3793616 0.1613923964
## 108 78365082 0.5570113 0.4786430 0.0783683642
## 109 51232997 0.5332589 0.5455266 -0.0122677057
## 110 41328046 0.5176613 0.3973687 0.1202925796
## 111 90269939 0.5302311 0.5355729 -0.0053417569
## 112 76384369 0.5151686 0.4012478 0.1139207795
## 113 46582891 0.5263412 0.3756296 0.1507116125
## 114 20718053 0.5336356 0.6164226 -0.0827869949
## 115 36855289 0.5529581 0.4798063 0.0731517230
## 116 14162817 0.5592630 0.4606368 0.0986261844
## 117 83427810 0.5615706 0.5260432 0.0355273921
## 118 47754877 0.5437141 0.5357717 0.0079423431
## 119 96086577 0.5088295 0.5831400 -0.0743105216
## 120 23368041 0.5326061 0.3771182 0.1554878465
## 121 95652738 0.5375484 0.6082396 -0.0706912664
## 122 68085408 0.5349036 0.6156401 -0.0807364627
## 123 50435170 0.5410930 0.3969029 0.1441901285
## 124 72969291 0.5499562 0.5945811 -0.0446248941
## 125 39789576 0.5385506 0.5497778 -0.0112271982
## 126 70949671 0.5328445 0.5512574 -0.0184129327
## 127 28105689 0.5374746 0.3735121 0.1639624999
## 128 31067917 0.5451062 0.6248859 -0.0797796690
## 129 31401285 0.5551856 0.5191559 0.0360296747
## 130 36989444 0.5328276 0.3928864 0.1399412002
## 131 54475311 0.5655765 0.5235172 0.0420593403
## 132 75412473 0.5603522 0.4886381 0.0717140302
## 133 37666917 0.5403496 0.5066957 0.0336538418
## 134 92034563 0.5247866 0.6073238 -0.0825372185
## 135 64331799 0.5483060 0.5233357 0.0249703590
## 136 23895688 0.5393810 0.4336873 0.1056936277
## 137 72574901 0.5482064 0.5387577 0.0094487358
## 138 18762241 0.5596488 0.3924892 0.1671595466
## 139 76873985 0.5299231 0.5368263 -0.0069032005
## 140 22587402 0.5649705 0.4925085 0.0724619944
## 141 38632436 0.5136449 0.5514553 -0.0378104120
## 142 64784435 0.5732559 0.5167559 0.0564999641
## 143 40113223 0.5479252 0.5213320 0.0265932230
## 144 38821276 0.5143160 0.5029177 0.0113982655
## 145 29759834 0.5372913 0.4708544 0.0664368785
## 146 17904630 0.5506232 0.5496991 0.0009240361
## 147 42121783 0.5675506 0.4567932 0.1107574003
## 148 83221723 0.5283198 0.5995470 -0.0712272164
## 149 92199625 0.5230217 0.4364572 0.0865645783
## 150 11287975 0.5439220 0.5357189 0.0082030386
## 151 59525042 0.5346921 0.6067055 -0.0720133978
## 152 99933470 0.5212920 0.5069287 0.0143632795
## 153 449880 0.5343195 0.3917604 0.1425590751
## 154 843264 0.5432454 0.5305391 0.0127062930
## 155 45102456 0.5225750 0.4731356 0.0494393453
## 156 52544982 0.5290751 0.4924803 0.0365947426
## 157 24716042 0.5169548 0.6243704 -0.1074156743
## 158 11469175 0.5553580 0.5539526 0.0014053903
## 159 32654802 0.5533853 0.6116696 -0.0582843411
## 160 22701179 0.5313403 0.5945106 -0.0631702219
## 161 79657253 0.5346243 0.4433276 0.0912966807
## 162 55157417 0.5202242 0.5369032 -0.0166790569
## 163 80567238 0.5400093 0.5480367 -0.0080274217
## 164 35206940 0.5272666 0.5468515 -0.0195849253
## 165 56067189 0.5339541 0.5822565 -0.0483023850
## 166 61002397 0.5409392 0.5107176 0.0302215776
## 167 7344359 0.5282352 0.3491360 0.1790992217
## 168 36103481 0.5301925 0.3857844 0.1444080573
## 169 38501127 0.5236255 0.4531512 0.0704742476
## 170 9605557 0.5375807 0.3823767 0.1552039476
## 171 96305721 0.5351266 0.5369032 -0.0017766426
## 172 96556339 0.5504812 0.4951191 0.0553620939
## 173 55917499 0.5369041 0.4557687 0.0811354680
## 174 33175807 0.5340506 0.4327883 0.1012622579
## 175 20310397 0.5304154 0.5218889 0.0085265195
## 176 69353345 0.5121749 0.3961544 0.1160204480
## 177 61139070 0.5349114 0.4352515 0.0996598674
## 178 39016431 0.5330901 0.4406019 0.0924882369
## 179 22331385 0.5162823 0.3926762 0.1236060976
## 180 36657309 0.5439242 0.5944177 -0.0504935017
## 181 89633602 0.5124902 0.3973687 0.1151214350
## 182 91643999 0.5157113 0.4255803 0.0901310575
## 183 333980 0.5361666 0.4947092 0.0414574202
##################################
# Computing for the mean of the individual
# Estimated performance optimism
##################################
<- mean(ICV_CoxPH_Reduced.Concordance.Optimism.List)) (ICV_CoxPH_Reduced.Concordance.Optimism
## [1] 0.03508444
##################################
# Determining the optimism-adjusted
# Model performance
##################################
<- ICV_OriginalOnOriginal_CoxPH_Reduced.Concordance - ICV_CoxPH_Reduced.Concordance.Optimism) (ICV_CoxPH_Reduced.Concordance.OptimismAdjusted
## C Index
## 0.6566788
<-cph(Surv(TEVENT,EVENT) ~ ifelse(AGE>50, (AGE-50)^2,0) +
COXPH.REDUCED.AREGIMPUTED_3 +
BMI +
HDL+
DIABETES +
SUMSCORE_5LEVELS log(CREAT) +
+
albumin +
STENOSIS
IMT,data = SMART.AREGIMPUTED_3,
surv=T)
<- datadist(SMART.AREGIMPUTED_3)
dd options(datadist="dd")
<- Survival(COXPH.REDUCED.AREGIMPUTED_3)
COXPH.REDUCED.AREGIMPUTED_3.PreNomogram
<- nomogram(COXPH.REDUCED.AREGIMPUTED_3,
COXPH.REDUCED.AREGIMPUTED_3.Nomogram fun=list(function(x) COXPH.REDUCED.AREGIMPUTED_3.PreNomogram(365,x),
function(x) COXPH.REDUCED.AREGIMPUTED_3.PreNomogram(730,x),
function(x) COXPH.REDUCED.AREGIMPUTED_3.PreNomogram(1095,x),
function(x) COXPH.REDUCED.AREGIMPUTED_3.PreNomogram(1460,x),
function(x) COXPH.REDUCED.AREGIMPUTED_3.PreNomogram(1825,x),
function(x) COXPH.REDUCED.AREGIMPUTED_3.PreNomogram(2190,x),
function(x) COXPH.REDUCED.AREGIMPUTED_3.PreNomogram(2555,x),
function(x) COXPH.REDUCED.AREGIMPUTED_3.PreNomogram(2920,x),
function(x) COXPH.REDUCED.AREGIMPUTED_3.PreNomogram(3285,x)),
funlabel=c("1-Year Survival",
"2-Year Survival",
"3-Year Survival",
"4-Year Survival",
"5-Year Survival",
"6-Year Survival",
"7-Year Survival",
"8-Year Survival",
"9-Year Survival"),
AGE=c(15,seq(50,85,5)),
CREAT=c(60,80,100,150,200,400),
lp.at=c(0,1,2,2.4),
fun.at=c(0.93,0.90,0.85,0.80,0.70,0.60,0.50),
lp=FALSE,
maxscale=10)
plot(COXPH.REDUCED.AREGIMPUTED_3.Nomogram)