##################################
# Loading R libraries
##################################
library(AppliedPredictiveModeling)
library(caret)
library(rpart)
library(lattice)
library(dplyr)
library(tidyr)
library(moments)
library(skimr)
library(RANN)
library(pls)
library(corrplot)
library(tidyverse)
library(lares)
library(DMwR2)
library(gridExtra)
library(rattle)
library(RColorBrewer)
library(stats)
library(factoextra)
library(FactoMineR)
library(gplots)
##################################
# Defining file paths
##################################
<- file.path("datasets")
DATASETS_PATH
##################################
# Loading source and
# formulating the analysis set
##################################
<- read.csv(file.path("..", DATASETS_PATH, "Oscars.csv"),
Oscars na.strings=c("NA","NaN"," ",""),
stringsAsFactors = FALSE)
<- as.data.frame(Oscars)
Oscars
##################################
# Performing a general exploration of the data set
##################################
dim(Oscars)
## [1] 126 20
str(Oscars)
## 'data.frame': 126 obs. of 20 variables:
## $ Film : chr "Avatar" "The Blind Side" "District 9" "An Education" ...
## $ Year : int 2010 2010 2010 2010 2010 2010 2010 2010 2010 2010 ...
## $ Tomatometer_Critic : int 82 66 90 93 97 89 92 89 98 90 ...
## $ Tomatometer_Audience : int 82 85 82 80 84 88 74 68 90 79 ...
## $ Tomatometer_Critic_Audience_Gap: int 0 19 -8 -13 -13 -1 -18 -21 -8 -11 ...
## $ IMDB_Critic : int 83 53 81 85 95 69 78 88 88 83 ...
## $ IMDB_Audience : int 79 76 79 73 75 83 73 70 83 74 ...
## $ IMDB_Critic_Audience_Gap : int -4 23 -2 -12 -20 14 -5 -18 -5 -9 ...
## $ Nominations_Total : int 9 2 4 3 9 8 6 2 5 6 ...
## $ Nomination_SuccessRate : num 0.333 0.5 0 0 0.667 ...
## $ Genre : chr "Sci-Fi" "Biography" "Sci-Fi" "Drama" ...
## $ SideGenre : chr "Sci-Fi/Adventure" "Drama/Biography" "Sci-Fi/Mystery/Thriller" "Drama" ...
## $ Picture : chr "NOM" "NOM" "NOM" "NOM" ...
## $ Cinematography : chr "WON" "NNOM" "NNOM" "NNOM" ...
## $ Directing : chr "NOM" "NNOM" "NNOM" "NNOM" ...
## $ Editing : chr "NOM" "NNOM" "NOM" "NNOM" ...
## $ Screenplay : chr "NNOM" "NNOM" "NOM" "NOM" ...
## $ Acting : chr "NNOM" "WON" "NNOM" "NOM" ...
## $ Design : chr "WON" "NNOM" "NNOM" "NNOM" ...
## $ Sound : chr "NOM" "NNOM" "NNOM" "NNOM" ...
summary(Oscars)
## Film Year Tomatometer_Critic Tomatometer_Audience
## Length:126 Min. :2010 Min. :45.00 Min. :60.00
## Class :character 1st Qu.:2013 1st Qu.:87.00 1st Qu.:79.00
## Mode :character Median :2016 Median :92.00 Median :85.00
## Mean :2016 Mean :89.07 Mean :83.39
## 3rd Qu.:2020 3rd Qu.:95.00 3rd Qu.:90.00
## Max. :2023 Max. :99.00 Max. :99.00
## Tomatometer_Critic_Audience_Gap IMDB_Critic IMDB_Audience
## Min. :-25.000 Min. : 46.00 Min. :68.00
## 1st Qu.:-12.750 1st Qu.: 76.00 1st Qu.:74.00
## Median : -8.000 Median : 83.00 Median :77.00
## Mean : -5.683 Mean : 81.71 Mean :76.76
## 3rd Qu.: 0.000 3rd Qu.: 89.00 3rd Qu.:79.75
## Max. : 25.000 Max. :100.00 Max. :88.00
## IMDB_Critic_Audience_Gap Nominations_Total Nomination_SuccessRate
## Min. :-25.000 Min. : 2.000 Min. :0.0000
## 1st Qu.:-14.000 1st Qu.: 5.000 1st Qu.:0.0000
## Median : -7.000 Median : 6.000 Median :0.2000
## Mean : -4.952 Mean : 6.524 Mean :0.2362
## 3rd Qu.: 1.000 3rd Qu.: 8.000 3rd Qu.:0.3561
## Max. : 30.000 Max. :14.000 Max. :1.0000
## Genre SideGenre Picture Cinematography
## Length:126 Length:126 Length:126 Length:126
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Directing Editing Screenplay Acting
## Length:126 Length:126 Length:126 Length:126
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Design Sound
## Length:126 Length:126
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##################################
# Transforming to appropriate data types
##################################
$Year <- factor(Oscars$Year,
Oscarslevels = c("2010",
"2011",
"2012",
"2013",
"2014",
"2015",
"2016",
"2017",
"2018",
"2019",
"2020",
"2021",
"2022",
"2023"))
$Genre <- as.factor(Oscars$Genre)
Oscars$SideGenre <- as.factor(Oscars$SideGenre)
Oscars$Picture <- as.factor(Oscars$Picture)
Oscars$Cinematography <- as.factor(Oscars$Cinematography)
Oscars$Directing <- as.factor(Oscars$Directing)
Oscars$Editing <- as.factor(Oscars$Editing)
Oscars$Screenplay <- as.factor(Oscars$Screenplay)
Oscars$Acting <- as.factor(Oscars$Acting)
Oscars$Design <- as.factor(Oscars$Design)
Oscars$Sound <- as.factor(Oscars$Sound)
Oscars
##################################
# Formulating a data type assessment summary
##################################
<- Oscars
PDA <- data.frame(
(PDA.Summary Column.Index=c(1:length(names(PDA))),
Column.Name= names(PDA),
Column.Type=sapply(PDA, function(x) class(x)),
row.names=NULL)
)
## Column.Index Column.Name Column.Type
## 1 1 Film character
## 2 2 Year factor
## 3 3 Tomatometer_Critic integer
## 4 4 Tomatometer_Audience integer
## 5 5 Tomatometer_Critic_Audience_Gap integer
## 6 6 IMDB_Critic integer
## 7 7 IMDB_Audience integer
## 8 8 IMDB_Critic_Audience_Gap integer
## 9 9 Nominations_Total integer
## 10 10 Nomination_SuccessRate numeric
## 11 11 Genre factor
## 12 12 SideGenre factor
## 13 13 Picture factor
## 14 14 Cinematography factor
## 15 15 Directing factor
## 16 16 Editing factor
## 17 17 Screenplay factor
## 18 18 Acting factor
## 19 19 Design factor
## 20 20 Sound factor
##################################
# Loading dataset
##################################
<- Oscars[,c(3:11,13:19)]
DQA
##################################
# Formulating an overall data quality assessment summary
##################################
<- data.frame(
(DQA.Summary Column.Index=c(1:length(names(DQA))),
Column.Name= names(DQA),
Column.Type=sapply(DQA, function(x) class(x)),
Row.Count=sapply(DQA, function(x) nrow(DQA)),
NA.Count=sapply(DQA,function(x)sum(is.na(x))),
Fill.Rate=sapply(DQA,function(x)format(round((sum(!is.na(x))/nrow(DQA)),3),nsmall=3)),
row.names=NULL)
)
## Column.Index Column.Name Column.Type Row.Count NA.Count
## 1 1 Tomatometer_Critic integer 126 0
## 2 2 Tomatometer_Audience integer 126 0
## 3 3 Tomatometer_Critic_Audience_Gap integer 126 0
## 4 4 IMDB_Critic integer 126 0
## 5 5 IMDB_Audience integer 126 0
## 6 6 IMDB_Critic_Audience_Gap integer 126 0
## 7 7 Nominations_Total integer 126 0
## 8 8 Nomination_SuccessRate numeric 126 0
## 9 9 Genre factor 126 0
## 10 10 Picture factor 126 0
## 11 11 Cinematography factor 126 0
## 12 12 Directing factor 126 0
## 13 13 Editing factor 126 0
## 14 14 Screenplay factor 126 0
## 15 15 Acting factor 126 0
## 16 16 Design factor 126 0
## Fill.Rate
## 1 1.000
## 2 1.000
## 3 1.000
## 4 1.000
## 5 1.000
## 6 1.000
## 7 1.000
## 8 1.000
## 9 1.000
## 10 1.000
## 11 1.000
## 12 1.000
## 13 1.000
## 14 1.000
## 15 1.000
## 16 1.000
##################################
# Listing all descriptors
##################################
<- DQA
DQA.Descriptors
##################################
# Listing all numeric Descriptors
##################################
<- DQA.Descriptors[,sapply(DQA.Descriptors, is.numeric)]
DQA.Descriptors.Numeric
if (length(names(DQA.Descriptors.Numeric))>0) {
print(paste0("There are ",
length(names(DQA.Descriptors.Numeric))),
(" numeric descriptor variable(s)."))
else {
} print("There are no numeric descriptor variables.")
}
## [1] "There are 8 numeric descriptor variable(s)."
##################################
# Listing all factor Descriptors
##################################
<- DQA.Descriptors[,sapply(DQA.Descriptors, is.factor)]
DQA.Descriptors.Factor
if (length(names(DQA.Descriptors.Factor))>0) {
print(paste0("There are ",
length(names(DQA.Descriptors.Factor))),
(" factor descriptor variable(s)."))
else {
} print("There are no factor descriptor variables.")
}
## [1] "There are 8 factor descriptor variable(s)."
##################################
# Formulating a data quality assessment summary for factor Descriptors
##################################
if (length(names(DQA.Descriptors.Factor))>0) {
##################################
# Formulating a function to determine the first mode
##################################
<- function(x) {
FirstModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab == max(tab)]
ux[tab
}
##################################
# Formulating a function to determine the second mode
##################################
<- function(x) {
SecondModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab = ux[tab == max(tab)]
fm = x[!(x %in% fm)]
sm <- unique(sm)
usm <- tabulate(match(sm, usm))
tabsm ifelse(is.na(usm[tabsm == max(tabsm)])==TRUE,
return("x"),
return(usm[tabsm == max(tabsm)]))
}
<- data.frame(
(DQA.Descriptors.Factor.Summary Column.Name= names(DQA.Descriptors.Factor),
Column.Type=sapply(DQA.Descriptors.Factor, function(x) class(x)),
Unique.Count=sapply(DQA.Descriptors.Factor, function(x) length(unique(x))),
First.Mode.Value=sapply(DQA.Descriptors.Factor, function(x) as.character(FirstModes(x)[1])),
Second.Mode.Value=sapply(DQA.Descriptors.Factor, function(x) as.character(SecondModes(x)[1])),
First.Mode.Count=sapply(DQA.Descriptors.Factor, function(x) sum(na.omit(x) == FirstModes(x)[1])),
Second.Mode.Count=sapply(DQA.Descriptors.Factor, function(x) sum(na.omit(x) == SecondModes(x)[1])),
Unique.Count.Ratio=sapply(DQA.Descriptors.Factor, function(x) format(round((length(unique(x))/nrow(DQA.Descriptors.Factor)),3), nsmall=3)),
First.Second.Mode.Ratio=sapply(DQA.Descriptors.Factor, function(x) format(round((sum(na.omit(x) == FirstModes(x)[1])/sum(na.omit(x) == SecondModes(x)[1])),3), nsmall=3)),
row.names=NULL)
)
}
## Column.Name Column.Type Unique.Count First.Mode.Value Second.Mode.Value
## 1 Genre factor 16 Drama Comedy
## 2 Picture factor 2 NOM WON
## 3 Cinematography factor 3 NNOM NOM
## 4 Directing factor 3 NNOM NOM
## 5 Editing factor 3 NNOM NOM
## 6 Screenplay factor 3 NOM WON
## 7 Acting factor 3 NOM WON
## 8 Design factor 3 NNOM NOM
## First.Mode.Count Second.Mode.Count Unique.Count.Ratio First.Second.Mode.Ratio
## 1 25 22 0.127 1.136
## 2 112 14 0.016 8.000
## 3 80 33 0.024 2.424
## 4 59 53 0.024 1.113
## 5 61 52 0.024 1.173
## 6 73 28 0.024 2.607
## 7 55 40 0.024 1.375
## 8 82 32 0.024 2.562
##################################
# Formulating a data quality assessment summary for numeric Descriptors
##################################
if (length(names(DQA.Descriptors.Numeric))>0) {
##################################
# Formulating a function to determine the first mode
##################################
<- function(x) {
FirstModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab == max(tab)]
ux[tab
}
##################################
# Formulating a function to determine the second mode
##################################
<- function(x) {
SecondModes <- unique(na.omit(x))
ux <- tabulate(match(x, ux))
tab = ux[tab == max(tab)]
fm = na.omit(x)[!(na.omit(x) %in% fm)]
sm <- unique(sm)
usm <- tabulate(match(sm, usm))
tabsm ifelse(is.na(usm[tabsm == max(tabsm)])==TRUE,
return(0.00001),
return(usm[tabsm == max(tabsm)]))
}
<- data.frame(
(DQA.Descriptors.Numeric.Summary Column.Name= names(DQA.Descriptors.Numeric),
Column.Type=sapply(DQA.Descriptors.Numeric, function(x) class(x)),
Unique.Count=sapply(DQA.Descriptors.Numeric, function(x) length(unique(x))),
Unique.Count.Ratio=sapply(DQA.Descriptors.Numeric, function(x) format(round((length(unique(x))/nrow(DQA.Descriptors.Numeric)),3), nsmall=3)),
First.Mode.Value=sapply(DQA.Descriptors.Numeric, function(x) format(round((FirstModes(x)[1]),3),nsmall=3)),
Second.Mode.Value=sapply(DQA.Descriptors.Numeric, function(x) format(round((SecondModes(x)[1]),3),nsmall=3)),
First.Mode.Count=sapply(DQA.Descriptors.Numeric, function(x) sum(na.omit(x) == FirstModes(x)[1])),
Second.Mode.Count=sapply(DQA.Descriptors.Numeric, function(x) sum(na.omit(x) == SecondModes(x)[1])),
First.Second.Mode.Ratio=sapply(DQA.Descriptors.Numeric, function(x) format(round((sum(na.omit(x) == FirstModes(x)[1])/sum(na.omit(x) == SecondModes(x)[1])),3), nsmall=3)),
Minimum=sapply(DQA.Descriptors.Numeric, function(x) format(round(min(x,na.rm = TRUE),3), nsmall=3)),
Mean=sapply(DQA.Descriptors.Numeric, function(x) format(round(mean(x,na.rm = TRUE),3), nsmall=3)),
Median=sapply(DQA.Descriptors.Numeric, function(x) format(round(median(x,na.rm = TRUE),3), nsmall=3)),
Maximum=sapply(DQA.Descriptors.Numeric, function(x) format(round(max(x,na.rm = TRUE),3), nsmall=3)),
Skewness=sapply(DQA.Descriptors.Numeric, function(x) format(round(skewness(x,na.rm = TRUE),3), nsmall=3)),
Kurtosis=sapply(DQA.Descriptors.Numeric, function(x) format(round(kurtosis(x,na.rm = TRUE),3), nsmall=3)),
Percentile25th=sapply(DQA.Descriptors.Numeric, function(x) format(round(quantile(x,probs=0.25,na.rm = TRUE),3), nsmall=3)),
Percentile75th=sapply(DQA.Descriptors.Numeric, function(x) format(round(quantile(x,probs=0.75,na.rm = TRUE),3), nsmall=3)),
row.names=NULL)
)
}
## Column.Name Column.Type Unique.Count Unique.Count.Ratio
## 1 Tomatometer_Critic integer 31 0.246
## 2 Tomatometer_Audience integer 32 0.254
## 3 Tomatometer_Critic_Audience_Gap integer 41 0.325
## 4 IMDB_Critic integer 40 0.317
## 5 IMDB_Audience integer 19 0.151
## 6 IMDB_Critic_Audience_Gap integer 42 0.333
## 7 Nominations_Total integer 13 0.103
## 8 Nomination_SuccessRate numeric 26 0.206
## First.Mode.Value Second.Mode.Value First.Mode.Count Second.Mode.Count
## 1 93.000 90.000 11 10
## 2 90.000 91.000 9 8
## 3 -8.000 -10.000 10 8
## 4 88.000 81.000 10 9
## 5 77.000 78.000 13 12
## 6 -4.000 -5.000 7 5
## 7 6.000 4.000 31 17
## 8 0.000 0.333 35 12
## First.Second.Mode.Ratio Minimum Mean Median Maximum Skewness Kurtosis
## 1 1.100 45.000 89.071 92.000 99.000 -2.069 8.210
## 2 1.125 60.000 83.389 85.000 99.000 -0.751 3.359
## 3 1.250 -25.000 -5.683 -8.000 25.000 0.611 3.137
## 4 1.111 46.000 81.714 83.000 100.000 -1.019 4.101
## 5 1.083 68.000 76.762 77.000 88.000 0.019 2.761
## 6 1.400 -25.000 -4.952 -7.000 30.000 0.838 3.320
## 7 1.824 2.000 6.524 6.000 14.000 0.502 2.686
## 8 2.917 0.000 0.236 0.200 1.000 0.790 3.448
## Percentile25th Percentile75th
## 1 87.000 95.000
## 2 79.000 90.000
## 3 -12.750 0.000
## 4 76.000 89.000
## 5 74.000 79.750
## 6 -14.000 1.000
## 7 5.000 8.000
## 8 0.000 0.356
##################################
# Identifying potential data quality issues
##################################
##################################
# Checking for missing observations
##################################
if ((nrow(DQA.Summary[DQA.Summary$NA.Count>0,]))>0){
print(paste0("Missing observations noted for ",
nrow(DQA.Summary[DQA.Summary$NA.Count>0,])),
(" variable(s) with NA.Count>0 and Fill.Rate<1.0."))
$NA.Count>0,]
DQA.Summary[DQA.Summaryelse {
} print("No missing observations noted.")
}
## [1] "No missing observations noted."
##################################
# Checking for zero or near-zero variance Descriptors
##################################
if (length(names(DQA.Descriptors.Factor))==0) {
print("No factor descriptors noted.")
else if (nrow(DQA.Descriptors.Factor.Summary[as.numeric(as.character(DQA.Descriptors.Factor.Summary$First.Second.Mode.Ratio))>5,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Descriptors.Factor.Summary[as.numeric(as.character(DQA.Descriptors.Factor.Summary$First.Second.Mode.Ratio))>5,])),
(" factor variable(s) with First.Second.Mode.Ratio>5."))
as.numeric(as.character(DQA.Descriptors.Factor.Summary$First.Second.Mode.Ratio))>5,]
DQA.Descriptors.Factor.Summary[else {
} print("No low variance factor descriptors due to high first-second mode ratio noted.")
}
## [1] "Low variance observed for 1 factor variable(s) with First.Second.Mode.Ratio>5."
## Column.Name Column.Type Unique.Count First.Mode.Value Second.Mode.Value
## 2 Picture factor 2 NOM WON
## First.Mode.Count Second.Mode.Count Unique.Count.Ratio First.Second.Mode.Ratio
## 2 112 14 0.016 8.000
if (length(names(DQA.Descriptors.Numeric))==0) {
print("No numeric descriptors noted.")
else if (nrow(DQA.Descriptors.Numeric.Summary[as.numeric(as.character(DQA.Descriptors.Numeric.Summary$First.Second.Mode.Ratio))>5,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Descriptors.Numeric.Summary[as.numeric(as.character(DQA.Descriptors.Numeric.Summary$First.Second.Mode.Ratio))>5,])),
(" numeric variable(s) with First.Second.Mode.Ratio>5."))
as.numeric(as.character(DQA.Descriptors.Numeric.Summary$First.Second.Mode.Ratio))>5,]
DQA.Descriptors.Numeric.Summary[else {
} print("No low variance numeric descriptors due to high first-second mode ratio noted.")
}
## [1] "No low variance numeric descriptors due to high first-second mode ratio noted."
if (length(names(DQA.Descriptors.Numeric))==0) {
print("No numeric descriptors noted.")
else if (nrow(DQA.Descriptors.Numeric.Summary[as.numeric(as.character(DQA.Descriptors.Numeric.Summary$Unique.Count.Ratio))<0.01,])>0){
} print(paste0("Low variance observed for ",
nrow(DQA.Descriptors.Numeric.Summary[as.numeric(as.character(DQA.Descriptors.Numeric.Summary$Unique.Count.Ratio))<0.01,])),
(" numeric variable(s) with Unique.Count.Ratio<0.01."))
as.numeric(as.character(DQA.Descriptors.Numeric.Summary$Unique.Count.Ratio))<0.01,]
DQA.Descriptors.Numeric.Summary[else {
} print("No low variance numeric descriptors due to low unique count ratio noted.")
}
## [1] "No low variance numeric descriptors due to low unique count ratio noted."
##################################
# Checking for skewed Descriptors
##################################
if (length(names(DQA.Descriptors.Numeric))==0) {
print("No numeric descriptors noted.")
else if (nrow(DQA.Descriptors.Numeric.Summary[as.numeric(as.character(DQA.Descriptors.Numeric.Summary$Skewness))>3 |
} as.numeric(as.character(DQA.Descriptors.Numeric.Summary$Skewness))<(-3),])>0){
print(paste0("High skewness observed for ",
nrow(DQA.Descriptors.Numeric.Summary[as.numeric(as.character(DQA.Descriptors.Numeric.Summary$Skewness))>3 |
(as.numeric(as.character(DQA.Descriptors.Numeric.Summary$Skewness))<(-3),])),
" numeric variable(s) with Skewness>3 or Skewness<(-3)."))
as.numeric(as.character(DQA.Descriptors.Numeric.Summary$Skewness))>3 |
DQA.Descriptors.Numeric.Summary[as.numeric(as.character(DQA.Descriptors.Numeric.Summary$Skewness))<(-3),]
else {
} print("No skewed numeric descriptors noted.")
}
## [1] "No skewed numeric descriptors noted."
##################################
# Formulating dataset versions
# for the different principal component algorithms
##################################
##################################
# Formulating dataset for
# Principal Component Analysis
##################################
<- Oscars[,c("Tomatometer_Critic",
Oscars.PCA "Tomatometer_Audience",
"IMDB_Critic",
"IMDB_Audience",
"Nominations_Total",
"Nomination_SuccessRate",
"Year",
"Picture")]
row.names(Oscars.PCA) <- Oscars$Film
dim(Oscars.PCA)
## [1] 126 8
str(Oscars.PCA)
## 'data.frame': 126 obs. of 8 variables:
## $ Tomatometer_Critic : int 82 66 90 93 97 89 92 89 98 90 ...
## $ Tomatometer_Audience : int 82 85 82 80 84 88 74 68 90 79 ...
## $ IMDB_Critic : int 83 53 81 85 95 69 78 88 88 83 ...
## $ IMDB_Audience : int 79 76 79 73 75 83 73 70 83 74 ...
## $ Nominations_Total : int 9 2 4 3 9 8 6 2 5 6 ...
## $ Nomination_SuccessRate: num 0.333 0.5 0 0 0.667 ...
## $ Year : Factor w/ 14 levels "2010","2011",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Picture : Factor w/ 2 levels "NOM","WON": 1 1 1 1 2 1 1 1 1 1 ...
summary(Oscars.PCA)
## Tomatometer_Critic Tomatometer_Audience IMDB_Critic IMDB_Audience
## Min. :45.00 Min. :60.00 Min. : 46.00 Min. :68.00
## 1st Qu.:87.00 1st Qu.:79.00 1st Qu.: 76.00 1st Qu.:74.00
## Median :92.00 Median :85.00 Median : 83.00 Median :77.00
## Mean :89.07 Mean :83.39 Mean : 81.71 Mean :76.76
## 3rd Qu.:95.00 3rd Qu.:90.00 3rd Qu.: 89.00 3rd Qu.:79.75
## Max. :99.00 Max. :99.00 Max. :100.00 Max. :88.00
##
## Nominations_Total Nomination_SuccessRate Year Picture
## Min. : 2.000 Min. :0.0000 2010 :10 NOM:112
## 1st Qu.: 5.000 1st Qu.:0.0000 2011 :10 WON: 14
## Median : 6.000 Median :0.2000 2022 :10
## Mean : 6.524 Mean :0.2362 2023 :10
## 3rd Qu.: 8.000 3rd Qu.:0.3561 2012 : 9
## Max. :14.000 Max. :1.0000 2013 : 9
## (Other):68
##################################
# Formulating dataset for
# Correspondence Analysis
##################################
<- Oscars[,c("Genre",
Oscars.CA.Cinematography "Cinematography")]
<- as.data.frame.matrix(
(Oscars.CA.Cinematography table(Oscars.CA.Cinematography$Genre,Oscars.CA.Cinematography$Cinematography)))
## NNOM NOM WON
## Action 3 1 0
## Adventure 1 0 2
## Biography 8 3 1
## Comedy 17 4 1
## Crime 3 2 0
## Drama 22 2 1
## Family 2 0 0
## History 8 6 0
## LGBTQ+ 2 1 0
## Music 2 1 0
## Musical 1 1 1
## Romance 3 2 0
## Sci-Fi 3 2 4
## Thriller 4 2 0
## War 1 3 2
## Western 0 3 1
<- Oscars[,c("Genre",
Oscars.CA.Directing "Directing")]
<- as.data.frame.matrix(
(Oscars.CA.Directing table(Oscars.CA.Directing$Genre,Oscars.CA.Directing$Directing)))
## NNOM NOM WON
## Action 3 1 0
## Adventure 0 2 1
## Biography 8 4 0
## Comedy 6 14 2
## Crime 2 3 0
## Drama 12 11 2
## Family 2 0 0
## History 10 3 1
## LGBTQ+ 2 1 0
## Music 2 1 0
## Musical 1 1 1
## Romance 2 2 1
## Sci-Fi 5 2 2
## Thriller 2 3 1
## War 1 4 1
## Western 1 1 2
<- Oscars[,c("Genre",
Oscars.CA.Editing "Editing")]
<- as.data.frame.matrix(
(Oscars.CA.Editing table(Oscars.CA.Editing$Genre,Oscars.CA.Editing$Editing)))
## NNOM NOM WON
## Action 1 2 1
## Adventure 1 2 0
## Biography 4 6 2
## Comedy 11 10 1
## Crime 0 5 0
## Drama 19 6 0
## Family 2 0 0
## History 7 5 2
## LGBTQ+ 1 2 0
## Music 0 1 2
## Musical 2 1 0
## Romance 4 1 0
## Sci-Fi 3 4 2
## Thriller 2 4 0
## War 2 1 3
## Western 2 2 0
<- Oscars[,c("Genre",
Oscars.CA.Screenplay "Screenplay")]
<- as.data.frame.matrix(
(Oscars.CA.Screenplay table(Oscars.CA.Screenplay$Genre,Oscars.CA.Screenplay$Screenplay)))
## NNOM NOM WON
## Action 2 2 0
## Adventure 0 3 0
## Biography 4 7 1
## Comedy 0 14 8
## Crime 0 5 0
## Drama 3 15 7
## Family 0 2 0
## History 5 5 4
## LGBTQ+ 0 1 2
## Music 0 3 0
## Musical 2 1 0
## Romance 1 3 1
## Sci-Fi 3 6 0
## Thriller 2 1 3
## War 2 3 1
## Western 1 2 1
##################################
# Formulating dataset for
# Multiple Correspondence Analysis
##################################
<- Oscars[,c("Cinematography",
Oscars.MCA "Directing",
"Editing",
"Screenplay",
"Year",
"Picture")]
row.names(Oscars.MCA) <- Oscars$Film
dim(Oscars.MCA)
## [1] 126 6
str(Oscars.MCA)
## 'data.frame': 126 obs. of 6 variables:
## $ Cinematography: Factor w/ 3 levels "NNOM","NOM","WON": 3 1 1 1 2 2 1 1 1 1 ...
## $ Directing : Factor w/ 3 levels "NNOM","NOM","WON": 2 1 1 1 3 2 2 1 1 2 ...
## $ Editing : Factor w/ 3 levels "NNOM","NOM","WON": 2 1 2 1 3 2 2 1 1 1 ...
## $ Screenplay : Factor w/ 3 levels "NNOM","NOM","WON": 1 1 2 2 3 2 3 2 2 2 ...
## $ Year : Factor w/ 14 levels "2010","2011",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Picture : Factor w/ 2 levels "NOM","WON": 1 1 1 1 2 1 1 1 1 1 ...
summary(Oscars.MCA)
## Cinematography Directing Editing Screenplay Year Picture
## NNOM:80 NNOM:59 NNOM:61 NNOM:25 2010 :10 NOM:112
## NOM :33 NOM :53 NOM :52 NOM :73 2011 :10 WON: 14
## WON :13 WON :14 WON :13 WON :28 2022 :10
## 2023 :10
## 2012 : 9
## 2013 : 9
## (Other):68
##################################
# Formulating dataset for
# Multiple Factor Analysis
##################################
<- Oscars[,c("Nominations_Total",
Oscars.MFA "Nomination_SuccessRate",
"Picture",
"Cinematography",
"Directing",
"Editing",
"Screenplay",
"Acting",
"Design",
"Sound")]
row.names(Oscars.MFA) <- Oscars$Film
dim(Oscars.MFA)
## [1] 126 10
str(Oscars.MFA)
## 'data.frame': 126 obs. of 10 variables:
## $ Nominations_Total : int 9 2 4 3 9 8 6 2 5 6 ...
## $ Nomination_SuccessRate: num 0.333 0.5 0 0 0.667 ...
## $ Picture : Factor w/ 2 levels "NOM","WON": 1 1 1 1 2 1 1 1 1 1 ...
## $ Cinematography : Factor w/ 3 levels "NNOM","NOM","WON": 3 1 1 1 2 2 1 1 1 1 ...
## $ Directing : Factor w/ 3 levels "NNOM","NOM","WON": 2 1 1 1 3 2 2 1 1 2 ...
## $ Editing : Factor w/ 3 levels "NNOM","NOM","WON": 2 1 2 1 3 2 2 1 1 1 ...
## $ Screenplay : Factor w/ 3 levels "NNOM","NOM","WON": 1 1 2 2 3 2 3 2 2 2 ...
## $ Acting : Factor w/ 3 levels "NNOM","NOM","WON": 1 3 1 2 2 3 3 1 1 2 ...
## $ Design : Factor w/ 3 levels "NNOM","NOM","WON": 3 1 1 1 1 1 1 1 1 1 ...
## $ Sound : Factor w/ 3 levels "NNOM","NOM","WON": 2 1 1 1 3 2 1 1 2 1 ...
summary(Oscars.MFA)
## Nominations_Total Nomination_SuccessRate Picture Cinematography Directing
## Min. : 2.000 Min. :0.0000 NOM:112 NNOM:80 NNOM:59
## 1st Qu.: 5.000 1st Qu.:0.0000 WON: 14 NOM :33 NOM :53
## Median : 6.000 Median :0.2000 WON :13 WON :14
## Mean : 6.524 Mean :0.2362
## 3rd Qu.: 8.000 3rd Qu.:0.3561
## Max. :14.000 Max. :1.0000
## Editing Screenplay Acting Design Sound
## NNOM:61 NNOM:25 NNOM:31 NNOM:82 NNOM:76
## NOM :52 NOM :73 NOM :55 NOM :32 NOM :34
## WON :13 WON :28 WON :40 WON :12 WON :16
##
##
##
##################################
# Formulating dataset for
# Factor Analysis of Mixed Data
##################################
<- Oscars[,c("Nominations_Total",
Oscars.FAMD "Nomination_SuccessRate",
"Picture",
"Cinematography",
"Directing",
"Editing",
"Screenplay")]
row.names(Oscars.FAMD) <- Oscars$Film
dim(Oscars.FAMD)
## [1] 126 7
str(Oscars.FAMD)
## 'data.frame': 126 obs. of 7 variables:
## $ Nominations_Total : int 9 2 4 3 9 8 6 2 5 6 ...
## $ Nomination_SuccessRate: num 0.333 0.5 0 0 0.667 ...
## $ Picture : Factor w/ 2 levels "NOM","WON": 1 1 1 1 2 1 1 1 1 1 ...
## $ Cinematography : Factor w/ 3 levels "NNOM","NOM","WON": 3 1 1 1 2 2 1 1 1 1 ...
## $ Directing : Factor w/ 3 levels "NNOM","NOM","WON": 2 1 1 1 3 2 2 1 1 2 ...
## $ Editing : Factor w/ 3 levels "NNOM","NOM","WON": 2 1 2 1 3 2 2 1 1 1 ...
## $ Screenplay : Factor w/ 3 levels "NNOM","NOM","WON": 1 1 2 2 3 2 3 2 2 2 ...
summary(Oscars.FAMD)
## Nominations_Total Nomination_SuccessRate Picture Cinematography Directing
## Min. : 2.000 Min. :0.0000 NOM:112 NNOM:80 NNOM:59
## 1st Qu.: 5.000 1st Qu.:0.0000 WON: 14 NOM :33 NOM :53
## Median : 6.000 Median :0.2000 WON :13 WON :14
## Mean : 6.524 Mean :0.2362
## 3rd Qu.: 8.000 3rd Qu.:0.3561
## Max. :14.000 Max. :1.0000
## Editing Screenplay
## NNOM:61 NNOM:25
## NOM :52 NOM :73
## WON :13 WON :28
##
##
##
##################################
# Loading dataset
##################################
<- Oscars
EDA_Oscars str(EDA_Oscars)
## 'data.frame': 126 obs. of 20 variables:
## $ Film : chr "Avatar" "The Blind Side" "District 9" "An Education" ...
## $ Year : Factor w/ 14 levels "2010","2011",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Tomatometer_Critic : int 82 66 90 93 97 89 92 89 98 90 ...
## $ Tomatometer_Audience : int 82 85 82 80 84 88 74 68 90 79 ...
## $ Tomatometer_Critic_Audience_Gap: int 0 19 -8 -13 -13 -1 -18 -21 -8 -11 ...
## $ IMDB_Critic : int 83 53 81 85 95 69 78 88 88 83 ...
## $ IMDB_Audience : int 79 76 79 73 75 83 73 70 83 74 ...
## $ IMDB_Critic_Audience_Gap : int -4 23 -2 -12 -20 14 -5 -18 -5 -9 ...
## $ Nominations_Total : int 9 2 4 3 9 8 6 2 5 6 ...
## $ Nomination_SuccessRate : num 0.333 0.5 0 0 0.667 ...
## $ Genre : Factor w/ 16 levels "Action","Adventure",..: 13 3 13 6 15 15 6 4 7 4 ...
## $ SideGenre : Factor w/ 32 levels "Action/Adventure",..: 25 13 27 12 30 29 12 9 17 9 ...
## $ Picture : Factor w/ 2 levels "NOM","WON": 1 1 1 1 2 1 1 1 1 1 ...
## $ Cinematography : Factor w/ 3 levels "NNOM","NOM","WON": 3 1 1 1 2 2 1 1 1 1 ...
## $ Directing : Factor w/ 3 levels "NNOM","NOM","WON": 2 1 1 1 3 2 2 1 1 2 ...
## $ Editing : Factor w/ 3 levels "NNOM","NOM","WON": 2 1 2 1 3 2 2 1 1 1 ...
## $ Screenplay : Factor w/ 3 levels "NNOM","NOM","WON": 1 1 2 2 3 2 3 2 2 2 ...
## $ Acting : Factor w/ 3 levels "NNOM","NOM","WON": 1 3 1 2 2 3 3 1 1 2 ...
## $ Design : Factor w/ 3 levels "NNOM","NOM","WON": 3 1 1 1 1 1 1 1 1 1 ...
## $ Sound : Factor w/ 3 levels "NNOM","NOM","WON": 2 1 1 1 3 2 1 1 2 1 ...
##################################
# Exploratory analysis
# among quantitative descriptors
# grouped by Picture
##################################
splom(~EDA_Oscars[,c(3:10)],
groups = EDA_Oscars$Picture,
pch = 16,
cex = 1,
alpha = 0.45,
varnames = c("T_CTC", "T_AUD", "T_GAP",
"I_CTC", "I_AUD", "I_GAP",
"NOM", "WIN%"),
auto.key = list(points = TRUE, space = "right"),
main = "Exploratory Analysis Between Picture and Quantitative Descriptors",
xlab = "Scatterplot Matrix of Quantitative Descriptors")
##################################
# Exploratory analysis
# among quantitative descriptors
# grouped by Year
##################################
splom(~EDA_Oscars[,c(3:10)],
groups = EDA_Oscars$Year,
pch = 16,
cex = 1,
alpha = 0.45,
varnames = c("T_CTC", "T_AUD", "T_GAP",
"I_CTC", "I_AUD", "I_GAP",
"NOM", "WIN%"),
auto.key = list(points = TRUE, space = "right"),
main = "Exploratory Analysis Between Year and Quantitative Descriptors",
xlab = "Scatterplot Matrix of Quantitative Descriptors")
##################################
# Exploratory analysis
# among qualitative descriptors
# grouped by Picture
##################################
##################################
# Creating a function to formulate
# the proportions table
##################################
<- function(FactorVar) {
EDA.PropTable.Function <- EDA_Oscars[,c("Picture",
EDA.Bar.Source.FactorVar
FactorVar)]<- as.data.frame(prop.table(table(EDA.Bar.Source.FactorVar), 2))
EDA.Bar.Source.FactorVar.Prop names(EDA.Bar.Source.FactorVar.Prop)[2] <- "Status"
$Variable <- rep(FactorVar,nrow(EDA.Bar.Source.FactorVar.Prop))
EDA.Bar.Source.FactorVar.Prop
return(EDA.Bar.Source.FactorVar.Prop)
}
<- rbind(EDA.PropTable.Function(names(EDA_Oscars)[14]),
(EDA.Bar.Source.FactorVar.Prop EDA.PropTable.Function(names(EDA_Oscars)[15]),
EDA.PropTable.Function(names(EDA_Oscars)[16]),
EDA.PropTable.Function(names(EDA_Oscars)[17]),
EDA.PropTable.Function(names(EDA_Oscars)[18]),
EDA.PropTable.Function(names(EDA_Oscars)[19]),
EDA.PropTable.Function(names(EDA_Oscars)[20])))
## Picture Status Freq Variable
## 1 NOM NNOM 0.91250000 Cinematography
## 2 WON NNOM 0.08750000 Cinematography
## 3 NOM NOM 0.81818182 Cinematography
## 4 WON NOM 0.18181818 Cinematography
## 5 NOM WON 0.92307692 Cinematography
## 6 WON WON 0.07692308 Cinematography
## 7 NOM NNOM 0.94915254 Directing
## 8 WON NNOM 0.05084746 Directing
## 9 NOM NOM 0.94339623 Directing
## 10 WON NOM 0.05660377 Directing
## 11 NOM WON 0.42857143 Directing
## 12 WON WON 0.57142857 Directing
## 13 NOM NNOM 0.96721311 Editing
## 14 WON NNOM 0.03278689 Editing
## 15 NOM NOM 0.82692308 Editing
## 16 WON NOM 0.17307692 Editing
## 17 NOM WON 0.76923077 Editing
## 18 WON WON 0.23076923 Editing
## 19 NOM NNOM 1.00000000 Screenplay
## 20 WON NNOM 0.00000000 Screenplay
## 21 NOM NOM 0.95890411 Screenplay
## 22 WON NOM 0.04109589 Screenplay
## 23 NOM WON 0.60714286 Screenplay
## 24 WON WON 0.39285714 Screenplay
## 25 NOM NNOM 0.96774194 Acting
## 26 WON NNOM 0.03225806 Acting
## 27 NOM NOM 0.90909091 Acting
## 28 WON NOM 0.09090909 Acting
## 29 NOM WON 0.80000000 Acting
## 30 WON WON 0.20000000 Acting
## 31 NOM NNOM 0.89024390 Design
## 32 WON NNOM 0.10975610 Design
## 33 NOM NOM 0.87500000 Design
## 34 WON NOM 0.12500000 Design
## 35 NOM WON 0.91666667 Design
## 36 WON WON 0.08333333 Design
## 37 NOM NNOM 0.88157895 Sound
## 38 WON NNOM 0.11842105 Sound
## 39 NOM NOM 0.88235294 Sound
## 40 WON NOM 0.11764706 Sound
## 41 NOM WON 0.93750000 Sound
## 42 WON WON 0.06250000 Sound
<- barchart(EDA.Bar.Source.FactorVar.Prop[,3] ~
(EDA.Barchart.FactorVar 2] |
EDA.Bar.Source.FactorVar.Prop[,4],
EDA.Bar.Source.FactorVar.Prop[,data=EDA.Bar.Source.FactorVar.Prop,
groups = EDA.Bar.Source.FactorVar.Prop[,1],
stack=TRUE,
ylab = "Proportion",
layout=(c(3,3)),
auto.key = list(points = TRUE, space = "right"),
main = "Exploratory Analysis Between Picture and Qualitative Descriptors",
xlab = "Bar Chart of Qualitative Descriptors"))
##################################
# Loading dataset
##################################
<- Oscars.PCA
SD_PCA str(SD_PCA)
## 'data.frame': 126 obs. of 8 variables:
## $ Tomatometer_Critic : int 82 66 90 93 97 89 92 89 98 90 ...
## $ Tomatometer_Audience : int 82 85 82 80 84 88 74 68 90 79 ...
## $ IMDB_Critic : int 83 53 81 85 95 69 78 88 88 83 ...
## $ IMDB_Audience : int 79 76 79 73 75 83 73 70 83 74 ...
## $ Nominations_Total : int 9 2 4 3 9 8 6 2 5 6 ...
## $ Nomination_SuccessRate: num 0.333 0.5 0 0 0.667 ...
## $ Year : Factor w/ 14 levels "2010","2011",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Picture : Factor w/ 2 levels "NOM","WON": 1 1 1 1 2 1 1 1 1 1 ...
##################################
# Performing PCA
##################################
<- PCA(SD_PCA[,c(1:6)],
DR_PCA scale.unit = TRUE,
graph = FALSE)
##################################
# Obtaining the PCA eigenvalues
##################################
<- get_eigenvalue(DR_PCA)) (DR_PCA_EV
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 2.21987218 36.997870 36.99787
## Dim.2 1.66513612 27.752269 64.75014
## Dim.3 1.01373817 16.895636 81.64577
## Dim.4 0.67604616 11.267436 92.91321
## Dim.5 0.33051908 5.508651 98.42186
## Dim.6 0.09468828 1.578138 100.00000
##################################
# Formulating the Scree Plot
# for the variances
##################################
<- fviz_eig(DR_PCA,
(DR_PCA_VarianceScreePlot addlabels = TRUE,
ylim = c(0, 100),
choice = c("variance")) +
labs(title = "Principal Component Analysis : Scree Plot of Explained Variances",
subtitle = "Top 2 Principal Components",
y = "Percentage of Explained Variances",
x = "Principal Components") +
theme_classic())
##################################
# Formulating the Scree Plot
# for the eigenvalues
##################################
<- fviz_eig(DR_PCA,
(DR_PCA_EigenvalueScreePlot addlabels = TRUE,
ylim = c(0, 6),
choice = c("eigenvalue")) +
labs(title = "Principal Component Analysis : Scree Plot of Eigenvalues",
subtitle = "Top 2 Principal Components",
y = "Eigenvalues",
x = "Principal Components") +
theme_classic())
##################################
# Extracting the PCA
# descriptor variable loadings
##################################
<- get_pca_var(DR_PCA)
DR_PCA_VAR
##################################
# Extracting the coordinates
# representing the loadings
# for the descriptor variables
##################################
$coord DR_PCA_VAR
## Dim.1 Dim.2 Dim.3 Dim.4
## Tomatometer_Critic 0.7160011 0.61208661 -0.24062432 -0.01481838
## Tomatometer_Audience 0.6589988 -0.51101999 -0.35527700 0.23993434
## IMDB_Critic 0.5746051 0.77898937 -0.03684305 -0.06857649
## IMDB_Audience 0.7133572 -0.52207446 -0.07046626 0.19067464
## Nominations_Total 0.3594166 0.08771218 0.85255162 0.35355795
## Nomination_SuccessRate 0.5520012 -0.37718042 0.31056271 -0.67245470
## Dim.5
## Tomatometer_Critic 0.10911558
## Tomatometer_Audience 0.33343405
## IMDB_Critic -0.12768134
## IMDB_Audience -0.41907508
## Nominations_Total 0.10547110
## Nomination_SuccessRate 0.06621183
##################################
# Extracting the correlations
# between the descriptor variables
# and principal component dimensions
##################################
$cor DR_PCA_VAR
## Dim.1 Dim.2 Dim.3 Dim.4
## Tomatometer_Critic 0.7160011 0.61208661 -0.24062432 -0.01481838
## Tomatometer_Audience 0.6589988 -0.51101999 -0.35527700 0.23993434
## IMDB_Critic 0.5746051 0.77898937 -0.03684305 -0.06857649
## IMDB_Audience 0.7133572 -0.52207446 -0.07046626 0.19067464
## Nominations_Total 0.3594166 0.08771218 0.85255162 0.35355795
## Nomination_SuccessRate 0.5520012 -0.37718042 0.31056271 -0.67245470
## Dim.5
## Tomatometer_Critic 0.10911558
## Tomatometer_Audience 0.33343405
## IMDB_Critic -0.12768134
## IMDB_Audience -0.41907508
## Nominations_Total 0.10547110
## Nomination_SuccessRate 0.06621183
##################################
# Extracting the quality of representation
# for the descriptor variables
# on the factor map
##################################
$cos2 DR_PCA_VAR
## Dim.1 Dim.2 Dim.3 Dim.4
## Tomatometer_Critic 0.5126576 0.374650014 0.057900065 0.0002195845
## Tomatometer_Audience 0.4342794 0.261141435 0.126221744 0.0575684853
## IMDB_Critic 0.3301711 0.606824433 0.001357411 0.0047027343
## IMDB_Audience 0.5088785 0.272561741 0.004965494 0.0363568172
## Nominations_Total 0.1291803 0.007693427 0.726844261 0.1250032235
## Nomination_SuccessRate 0.3047053 0.142265072 0.096449197 0.4521953177
## Dim.5
## Tomatometer_Critic 0.011906210
## Tomatometer_Audience 0.111178264
## IMDB_Critic 0.016302526
## IMDB_Audience 0.175623921
## Nominations_Total 0.011124154
## Nomination_SuccessRate 0.004384007
##################################
# Extracting the contributions
# (in percentage) of the descriptor variables
# to the principal components
##################################
$contrib DR_PCA_VAR
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## Tomatometer_Critic 23.094013 22.4996629 5.7115404 0.03248069 3.602276
## Tomatometer_Audience 19.563262 15.6828881 12.4511188 8.51546661 33.637472
## IMDB_Critic 14.873427 36.4429325 0.1339015 0.69562326 4.932401
## IMDB_Audience 22.923776 16.3687363 0.4898202 5.37786015 53.135789
## Nominations_Total 5.819268 0.4620299 71.6994072 18.49033844 3.365662
## Nomination_SuccessRate 13.726255 8.5437503 9.5142118 66.88823084 1.326400
##################################
# Extracting the quality of representation
# for the descriptor variables
# on the factor map
##################################
<- fviz_pca_var(DR_PCA,
(DR_PCA_VariableSquaredCorrelationCircle col.var = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
legend.title = "Squared Coordinates",
repel = TRUE) +
labs(title = "Principal Component Analysis : Squared Coordinate Plot",
subtitle = "Descriptor Variable Representation Quality",
y = "Principal Component 2",
x = "Principal Component 1") +
theme_classic() +
theme(legend.position="top"))
##################################
# Formulating clusters of the
# descriptor variables
##################################
set.seed(123)
<- kmeans(DR_PCA_VAR$coord, centers=3, nstart=25)
DR_PCA_KMEANS <- as.factor(DR_PCA_KMEANS$cluster)
DR_PCA_KMEANS_CLUSTER
##################################
# Extracting the correlation
# between the descriptor variables
# and top principal components
##################################
<- fviz_pca_var(DR_PCA,
(DR_PCA_VariableCorrelationCircle col.var = DR_PCA_KMEANS_CLUSTER,
palette = c("#0073C2FF", "#EFC000FF", "#868686FF"),
legend.title = "Cluster",
repel = TRUE) +
labs(title = "Principal Component Analysis : Correlation Plot of Quantitative Variables",
subtitle = "Principal Components Versus Descriptor Variable Clusters",
y = "Principal Component 2",
x = "Principal Component 1") +
theme_classic() +
theme(legend.position="top"))
##################################
# Extracting the contribution
# of the descriptor variables
# for the the Top 1 and 2 principal components
##################################
<- fviz_contrib(DR_PCA,
(DR_PCA_PCVariableContributors choice = "var",
axes = 1:2) +
labs(title = "Principal Component Analysis : Descriptor Variable Contribution",
subtitle = "Principal Components 1 and 2 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Testing the statistical significance
# of the contribution
# of the descriptor variables
# for the the Top 1 and 2 principal components
##################################
<- dimdesc(DR_PCA,
DR_PCA_VariableContributionTest axes = c(1,2),
proba = 0.05)
##################################
# Extracting the contribution
# of the descriptor variables
# for the Top 1 principal component
##################################
<- fviz_contrib(DR_PCA,
(DR_PCA_PC1VariableContributors choice = "var",
axes = 1) +
labs(title = "Principal Component Analysis : Descriptor Variable Contribution",
subtitle = "Principal Component 1 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Testing the statistical significance
# of the contribution
# of the descriptor variables
# for the Top 1 principal component
##################################
$Dim.1 DR_PCA_VariableContributionTest
##
## Link between the variable and the continuous variables (R-square)
## =================================================================================
## correlation p.value
## Tomatometer_Critic 0.7160011 4.384744e-21
## IMDB_Audience 0.7133572 7.103853e-21
## Tomatometer_Audience 0.6589988 4.925052e-17
## IMDB_Critic 0.5746051 1.986144e-12
## Nomination_SuccessRate 0.5520012 2.086140e-11
## Nominations_Total 0.3594166 3.577176e-05
##################################
# Extracting the contribution
# of the descriptor variables
# for the Top 2 principal component
##################################
<- fviz_contrib(DR_PCA,
(DR_PCA_PC2VariableContributors choice = "var",
axes = 2) +
labs(title = "Principal Component Analysis : Descriptor Variable Contribution",
subtitle = "Principal Component 2 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Testing the statistical significance
# of the contribution
# of the descriptor variables
# for the Top 2 principal component
##################################
$Dim.2 DR_PCA_VariableContributionTest
##
## Link between the variable and the continuous variables (R-square)
## =================================================================================
## correlation p.value
## IMDB_Critic 0.7789894 6.683142e-27
## Tomatometer_Critic 0.6120866 2.639968e-14
## Nomination_SuccessRate -0.3771804 1.339456e-05
## Tomatometer_Audience -0.5110200 9.715208e-10
## IMDB_Audience -0.5220745 3.624252e-10
##################################
# Extracting the PCA individual scores
##################################
<- get_pca_ind(DR_PCA)
DR_PCA_IND
##################################
# Extracting the coordinates
# representing the scores
# for the individuals
##################################
$coord[,c("Dim.1","Dim.2")] DR_PCA_IND
## Dim.1 Dim.2
## Avatar 0.267696540 -0.530442152
## The Blind Side -2.195453708 -3.301208059
## District 9 -0.430797744 0.112896306
## An Education -1.058977961 1.182366388
## The Hurt Locker 1.705555201 0.769878023
## Inglourious Basterds 0.487479202 -1.390603430
## Precious: Based on the Novel 'Push' by Sapphire -0.824912508 0.640551052
## A Serious Man -2.274858904 2.012978851
## Up 1.968871570 -0.415625889
## Up in the Air -0.948551664 0.937195852
## Black Swan -0.088494191 -0.715371246
## The Fighter 0.593337935 -0.511337506
## Inception 2.000141368 -2.400531058
## The Kids Are All Right -1.677629729 1.814279497
## The King's Speech 2.022535144 -0.140319446
## 127 Hours -0.376296841 0.640551980
## The Social Network 1.571785943 0.646156669
## Toy Story 3 2.112408569 -0.190945586
## True Grit 0.143820607 0.632433896
## Winter's Bone -1.197590702 1.942394429
## The Artist 1.829965512 0.030391009
## The Descendants -0.928054041 0.633597837
## Extremely Loud & Incredibly Close -6.607718620 -2.191543916
## The Help -0.780439361 -2.582418858
## Hugo 0.540836116 0.530503658
## Midnight in Paris -0.016386835 0.074821615
## Moneyball 0.032152643 0.821762000
## The Tree of Life -3.183193524 2.259276875
## War Horse -2.651283351 -0.011313828
## Amour 0.676678866 0.800609934
## Argo 1.295486139 -0.004493834
## Beasts of the Southern Wild -1.641594932 1.198443100
## Django Unchained 1.368398632 -1.582284768
## Les Miserables -1.786154693 -1.772901843
## Life of Pi 0.680921689 -0.635955517
## Lincoln -0.026274204 1.034913302
## Silver Linings Playbook 0.237579503 0.153926881
## Zero Dark Thirty -0.146040932 1.308036691
## American Hustle -0.743744716 1.987726848
## Captain Phillips 0.207250822 0.136428053
## Dallas Buyers Club 1.095154638 -1.098634659
## Gravity 1.808662879 0.792462032
## Her 0.707449019 0.525283745
## Nebraska -0.204598544 0.653651613
## Philomena -0.503197773 -0.094529799
## 12 Years a Slave 2.100854560 0.279285345
## The Wolf of Wall Street -0.663039400 -1.076259636
## American Sniper -1.844409883 -0.993999809
## Birdman or (The Unexpected Virtue of Ignorance) 0.621586316 0.408398188
## Boyhood 0.989609154 1.460184615
## The Grand Budapest Hotel 1.634209716 -0.286179436
## The Imitation Game 0.411500335 -1.065031194
## Selma 0.411542825 -0.068773038
## The Theory of Everything -1.003995061 -1.116933111
## Whiplash 2.611370669 -1.249717667
## The Big Short 0.170060595 -0.385300448
## Bridge of Spies 0.009676416 0.045721157
## Brooklyn -0.114157904 1.010762460
## Mad Max: Fury Road 2.337559214 -0.106023934
## The Martian 0.473734478 -0.357771031
## The Revenant 0.161937864 -1.138614186
## Room 1.196463021 -0.545134110
## Spotlight 1.989250915 -0.009168248
## Arrival 0.367309471 0.248612994
## Fences -1.186448927 0.815944510
## Hacksaw Ridge 0.404570037 -1.823823948
## Hell or High Water 0.153324144 0.884752265
## Hidden Figures -0.134558405 -0.586009309
## La La Land 1.829415815 0.497221042
## Lion -0.381467753 -1.365827706
## Manchester by the Sea 0.858671370 1.150335634
## Moonlight 0.896841919 1.777793051
## Call Me by Your Name 0.788578813 0.607403297
## Darkest Hour -0.795263816 -0.439185974
## Dunkirk 0.996564353 0.675675583
## Get Out 0.555076960 0.412865802
## Lady Bird -0.206141307 1.941822125
## Phantom Thread -0.786011564 1.540594077
## The Post -1.990318557 1.229655961
## The Shape of Water -0.018542859 1.459937812
## Three Billboards outside Ebbing, Missouri 1.118764632 -0.268033382
## Black Panther 0.279104946 1.057646892
## BlacKkKlansman 0.004769683 0.717101841
## Bohemian Rhapsody -1.483775371 -4.490215441
## The Favourite -0.331512533 1.843510513
## Green Book 0.468063042 -2.803170545
## Roma 0.714350361 1.696574201
## A Star Is Born -0.066617113 0.839660667
## Vice -3.934949029 -0.587917055
## Ford v Ferrari 1.682435065 -1.473745599
## The Irishman 0.944030071 1.164389790
## Jojo Rabbit -0.648230212 -2.470061984
## Joker -0.430208286 -3.099021343
## Little Women 1.096244053 0.365296701
## Marriage Story 0.942403658 0.774052883
## 1917 1.129721748 -1.028531120
## Once upon a Time...in Hollywood -0.741237528 0.734591165
## Parasite 3.117196230 -0.468032148
## The Father 1.929415735 -0.291670041
## Judas and the Black Messiah 0.960737989 0.162489630
## Mank -2.345763071 1.570456017
## Minari 0.422910493 1.064998675
## Nomadland 0.428470433 0.797058184
## Promising Young Woman -0.483645104 -0.425451081
## Sound of Metal 0.944143148 -0.069194279
## The Trial of the Chicago 7 -0.285898871 -0.355149365
## Belfast -0.511582058 -0.429370787
## CODA 1.758254848 -2.156598920
## Don't Look Up -4.442451275 -2.539922441
## Drive My Car 0.158306636 1.192487916
## Dune 1.122203185 -1.826980124
## King Richard 0.257879939 -0.723422523
## Licorice Pizza -1.872711748 2.147715796
## Nightmare Alley -3.160384027 0.636761057
## The Power of the Dog -0.720311839 2.283065628
## West Side Story 0.044190625 0.446398144
## All Quiet on the Western Front 0.952314310 -0.952191110
## Avatar: The Way of Water -0.923072631 -2.037690509
## The Banshees of Inisherin -0.069099854 1.439223633
## Elvis -1.434462767 -1.387741046
## Everything Everywhere All at Once 1.594787307 -0.536995405
## The Fabelmans -0.253962254 0.721795892
## Tar -0.777644395 1.584983545
## Top Gun: Maverick 1.673321316 -1.169382857
## Triangle of Sadness -3.612891352 -0.558900276
## Women Talking -1.120884714 0.372860732
##################################
# Extracting the quality of representation
# for the individuals
# on the factor map
##################################
$cos2[,c("Dim.1","Dim.2")] DR_PCA_IND
## Dim.1 Dim.2
## Avatar 3.473559e-02 1.363846e-01
## The Blind Side 2.654492e-01 6.001760e-01
## District 9 7.241199e-02 4.973054e-03
## An Education 2.532809e-01 3.157423e-01
## The Hurt Locker 3.809304e-01 7.761718e-02
## Inglourious Basterds 4.920414e-02 4.004019e-01
## Precious: Based on the Novel 'Push' by Sapphire 2.482848e-01 1.497069e-01
## A Serious Man 4.651816e-01 3.642437e-01
## Up 7.139238e-01 3.181429e-02
## Up in the Air 4.213049e-01 4.112778e-01
## Black Swan 5.990845e-03 3.914908e-01
## The Fighter 4.524012e-01 3.359966e-01
## Inception 3.476223e-01 5.007269e-01
## The Kids Are All Right 4.166793e-01 4.873243e-01
## The King's Speech 5.886048e-01 2.833136e-03
## 127 Hours 8.087118e-02 2.343373e-01
## The Social Network 7.817410e-01 1.321149e-01
## Toy Story 3 7.432579e-01 6.072995e-03
## True Grit 5.845162e-03 1.130276e-01
## Winter's Bone 2.367355e-01 6.227610e-01
## The Artist 7.071045e-01 1.950240e-04
## The Descendants 5.169605e-01 2.409563e-01
## Extremely Loud & Incredibly Close 8.695189e-01 9.564809e-02
## The Help 7.634343e-02 8.358854e-01
## Hugo 6.084654e-02 5.854385e-02
## Midnight in Paris 2.416386e-04 5.037676e-03
## Moneyball 5.166937e-04 3.375141e-01
## The Tree of Life 6.076654e-01 3.061099e-01
## War Horse 9.579033e-01 1.744329e-05
## Amour 1.885396e-01 2.639242e-01
## Argo 7.313005e-01 8.799629e-06
## Beasts of the Southern Wild 5.654711e-01 3.013792e-01
## Django Unchained 3.393579e-01 4.537347e-01
## Les Miserables 3.367003e-01 3.317223e-01
## Life of Pi 1.238295e-01 1.080148e-01
## Lincoln 1.224060e-04 1.899116e-01
## Silver Linings Playbook 6.949945e-02 2.917376e-02
## Zero Dark Thirty 8.195024e-03 6.574169e-01
## American Hustle 8.482517e-02 6.058853e-01
## Captain Phillips 2.053377e-02 8.897817e-03
## Dallas Buyers Club 3.810563e-01 3.834818e-01
## Gravity 3.507612e-01 6.733692e-02
## Her 2.375703e-01 1.309754e-01
## Nebraska 2.728446e-02 2.784857e-01
## Philomena 8.576662e-02 3.026762e-03
## 12 Years a Slave 8.635690e-01 1.526164e-02
## The Wolf of Wall Street 9.216683e-02 2.428455e-01
## American Sniper 6.302903e-01 1.830620e-01
## Birdman or (The Unexpected Virtue of Ignorance) 1.480236e-01 6.389924e-02
## Boyhood 2.276680e-01 4.956671e-01
## The Grand Budapest Hotel 7.454044e-01 2.285879e-02
## The Imitation Game 5.353337e-02 3.585988e-01
## Selma 2.783596e-02 7.773426e-04
## The Theory of Everything 4.276720e-01 5.293001e-01
## Whiplash 6.665594e-01 1.526601e-01
## The Big Short 3.656899e-02 1.877174e-01
## Bridge of Spies 2.143375e-04 4.785238e-03
## Brooklyn 2.858225e-03 2.240695e-01
## Mad Max: Fury Road 7.394092e-01 1.521134e-03
## The Martian 7.606632e-02 4.338431e-02
## The Revenant 3.895087e-03 1.925633e-01
## Room 3.736193e-01 7.755994e-02
## Spotlight 8.440420e-01 1.792907e-05
## Arrival 1.082896e-01 4.961011e-02
## Fences 3.887748e-01 1.838744e-01
## Hacksaw Ridge 4.540692e-02 9.227848e-01
## Hell or High Water 6.426268e-03 2.139839e-01
## Hidden Figures 3.432981e-03 6.511166e-02
## La La Land 3.054422e-01 2.256333e-02
## Lion 3.165926e-02 4.058607e-01
## Manchester by the Sea 2.344310e-01 4.207366e-01
## Moonlight 1.580445e-01 6.210264e-01
## Call Me by Your Name 2.288301e-01 1.357617e-01
## Darkest Hour 4.279259e-01 1.305098e-01
## Dunkirk 4.223402e-01 1.941459e-01
## Get Out 1.483425e-01 8.206861e-02
## Lady Bird 9.052534e-03 8.032653e-01
## Phantom Thread 1.701626e-01 6.537063e-01
## The Post 5.387903e-01 2.056562e-01
## The Shape of Water 3.675095e-05 2.278158e-01
## Three Billboards outside Ebbing, Missouri 6.958869e-01 3.994284e-02
## Black Panther 2.586023e-02 3.713459e-01
## BlacKkKlansman 2.405407e-05 5.437147e-01
## Bohemian Rhapsody 8.015624e-02 7.340665e-01
## The Favourite 1.822222e-02 5.634984e-01
## Green Book 2.383626e-02 8.549259e-01
## Roma 8.290181e-02 4.676136e-01
## A Star Is Born 3.819042e-03 6.067229e-01
## Vice 7.328548e-01 1.635957e-02
## Ford v Ferrari 4.016954e-01 3.082231e-01
## The Irishman 1.801827e-01 2.741184e-01
## Jojo Rabbit 5.215222e-02 7.572338e-01
## Joker 1.159945e-02 6.019064e-01
## Little Women 4.704613e-01 5.223972e-02
## Marriage Story 3.958672e-01 2.670649e-01
## 1917 3.152575e-01 2.613108e-01
## Once upon a Time...in Hollywood 1.155997e-01 1.135360e-01
## Parasite 7.896317e-01 1.780113e-02
## The Father 8.341746e-01 1.906290e-02
## Judas and the Black Messiah 2.520046e-01 7.208583e-03
## Mank 4.052969e-01 1.816588e-01
## Minari 7.906691e-02 5.014135e-01
## Nomadland 5.220968e-02 1.806714e-01
## Promising Young Woman 1.636160e-01 1.266110e-01
## Sound of Metal 5.282852e-01 2.837482e-03
## The Trial of the Chicago 7 3.585289e-02 5.532499e-02
## Belfast 9.399917e-02 6.621531e-02
## CODA 1.728471e-01 2.600382e-01
## Don't Look Up 7.434192e-01 2.430129e-01
## Drive My Car 8.639278e-03 4.902157e-01
## Dune 1.778672e-01 4.714335e-01
## King Richard 1.700203e-02 1.337980e-01
## Licorice Pizza 3.582575e-01 4.712019e-01
## Nightmare Alley 9.299245e-01 3.775035e-02
## The Power of the Dog 4.566770e-02 4.587803e-01
## West Side Story 6.014453e-04 6.137346e-02
## All Quiet on the Western Front 3.088681e-01 3.087882e-01
## Avatar: The Way of Water 1.423211e-01 6.935438e-01
## The Banshees of Inisherin 1.175231e-03 5.098300e-01
## Elvis 2.363814e-01 2.212339e-01
## Everything Everywhere All at Once 3.661492e-01 4.151392e-02
## The Fabelmans 4.282869e-02 3.459595e-01
## Tar 1.591355e-01 6.610807e-01
## Top Gun: Maverick 3.950945e-01 1.929549e-01
## Triangle of Sadness 9.230537e-01 2.208950e-02
## Women Talking 1.450523e-01 1.605077e-02
##################################
# Extracting the contributions
# (in percentage) of the individuals
# to the principal components
##################################
$contrib[,c("Dim.1","Dim.2")] DR_PCA_IND
## Dim.1 Dim.2
## Avatar 2.562046e-02 1.341083e-01
## The Blind Side 1.723257e+00 5.194282e+00
## District 9 6.635113e-02 6.074901e-03
## An Education 4.009363e-01 6.663216e-01
## The Hurt Locker 1.039999e+00 2.825033e-01
## Inglourious Basterds 8.495984e-02 9.216930e-01
## Precious: Based on the Novel 'Push' by Sapphire 2.432861e-01 1.955632e-01
## A Serious Man 1.850165e+00 1.931337e+00
## Up 1.385914e+00 8.233508e-02
## Up in the Air 3.216796e-01 4.186397e-01
## Black Swan 2.799826e-03 2.439173e-01
## The Fighter 1.258652e-01 1.246221e-01
## Inception 1.430286e+00 2.746593e+00
## The Kids Are All Right 1.006222e+00 1.568874e+00
## The King's Speech 1.462492e+00 9.384593e-03
## 127 Hours 5.062472e-02 1.955638e-01
## The Social Network 8.832594e-01 1.990011e-01
## Toy Story 3 1.595355e+00 1.737797e-02
## True Grit 7.395094e-03 1.906382e-01
## Winter's Bone 5.127649e-01 1.798269e+00
## The Artist 1.197257e+00 4.402202e-04
## The Descendants 3.079272e-01 1.913406e-01
## Extremely Loud & Incredibly Close 1.561006e+01 2.289181e+00
## The Help 2.177609e-01 3.178580e+00
## Hugo 1.045762e-01 1.341394e-01
## Midnight in Paris 9.600452e-05 2.668295e-03
## Moneyball 3.696024e-04 3.218636e-01
## The Tree of Life 3.622660e+00 2.432868e+00
## War Horse 2.513123e+00 6.100969e-05
## Amour 1.637068e-01 3.055073e-01
## Argo 6.000218e-01 9.625289e-06
## Beasts of the Southern Wild 9.634596e-01 6.845648e-01
## Django Unchained 6.694633e-01 1.193298e+00
## Les Miserables 1.140616e+00 1.498129e+00
## Life of Pi 1.657661e-01 1.927672e-01
## Lincoln 2.468088e-04 5.104905e-01
## Silver Linings Playbook 2.017992e-02 1.129298e-02
## Zero Dark Thirty 7.625190e-03 8.154917e-01
## American Hustle 1.977649e-01 1.883186e+00
## Captain Phillips 1.535656e-02 8.871296e-03
## Dallas Buyers Club 4.287976e-01 5.752893e-01
## Gravity 1.169544e+00 2.993206e-01
## Her 1.789336e-01 1.315127e-01
## Nebraska 1.496603e-02 2.036444e-01
## Philomena 9.052716e-02 4.259094e-03
## 12 Years a Slave 1.577951e+00 3.717714e-02
## The Wolf of Wall Street 1.571738e-01 5.520950e-01
## American Sniper 1.216232e+00 4.709256e-01
## Birdman or (The Unexpected Virtue of Ignorance) 1.381352e-01 7.949638e-02
## Boyhood 3.501297e-01 1.016238e+00
## The Grand Budapest Hotel 9.548102e-01 3.903521e-02
## The Imitation Game 6.053992e-02 5.406353e-01
## Selma 6.055243e-02 2.254323e-03
## The Theory of Everything 3.603833e-01 5.946125e-01
## Whiplash 2.438027e+00 7.443951e-01
## The Big Short 1.033972e-02 7.075852e-02
## Bridge of Spies 3.347577e-05 9.963551e-04
## Brooklyn 4.659223e-03 4.869428e-01
## Mad Max: Fury Road 1.953560e+00 5.357813e-03
## The Martian 8.023641e-02 6.100846e-02
## The Revenant 9.375583e-03 6.179209e-01
## Room 5.117997e-01 1.416402e-01
## Spotlight 1.414753e+00 4.006383e-05
## Arrival 4.823538e-02 2.945963e-02
## Fences 5.032683e-01 3.173226e-01
## Hacksaw Ridge 5.851792e-02 1.585424e+00
## Hell or High Water 8.404707e-03 3.730981e-01
## Hidden Figures 6.473261e-03 1.636774e-01
## La La Land 1.196538e+00 1.178362e-01
## Lion 5.202561e-02 8.891429e-01
## Manchester by the Sea 2.636061e-01 6.307088e-01
## Moonlight 2.875632e-01 1.506406e+00
## Call Me by Your Name 2.223267e-01 1.758466e-01
## Darkest Hour 2.261122e-01 9.193410e-02
## Dunkirk 3.550685e-01 2.175986e-01
## Get Out 1.101559e-01 8.124517e-02
## Lady Bird 1.519258e-02 1.797209e+00
## Phantom Thread 2.208815e-01 1.131244e+00
## The Post 1.416272e+00 7.206874e-01
## The Shape of Water 1.229291e-04 1.015894e+00
## Three Billboards outside Ebbing, Missouri 4.474855e-01 3.424187e-02
## Black Panther 2.785073e-02 5.331644e-01
## BlacKkKlansman 8.133558e-06 2.450989e-01
## Bohemian Rhapsody 7.871143e-01 9.609794e+00
## The Favourite 3.929175e-02 1.619836e+00
## Green Book 7.832676e-02 3.745232e+00
## Roma 1.824417e-01 1.371909e+00
## A Star Is Born 1.586621e-03 3.360372e-01
## Vice 5.535791e+00 1.647448e-01
## Ford v Ferrari 1.011994e+00 1.035201e+00
## The Irishman 3.186201e-01 6.462142e-01
## Jojo Rabbit 1.502312e-01 2.908007e+00
## Joker 6.616968e-02 4.577505e+00
## Little Women 4.296512e-01 6.360206e-02
## Marriage Story 3.175232e-01 2.855755e-01
## 1917 4.562937e-01 5.042136e-01
## Once upon a Time...in Hollywood 1.964338e-01 2.572001e-01
## Parasite 3.474000e+00 1.044074e-01
## The Father 1.330924e+00 4.054743e-02
## Judas and the Black Messiah 3.299981e-01 1.258436e-02
## Mank 1.967296e+00 1.175523e+00
## Minari 6.394379e-02 5.406022e-01
## Nomadland 6.563617e-02 3.028027e-01
## Promising Young Woman 8.362865e-02 8.627381e-02
## Sound of Metal 3.186964e-01 2.282023e-03
## The Trial of the Chicago 7 2.922311e-02 6.011762e-02
## Belfast 9.356902e-02 8.787082e-02
## CODA 1.105262e+00 2.216759e+00
## Don't Look Up 7.055809e+00 3.074827e+00
## Drive My Car 8.959829e-03 6.777783e-01
## Dune 4.502404e-01 1.590916e+00
## King Richard 2.377588e-02 2.494387e-01
## Licorice Pizza 1.253844e+00 2.198535e+00
## Nightmare Alley 3.570929e+00 1.932559e-01
## The Power of the Dog 1.854994e-01 2.484371e+00
## West Side Story 6.981710e-04 9.497832e-02
## All Quiet on the Western Front 3.242367e-01 4.321435e-01
## Avatar: The Way of Water 3.046304e-01 1.979047e+00
## The Banshees of Inisherin 1.707087e-03 9.872708e-01
## Elvis 7.356649e-01 9.179026e-01
## Everything Everywhere All at Once 9.092997e-01 1.374424e-01
## The Fabelmans 2.305897e-02 2.483182e-01
## Tar 2.162039e-01 1.197372e+00
## Top Gun: Maverick 1.001060e+00 6.517682e-01
## Triangle of Sadness 4.666715e+00 1.488841e-01
## Women Talking 4.491831e-01 6.626329e-02
##################################
# Extracting the correlation
# between the individual instances
# grouped by Picture categories
# and top principal components
##################################
<- fviz_pca_ind(DR_PCA,
(DR_PCA_IndividualCorrelationCircleByPicture geom.ind = "text",
col.ind = SD_PCA$Picture,
palette = c("#888888","#5544FF"),
repel = TRUE,
legend.title = "Picture",
addEllipses = FALSE) +
labs(title = "Principal Component Analysis : Factorial Map of Individuals",
subtitle = "Principal Components Versus Individuals Grouped by Picture Categories",
y = "Principal Component 2",
x = "Principal Component 1") +
theme_classic() +
theme(legend.position="top"))
##################################
# Extracting the correlation
# between the individual instances
# grouped by Year categories
# and top principal components
##################################
<- fviz_pca_ind(DR_PCA,
(DR_PCA_IndividualCorrelationCircleByYear geom.ind = "text",
col.ind = SD_PCA$Year,
repel = TRUE,
legend.title = "Year",
addEllipses = FALSE) +
labs(title = "Principal Component Analysis : Factorial Map of Individuals",
subtitle = "Principal Components Versus Individuals Grouped by Picture Categories",
y = "Principal Component 2",
x = "Principal Component 1") +
theme_classic() +
theme(legend.position="top"))
##################################
# Extracting the contribution
# of the descriptor variables
# for the Top 1 and Top 2 principal components
##################################
<- fviz_contrib(DR_PCA,
(DR_PCA_PCIndividualContributors choice = "ind",
axes = 1:2,
top = 10) +
labs(title = "Principal Component Analysis : Individual Contribution",
subtitle = "Principal Components 1 and 2 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Extracting the contribution
# of the descriptor variables
# for the Top 1 principal component
##################################
<- fviz_contrib(DR_PCA,
(DR_PCA_PC1IndividualContributors choice = "ind",
axes = 1,
top = 10) +
labs(title = "Principal Component Analysis : Individual Contribution",
subtitle = "Principal Component 1 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Extracting the contribution
# of the descriptor variables
# for the Top 2 principal component
##################################
<- fviz_contrib(DR_PCA,
(DR_PCA_PC2IndividualContributors choice = "ind",
axes = 2,
top = 10) +
labs(title = "Principal Component Analysis : Individual Contribution",
subtitle = "Principal Component 2 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Formulating the Biplot
# using the individual scores
# and descriptor variable loadings
# for the Top 1 and Top 2 principal components
##################################
<- fviz_pca_biplot(DR_PCA,
(DR_PCA_Biplot label = c("var","ind"),
repel = TRUE,
habillage = SD_PCA$Picture,
palette = c("#888888","#5544FF"),
col.var = "#FF5050",
legend.title = "Picture",
addEllipses = FALSE) +
labs(title = "Principal Component Analysis : Biplot",
subtitle = "Individual Scores versus Descriptor Variable Loadings",
x = "Principal Component 1",
y = "Principal Component 2") +
theme_classic() +
theme(legend.position="top"))
##################################
# Loading dataset for Cinematography
##################################
<- Oscars.CA.Cinematography
SD_CA_Cinematography str(SD_CA_Cinematography)
## 'data.frame': 16 obs. of 3 variables:
## $ NNOM: int 3 1 8 17 3 22 2 8 2 2 ...
## $ NOM : int 1 0 3 4 2 2 0 6 1 1 ...
## $ WON : int 0 2 1 1 0 1 0 0 0 0 ...
balloonplot(t(as.table(as.matrix(SD_CA_Cinematography))),
main="Cinematography by Film Genre",
xlab="Cinematography",
ylab="Genre",
label=FALSE,
show.margins=FALSE)
##################################
# Conducting a Chi-Square Test
##################################
<- chisq.test(SD_CA_Cinematography)) (SD_CA_Cinematography_CHSQ
##
## Pearson's Chi-squared test
##
## data: SD_CA_Cinematography
## X-squared = 53.85, df = 30, p-value = 0.004777
##################################
# Performing CA
##################################
<- CA(SD_CA_Cinematography,
DR_CA_Cinematography graph = FALSE)
##################################
# Obtaining the CA eigenvalues
##################################
<- get_eigenvalue(DR_CA_Cinematography)) (DR_CA_Cinematography_EV
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 0.2992796 70.02615 70.02615
## Dim.2 0.1281030 29.97385 100.00000
##################################
# Formulating the Scree Plot
# for the variances
##################################
<- fviz_eig(DR_CA_Cinematography,
(DR_CA_Cinematography_VarianceScreePlot addlabels = TRUE,
ylim = c(0, 100),
choice = c("variance")) +
labs(title = "Correspondence Analysis : Scree Plot of Explained Variances (Cinematography)",
subtitle = "Top 2 Principal Components",
y = "Percentage of Explained Variances",
x = "Principal Components") +
theme_classic())
##################################
# Formulating the Scree Plot
# for the eigenvalues
##################################
<- fviz_eig(DR_CA_Cinematography,
(DR_CA_Cinematography_EigenvalueScreePlot addlabels = TRUE,
ylim = c(0, 6),
choice = c("eigenvalue")) +
labs(title = "Correspondence Analysis : Scree Plot of Eigenvalues (Cinematography)",
subtitle = "Top 2 Principal Components",
y = "Eigenvalues",
x = "Principal Components") +
theme_classic())
##################################
# Extracting the CA
# results for row descriptor variables
##################################
<- get_ca_row(DR_CA_Cinematography)) (DR_CA_Cinematography_ROW
## Correspondence Analysis - Results for rows
## ===================================================
## Name Description
## 1 "$coord" "Coordinates for the rows"
## 2 "$cos2" "Cos2 for the rows"
## 3 "$contrib" "contributions of the rows"
## 4 "$inertia" "Inertia of the rows"
##################################
# Extracting the coordinates
# representing the loadings
# for the row descriptor variabless
##################################
$coord DR_CA_Cinematography_ROW
## Dim 1 Dim 2
## Action -0.35048211 0.041667147
## Adventure 1.61952041 -0.927278781
## Biography -0.07606662 -0.012566854
## Comedy -0.26225224 -0.133844287
## Crime -0.21528924 0.362711502
## Drama -0.37198127 -0.348215442
## Family -0.57580357 -0.493406778
## History -0.18953822 0.423862808
## LGBTQ+ -0.27537496 0.220025122
## Music -0.27537496 0.220025122
## Musical 0.82228703 0.003089121
## Romance -0.21528924 0.362711502
## Sci-Fi 1.08803149 -0.307033513
## Thriller -0.27537496 0.220025122
## War 0.97250133 0.359805071
## Western 0.92340729 0.949112996
##################################
# Extracting the quality of representation
# for the row descriptor variables
# on the factor map
##################################
$cos2 DR_CA_Cinematography_ROW
## Dim 1 Dim 2
## Action 0.9860633 1.393672e-02
## Adventure 0.7531088 2.468912e-01
## Biography 0.9734313 2.656869e-02
## Comedy 0.7933535 2.066465e-01
## Crime 0.2605233 7.394767e-01
## Drama 0.5329632 4.670368e-01
## Family 0.5766083 4.233917e-01
## History 0.1666388 8.333612e-01
## LGBTQ+ 0.6103501 3.896499e-01
## Music 0.6103501 3.896499e-01
## Musical 0.9999859 1.411291e-05
## Romance 0.2605233 7.394767e-01
## Sci-Fi 0.9262414 7.375862e-02
## Thriller 0.6103501 3.896499e-01
## War 0.8795969 1.204031e-01
## Western 0.4862747 5.137253e-01
##################################
# Extracting the contributions
# (in percentage) of the row descriptor variables
# to the principal components
##################################
$contrib DR_CA_Cinematography_ROW
## Dim 1 Dim 2
## Action 1.3029990 4.302467e-02
## Adventure 20.8663496 1.598129e+01
## Biography 0.1841288 1.174098e-02
## Comedy 4.0124854 2.441701e+00
## Crime 0.6145639 4.075329e+00
## Drama 9.1734884 1.878046e+01
## Family 1.7584543 3.016548e+00
## History 1.3337486 1.558292e+01
## LGBTQ+ 0.6032850 8.997790e-01
## Music 0.6032850 8.997790e-01
## Musical 5.3792349 1.773622e-04
## Romance 0.6145639 4.075329e+00
## Sci-Fi 28.2538615 5.256348e+00
## Thriller 1.2065700 1.799558e+00
## War 15.0481819 4.812335e+00
## Western 9.0447999 2.232369e+01
##################################
# Extracting the contribution
# of the row descriptor variables
# for the the Top 1 and 2 principal components
##################################
<- fviz_contrib(DR_CA_Cinematography,
(DR_CA_Cinematography_ROW_PCVariableContributors choice = "row",
axes = 1:2) +
labs(title = "Correspondence Analysis : Row Descriptor Variable Contribution (Cinematography)",
subtitle = "Principal Components 1 and 2 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Extracting the contribution
# of the row descriptor variables
# for the the Top 1 principal component
##################################
<- fviz_contrib(DR_CA_Cinematography,
(DR_CA_Cinematography_ROW_PC1VariableContributors choice = "row",
axes = 1) +
labs(title = "Correspondence Analysis : Row Descriptor Variable Contribution (Cinematography)",
subtitle = "Principal Component 1 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Extracting the contribution
# of the row descriptor variables
# for the the Top 2 principal component
##################################
<- fviz_contrib(DR_CA_Cinematography,
(DR_CA_Cinematography_ROW_PC2VariableContributors choice = "row",
axes = 2) +
labs(title = "Correspondence Analysis : Row Descriptor Variable Contribution (Cinematography)",
subtitle = "Principal Component 2 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Extracting the CA
# results for column descriptor variables
##################################
<- get_ca_col(DR_CA_Cinematography)) (DR_CA_Cinematography_COL
## Correspondence Analysis - Results for columns
## ===================================================
## Name Description
## 1 "$coord" "Coordinates for the columns"
## 2 "$cos2" "Cos2 for the columns"
## 3 "$contrib" "contributions of the columns"
## 4 "$inertia" "Inertia of the columns"
##################################
# Extracting the coordinates
# representing the loadings
# for the column descriptor variabless
##################################
$coord DR_CA_Cinematography_COL
## Dim 1 Dim 2
## NNOM -0.3150017 -0.1765976
## NOM 0.1780598 0.5894460
## WON 1.4864741 -0.4095314
##################################
# Extracting the quality of representation
# for the column descriptor variables
# on the factor map
##################################
$cos2 DR_CA_Cinematography_COL
## Dim 1 Dim 2
## NNOM 0.76086152 0.23913848
## NOM 0.08362161 0.91637839
## WON 0.92945168 0.07054832
##################################
# Extracting the contributions
# (in percentage) of the column descriptor variables
# to the principal components
##################################
$contrib DR_CA_Cinematography_COL
## Dim 1 Dim 2
## NNOM 21.050778 15.45716
## NOM 2.774585 71.03494
## WON 76.174637 13.50790
##################################
# Extracting the contribution
# of the column descriptor variables
# for the the Top 1 and 2 principal components
##################################
<- fviz_contrib(DR_CA_Cinematography,
(DR_CA_Cinematography_COL_PCVariableContributors choice = "col",
axes = 1:2) +
labs(title = "Correspondence Analysis : Column Descriptor Variable Contribution (Cinematography)",
subtitle = "Principal Components 1 and 2 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Extracting the contribution
# of the column descriptor variables
# for the the Top 1 principal component
##################################
<- fviz_contrib(DR_CA_Cinematography,
(DR_CA_Cinematography_COL_PC1VariableContributors choice = "col",
axes = 1) +
labs(title = "Correspondence Analysis : Column Descriptor Variable Contribution (Cinematography)",
subtitle = "Principal Component 1 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Extracting the contribution
# of the column descriptor variables
# for the the Top 2 principal component
##################################
<- fviz_contrib(DR_CA_Cinematography,
(DR_CA_Cinematography_COL_PC2VariableContributors choice = "col",
axes = 2) +
labs(title = "Correspondence Analysis : Column Descriptor Variable Contribution (Cinematography)",
subtitle = "Principal Component 2 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Formulating the Symmetric Biplot
# using the row and column
# descriptor variable loadings
# for the Top 1 and Top 2 principal components
##################################
<- fviz_ca_biplot(DR_CA_Cinematography,
(DR_CA_SymmetricBiplot_Cinematography label="all",
geom = "text",
repel = TRUE) +
labs(title = "Correspondence Analysis : Symmetric Biplot (Cinematography)",
subtitle = "Row versus Column Descriptor Variable Loadings",
x = "Principal Component 1",
y = "Principal Component 2") +
theme_classic())
##################################
# Formulating the Asymmetric Biplot
# using the row and column
# descriptor variable loadings
# for the Top 1 and Top 2 principal components
##################################
<- fviz_ca_biplot(DR_CA_Cinematography,
(DR_CA_AsymmetricBiplot_Cinematography label="all",
geom = "text",
repel = TRUE,
map = "symbiplot") +
labs(title = "Correspondence Analysis : Asymmetric Biplot (Cinematography)",
subtitle = "Row versus Column Descriptor Variable Loadings",
x = "Principal Component 1",
y = "Principal Component 2") +
theme_classic())
##################################
# Loading dataset for Directing
##################################
<- Oscars.CA.Directing
SD_CA_Directing str(SD_CA_Directing)
## 'data.frame': 16 obs. of 3 variables:
## $ NNOM: int 3 0 8 6 2 12 2 10 2 2 ...
## $ NOM : int 1 2 4 14 3 11 0 3 1 1 ...
## $ WON : int 0 1 0 2 0 2 0 1 0 0 ...
balloonplot(t(as.table(as.matrix(SD_CA_Directing))),
main="Directing by Film Genre",
xlab="Directing",
ylab="Genre",
label=FALSE,
show.margins=FALSE)
##################################
# Conducting a Chi-Square Test
##################################
<- chisq.test(SD_CA_Directing)) (SD_CA_Directing_CHSQ
##
## Pearson's Chi-squared test
##
## data: SD_CA_Directing
## X-squared = 32.369, df = 30, p-value = 0.3506
##################################
# Performing CA
##################################
<- CA(SD_CA_Directing,
DR_CA_Directing graph = FALSE)
##################################
# Obtaining the CA eigenvalues
##################################
<- get_eigenvalue(DR_CA_Directing)) (DR_CA_Directing_EV
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 0.16679328 64.92707 64.92707
## Dim.2 0.09009999 35.07293 100.00000
##################################
# Formulating the Scree Plot
# for the variances
##################################
<- fviz_eig(DR_CA_Directing,
(DR_CA_Directing_VarianceScreePlot addlabels = TRUE,
ylim = c(0, 100),
choice = c("variance")) +
labs(title = "Correspondence Analysis : Scree Plot of Explained Variances (Directing)",
subtitle = "Top 2 Principal Components",
y = "Percentage of Explained Variances",
x = "Principal Components") +
theme_classic())
##################################
# Formulating the Scree Plot
# for the eigenvalues
##################################
<- fviz_eig(DR_CA_Directing,
(DR_CA_Directing_EigenvalueScreePlot addlabels = TRUE,
ylim = c(0, 6),
choice = c("eigenvalue")) +
labs(title = "Correspondence Analysis : Scree Plot of Eigenvalues (Directing)",
subtitle = "Top 2 Principal Components",
y = "Eigenvalues",
x = "Principal Components") +
theme_classic())
##################################
# Extracting the CA
# results for row descriptor variables
##################################
<- get_ca_row(DR_CA_Directing)) (DR_CA_Directing_ROW
## Correspondence Analysis - Results for rows
## ===================================================
## Name Description
## 1 "$coord" "Coordinates for the rows"
## 2 "$cos2" "Cos2 for the rows"
## 3 "$contrib" "contributions of the rows"
## 4 "$inertia" "Inertia of the rows"
##################################
# Extracting the coordinates
# representing the loadings
# for the row descriptor variabless
##################################
$coord DR_CA_Directing_ROW
## Dim 1 Dim 2
## Action -0.59093674 0.025492372
## Adventure 1.02503716 0.076828292
## Biography -0.45370496 -0.086344034
## Comedy 0.29890560 -0.326516979
## Crime -0.01456328 -0.444220534
## Drama -0.05489268 -0.082970326
## Family -1.00263207 0.361001590
## History -0.45050428 0.204247535
## LGBTQ+ -0.45370496 -0.086344034
## Music -0.45370496 -0.086344034
## Musical 0.47611005 0.524173917
## Romance 0.21396947 0.190497611
## Sci-Fi -0.01680399 0.469783141
## Thriller 0.28566610 -0.004757871
## War 0.56012965 -0.228430683
## Western 0.80378579 0.941269298
##################################
# Extracting the quality of representation
# for the row descriptor variables
# on the factor map
##################################
$cos2 DR_CA_Directing_ROW
## Dim 1 Dim 2
## Action 0.998142490 0.001857510
## Adventure 0.994413624 0.005586376
## Biography 0.965048427 0.034951573
## Comedy 0.455937517 0.544062483
## Crime 0.001073629 0.998926371
## Drama 0.304447910 0.695552090
## Family 0.885238705 0.114761295
## History 0.829497458 0.170502542
## LGBTQ+ 0.965048427 0.034951573
## Music 0.965048427 0.034951573
## Musical 0.452060467 0.547939533
## Romance 0.557836791 0.442163209
## Sci-Fi 0.001277834 0.998722166
## Thriller 0.999722676 0.000277324
## War 0.857401211 0.142598789
## Western 0.421701375 0.578298625
##################################
# Extracting the contributions
# (in percentage) of the row descriptor variables
# to the principal components
##################################
$contrib DR_CA_Directing_ROW
## Dim 1 Dim 2
## Action 6.646498031 0.022897349
## Adventure 14.998622769 0.155979791
## Biography 11.753824740 0.788044317
## Comedy 9.352800609 20.660410302
## Crime 0.005045905 8.691044740
## Drama 0.358442786 1.515969009
## Family 9.566742526 2.295899889
## History 13.520003083 5.144538317
## LGBTQ+ 2.938456185 0.197011079
## Music 2.938456185 0.197011079
## Musical 3.235838706 7.260671511
## Romance 1.089242375 1.598283421
## Sci-Fi 0.012092556 17.496133580
## Thriller 2.329804971 0.001196414
## War 8.957344701 2.757813226
## Western 12.296783873 31.217095975
##################################
# Extracting the contribution
# of the row descriptor variables
# for the the Top 1 and 2 principal components
##################################
<- fviz_contrib(DR_CA_Directing,
(DR_CA_Directing_ROW_PCVariableContributors choice = "row",
axes = 1:2) +
labs(title = "Correspondence Analysis : Row Descriptor Variable Contribution (Directing)",
subtitle = "Principal Components 1 and 2 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Extracting the contribution
# of the row descriptor variables
# for the the Top 1 principal component
##################################
<- fviz_contrib(DR_CA_Directing,
(DR_CA_Directing_ROW_PC1VariableContributors choice = "row",
axes = 1) +
labs(title = "Correspondence Analysis : Row Descriptor Variable Contribution (Directing)",
subtitle = "Principal Component 1 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Extracting the contribution
# of the row descriptor variables
# for the the Top 2 principal component
##################################
<- fviz_contrib(DR_CA_Directing,
(DR_CA_Directing_ROW_PC2VariableContributors choice = "row",
axes = 2) +
labs(title = "Correspondence Analysis : Row Descriptor Variable Contribution (Directing)",
subtitle = "Principal Component 2 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Extracting the CA
# results for column descriptor variables
##################################
<- get_ca_col(DR_CA_Directing)) (DR_CA_Directing_COL
## Correspondence Analysis - Results for columns
## ===================================================
## Name Description
## 1 "$coord" "Coordinates for the columns"
## 2 "$cos2" "Cos2 for the columns"
## 3 "$contrib" "contributions of the columns"
## 4 "$inertia" "Inertia of the columns"
##################################
# Extracting the coordinates
# representing the loadings
# for the column descriptor variabless
##################################
$coord DR_CA_Directing_COL
## Dim 1 Dim 2
## NNOM -0.4094783 0.1083606
## NOM 0.2630727 -0.2944740
## WON 0.7297404 0.6581319
##################################
# Extracting the quality of representation
# for the column descriptor variables
# on the factor map
##################################
$cos2 DR_CA_Directing_COL
## Dim 1 Dim 2
## NNOM 0.9345536 0.06544635
## NOM 0.4438576 0.55614244
## WON 0.5514589 0.44854113
##################################
# Extracting the contributions
# (in percentage) of the column descriptor variables
# to the principal components
##################################
$contrib DR_CA_Directing_COL
## Dim 1 Dim 2
## NNOM 47.07222 6.102386
## NOM 17.45333 40.483176
## WON 35.47445 53.414437
##################################
# Extracting the contribution
# of the column descriptor variables
# for the the Top 1 and 2 principal components
##################################
<- fviz_contrib(DR_CA_Directing,
(DR_CA_Directing_COL_PCVariableContributors choice = "col",
axes = 1:2) +
labs(title = "Correspondence Analysis : Column Descriptor Variable Contribution (Directing)",
subtitle = "Principal Components 1 and 2 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Extracting the contribution
# of the column descriptor variables
# for the the Top 1 principal component
##################################
<- fviz_contrib(DR_CA_Directing,
(DR_CA_Directing_COL_PC1VariableContributors choice = "col",
axes = 1) +
labs(title = "Correspondence Analysis : Column Descriptor Variable Contribution (Directing)",
subtitle = "Principal Component 1 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Extracting the contribution
# of the column descriptor variables
# for the the Top 2 principal component
##################################
<- fviz_contrib(DR_CA_Directing,
(DR_CA_Directing_COL_PC2VariableContributors choice = "col",
axes = 2) +
labs(title = "Correspondence Analysis : Column Descriptor Variable Contribution (Directing)",
subtitle = "Principal Component 2 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Formulating the Symmetric Biplot
# using the row and column
# descriptor variable loadings
# for the Top 1 and Top 2 principal components
##################################
<- fviz_ca_biplot(DR_CA_Directing,
(DR_CA_SymmetricBiplot_Directing label="all",
geom = "text",
repel = TRUE) +
labs(title = "Correspondence Analysis : Symmetric Biplot (Directing)",
subtitle = "Row versus Column Descriptor Variable Loadings",
x = "Principal Component 1",
y = "Principal Component 2") +
theme_classic())
##################################
# Formulating the Asymmetric Biplot
# using the row and column
# descriptor variable loadings
# for the Top 1 and Top 2 principal components
##################################
<- fviz_ca_biplot(DR_CA_Directing,
(DR_CA_AsymmetricBiplot_Directing label="all",
geom = "text",
repel = TRUE,
map = "symbiplot") +
labs(title = "Correspondence Analysis : Asymmetric Biplot (Directing)",
subtitle = "Row versus Column Descriptor Variable Loadings",
x = "Principal Component 1",
y = "Principal Component 2") +
theme_classic())
##################################
# Loading dataset for Editing
##################################
<- Oscars.CA.Editing
SD_CA_Editing str(SD_CA_Editing)
## 'data.frame': 16 obs. of 3 variables:
## $ NNOM: int 1 1 4 11 0 19 2 7 1 0 ...
## $ NOM : int 2 2 6 10 5 6 0 5 2 1 ...
## $ WON : int 1 0 2 1 0 0 0 2 0 2 ...
balloonplot(t(as.table(as.matrix(SD_CA_Editing))),
main="Editing by Film Genre",
xlab="Editing",
ylab="Genre",
label=FALSE,
show.margins=FALSE)
##################################
# Conducting a Chi-Square Test
##################################
<- chisq.test(SD_CA_Editing)) (SD_CA_Editing_CHSQ
##
## Pearson's Chi-squared test
##
## data: SD_CA_Editing
## X-squared = 50.873, df = 30, p-value = 0.01005
##################################
# Performing CA
##################################
<- CA(SD_CA_Editing,
DR_CA_Editing graph = FALSE)
##################################
# Obtaining the CA eigenvalues
##################################
<- get_eigenvalue(DR_CA_Editing)) (DR_CA_Editing_EV
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 0.2584184 64.00355 64.00355
## Dim.2 0.1453380 35.99645 100.00000
##################################
# Formulating the Scree Plot
# for the variances
##################################
<- fviz_eig(DR_CA_Editing,
(DR_CA_Editing_VarianceScreePlot addlabels = TRUE,
ylim = c(0, 100),
choice = c("variance")) +
labs(title = "Correspondence Analysis : Scree Plot of Explained Variances (Editing)",
subtitle = "Top 2 Principal Components",
y = "Percentage of Explained Variances",
x = "Principal Components") +
theme_classic())
##################################
# Formulating the Scree Plot
# for the eigenvalues
##################################
<- fviz_eig(DR_CA_Editing,
(DR_CA_Editing_EigenvalueScreePlot addlabels = TRUE,
ylim = c(0, 6),
choice = c("eigenvalue")) +
labs(title = "Correspondence Analysis : Scree Plot of Eigenvalues (Editing)",
subtitle = "Top 2 Principal Components",
y = "Eigenvalues",
x = "Principal Components") +
theme_classic())
##################################
# Extracting the CA
# results for row descriptor variables
##################################
<- get_ca_row(DR_CA_Editing)) (DR_CA_Editing_ROW
## Correspondence Analysis - Results for rows
## ===================================================
## Name Description
## 1 "$coord" "Coordinates for the rows"
## 2 "$cos2" "Cos2 for the rows"
## 3 "$contrib" "contributions of the rows"
## 4 "$inertia" "Inertia of the rows"
##################################
# Extracting the coordinates
# representing the loadings
# for the row descriptor variabless
##################################
$coord DR_CA_Editing_ROW
## Dim 1 Dim 2
## Action 0.56704105 -0.13820733
## Adventure -0.16441566 -0.52858184
## Biography 0.28198554 -0.15808970
## Comedy -0.16637971 -0.09681112
## Crime 0.08300400 -1.19003666
## Drama -0.48111282 0.31808033
## Family -0.65925497 0.79432780
## History 0.09450411 0.11971026
## LGBTQ+ -0.16441566 -0.52858184
## Music 1.86860879 0.29193192
## Musical -0.41183532 0.13287298
## Romance -0.51080318 0.39745491
## Sci-Fi 0.43078594 -0.03459231
## Thriller -0.16441566 -0.52858184
## War 1.17478794 0.58289460
## Western -0.28812549 -0.19785443
##################################
# Extracting the quality of representation
# for the row descriptor variables
# on the factor map
##################################
$cos2 DR_CA_Editing_ROW
## Dim 1 Dim 2
## Action 0.943924837 0.056075163
## Adventure 0.088217200 0.911782800
## Biography 0.760857495 0.239142505
## Comedy 0.747065329 0.252934671
## Crime 0.004841385 0.995158615
## Drama 0.695846312 0.304153688
## Family 0.407871452 0.592128548
## History 0.383939061 0.616060939
## LGBTQ+ 0.088217200 0.911782800
## Music 0.976173866 0.023826134
## Musical 0.905719893 0.094280107
## Romance 0.622883592 0.377116408
## Sci-Fi 0.993593147 0.006406853
## Thriller 0.088217200 0.911782800
## War 0.802449252 0.197550748
## Western 0.679555338 0.320444662
##################################
# Extracting the contributions
# (in percentage) of the row descriptor variables
# to the principal components
##################################
$contrib DR_CA_Editing_ROW
## Dim 1 Dim 2
## Action 3.9499807 0.41722700
## Adventure 0.2490655 4.57715913
## Biography 2.9304944 1.63771622
## Comedy 1.8703779 1.12596125
## Crime 0.1057972 38.66704312
## Drama 17.7721450 13.81223319
## Family 2.6695792 6.89096101
## History 0.3840037 1.09557242
## LGBTQ+ 0.2490655 4.57715913
## Music 32.1709592 1.39615846
## Musical 1.5626957 0.28923102
## Romance 4.0066662 4.31316430
## Sci-Fi 5.1294586 0.05881011
## Thriller 0.4981310 9.15431827
## War 25.4317465 11.13221503
## Western 1.0198336 0.85507033
##################################
# Extracting the contribution
# of the row descriptor variables
# for the the Top 1 and 2 principal components
##################################
<- fviz_contrib(DR_CA_Editing,
(DR_CA_Editing_ROW_PCVariableContributors choice = "row",
axes = 1:2) +
labs(title = "Correspondence Analysis : Row Descriptor Variable Contribution (Editing)",
subtitle = "Principal Components 1 and 2 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Extracting the contribution
# of the row descriptor variables
# for the the Top 1 principal component
##################################
<- fviz_contrib(DR_CA_Editing,
(DR_CA_Editing_ROW_PC1VariableContributors choice = "row",
axes = 1) +
labs(title = "Correspondence Analysis : Row Descriptor Variable Contribution (Editing)",
subtitle = "Principal Component 1 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Extracting the contribution
# of the row descriptor variables
# for the the Top 2 principal component
##################################
<- fviz_contrib(DR_CA_Editing,
(DR_CA_Editing_ROW_PC2VariableContributors choice = "row",
axes = 2) +
labs(title = "Correspondence Analysis : Row Descriptor Variable Contribution (Editing)",
subtitle = "Principal Component 2 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Extracting the CA
# results for column descriptor variables
##################################
<- get_ca_col(DR_CA_Editing)) (DR_CA_Editing_COL
## Correspondence Analysis - Results for columns
## ===================================================
## Name Description
## 1 "$coord" "Coordinates for the columns"
## 2 "$cos2" "Cos2 for the columns"
## 3 "$contrib" "contributions of the columns"
## 4 "$inertia" "Inertia of the columns"
##################################
# Extracting the coordinates
# representing the loadings
# for the column descriptor variabless
##################################
$coord DR_CA_Editing_COL
## Dim 1 Dim 2
## NNOM -0.33513143 0.3028233
## NOM 0.04219498 -0.4536803
## WON 1.40375986 0.3937809
##################################
# Extracting the quality of representation
# for the column descriptor variables
# on the factor map
##################################
$cos2 DR_CA_Editing_COL
## Dim 1 Dim 2
## NNOM 0.550513662 0.44948634
## NOM 0.008575927 0.99142407
## WON 0.927049751 0.07295025
##################################
# Extracting the contributions
# (in percentage) of the column descriptor variables
# to the principal components
##################################
$contrib DR_CA_Editing_COL
## Dim 1 Dim 2
## NNOM 21.0409876 30.54631
## NOM 0.2843353 58.44582
## WON 78.6746771 11.00786
##################################
# Extracting the contribution
# of the column descriptor variables
# for the the Top 1 and 2 principal components
##################################
<- fviz_contrib(DR_CA_Editing,
(DR_CA_Editing_COL_PCVariableContributors choice = "col",
axes = 1:2) +
labs(title = "Correspondence Analysis : Column Descriptor Variable Contribution (Editing)",
subtitle = "Principal Components 1 and 2 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Extracting the contribution
# of the column descriptor variables
# for the the Top 1 principal component
##################################
<- fviz_contrib(DR_CA_Editing,
(DR_CA_Editing_COL_PC1VariableContributors choice = "col",
axes = 1) +
labs(title = "Correspondence Analysis : Column Descriptor Variable Contribution (Editing)",
subtitle = "Principal Component 1 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Extracting the contribution
# of the column descriptor variables
# for the the Top 2 principal component
##################################
<- fviz_contrib(DR_CA_Editing,
(DR_CA_Editing_COL_PC2VariableContributors choice = "col",
axes = 2) +
labs(title = "Correspondence Analysis : Column Descriptor Variable Contribution (Editing)",
subtitle = "Principal Component 2 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Formulating the Symmetric Biplot
# using the row and column
# descriptor variable loadings
# for the Top 1 and Top 2 principal components
##################################
<- fviz_ca_biplot(DR_CA_Editing,
(DR_CA_SymmetricBiplot_Editing label="all",
geom = "text",
repel = TRUE) +
labs(title = "Correspondence Analysis : Symmetric Biplot (Editing)",
subtitle = "Row versus Column Descriptor Variable Loadings",
x = "Principal Component 1",
y = "Principal Component 2") +
theme_classic())
##################################
# Formulating the Asymmetric Biplot
# using the row and column
# descriptor variable loadings
# for the Top 1 and Top 2 principal components
##################################
<- fviz_ca_biplot(DR_CA_Editing,
(DR_CA_AsymmetricBiplot_Editing label="all",
geom = "text",
repel = TRUE,
map = "symbiplot") +
labs(title = "Correspondence Analysis : Asymmetric Biplot (Editing)",
subtitle = "Row versus Column Descriptor Variable Loadings",
x = "Principal Component 1",
y = "Principal Component 2") +
theme_classic())
##################################
# Loading dataset for Screenplay
##################################
<- Oscars.CA.Screenplay
SD_CA_Screenplay str(SD_CA_Screenplay)
## 'data.frame': 16 obs. of 3 variables:
## $ NNOM: int 2 0 4 0 0 3 0 5 0 0 ...
## $ NOM : int 2 3 7 14 5 15 2 5 1 3 ...
## $ WON : int 0 0 1 8 0 7 0 4 2 0 ...
balloonplot(t(as.table(as.matrix(SD_CA_Screenplay))),
main="Screenplay by Film Genre",
xlab="Screenplay",
ylab="Genre",
label=FALSE,
show.margins=FALSE)
##################################
# Conducting a Chi-Square Test
##################################
<- chisq.test(SD_CA_Screenplay)) (SD_CA_Screenplay_CHSQ
##
## Pearson's Chi-squared test
##
## data: SD_CA_Screenplay
## X-squared = 41.247, df = 30, p-value = 0.08289
##################################
# Performing CA
##################################
<- CA(SD_CA_Screenplay,
DR_CA_Screenplay graph = FALSE)
##################################
# Obtaining the CA eigenvalues
##################################
<- get_eigenvalue(DR_CA_Screenplay)) (DR_CA_Screenplay_EV
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 0.1831592 55.95135 55.95135
## Dim.2 0.1441952 44.04865 100.00000
##################################
# Formulating the Scree Plot
# for the variances
##################################
<- fviz_eig(DR_CA_Screenplay,
(DR_CA_Screenplay_VarianceScreePlot addlabels = TRUE,
ylim = c(0, 100),
choice = c("variance")) +
labs(title = "Correspondence Analysis : Scree Plot of Explained Variances (Screenplay)",
subtitle = "Top 2 Principal Components",
y = "Percentage of Explained Variances",
x = "Principal Components") +
theme_classic())
##################################
# Formulating the Scree Plot
# for the eigenvalues
##################################
<- fviz_eig(DR_CA_Screenplay,
(DR_CA_Screenplay_EigenvalueScreePlot addlabels = TRUE,
ylim = c(0, 6),
choice = c("eigenvalue")) +
labs(title = "Correspondence Analysis : Scree Plot of Eigenvalues (Screenplay)",
subtitle = "Top 2 Principal Components",
y = "Eigenvalues",
x = "Principal Components") +
theme_classic())
##################################
# Extracting the CA
# results for row descriptor variables
##################################
<- get_ca_row(DR_CA_Screenplay)) (DR_CA_Screenplay_ROW
## Correspondence Analysis - Results for rows
## ===================================================
## Name Description
## 1 "$coord" "Coordinates for the rows"
## 2 "$cos2" "Cos2 for the rows"
## 3 "$contrib" "contributions of the rows"
## 4 "$inertia" "Inertia of the rows"
##################################
# Extracting the coordinates
# representing the loadings
# for the row descriptor variabless
##################################
$coord DR_CA_Screenplay_ROW
## Dim 1 Dim 2
## Action 0.81234217 -0.17778369
## Adventure -0.33965391 -0.78144905
## Biography 0.38428571 -0.17579324
## Comedy -0.53190601 0.10529527
## Crime -0.33965391 -0.78144905
## Drama -0.21120897 0.04622376
## Family -0.33965391 -0.78144905
## History 0.33214522 0.34646817
## LGBTQ+ -0.69211609 0.84424886
## Music -0.33965391 -0.78144905
## Musical 1.19634087 0.02343809
## Romance 0.01540587 -0.05227353
## Sci-Fi 0.42834348 -0.37900548
## Thriller 0.16399685 0.84026796
## War 0.34022793 0.02741900
## Western 0.10417082 0.13002035
##################################
# Extracting the quality of representation
# for the row descriptor variables
# on the factor map
##################################
$cos2 DR_CA_Screenplay_ROW
## Dim 1 Dim 2
## Action 0.95429251 0.045707489
## Adventure 0.15889866 0.841101339
## Biography 0.82694884 0.173051161
## Comedy 0.96229022 0.037709779
## Crime 0.15889866 0.841101339
## Drama 0.95429251 0.045707489
## Family 0.15889866 0.841101339
## History 0.47890320 0.521096802
## LGBTQ+ 0.40194025 0.598059754
## Music 0.15889866 0.841101339
## Musical 0.99961632 0.000383679
## Romance 0.07991642 0.920083582
## Sci-Fi 0.56088375 0.439116254
## Thriller 0.03669438 0.963305625
## War 0.99354715 0.006452853
## Western 0.39095067 0.609049332
##################################
# Extracting the contributions
# (in percentage) of the row descriptor variables
# to the principal components
##################################
$contrib DR_CA_Screenplay_ROW
## Dim 1 Dim 2
## Action 11.437701279 0.695861002
## Adventure 1.499668464 10.083266568
## Biography 7.678748880 2.041099813
## Comedy 26.970762770 1.342514688
## Crime 2.499447440 16.805444281
## Drama 4.832428790 0.294001273
## Family 0.999778976 6.722177712
## History 6.692444844 9.249822006
## LGBTQ+ 6.227014798 11.769035837
## Music 1.499668464 10.083266568
## Musical 18.605094943 0.009070776
## Romance 0.005142132 0.075199052
## Sci-Fi 7.155295654 7.115610127
## Thriller 0.699234768 23.316615710
## War 3.009483353 0.024827508
## Western 0.188084443 0.372187078
##################################
# Extracting the contribution
# of the row descriptor variables
# for the the Top 1 and 2 principal components
##################################
<- fviz_contrib(DR_CA_Screenplay,
(DR_CA_Screenplay_ROW_PCVariableContributors choice = "row",
axes = 1:2) +
labs(title = "Correspondence Analysis : Row Descriptor Variable Contribution (Screenplay)",
subtitle = "Principal Components 1 and 2 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Extracting the contribution
# of the row descriptor variables
# for the the Top 1 principal component
##################################
<- fviz_contrib(DR_CA_Screenplay,
(DR_CA_Screenplay_ROW_PC1VariableContributors choice = "row",
axes = 1) +
labs(title = "Correspondence Analysis : Row Descriptor Variable Contribution (Screenplay)",
subtitle = "Principal Component 1 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Extracting the contribution
# of the row descriptor variables
# for the the Top 2 principal component
##################################
<- fviz_contrib(DR_CA_Screenplay,
(DR_CA_Screenplay_ROW_PC2VariableContributors choice = "row",
axes = 2) +
labs(title = "Correspondence Analysis : Row Descriptor Variable Contribution (Screenplay)",
subtitle = "Principal Component 2 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Extracting the CA
# results for column descriptor variables
##################################
<- get_ca_col(DR_CA_Screenplay)) (DR_CA_Screenplay_COL
## Correspondence Analysis - Results for columns
## ===================================================
## Name Description
## 1 "$coord" "Coordinates for the columns"
## 2 "$cos2" "Cos2 for the columns"
## 3 "$contrib" "contributions of the columns"
## 4 "$inertia" "Inertia of the columns"
##################################
# Extracting the coordinates
# representing the loadings
# for the column descriptor variabless
##################################
$coord DR_CA_Screenplay_COL
## Dim 1 Dim 2
## NNOM 0.8406798 0.1617202
## NOM -0.1453620 -0.2967400
## WON -0.3716274 0.6292505
##################################
# Extracting the quality of representation
# for the column descriptor variables
# on the factor map
##################################
$cos2 DR_CA_Screenplay_COL
## Dim 1 Dim 2
## NNOM 0.9643149 0.03568505
## NOM 0.1935263 0.80647369
## WON 0.2585967 0.74140333
##################################
# Extracting the contributions
# (in percentage) of the column descriptor variables
# to the principal components
##################################
$contrib DR_CA_Screenplay_COL
## Dim 1 Dim 2
## NNOM 76.560016 3.598714
## NOM 6.683833 35.379659
## WON 16.756151 61.021626
##################################
# Extracting the contribution
# of the column descriptor variables
# for the the Top 1 and 2 principal components
##################################
<- fviz_contrib(DR_CA_Screenplay,
(DR_CA_Screenplay_COL_PCVariableContributors choice = "col",
axes = 1:2) +
labs(title = "Correspondence Analysis : Column Descriptor Variable Contribution (Screenplay)",
subtitle = "Principal Components 1 and 2 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Extracting the contribution
# of the column descriptor variables
# for the the Top 1 principal component
##################################
<- fviz_contrib(DR_CA_Screenplay,
(DR_CA_Screenplay_COL_PC1VariableContributors choice = "col",
axes = 1) +
labs(title = "Correspondence Analysis : Column Descriptor Variable Contribution (Screenplay)",
subtitle = "Principal Component 1 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Extracting the contribution
# of the column descriptor variables
# for the the Top 2 principal component
##################################
<- fviz_contrib(DR_CA_Screenplay,
(DR_CA_Screenplay_COL_PC2VariableContributors choice = "col",
axes = 2) +
labs(title = "Correspondence Analysis : Column Descriptor Variable Contribution (Screenplay)",
subtitle = "Principal Component 2 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Formulating the Symmetric Biplot
# using the row and column
# descriptor variable loadings
# for the Top 1 and Top 2 principal components
##################################
<- fviz_ca_biplot(DR_CA_Screenplay,
(DR_CA_SymmetricBiplot_Screenplay label="all",
geom = "text",
repel = TRUE) +
labs(title = "Correspondence Analysis : Symmetric Biplot (Screenplay)",
subtitle = "Row versus Column Descriptor Variable Loadings",
x = "Principal Component 1",
y = "Principal Component 2") +
theme_classic())
##################################
# Formulating the Asymmetric Biplot
# using the row and column
# descriptor variable loadings
# for the Top 1 and Top 2 principal components
##################################
<- fviz_ca_biplot(DR_CA_Screenplay,
(DR_CA_AsymmetricBiplot_Screenplay label="all",
geom = "text",
repel = TRUE,
map = "symbiplot") +
labs(title = "Correspondence Analysis : Asymmetric Biplot (Screenplay)",
subtitle = "Row versus Column Descriptor Variable Loadings",
x = "Principal Component 1",
y = "Principal Component 2") +
theme_classic())
##################################
# Loading dataset
##################################
<- Oscars.MCA
SD_MCA str(SD_MCA)
## 'data.frame': 126 obs. of 6 variables:
## $ Cinematography: Factor w/ 3 levels "NNOM","NOM","WON": 3 1 1 1 2 2 1 1 1 1 ...
## $ Directing : Factor w/ 3 levels "NNOM","NOM","WON": 2 1 1 1 3 2 2 1 1 2 ...
## $ Editing : Factor w/ 3 levels "NNOM","NOM","WON": 2 1 2 1 3 2 2 1 1 1 ...
## $ Screenplay : Factor w/ 3 levels "NNOM","NOM","WON": 1 1 2 2 3 2 3 2 2 2 ...
## $ Year : Factor w/ 14 levels "2010","2011",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ Picture : Factor w/ 2 levels "NOM","WON": 1 1 1 1 2 1 1 1 1 1 ...
##################################
# Performing MCA
##################################
<- MCA(SD_MCA[,c(1:4)],
DR_MCA graph = FALSE)
##################################
# Obtaining the MCA eigenvalues
##################################
<- get_eigenvalue(DR_MCA)) (DR_MCA_EV
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 0.4014500 20.072501 20.07250
## Dim.2 0.3482608 17.413038 37.48554
## Dim.3 0.2934227 14.671133 52.15667
## Dim.4 0.2814318 14.071589 66.22826
## Dim.5 0.2191032 10.955162 77.18342
## Dim.6 0.1864147 9.320736 86.50416
## Dim.7 0.1647588 8.237939 94.74210
## Dim.8 0.1051580 5.257902 100.00000
##################################
# Formulating the Scree Plot
# for the variances
##################################
<- fviz_eig(DR_MCA,
(DR_MCA_VarianceScreePlot addlabels = TRUE,
ylim = c(0, 100),
choice = c("variance")) +
labs(title = "Multiple Correspondence Analysis : Scree Plot of Explained Variances",
subtitle = "Top 2 Principal Components",
y = "Percentage of Explained Variances",
x = "Principal Components") +
theme_classic())
##################################
# Formulating the Scree Plot
# for the eigenvalues
##################################
<- fviz_eig(DR_MCA,
(DR_MCA_EigenvalueScreePlot addlabels = TRUE,
ylim = c(0, 6),
choice = c("eigenvalue")) +
labs(title = "Multiple Correspondence Analysis : Scree Plot of Eigenvalues",
subtitle = "Top 2 Principal Components",
y = "Eigenvalues",
x = "Principal Components") +
theme_classic())
##################################
# Extracting the MCA
# descriptor variable loadings
##################################
<- get_mca_var(DR_MCA)
DR_MCA_VAR
##################################
# Extracting the coordinates
# representing the loadings
# for the descriptor variables
##################################
$coord DR_MCA_VAR
## Dim 1 Dim 2 Dim 3 Dim 4
## Cinematography_NNOM -0.5220182052 -0.1264760 0.12785667 0.38942121
## Cinematography_NOM 0.6927420866 -0.1325546 0.28326894 -1.19605797
## Cinematography_WON 1.4539205813 1.1147985 -1.50587760 0.63970892
## Directing_NNOM -0.6590730961 0.3732744 -0.19240706 0.23562263
## Directing_NOM 0.1678869817 -0.5334948 0.37577191 -0.49146594
## Directing_WON 2.1419501885 0.4465739 -0.61170677 0.86756855
## Editing_NNOM -0.5971330422 0.3091986 -0.24548152 -0.12141930
## Editing_NOM 0.5111151354 -0.7525933 -0.07146831 0.05150745
## Editing_WON 0.7574714257 1.5595180 1.43774806 0.36370692
## Screenplay_NNOM -0.0009048548 1.4299445 0.50282891 -0.74923832
## Screenplay_NOM -0.1076851566 -0.3010392 -0.60515086 -0.15377497
## Screenplay_WON 0.2815584929 -0.4918838 1.12876035 1.06987609
## Dim 5
## Cinematography_NNOM 0.003994189
## Cinematography_NOM -0.489028350
## Cinematography_WON 1.216800033
## Directing_NNOM -0.554851077
## Directing_NOM 0.712251511
## Directing_WON -0.358079754
## Editing_NNOM 0.459427895
## Editing_NOM -0.341356193
## Editing_WON -0.790352271
## Screenplay_NNOM 0.251177034
## Screenplay_NOM -0.230016943
## Screenplay_WON 0.375421821
##################################
# Extracting the quality of representation
# for the descriptor variables
# on the factor map
##################################
$cos2 DR_MCA_VAR
## Dim 1 Dim 2 Dim 3 Dim 4
## Cinematography_NNOM 4.739183e-01 0.027819444 0.028430136 0.263737183
## Cinematography_NOM 1.702841e-01 0.006234768 0.028472717 0.507616171
## Cinematography_WON 2.431903e-01 0.142974205 0.260882083 0.047079271
## Directing_NNOM 3.825114e-01 0.122696917 0.032600122 0.048889005
## Directing_NOM 2.046384e-02 0.206639535 0.102518359 0.175363763
## Directing_WON 5.734938e-01 0.024928533 0.046773147 0.094084398
## Editing_NNOM 3.346252e-01 0.089720491 0.056552798 0.013835406
## Editing_NOM 1.835731e-01 0.398008472 0.003589208 0.001864282
## Editing_WON 6.600813e-02 0.279798703 0.237810207 0.015218367
## Screenplay_NNOM 2.026639e-07 0.506124052 0.062583394 0.138950015
## Screenplay_NOM 1.597198e-02 0.124822588 0.504399092 0.032570039
## Screenplay_WON 2.265005e-02 0.069128491 0.364028552 0.327038529
## Dim 5
## Cinematography_NNOM 0.0000277453
## Cinematography_NOM 0.0848592258
## Cinematography_WON 0.1703347802
## Directing_NNOM 0.2711003480
## Directing_NOM 0.3683153067
## Directing_WON 0.0160276388
## Editing_NNOM 0.1980848216
## Editing_NOM 0.0818817653
## Editing_WON 0.0718631616
## Screenplay_NNOM 0.0156163125
## Screenplay_NOM 0.0728729994
## Screenplay_WON 0.0402690125
##################################
# Extracting the contributions
# (in percentage) of the descriptor variables
# to the principal components
##################################
$contrib DR_MCA_VAR
## Dim 1 Dim 2 Dim 3 Dim 4 Dim 5
## Cinematography_NNOM 1.077455e+01 0.7290733 0.8843264 8.55313892 0.001155761
## Cinematography_NOM 7.826995e+00 0.3303453 1.7905550 33.28240706 7.146652781
## Cinematography_WON 1.358197e+01 9.2045183 19.9341857 3.75063238 17.430203052
## Directing_NNOM 1.266652e+01 4.6835308 1.4769637 2.30931050 16.448471491
## Directing_NOM 7.383255e-01 8.5941159 5.0605801 9.02524596 24.348000197
## Directing_WON 3.174569e+01 1.5906685 3.5423414 7.42902749 1.625579588
## Editing_NNOM 1.075004e+01 3.3225378 2.4856688 0.63401803 11.659642215
## Editing_NOM 6.713961e+00 16.7798845 0.1795999 0.09726122 5.487058437
## Editing_WON 3.686497e+00 18.0131253 18.1712472 1.21239133 7.353691928
## Screenplay_NNOM 1.011663e-05 29.1234808 4.2742143 9.89410016 1.428303605
## Screenplay_NOM 4.183816e-01 3.7690681 18.0769838 1.21699970 3.497543972
## Screenplay_WON 1.097067e+00 3.8596514 24.1233338 22.59546726 3.573696974
##################################
# Extracting the quality of representation
# for the descriptor variables
# on the factor map
##################################
<- fviz_mca_var(DR_MCA,
(DR_MCA_VariableSquaredCorrelationCircle col.var = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
legend.title = "Squared Coordinates",
repel = TRUE) +
labs(title = "Multiple Correspondence Analysis : Squared Coordinate Plot",
subtitle = "Descriptor Variable Representation Quality",
y = "Principal Component 2",
x = "Principal Component 1") +
theme_classic() +
theme(legend.position="top"))
##################################
# Formulating clusters of the
# descriptor variables
##################################
set.seed(123)
<- kmeans(DR_MCA_VAR$coord, centers=3, nstart=25)
DR_MCA_KMEANS <- as.factor(DR_MCA_KMEANS$cluster)
DR_MCA_KMEANS_CLUSTER
##################################
# Extracting the correlation
# between the descriptor variables
# and top principal components
##################################
<- fviz_mca_var(DR_MCA,
(DR_MCA_VariableCorrelationCircle col.var = DR_MCA_KMEANS_CLUSTER,
repel = TRUE,
palette = c("#0073C2FF", "#EFC000FF", "#868686FF"),
legend.title = "Cluster") +
labs(title = "Multiple Correspondence Analysis : Factorial Map of Qualitative Variables",
subtitle = "Principal Components Versus Descriptor Variable Clusters",
y = "Principal Component 2",
x = "Principal Component 1") +
theme_classic() +
theme(legend.position="top"))
##################################
# Extracting the contribution
# of the descriptor variables
# for the the Top 1 and 2 principal components
##################################
<- fviz_contrib(DR_MCA,
(DR_MCA_PCVariableContributors choice = "var",
axes = 1:2) +
labs(title = "Multiple Correspondence Analysis : Descriptor Variable Contribution",
subtitle = "Principal Components 1 and 2 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Testing the statistical significance
# of the contribution
# of the descriptor variables
# for the the Top 1 and 2 principal components
##################################
<- dimdesc(DR_MCA,
DR_MCA_VariableContributionTest axes = c(1,2),
proba = 0.05)
##################################
# Extracting the contribution
# of the descriptor variables
# for the Top 1 principal component
##################################
<- fviz_contrib(DR_MCA,
(DR_MCA_PC1VariableContributors choice = "var",
axes = 1) +
labs(title = "Multiple Correspondence Analysis : Descriptor Variable Contribution",
subtitle = "Principal Component 1 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Testing the statistical significance
# of the contribution
# of the descriptor variables
# for the Top 1 principal component
##################################
$'Dim 1' DR_MCA_VariableContributionTest
##
## Link between the variable and the categorical variable (1-way anova)
## =============================================
## R2 p.value
## Directing 0.7250272 3.283334e-35
## Cinematography 0.5168029 3.746658e-20
## Editing 0.3396347 8.255279e-12
##
## Link between variable and the categories of the categorical variables
## ================================================================
## Estimate p.value
## Directing=Directing_WON 1.0084996 1.066429e-24
## Cinematography=Cinematography_WON 0.5780799 4.450536e-09
## Editing=Editing_NOM 0.1820318 5.586469e-07
## Cinematography=Cinematography_NOM 0.0957966 1.573813e-06
## Editing=Editing_WON 0.3381234 3.683149e-03
## Editing=Editing_NNOM -0.5201552 1.304909e-12
## Directing=Directing_NNOM -0.7662311 1.192963e-14
## Cinematography=Cinematography_NNOM -0.6738765 5.224619e-19
##################################
# Extracting the contribution
# of the descriptor variables
# for the Top 2 principal component
##################################
<- fviz_contrib(DR_MCA,
(DR_MCA_PC2VariableContributors choice = "var",
axes = 2) +
labs(title = "Multiple Correspondence Analysis : Descriptor Variable Contribution",
subtitle = "Principal Component 2 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Testing the statistical significance
# of the contribution
# of the descriptor variables
# for the Top 2 principal component
##################################
$'Dim 2' DR_MCA_VariableContributionTest
##
## Link between the variable and the categorical variable (1-way anova)
## =============================================
## R2 p.value
## Editing 0.5309660 6.012674e-21
## Screenplay 0.5119740 6.906284e-20
## Directing 0.2071220 6.326897e-07
## Cinematography 0.1429811 7.566777e-05
##
## Link between variable and the categories of the categorical variables
## ================================================================
## Estimate p.value
## Screenplay=Screenplay_NNOM 0.71855222 1.007469e-20
## Editing=Editing_WON 0.70077312 1.925781e-10
## Cinematography=Cinematography_WON 0.48954310 1.269630e-05
## Directing=Directing_NNOM 0.16395356 5.798540e-05
## Screenplay=Screenplay_WON -0.41558827 2.935568e-03
## Editing=Editing_NNOM -0.03708562 6.560151e-04
## Screenplay=Screenplay_NOM -0.30296395 4.950520e-05
## Directing=Directing_NOM -0.37116380 8.944524e-08
## Editing=Editing_NOM -0.66368750 2.420712e-15
##################################
# Extracting the MCA individual scores
##################################
<- get_mca_ind(DR_MCA)
DR_MCA_IND
##################################
# Extracting the coordinates
# representing the scores
# for the individuals
##################################
$coord[,c("Dim 1","Dim 2")] DR_MCA_IND
## Dim 1 Dim 2
## Avatar 0.84123067 0.53320524
## The Blind Side -0.70199134 0.84130641
## District 9 -0.30684197 -0.34179995
## An Education -0.74412366 0.10800804
## The Hurt Locker 1.52845528 0.58531128
## Inglourious Basterds 0.49876001 -0.72851058
## Precious: Based on the Novel 'Push' by Sapphire 0.17303576 -0.80678321
## A Serious Man -0.74412366 0.10800804
## Up -0.74412366 0.10800804
## Up in the Air -0.41782987 -0.27612752
## Black Swan 0.54089234 0.00478779
## The Fighter 0.01945182 -0.72593552
## Inception 0.03552287 0.63385043
## The Kids Are All Right -0.74412366 0.10800804
## The King's Speech 1.43125043 -0.39417077
## 127 Hours -0.30684197 -0.34179995
## The Social Network 0.74954880 0.17012377
## Toy Story 3 -0.74412366 0.10800804
## True Grit 0.06147832 -0.27870259
## Winter's Bone -0.74412366 0.10800804
## The Artist 1.27766650 -0.31332307
## The Descendants 0.17303576 -0.80678321
## Extremely Loud & Incredibly Close -0.70199134 0.84130641
## The Help -0.70199134 0.84130641
## Hugo 0.79909835 -0.20009313
## Midnight in Paris -0.26424593 -0.35697522
## Moneyball -0.30684197 -0.34179995
## The Tree of Life 0.10361065 0.45459578
## War Horse -0.22268314 0.83873134
## Amour -0.41782987 -0.27612752
## Argo -0.05605319 0.55683439
## Beasts of the Southern Wild -0.41782987 -0.27612752
## Django Unchained -0.11123153 0.02458528
## Les Miserables -0.70199134 0.84130641
## Life of Pi 1.57800484 0.21509438
## Lincoln 0.49876001 -0.72851058
## Silver Linings Playbook 0.01945182 -0.72593552
## Zero Dark Thirty -0.30684197 -0.34179995
## American Hustle 0.01945182 -0.72593552
## Captain Phillips -0.30684197 -0.34179995
## Dallas Buyers Club -0.30684197 -0.34179995
## Gravity 1.71734201 1.92787479
## Her -0.59053972 0.02716034
## Nebraska 0.06147832 -0.27870259
## Philomena -0.74412366 0.10800804
## 12 Years a Slave 0.17303576 -0.80678321
## The Wolf of Wall Street -0.41782987 -0.27612752
## American Sniper -0.30684197 -0.34179995
## Birdman or (The Unexpected Virtue of Ignorance) 1.29430709 0.58405467
## Boyhood 0.01945182 -0.72593552
## The Grand Budapest Hotel 0.49876001 -0.72851058
## The Imitation Game 0.17303576 -0.80678321
## Selma -0.70199134 0.84130641
## The Theory of Everything -0.74412366 0.10800804
## Whiplash -0.20963712 0.63768209
## The Big Short 0.17303576 -0.80678321
## Bridge of Spies -0.74412366 0.10800804
## Brooklyn -0.74412366 0.10800804
## Mad Max: Fury Road 0.63809718 0.98426984
## The Martian -0.74412366 0.10800804
## The Revenant 1.62013716 0.94839275
## Room -0.41782987 -0.27612752
## Spotlight 0.17303576 -0.80678321
## Arrival 0.49876001 -0.72851058
## Fences -0.74412366 0.10800804
## Hacksaw Ridge 0.15878899 0.98684490
## Hell or High Water -0.30684197 -0.34179995
## Hidden Figures -0.74412366 0.10800804
## La La Land 1.57800484 0.21509438
## Lion -0.26481547 0.10543298
## Manchester by the Sea -0.26424593 -0.35697522
## Moonlight 0.65234395 -0.80935828
## Call Me by Your Name -0.59053972 0.02716034
## Darkest Hour -0.22268314 0.83873134
## Dunkirk 0.63809718 0.98426984
## Get Out -0.26424593 -0.35697522
## Lady Bird -0.41782987 -0.27612752
## Phantom Thread -0.37569755 0.45717084
## The Post -0.70199134 0.84130641
## The Shape of Water 1.27766650 -0.31332307
## Three Billboards outside Ebbing, Missouri -0.30684197 -0.34179995
## Black Panther -0.70199134 0.84130641
## BlacKkKlansman 0.17303576 -0.80678321
## Bohemian Rhapsody -0.16750480 1.37098046
## The Favourite 0.49876001 -0.72851058
## Green Book -0.15325803 -0.42264765
## Roma 1.14072315 0.66490237
## A Star Is Born -0.26481547 0.10543298
## Vice 0.01945182 -0.72593552
## Ford v Ferrari -0.16750480 1.37098046
## The Irishman 0.49876001 -0.72851058
## Jojo Rabbit -0.15325803 -0.42264765
## Joker 0.49876001 -0.72851058
## Little Women -0.74412366 0.10800804
## Marriage Story -0.74412366 0.10800804
## 1917 0.36181666 0.24971486
## Once upon a Time...in Hollywood 0.06147832 -0.27870259
## Parasite 0.95194224 -0.39159571
## The Father -0.15325803 -0.42264765
## Judas and the Black Messiah -0.26481547 0.10543298
## Mank 0.40394899 0.98301323
## Minari -0.41782987 -0.27612752
## Nomadland 1.27766650 -0.31332307
## Promising Young Woman 0.17303576 -0.80678321
## Sound of Metal -0.20963712 0.63768209
## The Trial of the Chicago 7 0.17246622 -0.34437502
## Belfast -0.26424593 -0.35697522
## CODA -0.59053972 0.02716034
## Don't Look Up -0.30684197 -0.34179995
## Drive My Car -0.41782987 -0.27612752
## Dune 0.57000941 1.16352448
## King Richard -0.30684197 -0.34179995
## Licorice Pizza -0.41782987 -0.27612752
## Nightmare Alley -0.22268314 0.83873134
## The Power of the Dog 1.27766650 -0.31332307
## West Side Story 0.10361065 0.45459578
## All Quiet on the Western Front 0.03552287 0.63385043
## Avatar: The Way of Water -0.70199134 0.84130641
## The Banshees of Inisherin 0.01945182 -0.72593552
## Elvis 0.21459854 0.38892335
## Everything Everywhere All at Once 1.04914709 0.58788634
## The Fabelmans -0.41782987 -0.27612752
## Tar 0.49876001 -0.72851058
## Top Gun: Maverick -0.30684197 -0.34179995
## Triangle of Sadness -0.41782987 -0.27612752
## Women Talking -0.59053972 0.02716034
##################################
# Extracting the quality of representation
# for the individuals
# on the factor map
##################################
$cos2[,c("Dim 1","Dim 2")] DR_MCA_IND
## Dim 1 Dim 2
## Avatar 0.1822392975 7.321510e-02
## The Blind Side 0.2891900021 4.153633e-01
## District 9 0.0975744805 1.210740e-01
## An Education 0.6324263704 1.332392e-02
## The Hurt Locker 0.4061061887 5.955359e-02
## Inglourious Basterds 0.1568324557 3.345988e-01
## Precious: Based on the Novel 'Push' by Sapphire 0.0174193322 3.786810e-01
## A Serious Man 0.6324263704 1.332392e-02
## Up 0.6324263704 1.332392e-02
## Up in the Air 0.1865210265 8.146072e-02
## Black Swan 0.1211620717 9.493257e-06
## The Fighter 0.0003690130 5.139458e-01
## Inception 0.0004343988 1.383076e-01
## The Kids Are All Right 0.6324263704 1.332392e-02
## The King's Speech 0.5205372305 3.948111e-02
## 127 Hours 0.0975744805 1.210740e-01
## The Social Network 0.1371317088 7.064282e-03
## Toy Story 3 0.6324263704 1.332392e-02
## True Grit 0.0025251344 5.189464e-02
## Winter's Bone 0.6324263704 1.332392e-02
## The Artist 0.5035538379 3.028277e-02
## The Descendants 0.0174193322 3.786810e-01
## Extremely Loud & Incredibly Close 0.2891900021 4.153633e-01
## The Help 0.2891900021 4.153633e-01
## Hugo 0.2090417116 1.310680e-02
## Midnight in Paris 0.0428515726 7.820352e-02
## Moneyball 0.0975744805 1.210740e-01
## The Tree of Life 0.0046167227 8.887422e-02
## War Horse 0.0218946344 3.106052e-01
## Amour 0.1865210265 8.146072e-02
## Argo 0.0009039724 8.920859e-02
## Beasts of the Southern Wild 0.1865210265 8.146072e-02
## Django Unchained 0.0058091073 2.837944e-04
## Les Miserables 0.2891900021 4.153633e-01
## Life of Pi 0.5286438743 9.822107e-03
## Lincoln 0.1568324557 3.345988e-01
## Silver Linings Playbook 0.0003690130 5.139458e-01
## Zero Dark Thirty 0.0975744805 1.210740e-01
## American Hustle 0.0003690130 5.139458e-01
## Captain Phillips 0.0975744805 1.210740e-01
## Dallas Buyers Club 0.0975744805 1.210740e-01
## Gravity 0.4009246716 5.052506e-01
## Her 0.2222612387 4.701495e-04
## Nebraska 0.0025251344 5.189464e-02
## Philomena 0.6324263704 1.332392e-02
## 12 Years a Slave 0.0174193322 3.786810e-01
## The Wolf of Wall Street 0.1865210265 8.146072e-02
## American Sniper 0.0975744805 1.210740e-01
## Birdman or (The Unexpected Virtue of Ignorance) 0.3152206556 6.418699e-02
## Boyhood 0.0003690130 5.139458e-01
## The Grand Budapest Hotel 0.1568324557 3.345988e-01
## The Imitation Game 0.0174193322 3.786810e-01
## Selma 0.2891900021 4.153633e-01
## The Theory of Everything 0.6324263704 1.332392e-02
## Whiplash 0.0157958513 1.461555e-01
## The Big Short 0.0174193322 3.786810e-01
## Bridge of Spies 0.6324263704 1.332392e-02
## Brooklyn 0.6324263704 1.332392e-02
## Mad Max: Fury Road 0.0962125874 2.289215e-01
## The Martian 0.6324263704 1.332392e-02
## The Revenant 0.4738973318 1.623892e-01
## Room 0.1865210265 8.146072e-02
## Spotlight 0.0174193322 3.786810e-01
## Arrival 0.1568324557 3.345988e-01
## Fences 0.6324263704 1.332392e-02
## Hacksaw Ridge 0.0068681011 2.652734e-01
## Hell or High Water 0.0975744805 1.210740e-01
## Hidden Figures 0.6324263704 1.332392e-02
## La La Land 0.5286438743 9.822107e-03
## Lion 0.0488234219 7.739171e-03
## Manchester by the Sea 0.0428515726 7.820352e-02
## Moonlight 0.1866741937 2.873509e-01
## Call Me by Your Name 0.2222612387 4.701495e-04
## Darkest Hour 0.0218946344 3.106052e-01
## Dunkirk 0.0962125874 2.289215e-01
## Get Out 0.0428515726 7.820352e-02
## Lady Bird 0.1865210265 8.146072e-02
## Phantom Thread 0.0799943330 1.184512e-01
## The Post 0.2891900021 4.153633e-01
## The Shape of Water 0.5035538379 3.028277e-02
## Three Billboards outside Ebbing, Missouri 0.0975744805 1.210740e-01
## Black Panther 0.2891900021 4.153633e-01
## BlacKkKlansman 0.0174193322 3.786810e-01
## Bohemian Rhapsody 0.0077706989 5.205568e-01
## The Favourite 0.1568324557 3.345988e-01
## Green Book 0.0141629138 1.077117e-01
## Roma 0.2815961296 9.567136e-02
## A Star Is Born 0.0488234219 7.739171e-03
## Vice 0.0003690130 5.139458e-01
## Ford v Ferrari 0.0077706989 5.205568e-01
## The Irishman 0.1568324557 3.345988e-01
## Jojo Rabbit 0.0141629138 1.077117e-01
## Joker 0.1568324557 3.345988e-01
## Little Women 0.6324263704 1.332392e-02
## Marriage Story 0.6324263704 1.332392e-02
## 1917 0.0441474907 2.102895e-02
## Once upon a Time...in Hollywood 0.0025251344 5.189464e-02
## Parasite 0.2685401891 4.544268e-02
## The Father 0.0141629138 1.077117e-01
## Judas and the Black Messiah 0.0488234219 7.739171e-03
## Mank 0.0430107950 2.547083e-01
## Minari 0.1865210265 8.146072e-02
## Nomadland 0.5035538379 3.028277e-02
## Promising Young Woman 0.0174193322 3.786810e-01
## Sound of Metal 0.0157958513 1.461555e-01
## The Trial of the Chicago 7 0.0194954517 7.772997e-02
## Belfast 0.0428515726 7.820352e-02
## CODA 0.2222612387 4.701495e-04
## Don't Look Up 0.0975744805 1.210740e-01
## Drive My Car 0.1865210265 8.146072e-02
## Dune 0.0675271205 2.813619e-01
## King Richard 0.0975744805 1.210740e-01
## Licorice Pizza 0.1865210265 8.146072e-02
## Nightmare Alley 0.0218946344 3.106052e-01
## The Power of the Dog 0.5035538379 3.028277e-02
## West Side Story 0.0046167227 8.887422e-02
## All Quiet on the Western Front 0.0004343988 1.383076e-01
## Avatar: The Way of Water 0.2891900021 4.153633e-01
## The Banshees of Inisherin 0.0003690130 5.139458e-01
## Elvis 0.0195617540 6.425135e-02
## Everything Everywhere All at Once 0.2120081487 6.656816e-02
## The Fabelmans 0.1865210265 8.146072e-02
## Tar 0.1568324557 3.345988e-01
## Top Gun: Maverick 0.0975744805 1.210740e-01
## Triangle of Sadness 0.1865210265 8.146072e-02
## Women Talking 0.2222612387 4.701495e-04
##################################
# Extracting the contributions
# (in percentage) of the individuals
# to the principal components
##################################
$contrib[,c("Dim 1","Dim 2")] DR_MCA_IND
## Dim 1 Dim 2
## Avatar 1.399033677 6.479086e-01
## The Blind Side 0.974229942 1.612996e+00
## District 9 0.186134766 2.662373e-01
## An Education 1.094682550 2.658503e-02
## The Hurt Locker 4.618526525 7.807261e-01
## Inglourious Basterds 0.491791730 1.209474e+00
## Precious: Based on the Novel 'Push' by Sapphire 0.059192909 1.483333e+00
## A Serious Man 1.094682550 2.658503e-02
## Up 1.094682550 2.658503e-02
## Up in the Air 0.345141303 1.737578e-01
## Black Swan 0.578388464 5.223903e-05
## The Fighter 0.000748029 1.200939e+00
## Inception 0.002494676 9.155846e-01
## The Kids Are All Right 1.094682550 2.658503e-02
## The King's Speech 4.049759486 3.540738e-01
## 127 Hours 0.186134766 2.662373e-01
## The Social Network 1.110702608 6.595609e-02
## Toy Story 3 1.094682550 2.658503e-02
## True Grit 0.007472088 1.770137e-01
## Winter's Bone 1.094682550 2.658503e-02
## The Artist 3.227252762 2.237225e-01
## The Descendants 0.059192909 1.483333e+00
## Extremely Loud & Incredibly Close 0.974229942 1.612996e+00
## The Help 0.974229942 1.612996e+00
## Hugo 1.262404218 9.124084e-02
## Midnight in Paris 0.138043062 2.904030e-01
## Moneyball 0.186134766 2.662373e-01
## The Tree of Life 0.021222998 4.709510e-01
## War Horse 0.098033082 1.603137e+00
## Amour 0.345141303 1.737578e-01
## Argo 0.006211530 7.066055e-01
## Beasts of the Southern Wild 0.345141303 1.737578e-01
## Django Unchained 0.024459849 1.377448e-03
## Les Miserables 0.974229942 1.612996e+00
## Life of Pi 4.922827606 1.054346e-01
## Lincoln 0.491791730 1.209474e+00
## Silver Linings Playbook 0.000748029 1.200939e+00
## Zero Dark Thirty 0.186134766 2.662373e-01
## American Hustle 0.000748029 1.200939e+00
## Captain Phillips 0.186134766 2.662373e-01
## Dallas Buyers Club 0.186134766 2.662373e-01
## Gravity 5.830577249 8.469984e+00
## Her 0.689439557 1.681107e-03
## Nebraska 0.007472088 1.770137e-01
## Philomena 1.094682550 2.658503e-02
## 12 Years a Slave 0.059192909 1.483333e+00
## The Wolf of Wall Street 0.345141303 1.737578e-01
## American Sniper 0.186134766 2.662373e-01
## Birdman or (The Unexpected Virtue of Ignorance) 3.311865001 7.773774e-01
## Boyhood 0.000748029 1.200939e+00
## The Grand Budapest Hotel 0.491791730 1.209474e+00
## The Imitation Game 0.059192909 1.483333e+00
## Selma 0.974229942 1.612996e+00
## The Theory of Everything 1.094682550 2.658503e-02
## Whiplash 0.086882910 9.266876e-01
## The Big Short 0.059192909 1.483333e+00
## Bridge of Spies 1.094682550 2.658503e-02
## Brooklyn 1.094682550 2.658503e-02
## Mad Max: Fury Road 0.804955036 2.207767e+00
## The Martian 1.094682550 2.658503e-02
## The Revenant 5.189213436 2.049752e+00
## Room 0.345141303 1.737578e-01
## Spotlight 0.059192909 1.483333e+00
## Arrival 0.491791730 1.209474e+00
## Fences 1.094682550 2.658503e-02
## Hacksaw Ridge 0.049846967 2.219334e+00
## Hell or High Water 0.186134766 2.662373e-01
## Hidden Figures 1.094682550 2.658503e-02
## La La Land 4.922827606 1.054346e-01
## Lion 0.138638759 2.533249e-02
## Manchester by the Sea 0.138043062 2.904030e-01
## Moonlight 0.841300693 1.492817e+00
## Call Me by Your Name 0.689439557 1.681107e-03
## Darkest Hour 0.098033082 1.603137e+00
## Dunkirk 0.804955036 2.207767e+00
## Get Out 0.138043062 2.904030e-01
## Lady Bird 0.345141303 1.737578e-01
## Phantom Thread 0.279045282 4.763015e-01
## The Post 0.974229942 1.612996e+00
## The Shape of Water 3.227252762 2.237225e-01
## Three Billboards outside Ebbing, Missouri 0.186134766 2.662373e-01
## Black Panther 0.974229942 1.612996e+00
## BlacKkKlansman 0.059192909 1.483333e+00
## Bohemian Rhapsody 0.055469275 4.283388e+00
## The Favourite 0.491791730 1.209474e+00
## Green Book 0.046434894 4.070819e-01
## Roma 2.572518321 1.007490e+00
## A Star Is Born 0.138638759 2.533249e-02
## Vice 0.000748029 1.200939e+00
## Ford v Ferrari 0.055469275 4.283388e+00
## The Irishman 0.491791730 1.209474e+00
## Jojo Rabbit 0.046434894 4.070819e-01
## Joker 0.491791730 1.209474e+00
## Little Women 1.094682550 2.658503e-02
## Marriage Story 1.094682550 2.658503e-02
## 1917 0.258806450 1.421064e-01
## Once upon a Time...in Hollywood 0.007472088 1.770137e-01
## Parasite 1.791509708 3.494627e-01
## The Father 0.046434894 4.070819e-01
## Judas and the Black Messiah 0.138638759 2.533249e-02
## Mank 0.322590084 2.202134e+00
## Minari 0.345141303 1.737578e-01
## Nomadland 3.227252762 2.237225e-01
## Promising Young Woman 0.059192909 1.483333e+00
## Sound of Metal 0.086882910 9.266876e-01
## The Trial of the Chicago 7 0.058803891 2.702640e-01
## Belfast 0.138043062 2.904030e-01
## CODA 0.689439557 1.681107e-03
## Don't Look Up 0.186134766 2.662373e-01
## Drive My Car 0.345141303 1.737578e-01
## Dune 0.642335633 3.085148e+00
## King Richard 0.186134766 2.662373e-01
## Licorice Pizza 0.345141303 1.737578e-01
## Nightmare Alley 0.098033082 1.603137e+00
## The Power of the Dog 3.227252762 2.237225e-01
## West Side Story 0.021222998 4.709510e-01
## All Quiet on the Western Front 0.002494676 9.155846e-01
## Avatar: The Way of Water 0.974229942 1.612996e+00
## The Banshees of Inisherin 0.000748029 1.200939e+00
## Elvis 0.091044038 3.447093e-01
## Everything Everywhere All at Once 2.176059309 7.876108e-01
## The Fabelmans 0.345141303 1.737578e-01
## Tar 0.491791730 1.209474e+00
## Top Gun: Maverick 0.186134766 2.662373e-01
## Triangle of Sadness 0.345141303 1.737578e-01
## Women Talking 0.689439557 1.681107e-03
##################################
# Extracting the correlation
# between the individual instances
# grouped by Picture categories
# and top principal components
##################################
<- fviz_mca_ind(DR_MCA,
(DR_MCA_IndividualCorrelationCircleByPicture geom.ind = "text",
col.ind = SD_MCA$Picture,
palette = c("#888888","#5544FF"),
repel = TRUE,
legend.title = "Picture",
addEllipses = FALSE) +
labs(title = "Multiple Correspondence Analysis : Factorial Map of Individuals",
subtitle = "Principal Components Versus Individuals Grouped by Picture Categories",
y = "Principal Component 2",
x = "Principal Component 1") +
theme_classic() +
theme(legend.position="top"))
##################################
# Extracting the correlation
# between the individual instances
# grouped by Year categories
# and top principal components
##################################
<- fviz_mca_ind(DR_MCA,
(DR_MCA_IndividualCorrelationCircleByYear geom.ind = "text",
col.ind = SD_MCA$Year,
repel = TRUE,
legend.title = "Year",
addEllipses = FALSE) +
labs(title = "Multiple Correspondence Analysis : Factorial Map of Individuals",
subtitle = "Principal Components Versus Individuals Grouped by Picture Categories",
y = "Principal Component 2",
x = "Principal Component 1") +
theme_classic() +
theme(legend.position="top"))
##################################
# Extracting the contribution
# of the descriptor variables
# for the Top 1 and Top 2 principal components
##################################
<- fviz_contrib(DR_MCA,
(DR_MCA_PCIndividualContributors choice = "ind",
axes = 1:2,
top = 10) +
labs(title = "Multiple Correspondence Analysis : Individual Contribution",
subtitle = "Principal Components 1 and 2 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Extracting the contribution
# of the descriptor variables
# for the Top 1 principal component
##################################
<- fviz_contrib(DR_MCA,
(DR_MCA_PC1IndividualContributors choice = "ind",
axes = 1,
top = 10) +
labs(title = "Multiple Correspondence Analysis : Individual Contribution",
subtitle = "Principal Component 1 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Extracting the contribution
# of the descriptor variables
# for the Top 2 principal component
##################################
<- fviz_contrib(DR_MCA,
(DR_MCA_PC2IndividualContributors choice = "ind",
axes = 2,
top = 10) +
labs(title = "Multiple Correspondence Analysis : Individual Contribution",
subtitle = "Principal Component 2 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Formulating the Biplot
# using the individual scores
# and descriptor variable loadings
# for the Top 1 and Top 2 principal components
##################################
<- fviz_mca_biplot(DR_MCA,
(DR_MCA_Biplot label = c("var","ind"),
repel = TRUE,
habillage = SD_MCA$Picture,
palette = c("#888888","#5544FF"),
col.var = "#FF5050",
legend.title = "Picture",
addEllipses = FALSE) +
labs(title = "Multiple Correspondence Analysis : Biplot",
subtitle = "Individual Scores versus Descriptor Variable Loadings",
x = "Principal Component 1",
y = "Principal Component 2") +
theme_classic() +
theme(legend.position="top"))
##################################
# Loading dataset
##################################
<- Oscars.MFA
SD_MFA str(SD_MFA)
## 'data.frame': 126 obs. of 10 variables:
## $ Nominations_Total : int 9 2 4 3 9 8 6 2 5 6 ...
## $ Nomination_SuccessRate: num 0.333 0.5 0 0 0.667 ...
## $ Picture : Factor w/ 2 levels "NOM","WON": 1 1 1 1 2 1 1 1 1 1 ...
## $ Cinematography : Factor w/ 3 levels "NNOM","NOM","WON": 3 1 1 1 2 2 1 1 1 1 ...
## $ Directing : Factor w/ 3 levels "NNOM","NOM","WON": 2 1 1 1 3 2 2 1 1 2 ...
## $ Editing : Factor w/ 3 levels "NNOM","NOM","WON": 2 1 2 1 3 2 2 1 1 1 ...
## $ Screenplay : Factor w/ 3 levels "NNOM","NOM","WON": 1 1 2 2 3 2 3 2 2 2 ...
## $ Acting : Factor w/ 3 levels "NNOM","NOM","WON": 1 3 1 2 2 3 3 1 1 2 ...
## $ Design : Factor w/ 3 levels "NNOM","NOM","WON": 3 1 1 1 1 1 1 1 1 1 ...
## $ Sound : Factor w/ 3 levels "NNOM","NOM","WON": 2 1 1 1 3 2 1 1 2 1 ...
##################################
# Performing MFA
##################################
<- MFA(SD_MFA[,c(1:10)],
DR_MFA group=c(2,3,3,2),
type =c("s","n","n","n"),
name.group = c("Recognitions","Technicalities","Storytelling","Aesthetics"),
graph = FALSE)
##################################
# Obtaining the MFA eigenvalues
##################################
<- get_eigenvalue(DR_MFA)) (DR_MFA_EV
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 2.39016818 20.6030264 20.60303
## Dim.2 1.52105133 13.1113203 33.71435
## Dim.3 1.42452541 12.2792759 45.99362
## Dim.4 0.90277182 7.7818087 53.77543
## Dim.5 0.81017775 6.9836565 60.75909
## Dim.6 0.77573893 6.6867971 67.44588
## Dim.7 0.67986920 5.8604090 73.30629
## Dim.8 0.66850406 5.7624426 79.06874
## Dim.9 0.48579590 4.1875153 83.25625
## Dim.10 0.44638493 3.8477964 87.10405
## Dim.11 0.41857745 3.6080985 90.71215
## Dim.12 0.31600115 2.7239004 93.43605
## Dim.13 0.23678765 2.0410874 95.47713
## Dim.14 0.19385328 1.6709972 97.14813
## Dim.15 0.14059433 1.2119100 98.36004
## Dim.16 0.12177276 1.0496698 99.40971
## Dim.17 0.06847969 0.5902885 100.00000
##################################
# Formulating the Scree Plot
# for the variances
##################################
<- fviz_eig(DR_MFA,
(DR_MFA_VarianceScreePlot addlabels = TRUE,
ylim = c(0, 100),
choice = c("variance")) +
labs(title = "Multiple Factor Analysis : Scree Plot of Explained Variances",
subtitle = "Top 2 Principal Components",
y = "Percentage of Explained Variances",
x = "Principal Components") +
theme_classic())
##################################
# Formulating the Scree Plot
# for the eigenvalues
##################################
<- fviz_eig(DR_MFA,
(DR_MFA_EigenvalueScreePlot addlabels = TRUE,
ylim = c(0, 6),
choice = c("eigenvalue")) +
labs(title = "Multiple Factor Analysis : Scree Plot of Eigenvalues",
subtitle = "Top 2 Principal Components",
y = "Eigenvalues",
x = "Principal Components") +
theme_classic())
##################################
# Extracting the MFA
# descriptor variable loadings
##################################
<- get_mfa_var(DR_MFA)
DR_MFA_VAR
##################################
# Extracting the coordinates
# representing the loadings
# for the descriptor variables
##################################
$coord DR_MFA_VAR
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## Nominations_Total 0.8315040 -0.436788 0.1298438 -0.15550666 0.02120145
## Nomination_SuccessRate 0.6061531 0.687584 0.1110336 0.09400855 -0.21971319
##################################
# Extracting the quality of representation
# for the descriptor variables
# on the factor map
##################################
$cos2 DR_MFA_VAR
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## Nominations_Total 0.6913990 0.1907837 0.01685940 0.024182322 0.0004495016
## Nomination_SuccessRate 0.3674216 0.4727718 0.01232846 0.008837607 0.0482738848
##################################
# Extracting the contributions
# (in percentage) of the descriptor variables
# to the principal components
##################################
$contrib DR_MFA_VAR
## Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## Nominations_Total 24.11964 10.45847 0.9868303 2.2335234 0.04626168
## Nomination_SuccessRate 12.81760 25.91661 0.7216212 0.8162575 4.96823865
##################################
# Extracting the dimensions
# for the descriptor variables
# on the factor map
##################################
<- fviz_mfa_var(DR_MFA,
(DR_MFA_VariableGroups choice = "group",
repel = TRUE) +
labs(title = "Multiple Factor Analysis : Factorial Map of Variable Groups",
subtitle = "Descriptor Variable Principal Components",
y = "Principal Component 2",
x = "Principal Component 1") +
theme_classic() +
theme(legend.position="top"))
##################################
# Extracting the dimensions
# for the qunatitative descriptor variables
# on the factor map
##################################
<- fviz_mfa_var(DR_MFA,
(DR_MFA_QuantitativeVariableGroups choice = "quanti.var") +
labs(title = "Multiple Factor Analysis : Correlation Plot of Quantitative Variables",
subtitle = "Quantitative Descriptor Variable Principal Components",
y = "Principal Component 2",
x = "Principal Component 1") +
theme_classic() +
theme(legend.position="top"))
##################################
# Graphing the individual instances
# from qualitative descriptor variables
# on the factor map
##################################
<- fviz_mfa_ind(DR_MFA,
(DR_MFA_QualitativeIndividualGroups partial = "all",
geom = "text",
repel = TRUE,
habillage = SD_MFA$Picture,
palette = c("#888888","#5544FF"),
legend.title = "Picture") +
labs(title = "Multiple Factor Analysis : Factorial Map of Individuals",
subtitle = "Qualitative Descriptor Variable Principal Components",
y = "Principal Component 2",
x = "Principal Component 1") +
theme_classic() +
theme(legend.position="top"))
##################################
# Extracting the dimensions
# for the qualitative descriptor variables
# on the factor map
##################################
<- fviz_mfa_var(DR_MFA,
(DR_MFA_QualitativeVariableGroups choice = "quali.var",
geom = "text") +
labs(title = "Multiple Factor Analysis : Factorial Map of Qualitative Variables",
subtitle = "Qualitative Descriptor Variable Principal Components",
y = "Principal Component 2",
x = "Principal Component 1") +
theme_classic() +
theme(legend.position="top"))
##################################
# Formulating the Biplot
# using the individual scores
# and qualitative descriptor variable loadings
# for the Top 1 and Top 2 principal components
##################################
<- as.factor(SD_MFA[,"Picture"])
Picture <- fviz_mfa_quali_biplot(DR_MFA,
(DR_MCA_QualitativeBiplot repel = TRUE,
geom = "text",
habillage = Picture,
palette = c("#888888","#5544FF"),
col.var = "#FF5050",
legend.title = "Picture") +
labs(title = "Multiple Factor Analysis : Biplot",
subtitle = "Individual Scores versus Qualitative Descriptor Variable Loadings",
x = "Principal Component 1",
y = "Principal Component 2") +
theme_classic() +
theme(legend.position="top"))
##################################
# Loading dataset
##################################
<- Oscars.FAMD
SD_FAMD str(SD_FAMD)
## 'data.frame': 126 obs. of 7 variables:
## $ Nominations_Total : int 9 2 4 3 9 8 6 2 5 6 ...
## $ Nomination_SuccessRate: num 0.333 0.5 0 0 0.667 ...
## $ Picture : Factor w/ 2 levels "NOM","WON": 1 1 1 1 2 1 1 1 1 1 ...
## $ Cinematography : Factor w/ 3 levels "NNOM","NOM","WON": 3 1 1 1 2 2 1 1 1 1 ...
## $ Directing : Factor w/ 3 levels "NNOM","NOM","WON": 2 1 1 1 3 2 2 1 1 2 ...
## $ Editing : Factor w/ 3 levels "NNOM","NOM","WON": 2 1 2 1 3 2 2 1 1 1 ...
## $ Screenplay : Factor w/ 3 levels "NNOM","NOM","WON": 1 1 2 2 3 2 3 2 2 2 ...
##################################
# Performing FAMD
##################################
<- FAMD(SD_FAMD[,c(1:3)],
DR_FAMD graph = FALSE)
##################################
# Obtaining the FAMD eigenvalues
##################################
<- get_eigenvalue(DR_FAMD)) (DR_FAMD_EV
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 1.6055601 53.51867 53.51867
## Dim.2 0.8576607 28.58869 82.10736
## Dim.3 0.5367791 17.89264 100.00000
##################################
# Formulating the Scree Plot
# for the variances
##################################
<- fviz_eig(DR_FAMD,
(DR_FAMD_VarianceScreePlot addlabels = TRUE,
ylim = c(0, 100),
choice = c("variance")) +
labs(title = "Factor Analysis of Mixed Data : Scree Plot of Explained Variances",
subtitle = "Top 2 Principal Components",
y = "Percentage of Explained Variances",
x = "Principal Components") +
theme_classic())
##################################
# Formulating the Scree Plot
# for the eigenvalues
##################################
<- fviz_eig(DR_FAMD,
(DR_FAMD_EigenvalueScreePlot addlabels = TRUE,
ylim = c(0, 6),
choice = c("eigenvalue")) +
labs(title = "Factor Analysis of Mixed Data : Scree Plot of Eigenvalues",
subtitle = "Top 2 Principal Components",
y = "Eigenvalues",
x = "Principal Components") +
theme_classic())
##################################
# Extracting the FAMD
# descriptor variable loadings
##################################
<- get_famd_var(DR_FAMD)
DR_FAMD_VAR
##################################
# Extracting the coordinates
# representing the loadings
# for the descriptor variables
##################################
$coord DR_FAMD_VAR
## Dim.1 Dim.2 Dim.3
## Nominations_Total 0.3063051 0.69312071 0.0005742175
## Nomination_SuccessRate 0.6424797 0.09498771 0.2625326094
## Picture 0.6567754 0.06955231 0.2736723010
##################################
# Extracting the quality of representation
# for the descriptor variables
# on the factor map
##################################
$cos2 DR_FAMD_VAR
## Dim.1 Dim.2 Dim.3
## Nominations_Total 0.0938228 0.480416312 3.297257e-07
## Nomination_SuccessRate 0.4127801 0.009022666 6.892337e-02
## Picture 0.4313539 0.004837524 7.489653e-02
##################################
# Extracting the contributions
# (in percentage) of the descriptor variables
# to the principal components
##################################
$contrib DR_FAMD_VAR
## Dim.1 Dim.2 Dim.3
## Nominations_Total 19.07777 80.815255 0.1069746
## Nomination_SuccessRate 40.01592 11.075208 48.9088707
## Picture 40.90631 8.109537 50.9841547
##################################
# Extracting the quality of representation
# for the descriptor variables
# on the factor map
##################################
<- fviz_famd_var(DR_FAMD,
(DR_FAMD_VariableSquaredCorrelationCircle col.var = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
legend.title = "Squared Coordinates",
repel = TRUE) +
labs(title = "Factor Analysis of Mixed Data : Squared Coordinate Plot",
subtitle = "Descriptor Variable Representation Quality",
y = "Principal Component 2",
x = "Principal Component 1") +
theme_classic() +
theme(legend.position="top"))
##################################
# Formulating clusters of the
# descriptor variables
##################################
set.seed(123)
<- kmeans(DR_FAMD_VAR$coord, centers=2, nstart=25)
DR_FAMD_KMEANS <- as.factor(DR_FAMD_KMEANS$cluster)
DR_FAMD_KMEANS_CLUSTER
##################################
# Extracting the correlation
# between the descriptor variables
# and top principal components
##################################
<- fviz_famd_var(DR_FAMD,
(DR_FAMD_VariableCorrelationCircle col.var = DR_FAMD_KMEANS_CLUSTER,
palette = c("#0073C2FF",
"#EFC000FF"),
legend.title = "Cluster",
repel = TRUE) +
labs(title = "Factor Analysis of Mixed Data : Factorial Map of Quantitative and Qualitative Variables",
subtitle = "Principal Components Versus Descriptor Variable Clusters",
y = "Principal Component 2",
x = "Principal Component 1") +
theme_classic() +
theme(legend.position="top"))
##################################
# Extracting the contribution
# of the descriptor variables
# for the the Top 1 and 2 principal components
##################################
<- fviz_contrib(DR_FAMD,
(DR_FAMD_PCVariableContributors choice = "var",
axes = 1:2) +
labs(title = "Factor Analysis of Mixed Data : Descriptor Variable Contribution",
subtitle = "Principal Components 1 and 2 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Testing the statistical significance
# of the contribution
# of the descriptor variables
# for the the Top 1 and 2 principal components
##################################
<- dimdesc(DR_FAMD,
DR_FAMD_VariableContributionTest axes = c(1,2),
proba = 0.05)
##################################
# Extracting the contribution
# of the descriptor variables
# for the Top 1 principal component
##################################
<- fviz_contrib(DR_FAMD,
(DR_FAMD_PC1VariableContributors choice = "var",
axes = 1) +
labs(title = "Factor Analysis of Mixed Data : Descriptor Variable Contribution",
subtitle = "Principal Component 1 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Testing the statistical significance
# of the contribution
# of the descriptor variables
# for the Top 1 principal component
##################################
$Dim.1 DR_FAMD_VariableContributionTest
##
## Link between the variable and the continuous variables (R-square)
## =================================================================================
## correlation p.value
## Nomination_SuccessRate 0.8015483 1.791314e-29
## Nominations_Total 0.5534484 1.803999e-11
##
## Link between the variable and the categorical variable (1-way anova)
## =============================================
## R2 p.value
## Picture 0.6567754 1.411641e-30
##
## Link between variable and the categories of the categorical variables
## ================================================================
## Estimate p.value
## Picture=WON 1.633764 1.411641e-30
## Picture=NOM -1.633764 1.411641e-30
##################################
# Extracting the contribution
# of the descriptor variables
# for the Top 2 principal component
##################################
<- fviz_contrib(DR_FAMD,
(DR_FAMD_PC2VariableContributors choice = "var",
axes = 2) +
labs(title = "Factor Analysis of Mixed Data : Descriptor Variable Contribution",
subtitle = "Principal Component 2 Contributors",
x = "Descriptors") +
theme_classic() +
theme(axis.text.x = element_text(angle = 90,hjust = 1)))
##################################
# Testing the statistical significance
# of the contribution
# of the descriptor variables
# for the Top 2 principal component
##################################
$Dim.2 DR_FAMD_VariableContributionTest
##
## Link between the variable and the continuous variables (R-square)
## =================================================================================
## correlation p.value
## Nominations_Total 0.8325387 1.331762e-33
## Nomination_SuccessRate -0.3082008 4.465369e-04
##
## Link between the variable and the categorical variable (1-way anova)
## =============================================
## R2 p.value
## Picture 0.06955231 0.002846506
##
## Link between variable and the categories of the categorical variables
## ================================================================
## Estimate p.value
## Picture=NOM 0.3885806 0.002846506
## Picture=WON -0.3885806 0.002846506
##################################
# Extracting the FAMD individual scores
##################################
<- get_famd_ind(DR_FAMD)
DR_FAMD_IND
##################################
# Extracting the coordinates
# representing the scores
# for the individuals
##################################
$coord[,c("Dim.1","Dim.2")] DR_FAMD_IND
## Dim.1 Dim.2
## Avatar 0.477671939 0.78718319
## The Blind Side -0.172294892 -1.85825949
## District 9 -1.360047435 -0.37985738
## An Education -1.525188667 -0.71974737
## The Hurt Locker 3.524847885 -0.65138268
## Inglourious Basterds -0.319973758 0.78004709
## Precious: Based on the Novel 'Push' by Sapphire -0.017751755 -0.23248680
## A Serious Man -1.690329898 -1.05963737
## Up 0.019521801 -0.67886507
## Up in the Air -1.029764972 0.29992262
## Black Swan -0.587692201 -0.35941622
## The Fighter 0.002811822 0.18346397
## Inception 0.818552497 0.18108050
## The Kids Are All Right -1.360047435 -0.37985738
## The King's Speech 3.008228002 0.90071270
## 127 Hours -1.029764972 0.29992262
## The Social Network 0.439043745 0.38073603
## Toy Story 3 0.019521801 -0.67886507
## True Grit -0.369200046 1.65948261
## Winter's Bone -1.360047435 -0.37985738
## The Artist 3.183967328 -0.04527999
## The Descendants -0.587692201 -0.35941622
## Extremely Loud & Incredibly Close -1.690329898 -1.05963737
## The Help -0.601029932 -0.77916843
## Hugo 1.175986809 1.27334524
## Midnight in Paris -0.601029932 -0.77916843
## Moneyball -1.029764972 0.29992262
## The Tree of Life -1.525188667 -0.71974737
## War Horse -1.029764972 0.29992262
## Amour -0.587692201 -0.35941622
## Argo 2.471677152 -0.95085883
## Beasts of the Southern Wild -1.360047435 -0.37985738
## Django Unchained 0.019521801 -0.67886507
## Les Miserables 0.477671939 0.78718319
## Life of Pi 0.899977684 1.41855072
## Lincoln 0.467104206 2.07304991
## Silver Linings Playbook -0.319973758 0.78004709
## Zero Dark Thirty -0.587692201 -0.35941622
## American Hustle -0.369200046 1.65948261
## Captain Phillips -1.029764972 0.29992262
## Dallas Buyers Club 0.488270034 -0.49869950
## Gravity 1.756048962 0.54141165
## Her -0.587692201 -0.35941622
## Nebraska -1.029764972 0.29992262
## Philomena -1.360047435 -0.37985738
## 12 Years a Slave 2.512804307 -0.11895729
## The Wolf of Wall Street -1.194906204 -0.03996738
## American Sniper -0.523743184 0.03370992
## Birdman or (The Unexpected Virtue of Ignorance) 2.850142046 -0.29642710
## Boyhood -0.523743184 0.03370992
## The Grand Budapest Hotel 0.815009678 0.60971339
## The Imitation Game -0.319973758 0.78004709
## Selma -0.172294892 -1.85825949
## The Theory of Everything -0.587692201 -0.35941622
## Whiplash 0.626735803 -0.99831392
## The Big Short -0.587692201 -0.35941622
## Bridge of Spies -0.523743184 0.03370992
## Brooklyn -1.525188667 -0.71974737
## Mad Max: Fury Road 1.452441961 0.70113607
## The Martian -0.864623741 0.63981262
## The Revenant 0.720099920 1.93995155
## Room -0.601029932 -0.77916843
## Spotlight 2.017380613 -1.13862729
## Arrival -0.319973758 0.78004709
## Fences -0.601029932 -0.77916843
## Hacksaw Ridge -0.017751755 -0.23248680
## Hell or High Water -1.360047435 -0.37985738
## Hidden Figures -1.525188667 -0.71974737
## La La Land 1.592533405 2.33451164
## Lion -1.029764972 0.29992262
## Manchester by the Sea -0.017751755 -0.23248680
## Moonlight 2.474176113 -0.52540446
## Call Me by Your Name -0.601029932 -0.77916843
## Darkest Hour -0.017751755 -0.23248680
## Dunkirk 0.439043745 0.38073603
## Get Out -0.601029932 -0.77916843
## Lady Bird -1.194906204 -0.03996738
## Phantom Thread -0.523743184 0.03370992
## The Post -1.690329898 -1.05963737
## The Shape of Water 3.095524398 1.28155604
## Three Billboards outside Ebbing, Missouri 0.002811822 0.18346397
## Black Panther 0.436544784 -0.04471834
## BlacKkKlansman -0.523743184 0.03370992
## Bohemian Rhapsody 1.233949806 -1.31776276
## The Favourite -0.065593045 1.49975819
## Green Book 2.661868171 -1.90445440
## Roma 0.541620957 1.18030934
## A Star Is Born -0.319973758 0.78004709
## Vice -0.319973758 0.78004709
## Ford v Ferrari 0.157987571 -1.17847949
## The Irishman -0.369200046 1.65948261
## Jojo Rabbit -0.523743184 0.03370992
## Joker 0.347959435 1.70896166
## Little Women -0.523743184 0.03370992
## Marriage Story -0.523743184 0.03370992
## 1917 0.541620957 1.18030934
## Once upon a Time...in Hollywood 0.238013956 1.34003376
## Parasite 3.029424191 -1.67105268
## The Father -0.017751755 -0.23248680
## Judas and the Black Messiah -0.017751755 -0.23248680
## Mank 0.238013956 1.34003376
## Minari -0.523743184 0.03370992
## Nomadland 2.523402402 -1.40483998
## Promising Young Woman -0.587692201 -0.35941622
## Sound of Metal -0.017751755 -0.23248680
## The Trial of the Chicago 7 -1.029764972 0.29992262
## Belfast -0.430890779 0.41163031
## CODA 3.546013713 -3.22313209
## Don't Look Up -1.360047435 -0.37985738
## Drive My Car -0.601029932 -0.77916843
## Dune 1.452441961 0.70113607
## King Richard -0.523743184 0.03370992
## Licorice Pizza -1.525188667 -0.71974737
## Nightmare Alley -1.360047435 -0.37985738
## The Power of the Dog 0.214078131 2.20616424
## West Side Story -0.430890779 0.41163031
## All Quiet on the Western Front 0.815009678 0.60971339
## Avatar: The Way of Water -0.601029932 -0.77916843
## The Banshees of Inisherin -0.534341278 1.31959261
## Elvis -0.699482509 0.97970261
## Everything Everywhere All at Once 3.763107066 0.07680978
## The Fabelmans -0.864623741 0.63981262
## Tar -1.029764972 0.29992262
## Top Gun: Maverick -0.523743184 0.03370992
## Triangle of Sadness -1.525188667 -0.71974737
## Women Talking -0.172294892 -1.85825949
##################################
# Extracting the quality of representation
# for the individuals
# on the factor map
##################################
$cos2[,c("Dim.1","Dim.2")] DR_FAMD_IND
## Dim.1 Dim.2
## Avatar 1.872102e-01 0.5084187848
## The Blind Side 6.379316e-03 0.7420651260
## District 9 7.970443e-01 0.0621749536
## An Education 7.303059e-01 0.1626364356
## The Hurt Locker 9.451994e-01 0.0322786298
## Inglourious Basterds 1.419278e-01 0.8434908068
## Precious: Based on the Novel 'Push' by Sapphire 8.259804e-04 0.1416719048
## A Serious Man 6.590056e-01 0.2589766896
## Up 3.545355e-04 0.4287330112
## Up in the Air 7.316126e-01 0.0620616557
## Black Swan 7.090228e-01 0.2651891351
## The Fighter 3.696853e-05 0.1573830733
## Inception 3.285314e-01 0.0160778190
## The Kids Are All Right 7.970443e-01 0.0621749536
## The King's Speech 7.237137e-01 0.0648810458
## 127 Hours 7.316126e-01 0.0620616557
## The Social Network 2.189816e-01 0.1646796971
## Toy Story 3 3.545355e-04 0.4287330112
## True Grit 4.344373e-02 0.8777059345
## Winter's Bone 7.970443e-01 0.0621749536
## The Artist 8.947340e-01 0.0001809545
## The Descendants 7.090228e-01 0.2651891351
## Extremely Loud & Incredibly Close 6.590056e-01 0.2589766896
## The Help 3.473706e-01 0.5837992755
## Hugo 3.383463e-01 0.3966878352
## Midnight in Paris 3.473706e-01 0.5837992755
## Moneyball 7.316126e-01 0.0620616557
## The Tree of Life 7.303059e-01 0.1626364356
## War Horse 7.316126e-01 0.0620616557
## Amour 7.090228e-01 0.2651891351
## Argo 6.875993e-01 0.1017616166
## Beasts of the Southern Wild 7.970443e-01 0.0621749536
## Django Unchained 3.545355e-04 0.4287330112
## Les Miserables 1.872102e-01 0.5084187848
## Life of Pi 2.408257e-01 0.5983139173
## Lincoln 4.823662e-02 0.9500988669
## Silver Linings Playbook 1.419278e-01 0.8434908068
## Zero Dark Thirty 7.090228e-01 0.2651891351
## American Hustle 4.344373e-02 0.8777059345
## Captain Phillips 7.316126e-01 0.0620616557
## Dallas Buyers Club 1.349085e-01 0.1407333918
## Gravity 4.529955e-01 0.0430601812
## Her 7.090228e-01 0.2651891351
## Nebraska 7.316126e-01 0.0620616557
## Philomena 7.970443e-01 0.0621749536
## 12 Years a Slave 6.943401e-01 0.0015560984
## The Wolf of Wall Street 8.195722e-01 0.0009169189
## American Sniper 9.953032e-01 0.0041231976
## Birdman or (The Unexpected Virtue of Ignorance) 8.225854e-01 0.0088978236
## Boyhood 9.953032e-01 0.0041231976
## The Grand Budapest Hotel 3.320641e-01 0.1858436979
## The Imitation Game 1.419278e-01 0.8434908068
## Selma 6.379316e-03 0.7420651260
## The Theory of Everything 7.090228e-01 0.2651891351
## Whiplash 1.120504e-01 0.2843011476
## The Big Short 7.090228e-01 0.2651891351
## Bridge of Spies 9.953032e-01 0.0041231976
## Brooklyn 7.303059e-01 0.1626364356
## Mad Max: Fury Road 4.304396e-01 0.1003042290
## The Martian 5.182075e-01 0.2837624742
## The Revenant 1.174171e-01 0.8521726675
## Room 3.473706e-01 0.5837992755
## Spotlight 4.929227e-01 0.1570241106
## Arrival 1.419278e-01 0.8434908068
## Fences 3.473706e-01 0.5837992755
## Hacksaw Ridge 8.259804e-04 0.1416719048
## Hell or High Water 7.970443e-01 0.0621749536
## Hidden Figures 7.303059e-01 0.1626364356
## La La Land 2.828220e-01 0.6077545153
## Lion 7.316126e-01 0.0620616557
## Manchester by the Sea 8.259804e-04 0.1416719048
## Moonlight 6.991856e-01 0.0315296223
## Call Me by Your Name 3.473706e-01 0.5837992755
## Darkest Hour 8.259804e-04 0.1416719048
## Dunkirk 2.189816e-01 0.1646796971
## Get Out 3.473706e-01 0.5837992755
## Lady Bird 8.195722e-01 0.0009169189
## Phantom Thread 9.953032e-01 0.0041231976
## The Post 6.590056e-01 0.2589766896
## The Shape of Water 6.789594e-01 0.1163725546
## Three Billboards outside Ebbing, Missouri 3.696853e-05 0.1573830733
## Black Panther 1.887202e-01 0.0019803056
## BlacKkKlansman 9.953032e-01 0.0041231976
## Bohemian Rhapsody 1.957369e-01 0.2232298239
## The Favourite 1.887256e-03 0.9866376195
## Green Book 6.226013e-01 0.3186970691
## Roma 1.507362e-01 0.7158436701
## A Star Is Born 1.419278e-01 0.8434908068
## Vice 1.419278e-01 0.8434908068
## Ford v Ferrari 9.459975e-03 0.5263664507
## The Irishman 4.344373e-02 0.8777059345
## Jojo Rabbit 9.953032e-01 0.0041231976
## Joker 3.960200e-02 0.9552664219
## Little Women 9.953032e-01 0.0041231976
## Marriage Story 9.953032e-01 0.0041231976
## 1917 1.507362e-01 0.7158436701
## Once upon a Time...in Hollywood 3.009193e-02 0.9538424608
## Parasite 7.456688e-01 0.2268851430
## The Father 8.259804e-04 0.1416719048
## Judas and the Black Messiah 8.259804e-04 0.1416719048
## Mank 3.009193e-02 0.9538424608
## Minari 9.953032e-01 0.0041231976
## Nomadland 6.603859e-01 0.2046814563
## Promising Young Woman 7.090228e-01 0.2651891351
## Sound of Metal 8.259804e-04 0.1416719048
## The Trial of the Chicago 7 7.316126e-01 0.0620616557
## Belfast 5.184356e-01 0.4731241247
## CODA 5.416809e-01 0.4475266571
## Don't Look Up 7.970443e-01 0.0621749536
## Drive My Car 3.473706e-01 0.5837992755
## Dune 4.304396e-01 0.1003042290
## King Richard 9.953032e-01 0.0041231976
## Licorice Pizza 7.303059e-01 0.1626364356
## Nightmare Alley 7.970443e-01 0.0621749536
## The Power of the Dog 9.258054e-03 0.9832201697
## West Side Story 5.184356e-01 0.4731241247
## All Quiet on the Western Front 3.320641e-01 0.1858436979
## Avatar: The Way of Water 3.473706e-01 0.5837992755
## The Banshees of Inisherin 1.248614e-01 0.7615011681
## Elvis 2.841805e-01 0.5574796644
## Everything Everywhere All at Once 9.730857e-01 0.0004054068
## The Fabelmans 5.182075e-01 0.2837624742
## Tar 7.316126e-01 0.0620616557
## Top Gun: Maverick 9.953032e-01 0.0041231976
## Triangle of Sadness 7.303059e-01 0.1626364356
## Women Talking 6.379316e-03 0.7420651260
##################################
# Extracting the contributions
# (in percentage) of the individuals
# to the principal components
##################################
$contrib[,c("Dim.1","Dim.2")] DR_FAMD_IND
## Dim.1 Dim.2
## Avatar 1.127879e-01 0.573410388
## The Blind Side 1.467397e-02 3.195410419
## District 9 9.143469e-01 0.133522686
## An Education 1.149873e+00 0.479373595
## The Hurt Locker 6.141630e+00 0.392632593
## Inglourious Basterds 5.060945e-02 0.563061155
## Precious: Based on the Novel 'Push' by Sapphire 1.557706e-04 0.050016181
## A Serious Man 1.412361e+00 1.039030902
## Up 1.883834e-04 0.426462508
## Up in the Air 5.241784e-01 0.083240057
## Black Swan 1.707272e-01 0.119538909
## The Fighter 3.908216e-06 0.031146948
## Inception 3.312043e-01 0.030342915
## The Kids Are All Right 9.143469e-01 0.133522686
## The King's Speech 4.473262e+00 0.750734722
## 127 Hours 5.241784e-01 0.083240057
## The Social Network 9.528367e-02 0.134141105
## Toy Story 3 1.883834e-04 0.426462508
## True Grit 6.737928e-02 2.548351559
## Winter's Bone 9.143469e-01 0.133522686
## The Artist 5.011181e+00 0.001897259
## The Descendants 1.707272e-01 0.119538909
## Extremely Loud & Incredibly Close 1.412361e+00 1.039030902
## The Help 1.785645e-01 0.561793396
## Hugo 6.836090e-01 1.500397289
## Midnight in Paris 1.785645e-01 0.561793396
## Moneyball 5.241784e-01 0.083240057
## The Tree of Life 1.149873e+00 0.479373595
## War Horse 5.241784e-01 0.083240057
## Amour 1.707272e-01 0.119538909
## Argo 3.019857e+00 0.836654240
## Beasts of the Southern Wild 9.143469e-01 0.133522686
## Django Unchained 1.883834e-04 0.426462508
## Les Miserables 1.127879e-01 0.573410388
## Life of Pi 4.003745e-01 1.862102849
## Lincoln 1.078526e-01 3.976797215
## Silver Linings Playbook 5.060945e-02 0.563061155
## Zero Dark Thirty 1.707272e-01 0.119538909
## American Hustle 6.737928e-02 2.548351559
## Captain Phillips 5.241784e-01 0.083240057
## Dallas Buyers Club 1.178482e-01 0.230139830
## Gravity 1.524320e+00 0.271249611
## Her 1.707272e-01 0.119538909
## Nebraska 5.241784e-01 0.083240057
## Philomena 9.143469e-01 0.133522686
## 12 Years a Slave 3.121190e+00 0.013094716
## The Wolf of Wall Street 7.057819e-01 0.001478173
## American Sniper 1.355937e-01 0.001051549
## Birdman or (The Unexpected Virtue of Ignorance) 4.015465e+00 0.081311082
## Boyhood 1.355937e-01 0.001051549
## The Grand Budapest Hotel 3.283435e-01 0.344005504
## The Imitation Game 5.060945e-02 0.563061155
## Selma 1.467397e-02 3.195410419
## The Theory of Everything 1.707272e-01 0.119538909
## Whiplash 1.941654e-01 0.922248972
## The Big Short 1.707272e-01 0.119538909
## Bridge of Spies 1.355937e-01 0.001051549
## Brooklyn 1.149873e+00 0.479373595
## Mad Max: Fury Road 1.042799e+00 0.454902741
## The Martian 3.695364e-01 0.378808337
## The Revenant 2.563235e-01 3.482536655
## Room 1.785645e-01 0.561793396
## Spotlight 2.011771e+00 1.199712279
## Arrival 5.060945e-02 0.563061155
## Fences 1.785645e-01 0.561793396
## Hacksaw Ridge 1.557706e-04 0.050016181
## Hell or High Water 9.143469e-01 0.133522686
## Hidden Figures 1.149873e+00 0.479373595
## La La Land 1.253661e+00 5.043197985
## Lion 5.241784e-01 0.083240057
## Manchester by the Sea 1.557706e-04 0.050016181
## Moonlight 3.025966e+00 0.255447370
## Call Me by Your Name 1.785645e-01 0.561793396
## Darkest Hour 1.557706e-04 0.050016181
## Dunkirk 9.528367e-02 0.134141105
## Get Out 1.785645e-01 0.561793396
## Lady Bird 7.057819e-01 0.001478173
## Phantom Thread 1.355937e-01 0.001051549
## The Post 1.412361e+00 1.039030902
## The Shape of Water 4.736650e+00 1.519809425
## Three Billboards outside Ebbing, Missouri 3.908216e-06 0.031146948
## Black Panther 9.420208e-02 0.001850484
## BlacKkKlansman 1.355937e-01 0.001051549
## Bohemian Rhapsody 7.526583e-01 1.606898299
## The Favourite 2.126760e-03 2.081404132
## Green Book 3.502482e+00 3.356256064
## Roma 1.450086e-01 1.289156428
## A Star Is Born 5.060945e-02 0.563061155
## Vice 5.060945e-02 0.563061155
## Ford v Ferrari 1.233811e-02 1.285162331
## The Irishman 6.737928e-02 2.548351559
## Jojo Rabbit 1.355937e-01 0.001051549
## Joker 5.984944e-02 2.702580071
## Little Women 1.355937e-01 0.001051549
## Marriage Story 1.355937e-01 0.001051549
## 1917 1.450086e-01 1.289156428
## Once upon a Time...in Hollywood 2.800320e-02 1.661672422
## Parasite 4.536522e+00 2.584010124
## The Father 1.557706e-04 0.050016181
## Judas and the Black Messiah 1.557706e-04 0.050016181
## Mank 2.800320e-02 1.661672422
## Minari 1.355937e-01 0.001051549
## Nomadland 3.147574e+00 1.826281195
## Promising Young Woman 1.707272e-01 0.119538909
## Sound of Metal 1.557706e-04 0.050016181
## The Trial of the Chicago 7 5.241784e-01 0.083240057
## Belfast 9.177772e-02 0.156793702
## CODA 6.215609e+00 9.613247808
## Don't Look Up 9.143469e-01 0.133522686
## Drive My Car 1.785645e-01 0.561793396
## Dune 1.042799e+00 0.454902741
## King Richard 1.355937e-01 0.001051549
## Licorice Pizza 1.149873e+00 0.479373595
## Nightmare Alley 9.143469e-01 0.133522686
## The Power of the Dog 2.265414e-02 4.503909058
## West Side Story 9.177772e-02 0.156793702
## All Quiet on the Western Front 3.283435e-01 0.344005504
## Avatar: The Way of Water 1.785645e-01 0.561793396
## The Banshees of Inisherin 1.411368e-01 1.611364088
## Elvis 2.418558e-01 0.888183014
## Everything Everywhere All at Once 6.999968e+00 0.005459426
## The Fabelmans 3.695364e-01 0.378808337
## Tar 5.241784e-01 0.083240057
## Top Gun: Maverick 1.355937e-01 0.001051549
## Triangle of Sadness 1.149873e+00 0.479373595
## Women Talking 1.467397e-02 3.195410419
##################################
# Extracting the correlation
# between the individual instances
# and top principal components
# for the qualitative descriptor variable
##################################
<- fviz_famd_ind(DR_FAMD,
(DR_FAMD_IndividualFAMDGraphQualitativeVariable geom.ind = "text",
habillage = SD_FAMD$Cinematography,
palette = c("#888888","#5544FF","#F18EDE"),
repel = TRUE,
col.quali.var = "#FF0000",
legend.title = "Cinematography",
addEllipses = FALSE) +
labs(title = "Factor Analysis of Mixed Data : Factorial Map of Individuals (Cinematography)",
subtitle = "Individuals by Qualitative Descriptor Variable Principal Components",
y = "Principal Component 2",
x = "Principal Component 1") +
theme_classic() +
theme(legend.position="top"))
<- fviz_famd_ind(DR_FAMD,
(DR_FAMD_IndividualFAMDGraphQualitativeVariable geom.ind = "text",
habillage = SD_FAMD$Directing,
palette = c("#888888","#5544FF","#F18EDE"),
repel = TRUE,
col.quali.var = "#FF0000",
legend.title = "Directing",
addEllipses = FALSE) +
labs(title = "Factor Analysis of Mixed Data : Factorial Map of Individuals (Directing)",
subtitle = "Individuals by Qualitative Descriptor Variable Principal Components",
y = "Principal Component 2",
x = "Principal Component 1") +
theme_classic() +
theme(legend.position="top"))
<- fviz_famd_ind(DR_FAMD,
(DR_FAMD_IndividualFAMDGraphQualitativeVariable geom.ind = "text",
habillage = SD_FAMD$Editing,
palette = c("#888888","#5544FF","#F18EDE"),
repel = TRUE,
col.quali.var = "#FF0000",
legend.title = "Editing",
addEllipses = FALSE) +
labs(title = "Factor Analysis of Mixed Data : Factorial Map of Individuals (Editing)",
subtitle = "Individuals by Qualitative Descriptor Variable Principal Components",
y = "Principal Component 2",
x = "Principal Component 1") +
theme_classic() +
theme(legend.position="top"))
<- fviz_famd_ind(DR_FAMD,
(DR_FAMD_IndividualFAMDGraphQualitativeVariable geom.ind = "text",
habillage = SD_FAMD$Screenplay,
palette = c("#888888","#5544FF","#F18EDE"),
repel = TRUE,
col.quali.var = "#FF0000",
legend.title = "Screenplay",
addEllipses = FALSE) +
labs(title = "Factor Analysis of Mixed Data : Factorial Map of Individuals (Screenplay)",
subtitle = "Individuals by Qualitative Descriptor Variable Principal Components",
y = "Principal Component 2",
x = "Principal Component 1") +
theme_classic() +
theme(legend.position="top"))
##################################
# Gathering all factorial maps
# and biplots for each individual method
# used in the analysis
##################################
grid.arrange(DR_PCA_VariableCorrelationCircle,
DR_MCA_VariableCorrelationCircle,
DR_MFA_VariableGroups,
DR_FAMD_VariableCorrelationCircle,ncol = 2)
grid.arrange(DR_CA_SymmetricBiplot_Cinematography,
DR_CA_SymmetricBiplot_Directing,
DR_CA_SymmetricBiplot_Editing,
DR_CA_SymmetricBiplot_Screenplay,ncol = 2)