This section will run the commands necessary to initialize R and load up our packages and data.
#Set working directory
#setwd("C:/Users/Laurens/Dropbox/University/Year 4/Period 2/Applied Economics Research Course/Thesis/data")
#Load required packages
library(foreign)
library(stargazer)
library(ggplot2)
library(aod)
library(gridExtra)
library(ggthemes)
library(dplyr)
library(mfx)
library(corrplot)
#Enable anti-aliasing for rendered graphics
library(knitr)
#opts_chunk$set(out.width = '\\maxwidth')
#dev = "CairoPNG",
#Load dataset
data.dropout <- read.dta("DatasetTrimmed.dta")
#Read name vector of dataset
names(data.dropout)
Here, the two dichotomous control variables are factorized.
#Factorize binaries
data.dropout$geslachtBin <- factor(data.dropout$geslachtBin, labels = c("Female", "Male"))
data.dropout$allochtoonBin <- factor(data.dropout$allochtoonBin, labels = c("No", "Yes"))
The following section will create boxplots for the Big Five and Gender with numerical summary statistics below each plot to clarify the visualisation.
#Determine amounts of males and females in the sample
length(subset(data.dropout, geslachtBin == "Male")$geslachtBin)
## [1] 207
length(subset(data.dropout, geslachtBin == "Female")$geslachtBin)
## [1] 292
#Boxplots for Big Five and Gender
#Conscientiousness
zorg <- qplot(
x = geslachtBin,
y = zorgvuldig,
main = "Boxplot for Conscientiousness and Gender",
xlab = "Gender",
ylab = "Level of Conscientiousness",
data = data.dropout,
geom = "boxplot") +
theme_bw(base_family = "serif") +
theme(axis.title.y = element_text(vjust = 1.0)) +
theme(axis.title.x = element_text(vjust = -0.5))
zorg
by(data.dropout$zorgvuldig, data.dropout$geslachtBin, summary)
## data.dropout$geslachtBin: Female
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 3.000 4.000 3.606 5.000 5.000
## --------------------------------------------------------
## data.dropout$geslachtBin: Male
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 3.000 3.000 3.227 4.000 5.000
#Emotional Stability
stab <- qplot(
x = geslachtBin,
y = stabiel,
main = "Boxplot for Emotional Stability and Gender",
xlab = "Gender",
ylab = "Level of Emotional Stability",
data = data.dropout,
geom = "boxplot") +
theme_bw(base_family = "serif") +
theme(axis.title.y = element_text(vjust = 1.0)) +
theme(axis.title.x = element_text(vjust = -0.5))
stab
by(data.dropout$stabiel, data.dropout$geslachtBin, summary)
## data.dropout$geslachtBin: Female
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 3.000 3.147 4.000 5.000
## --------------------------------------------------------
## data.dropout$geslachtBin: Male
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 3.000 4.000 3.551 4.000 5.000
#Openness
open <- qplot(
x = geslachtBin,
y = open,
main = "Boxplot for Openness and Gender",
xlab = "Gender",
ylab = "Level of Openness",
data = data.dropout,
geom = "boxplot") +
theme_bw(base_family = "serif") +
theme(axis.title.y = element_text(vjust = 1.0)) +
theme(axis.title.x = element_text(vjust = -0.5))
open
by(data.dropout$open, data.dropout$geslachtBin, summary)
## data.dropout$geslachtBin: Female
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 3.000 3.000 3.209 4.000 5.000
## --------------------------------------------------------
## data.dropout$geslachtBin: Male
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 3.000 3.000 3.382 4.000 5.000
#Extraversion
extr <- qplot(
x = geslachtBin,
y = extravert,
main = "Boxplot for Extraversion and Gender",
xlab = "Gender",
ylab = "Level of Extraversion",
data = data.dropout,
geom = "boxplot") +
theme_bw(base_family = "serif") +
theme(axis.title.y = element_text(vjust = 1.0)) +
theme(axis.title.x = element_text(vjust = -0.5))
extr
by(data.dropout$extravert, data.dropout$geslachtBin, summary)
## data.dropout$geslachtBin: Female
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 3.000 3.127 4.000 5.000
## --------------------------------------------------------
## data.dropout$geslachtBin: Male
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 3.000 3.111 4.000 5.000
#Agreeableness
altr <- qplot(
x = geslachtBin,
y = altrusme,
main = "Boxplot for Agreeableness and Gender",
xlab = "Gender",
ylab = "Level of Agreeableness",
data = data.dropout,
geom = "boxplot") +
theme_bw(base_family = "serif") +
theme(axis.title.y = element_text(vjust = 1.0)) +
theme(axis.title.x = element_text(vjust = -0.5))
altr
by(data.dropout$altrusme, data.dropout$geslachtBin, summary)
## data.dropout$geslachtBin: Female
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 3.000 2.938 3.000 5.000
## --------------------------------------------------------
## data.dropout$geslachtBin: Male
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 3.000 2.879 3.000 5.000
#Uncomment to also group the plots in one window
#grid.arrange(altr, extr, open, stab, zorg, ncol= 3)
Next: Boxplots for the Big Five and Foreign Origin with numerical summaries below each plot.
#Boxplots for Big Five and foreign origin
#Conscientiousness
zorg.1 <- qplot(
x = allochtoonBin,
y = zorgvuldig,
main = "Boxplot for Conscientiousness and Foreign Origin",
xlab = "Foreign Origin",
ylab = "Level of Conscientiousness",
data = data.dropout,
geom = "boxplot") +
theme_bw(base_family = "serif") +
theme(axis.title.y = element_text(vjust = 1.0)) +
theme(axis.title.x = element_text(vjust = -0.5))
zorg.1
by(data.dropout$zorgvuldig, data.dropout$allochtoonBin, summary)
## data.dropout$allochtoonBin: No
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 3.000 3.000 3.367 4.000 5.000
## --------------------------------------------------------
## data.dropout$allochtoonBin: Yes
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 3.000 4.000 3.699 5.000 5.000
#Emotional Stability
stab.1 <- qplot(
x = allochtoonBin,
y = stabiel,
main = "Boxplot for Emotional Stability and Foreign Origin",
xlab = "Foreign Origin",
ylab = "Level of Emotional Stability",
data = data.dropout,
geom = "boxplot") +
theme_bw(base_family = "serif") +
theme(axis.title.y = element_text(vjust = 1.0)) +
theme(axis.title.x = element_text(vjust = -0.5))
stab.1
by(data.dropout$stabiel, data.dropout$allochtoonBin, summary)
## data.dropout$allochtoonBin: No
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 3.000 3.303 4.000 5.000
## --------------------------------------------------------
## data.dropout$allochtoonBin: Yes
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 3.00 3.00 3.35 4.00 5.00
#Openness
open.1 <- qplot(
x = allochtoonBin,
y = open,
main = "Boxplot for Openness and Foreign Origin",
xlab = "Foreign Origin",
ylab = "Level of Openness",
data = data.dropout,
geom = "boxplot") +
theme_bw(base_family = "serif") +
theme(axis.title.y = element_text(vjust = 1.0)) +
theme(axis.title.x = element_text(vjust = -0.5))
open.1
by(data.dropout$open, data.dropout$allochtoonBin, summary)
## data.dropout$allochtoonBin: No
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 3.000 3.000 3.186 4.000 5.000
## --------------------------------------------------------
## data.dropout$allochtoonBin: Yes
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 3.000 4.000 3.569 4.000 5.000
#Extraversion
extr.1 <- qplot(
x = allochtoonBin,
y = extravert,
main = "Boxplot for Extraversion and Foreign Origin",
xlab = "Foreign Origin",
ylab = "Level of Extraversion",
data = data.dropout,
geom = "boxplot") +
theme_bw(base_family = "serif") +
theme(axis.title.y = element_text(vjust = 1.0)) +
theme(axis.title.x = element_text(vjust = -0.5))
extr.1
by(data.dropout$extravert, data.dropout$allochtoonBin, summary)
## data.dropout$allochtoonBin: No
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 3.000 3.104 4.000 5.000
## --------------------------------------------------------
## data.dropout$allochtoonBin: Yes
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 3.000 3.171 4.000 5.000
#Agreeableness
altr.1 <- qplot(
x = allochtoonBin,
y = altrusme,
main = "Boxplot for Agreeableness and Foreign Origin",
xlab = "Foreign Origin",
ylab = "Level of Agreeableness",
data = data.dropout,
geom = "boxplot") +
theme_bw(base_family = "serif") +
theme(axis.title.y = element_text(vjust = 1.0)) +
theme(axis.title.x = element_text(vjust = -0.5))
altr.1
by(data.dropout$altrusme, data.dropout$allochtoonBin, summary)
## data.dropout$allochtoonBin: No
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 3.000 2.891 3.000 5.000
## --------------------------------------------------------
## data.dropout$allochtoonBin: Yes
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 3.000 2.984 4.000 5.000
#Uncomment to also group all plots in one window
#grid.arrange(arrangeGrob(zorg.1, stab.1, open.1, extr.1, altr.1, ncol = 3))
This section will display scatterplot for each of the Big Five variables on Dropout.
In this section the distribution of dropouts among Gender and Foreign Origin will be investigated.
#Scatterplot distribution gender and foreign origin among dropout
dropout.control <- ggplot(aes(x = geslachtBin, y = vrv_1),
data = data.dropout) +
geom_jitter(aes(colour = allochtoonBin,
size = 0.75),
position = position_jitter(width = 0.2,
height = 0.2),
alpha = 1/3) +
scale_y_continuous(breaks = c(0, 1)) +
labs(
x = "Gender",
y = "Dropout (Dichotomous)",
title = "Distribution of Gender and Foreign Origin among Dropout") +
theme_bw(base_family = "serif") +
guides(colour = guide_legend("Foreign Origin"),
size = FALSE) +
geom_hline(yintercept = 0.5, alpha = 0.3)
dropout.control
#Dropouts by male and female
by(data.dropout$vrv_1, data.dropout$geslachtBin, length)
## data.dropout$geslachtBin: Female
## [1] 292
## --------------------------------------------------------
## data.dropout$geslachtBin: Male
## [1] 207
#Dropouts by foreign origin and native origin
by(data.dropout$vrv_1, data.dropout$allochtoonBin, length)
## data.dropout$allochtoonBin: No
## [1] 376
## --------------------------------------------------------
## data.dropout$allochtoonBin: Yes
## [1] 123
#Amount of people of foreign origin in data
length(subset(data.dropout, allochtoonBin == "Yes")$allochtoonBin)
## [1] 123
#Amount of people of native origin in data
length(subset(data.dropout, allochtoonBin == "No")$allochtoonBin)
## [1] 376
#Create variables for either foreign origin is true or not
f1 <- subset(data.dropout, allochtoonBin == "Yes")
f0 <- subset(data.dropout, allochtoonBin == "No")
#Scatterplot dropout among foreign origin
dropout.f1 <- ggplot(aes(x = geslachtBin, y = vrv_1),
data = f1) +
geom_jitter(position = position_jitter(width = 0.095,
height = 0.095),
alpha = 0.4) +
scale_y_continuous(breaks = c(0, 1)) +
labs(
x = "Gender",
y = "Dropout (Dichotomous)",
title = "Gender and Dropout among Foreign Origin") +
theme_bw(base_family = "serif")
dropout.f1
#Scatterplot dropout among native origin
dropout.f0 <- ggplot(aes(x = geslachtBin, y = vrv_1),
data = f0) +
geom_jitter(position = position_jitter(width = 0.095,
height = 0.095),
alpha = 0.4) +
scale_y_continuous(breaks = c(0, 1)) +
labs(
x = "Gender",
y = "Dropout (Dichotomous)",
title = "Gender and Dropout among Native Origin") +
theme_bw(base_family = "serif")
dropout.f0
#Dropouts by male and female among foreign origin
by(f1$vrv_1, f1$geslachtBin, length)
## f1$geslachtBin: Female
## [1] 82
## --------------------------------------------------------
## f1$geslachtBin: Male
## [1] 41
#Dropouts by male and female among native origin
by(f0$vrv_1, f0$geslachtBin, length)
## f0$geslachtBin: Female
## [1] 210
## --------------------------------------------------------
## f0$geslachtBin: Male
## [1] 166
#DROPOUT FOREIGN ORIGIN
#Dropout YES foreign origin and male
length(subset(f1, vrv_1 == 1 & geslachtBin == "Male")$allochtoonBin)
## [1] 26
#Dropout YES foreign origin and female
length(subset(f1, vrv_1 == 1 & geslachtBin == "Female")$allochtoonBin)
## [1] 50
#Dropout NO foreign origin and male
length(subset(f1, vrv_1 == 0 & geslachtBin == "Male")$allochtoonBin)
## [1] 15
#Dropout NO foreign origin and female
length(subset(f1, vrv_1 == 0 & geslachtBin == "Female")$allochtoonBin)
## [1] 32
#DROPOUT NATIVE ORIGIN
#Generate native YES
n1 <- subset(data.dropout, allochtoonBin == "No")
#Generate native NO
n0 <- subset(data.dropout, allochtoonBin == "Yes")
#Dropout YES native origin and male
length(subset(n1, vrv_1 == 1 & geslachtBin == "Male")$allochtoonBin)
## [1] 108
#Dropout YES native origin and female
length(subset(n1, vrv_1 == 1 & geslachtBin == "Female")$allochtoonBin)
## [1] 129
#Dropout NO native origin and male
length(subset(n1, vrv_1 == 0 & geslachtBin == "Male")$allochtoonBin)
## [1] 58
#Dropout NO native origin and female
length(subset(n1, vrv_1 == 0 & geslachtBin == "Female")$allochtoonBin)
## [1] 81
CorMatrix <- cor(data.dropout[,1:25])
corrplot(CorMatrix, method = "circle", type = "lower", order ="AOE")