In this document we will investigate the causal relationships in our dataset using regression analysis.
#Set working directory
#setwd("C:/Users/Laurens/Dropbox/University/Year 4/Period 2/Applied Economics Research Course/Thesis/data")
#Load required packages
library(foreign)
library(stargazer)
library(ggplot2)
library(aod)
library(gridExtra)
library(ggthemes)
library(dplyr)
library(mfx)
library(corrplot)
library(car)
library(LogisticDx)
library(rms)
#Enable anti-aliasing for rendered graphics
library(knitr)
#opts_chunk$set(out.width = '\\maxwidth')
#dev = "CairoPNG",
#Load dataset
data.dropout <- read.dta("DatasetTrimmed.dta")
#Read name vector of dataset
names(data.dropout)
#Factorize binaries
data.dropout$geslachtBin <- factor(data.dropout$geslachtBin, labels = c("Female", "Male"))
data.dropout$allochtoonBin <- factor(data.dropout$allochtoonBin, labels = c("No", "Yes"))
In this section all the investigated models, as well as the obsolete ones, are listed.
For testing purposes.
#TRIVIAL: LPM model of Big Five on dropout
lpm.1 <- lm(vrv_1 ~ zorgvuldig + stabiel + open + extravert + altrusme + allochtoonBin + geslachtBin,
data = data.dropout)
#Display results
summary(lpm.1)
Neglects the theory in the sense that it disregards gender and foreign origin as control variables.
logit.1 <- glm(vrv_1 ~ stabiel + open + zorgvuldig + extravert + altrusme + age_opleiding,
family = binomial(logit),
data = data.dropout)
#Display results
summary(logit.1)
##
## Call:
## glm(formula = vrv_1 ~ stabiel + open + zorgvuldig + extravert +
## altrusme + age_opleiding, family = binomial(logit), data = data.dropout)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8954 -1.2830 0.8695 0.9972 1.2225
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.02021 0.53107 0.038 0.969644
## stabiel -0.03668 0.08695 -0.422 0.673153
## open -0.01039 0.10280 -0.101 0.919523
## zorgvuldig -0.20678 0.09600 -2.154 0.031244 *
## extravert 0.07932 0.10499 0.755 0.449957
## altrusme 0.01165 0.12152 0.096 0.923644
## age_opleiding 0.05571 0.01645 3.386 0.000708 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 659.08 on 498 degrees of freedom
## Residual deviance: 639.80 on 492 degrees of freedom
## AIC: 653.8
##
## Number of Fisher Scoring iterations: 4
#Calculate odds ratio vector
coef.vector.1 <- exp(logit.1$coef)
#R squared and Wald test significance
require(rms)
lrm(logit.1)
## Logistic Regression Model
##
## lrm(formula = logit.1)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 499 LR chi2 19.28 R2 0.052 C 0.599
## 0 186 d.f. 6 g 0.467 Dxy 0.199
## 1 313 Pr(> chi2) 0.0037 gr 1.595 gamma 0.200
## max |deriv| 7e-06 gp 0.098 tau-a 0.093
## Brier 0.226
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept 0.0202 0.5311 0.04 0.9696
## stabiel -0.0367 0.0870 -0.42 0.6732
## open -0.0104 0.1028 -0.10 0.9195
## zorgvuldig -0.2068 0.0960 -2.15 0.0312
## extravert 0.0793 0.1050 0.76 0.4500
## altrusme 0.0116 0.1215 0.10 0.9236
## age_opleiding 0.0557 0.0165 3.39 0.0007
##
#Marginal effects
require(mfx)
mfx.1 <- logitmfx(logit.1, data = data.dropout)
#Variance inflation factor: multicollinearity test
require(car)
vif(logit.1)
## stabiel open zorgvuldig extravert altrusme
## 1.183144 1.257312 1.223593 1.428105 1.182988
## age_opleiding
## 1.036169
sqrt(vif(logit.1)) > 2 #Bigger than 2 (or sometimes 2.5) signals relatively high multicollinearity
## stabiel open zorgvuldig extravert altrusme
## FALSE FALSE FALSE FALSE FALSE
## age_opleiding
## FALSE
#Test for outliers
outlierTest(logit.1)
##
## No Studentized residuals with Bonferonni p < 0.05
## Largest |rstudent|:
## rstudent unadjusted p-value Bonferonni p
## 406 -1.929694 0.053645 NA
#Cook's distance
#plot(logit.1, which = 4, main = "Model One")
ggplot(aes(x = seq_along(.cooksd), y = .cooksd), data = logit.1) +
geom_bar(stat = "identity") +
theme_bw(base_family = "serif") +
labs(title = "Model One: Cook's Distance",
x = "Observation Number",
y = "Cook's Distance") +
geom_hline(yintercept = 0.0081, colour = 2, alpha = 0.75, linetype = 5)
#Resid vs fitted
#plot(logit.1, which = 1, main = "Model One")
ggplot(aes(x = .fitted, y = .resid), data = logit.1) +
geom_point(alpha = 1/4) +
geom_hline(yintercept = 0, linetype = 2, alpha = 0.75) +
geom_smooth(alpha = 0.15, method = "loess") +
theme_bw(base_family = "serif") +
labs(title = "Model One: Residuals vs. Fitted",
x = "Predicted Values",
y = "Residuals")
#Model fitted on covariates
ggplot(aes(x = data.dropout$zorgvuldig, y = .resid), data = logit.1) +
theme_bw(base_family = "serif") +
geom_smooth(aes(x = .fitted, y = .resid)) +
geom_point(alpha = 1/6) +
geom_point(aes(x = data.dropout$open+0.1, y = .resid), colour = 2, alpha = 1/6) +
geom_point(aes(x = data.dropout$stabiel+0.2, y = .resid), colour = 3, alpha = 1/6) +
geom_point(aes(x = data.dropout$extravert+0.3, y = .resid), colour = 4, alpha = 1/6) +
geom_point(aes(x = data.dropout$altrusme+0.4, y = .resid), colour = 5, alpha = 1/6)
## `geom_smooth()` using method = 'loess'
Added gender and foreign origin as control variables to the first logit model.
logit.2 <- glm(vrv_1 ~ stabiel + open + zorgvuldig + extravert + altrusme + age_opleiding + geslachtBin +
allochtoonBin,
family = binomial(logit),
data = data.dropout)
#Display results
summary(logit.2)
##
## Call:
## glm(formula = vrv_1 ~ stabiel + open + zorgvuldig + extravert +
## altrusme + age_opleiding + geslachtBin + allochtoonBin, family = binomial(logit),
## data = data.dropout)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8665 -1.2884 0.8566 0.9997 1.2473
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.04660 0.53913 -0.086 0.931121
## stabiel -0.05222 0.08969 -0.582 0.560368
## open -0.02572 0.10521 -0.244 0.806885
## zorgvuldig -0.19280 0.09915 -1.944 0.051836 .
## extravert 0.08508 0.10532 0.808 0.419199
## altrusme 0.01471 0.12168 0.121 0.903769
## age_opleiding 0.05646 0.01652 3.418 0.000631 ***
## geslachtBinMale 0.15153 0.20361 0.744 0.456721
## allochtoonBinYes 0.06659 0.22302 0.299 0.765271
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 659.08 on 498 degrees of freedom
## Residual deviance: 639.19 on 490 degrees of freedom
## AIC: 657.19
##
## Number of Fisher Scoring iterations: 4
#R squared and Wald test significance
require(rms)
lrm(logit.2)
## Logistic Regression Model
##
## lrm(formula = logit.2)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 499 LR chi2 19.89 R2 0.053 C 0.601
## 0 186 d.f. 8 g 0.476 Dxy 0.201
## 1 313 Pr(> chi2) 0.0108 gr 1.610 gamma 0.203
## max |deriv| 8e-06 gp 0.100 tau-a 0.094
## Brier 0.226
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -0.0466 0.5391 -0.09 0.9311
## stabiel -0.0522 0.0897 -0.58 0.5604
## open -0.0257 0.1052 -0.24 0.8069
## zorgvuldig -0.1928 0.0992 -1.94 0.0518
## extravert 0.0851 0.1053 0.81 0.4192
## altrusme 0.0147 0.1217 0.12 0.9038
## age_opleiding 0.0565 0.0165 3.42 0.0006
## geslachtBin=Male 0.1515 0.2036 0.74 0.4567
## allochtoonBin=Yes 0.0666 0.2230 0.30 0.7653
##
#Marginal effects
require(mfx)
logitmfx(logit.2, data = data.dropout)
## Call:
## logitmfx(formula = logit.2, data = data.dropout)
##
## Marginal Effects:
## dF/dx Std. Err. z P>|z|
## stabiel -0.0120952 0.0207698 -0.5823 0.5603351
## open -0.0059565 0.0243658 -0.2445 0.8068734
## zorgvuldig -0.0446532 0.0229390 -1.9466 0.0515822 .
## extravert 0.0197039 0.0243864 0.8080 0.4190991
## altrusme 0.0034071 0.0281810 0.1209 0.9037686
## age_opleiding 0.0130772 0.0037752 3.4640 0.0005322 ***
## geslachtBinMale 0.0349592 0.0467581 0.7477 0.4546648
## allochtoonBinYes 0.0153493 0.0511618 0.3000 0.7641653
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## dF/dx is for discrete change for the following variables:
##
## [1] "geslachtBinMale" "allochtoonBinYes"
#Variance inflation factor: multicollinearity test
require(car)
vif(logit.2)
## stabiel open zorgvuldig extravert
## 1.257415 1.315879 1.303901 1.436032
## altrusme age_opleiding geslachtBinMale allochtoonBinYes
## 1.183412 1.040891 1.124696 1.054038
sqrt(vif(logit.2)) > 2
## stabiel open zorgvuldig extravert
## FALSE FALSE FALSE FALSE
## altrusme age_opleiding geslachtBinMale allochtoonBinYes
## FALSE FALSE FALSE FALSE
#Test for outliers
outlierTest(logit.2)
##
## No Studentized residuals with Bonferonni p < 0.05
## Largest |rstudent|:
## rstudent unadjusted p-value Bonferonni p
## 406 -1.902455 0.057112 NA
Added interaction effect between emotional stability and conscientiousness, as suggested by the psychologist referred to by Yolanda.
logit.3 <- glm(vrv_1 ~ stabiel + open + zorgvuldig + extravert + altrusme + age_opleiding + stabiel:zorgvuldig,
family = binomial(logit),
data = data.dropout)
#Display results
summary(logit.3)
##
## Call:
## glm(formula = vrv_1 ~ stabiel + open + zorgvuldig + extravert +
## altrusme + age_opleiding + stabiel:zorgvuldig, family = binomial(logit),
## data = data.dropout)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9140 -1.2881 0.8516 1.0022 1.2020
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.859716 0.989461 0.869 0.384917
## stabiel -0.290425 0.265614 -1.093 0.274213
## open -0.020262 0.103352 -0.196 0.844575
## zorgvuldig -0.441306 0.251862 -1.752 0.079744 .
## extravert 0.076597 0.105109 0.729 0.466160
## altrusme 0.006072 0.121729 0.050 0.960217
## age_opleiding 0.055556 0.016508 3.365 0.000764 ***
## stabiel:zorgvuldig 0.073843 0.072905 1.013 0.311126
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 659.08 on 498 degrees of freedom
## Residual deviance: 638.77 on 491 degrees of freedom
## AIC: 654.77
##
## Number of Fisher Scoring iterations: 4
#R squared and Wald test significance
require(rms)
lrm(logit.3)
## Logistic Regression Model
##
## lrm(formula = logit.3)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 499 LR chi2 20.31 R2 0.054 C 0.605
## 0 186 d.f. 7 g 0.481 Dxy 0.210
## 1 313 Pr(> chi2) 0.0049 gr 1.618 gamma 0.211
## max |deriv| 8e-06 gp 0.100 tau-a 0.098
## Brier 0.226
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept 0.8597 0.9895 0.87 0.3849
## stabiel -0.2904 0.2656 -1.09 0.2742
## open -0.0203 0.1034 -0.20 0.8446
## zorgvuldig -0.4413 0.2519 -1.75 0.0797
## extravert 0.0766 0.1051 0.73 0.4662
## altrusme 0.0061 0.1217 0.05 0.9602
## age_opleiding 0.0556 0.0165 3.37 0.0008
## stabiel * zorgvuldig 0.0738 0.0729 1.01 0.3111
##
#Interpret marginal effects
require(mfx)
logitmfx(logit.3, data = data.dropout)
## Call:
## logitmfx(formula = logit.3, data = data.dropout)
##
## Marginal Effects:
## dF/dx Std. Err. z P>|z|
## stabiel -0.0672418 0.0614541 -1.0942 0.273876
## open -0.0046911 0.0239275 -0.1961 0.844566
## zorgvuldig -0.1021752 0.0582265 -1.7548 0.079296 .
## extravert 0.0177345 0.0243313 0.7289 0.466077
## altrusme 0.0014058 0.0281838 0.0499 0.960217
## age_opleiding 0.0128629 0.0037718 3.4103 0.000649 ***
## stabiel:zorgvuldig 0.0170968 0.0168691 1.0135 0.310823
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Variance inflation factor: multicollinearity test
require(car)
vif(logit.3)
## stabiel open zorgvuldig
## 10.899160 1.270075 8.289497
## extravert altrusme age_opleiding
## 1.424272 1.184704 1.035412
## stabiel:zorgvuldig
## 22.659327
sqrt(vif(logit.3)) > 2
## stabiel open zorgvuldig
## TRUE FALSE TRUE
## extravert altrusme age_opleiding
## FALSE FALSE FALSE
## stabiel:zorgvuldig
## TRUE
#Test for outliers
outlierTest(logit.3)
##
## No Studentized residuals with Bonferonni p < 0.05
## Largest |rstudent|:
## rstudent unadjusted p-value Bonferonni p
## 406 -1.949069 0.051287 NA
Dropped all the outliers for age: bigger than 35. Resulting in a sample size of 466 (before: 499).
logit.4 <- glm(vrv_1 ~ stabiel + open + zorgvuldig + extravert + altrusme + age_opleiding,
family = binomial(logit),
data = subset(data.dropout, !age_opleiding > 35))
#Display results
summary(logit.4)
##
## Call:
## glm(formula = vrv_1 ~ stabiel + open + zorgvuldig + extravert +
## altrusme + age_opleiding, family = binomial(logit), data = subset(data.dropout,
## !age_opleiding > 35))
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5952 -1.3122 0.9136 1.0067 1.1694
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.457961 0.638476 0.717 0.4732
## stabiel -0.045877 0.087652 -0.523 0.6007
## open 0.023058 0.103870 0.222 0.8243
## zorgvuldig -0.171712 0.096692 -1.776 0.0758 .
## extravert 0.069962 0.106084 0.659 0.5096
## altrusme -0.003223 0.123037 -0.026 0.9791
## age_opleiding 0.023985 0.026921 0.891 0.3730
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 624.39 on 465 degrees of freedom
## Residual deviance: 619.72 on 459 degrees of freedom
## AIC: 633.72
##
## Number of Fisher Scoring iterations: 4
#R squared and Wald test significance
require(rms)
lrm(logit.4)
## Logistic Regression Model
##
## lrm(formula = logit.4)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 466 LR chi2 4.67 R2 0.014 C 0.559
## 0 183 d.f. 6 g 0.236 Dxy 0.117
## 1 283 Pr(> chi2) 0.5869 gr 1.267 gamma 0.118
## max |deriv| 2e-08 gp 0.056 tau-a 0.056
## Brier 0.236
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept 0.4580 0.6385 0.72 0.4732
## stabiel -0.0459 0.0877 -0.52 0.6007
## open 0.0231 0.1039 0.22 0.8243
## zorgvuldig -0.1717 0.0967 -1.78 0.0758
## extravert 0.0700 0.1061 0.66 0.5096
## altrusme -0.0032 0.1230 -0.03 0.9791
## age_opleiding 0.0240 0.0269 0.89 0.3730
##
#Marginal effects
require(mfx)
logitmfx(logit.4, data = data.dropout)
## Call:
## logitmfx(formula = logit.4, data = data.dropout)
##
## Marginal Effects:
## dF/dx Std. Err. z P>|z|
## stabiel -0.0084973 0.0201438 -0.4218 0.6731477
## open -0.0024060 0.0238133 -0.1010 0.9195210
## zorgvuldig -0.0479037 0.0222079 -2.1571 0.0310010 *
## extravert 0.0183749 0.0243173 0.7556 0.4498710
## altrusme 0.0026981 0.0281509 0.0958 0.9236436
## age_opleiding 0.0129064 0.0037613 3.4314 0.0006005 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Variance inflation factor: multicollinearity test
require(car)
vif(logit.4)
## stabiel open zorgvuldig extravert altrusme
## 1.175143 1.249847 1.218655 1.411745 1.185207
## age_opleiding
## 1.037346
sqrt(vif(logit.4)) > 2
## stabiel open zorgvuldig extravert altrusme
## FALSE FALSE FALSE FALSE FALSE
## age_opleiding
## FALSE
#Test for outliers
outlierTest(logit.4)
##
## No Studentized residuals with Bonferonni p < 0.05
## Largest |rstudent|:
## rstudent unadjusted p-value Bonferonni p
## 483 -1.617687 0.10573 NA
Our definitive, kick-ass model.
logit.5 <- glm(vrv_1 ~ zorgvuldig + open + stabiel + extravert + altrusme + zorgvuldig:allochtoonBin + open:allochtoonBin + stabiel:allochtoonBin + allochtoonBin + age_opleiding + leerintelligentie,
family = binomial(logit),
data = data.dropout)
#Display results
summary(logit.5)
##
## Call:
## glm(formula = vrv_1 ~ zorgvuldig + open + stabiel + extravert +
## altrusme + zorgvuldig:allochtoonBin + open:allochtoonBin +
## stabiel:allochtoonBin + allochtoonBin + age_opleiding + leerintelligentie,
## family = binomial(logit), data = data.dropout)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1111 -1.2039 0.7125 0.9641 1.6623
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.22638 0.68232 -0.332 0.740052
## zorgvuldig -0.35826 0.11199 -3.199 0.001379 **
## open 0.09935 0.12336 0.805 0.420595
## stabiel -0.15678 0.10259 -1.528 0.126463
## extravert 0.05850 0.10827 0.540 0.588996
## altrusme 0.05489 0.12679 0.433 0.665057
## allochtoonBinYes -1.56605 1.04383 -1.500 0.133540
## age_opleiding 0.05777 0.01711 3.377 0.000732 ***
## leerintelligentie 0.20454 0.11260 1.817 0.069283 .
## zorgvuldig:allochtoonBinYes 0.73863 0.25009 2.953 0.003143 **
## open:allochtoonBinYes -0.63185 0.24154 -2.616 0.008898 **
## stabiel:allochtoonBinYes 0.40362 0.21439 1.883 0.059748 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 659.08 on 498 degrees of freedom
## Residual deviance: 613.36 on 487 degrees of freedom
## AIC: 637.36
##
## Number of Fisher Scoring iterations: 4
#Calculate odds ratio vector
coef.vector.5 <- exp(logit.5$coef)
#R squared and Wald test significance
require(rms)
lrm(logit.5)
## Logistic Regression Model
##
## lrm(formula = logit.5)
##
## Model Likelihood Discrimination Rank Discrim.
## Ratio Test Indexes Indexes
## Obs 499 LR chi2 45.72 R2 0.119 C 0.669
## 0 186 d.f. 11 g 0.774 Dxy 0.337
## 1 313 Pr(> chi2) <0.0001 gr 2.169 gamma 0.339
## max |deriv| 3e-05 gp 0.162 tau-a 0.158
## Brier 0.214
##
## Coef S.E. Wald Z Pr(>|Z|)
## Intercept -0.2264 0.6823 -0.33 0.7401
## zorgvuldig -0.3583 0.1120 -3.20 0.0014
## open 0.0994 0.1234 0.81 0.4206
## stabiel -0.1568 0.1026 -1.53 0.1265
## extravert 0.0585 0.1083 0.54 0.5890
## altrusme 0.0549 0.1268 0.43 0.6651
## allochtoonBin=Yes -1.5660 1.0438 -1.50 0.1335
## age_opleiding 0.0578 0.0171 3.38 0.0007
## leerintelligentie 0.2045 0.1126 1.82 0.0693
## zorgvuldig * allochtoonBin=Yes 0.7386 0.2501 2.95 0.0031
## open * allochtoonBin=Yes -0.6319 0.2415 -2.62 0.0089
## stabiel * allochtoonBin=Yes 0.4036 0.2144 1.88 0.0597
##
#Marginal effects
require(mfx)
mfx.5 <- logitmfx(logit.5, data = data.dropout)
#Variance inflation factor: multicollinearity test
require(car)
vif(logit.5)
## zorgvuldig open
## 1.517166 1.703091
## stabiel extravert
## 1.518127 1.454943
## altrusme allochtoonBinYes
## 1.214333 21.299938
## age_opleiding leerintelligentie
## 1.055990 1.260560
## zorgvuldig:allochtoonBinYes open:allochtoonBinYes
## 18.119427 16.813727
## stabiel:allochtoonBinYes
## 11.301315
sqrt(vif(logit.5)) > 2
## zorgvuldig open
## FALSE FALSE
## stabiel extravert
## FALSE FALSE
## altrusme allochtoonBinYes
## FALSE TRUE
## age_opleiding leerintelligentie
## FALSE FALSE
## zorgvuldig:allochtoonBinYes open:allochtoonBinYes
## TRUE TRUE
## stabiel:allochtoonBinYes
## TRUE
#Test for outliers
outlierTest(logit.5)
##
## No Studentized residuals with Bonferonni p < 0.05
## Largest |rstudent|:
## rstudent unadjusted p-value Bonferonni p
## 274 -2.167205 0.030219 NA
#Cook's distance
#plot(logit.5, which = 4, main = "Model Two")
ggplot(aes(x = seq_along(.cooksd), y = .cooksd), data = logit.5) +
geom_bar(stat = "identity") +
theme_bw(base_family = "serif") +
labs(title = "Model Two: Cook's Distance",
x = "Observation Number",
y = "Cook's Distance") +
geom_hline(yintercept = 0.0082, colour = 2, alpha = 0.75, linetype = 5)
#Resid vs fitted
#plot(logit.5, which = 1, main = "Model Two")
ggplot(aes(x = .fitted, y = .resid), data = logit.5) +
geom_point(alpha = 1/4) +
geom_hline(yintercept = 0, linetype = 2, alpha = 0.75) +
geom_smooth(alpha = 0.15, method = "loess") +
theme_bw(base_family = "serif") +
labs(title = "Model Two: Residuals vs. Fitted",
x = "Predicted Values",
y = "Residuals")