# This empirical example uses the data from Angrist and Krueger (1991) to illustrate the construction of # confidence intervals robust to weak instruments using the method of Chernozhukov and Hansen (2008) # Authors: V. Chernozhukov and I. Fernandez-Val # Data source: Josh Angrist and Alan Krueger, "Does compulsory school attendance affect schooling and earnings?", # Quarterly Journal of Economics, Vol. 106, No. 4, 1991, pp. 979-1014 # URL for data: http://economics.mit.edu/faculty/angrist/data1/data/angkru1991 # Updated on 02/16/2015 ############ # Part I: Functions # Function to compute White standard errors; hc <- function(x) {; vcovHC(x, type = "HC"); }; #################### # Part II: Main program library(sandwich); library(foreign); library(AER); #library(ivpack); # Sample: Census 1980, Cohort 1930-1939; data <- read.dta("/Users/ivan/Dropbox/Shared/14.382/Data/NEW7080/QOB-CENSUS80-COHORT30-39.dta"); attach(data); alpha <- .05; # Significance level # Partialing-out controls; rlwage <- lm(LWKLYWGE ~ factor(YOB) + factor(MARRIED) + factor(RACE) + factor(SMSA) + factor(REGION))$res; reduc <- lm(EDUC ~ factor(YOB) + factor(MARRIED) + factor(RACE) + factor(SMSA) + factor(REGION))$res; rqob4 <- lm(I(QOB == 4) ~ factor(YOB) + factor(MARRIED) + factor(RACE) + factor(SMSA) + factor(REGION))$res; # ols results; formula <- rlwage ~ reduc; ols.fit <- lm(formula); ols.coef <- ols.fit$coef[2]; ols.se <- coeftest(ols.fit, vcov = hc)[2,2]; ols.lci <- ols.coef + qnorm(alpha/2)*ols.se; ols.uci <- ols.coef + qnorm(1-alpha/2)*ols.se; # First stage; formula <- reduc ~ rqob4; fs.fit <- lm(formula); fs.coef <- fs.fit$coef[2]; fs.se <- coeftest(fs.fit, vcov = hc)[2,2]; fs.Fstat <- (fs.coef/fs.se)^2; print(paste('F-stat: ', fs.Fstat)); # tsls results formula <- rlwage ~ reduc | rqob4; tsls.fit <- ivreg(formula); tsls.coef <- tsls.fit$coef[2]; tsls.se <- coeftest(tsls.fit, vcov = hc)[2,2]; tsls.lci <- tsls.coef + qnorm(alpha/2)*tsls.se; tsls.uci <- tsls.coef + qnorm(1-alpha/2)*tsls.se; # Confidence interval robust to weak instruments (Chernozhukov and Hansen, 2008) formula <- I(rlwage - beta * reduc) ~ rqob4; gridbeta <- tsls.coef + c(-50:50)/750; gridbeta <- tsls.coef + c(-100:100)/1500; gridrtstat <- 0*gridbeta; for (i in 1:length(gridbeta)) {; beta <- gridbeta[i]; fit <- lm(formula); gridrtstat[i] <- summary(fit)$coef[2,1]/coeftest(fit, vcov = hc)[2,2]; }; tsls.lrci <- min(gridbeta[gridrtstat^2 < qchisq(1-alpha,1)]); tsls.urci <- max(gridbeta[gridrtstat^2 < qchisq(1-alpha,1)]); options(digits=3); table <- matrix(0, ncol = 4, nrow = 3, dimnames = list(c('OLS', 'TSLS - Wald', 'WI Robust'), c('Est.', 'Std. Error', '95% LCI','95% UCI'))); table[1,1] <- ols.coef; table[1,2] <- ols.se; table[1,3] <- ols.lci; table[1,4] <- ols.uci; table[2,1] <- tsls.coef; table[2,2] <- tsls.se; table[2,3] <- tsls.lci; table[2,4] <- tsls.uci; table[3,1] <- NA; table[3,2] <- NA; table[3,3] <- tsls.lrci; table[3,4] <- tsls.urci; print(table); # Graphical illustration of the construction of the confidence intervals; tstat.tsls <- ((tsls.coef - gridbeta)/tsls.se)^2; tstat.wi <- gridrtstat^2; pdf("/Users/ivan/Dropbox/Shared/14.382/Results/QOB.pdf", pointsize=15,width=8.0,height=8.0); par(mfrow=c(1,1)) plot(range(gridbeta),range(c(tstat.tsls, tstat.wi)) , type="n",xlab="Returns to Schooling", ylab="Statistic", main=" "); lines(gridbeta, tstat.tsls, lty = 2, col = 2); lines(gridbeta, tstat.wi, lty = 1, col = 1); abline(h=qchisq(1-alpha,1), lty = 3, col = 4); legend(min(gridbeta), max(tstat.tsls), c('Wald Statistic', 'WI-Robust Statistic','95% Critical Value'), col = c(2,1,4), lty = c(2,1,3), horiz = F, bty = 'n'); dev.off() detach(data);