#################################################################
### ###
### `rdoptband' ###
### Author: Devin Caughey ###
### Created: 22 July 2009 ###
### Last Modified: 19 June 2012 ###
### Based on the algorithm developed by Imbens & Kalyanaraman ###
### ###
#################################################################
rdoptband_catch <- function(forcingVar,
outcomeVar,
cutpoint = 0,
C_k = 3.4375,
catch = NULL,
median_subset = TRUE) {
## This function calculates the optimal bandwidth for RD estimation
## using local linear regression. The default value for C_k is 3.4375,
## which is appropriate for the 'edge' or 'triangular' kernel.
## It returns the optimal bandwidth, the estimated discontinuity at the
## cutpoint, and the conventional estimated standard error of the
## estimate.
## The `catch' argument is a recent addition. It allows the user to
## decide whether to implement the catch in Equation 4.11 of Imbens &
## Kalyanaraman (February 2009). The default setting is NULL, in
## which case there is no catch. A positive value sets a floor on
## the estimated third derivative used to calculate the pilot
## bandwidths. The value used Imbens & Kalyanaraman (February 2009)
## is 0.01. Later iterations of this paper (e.g., January 2010)
## eliminate this catch, but as of June 2011 it was still used by
## the version of the `rdob' Stata program available on Imbens's
## website.
## The option `median_subset' is more recent still (19 June 2012)
## and was prompted by Ian Gow, who noticed that I&K's software
## implementations of this algorithm estimate the
## third derivative (m3hat.c) from a cubic regression on a _subset_
## of the data (trimming data beyond the medians on each side of the
## cutpoint). I could find no mention of this in I&K's paper, so I
## give the user the option whether to implement it.
### Error check
if (any(is.na(forcingVar)) | any(is.na(outcomeVar))) {
stop("Input variables contain missing values.\n")
}
### (0) Recenter forcing variable so that the cut-point is 0
X0 <- forcingVar - cutpoint
Y <- outcomeVar
### (1) Estimation of density (f.hat.c) and
### conditional variance (S.hat_c ^ 2)
Sx <- sd(X0)
N <- length(X0)
N.pos <- sum(X0 >= 0)
N.neg <- sum(X0 < 0)
h1 <- 1.84 * Sx * (N ^ -0.2)
## Calculate the number of units on either side of the threshold,
## and the average outcomes on either side.
N_h1.neg <- sum(X0 >= -h1 & X0 < 0)
Ybar_h1.neg <- mean(Y[X0 >= -h1 & X0 < 0])
N_h1.pos <- sum(X0 >= 0 & X0 <= h1)
Ybar_h1.pos <- mean(Y[X0 >= 0 & X0 <= h1])
## Estimate the density of the forcing variable at the cutpoint
f.hat.c <- (N_h1.neg + N_h1.pos) / (2 * N * h1)
## Estimate the limit of the conditional variances of Yi given
## Xi = x, at x = c, from the left and the right
var.hat.neg.c <- var(Y[X0 >= -h1 & X0 < 0])
var.hat.pos.c <- var(Y[X0 >= 0 & X0 <= h1])
## For sample variance, use the following formula:
# mean((Y[(X0 >= -h1) & (X0 < 0)] -
# mean(Y[(X0 >= -h1) & (X0 < 0)]))^2)
### (2) Estimation of second derivatives
## Fit a third order polynomial to the data and estimate the
## third derivative at the cut-point
right <- X0 >= 0
if (median_subset) {
ss <- X0 >= median(X0[!right]) & X0 <= median(X0[right])
} else {
ss <- rep(TRUE, length(X0))
}
lm3 <- lm(Y ~ right + X0 + I(X0 ^ 2) + I(X0 ^ 3), subset = ss)
m3hat.c <- 6 * coef(lm3)[["I(X0^3)"]]
## N.B. The February 2009 version of Imbens and Kalyanaraman's
## working paper contains a "catch" (p. 10, equation 4.11) that
## multiplied f.hat.c by the max of m3hat.c ^ 2 and 0.01, in case
## m3hat.c ^ 2 was too close to 0. This catch was dropped from a
## later version of the working paper (January 2010).
if (is.null(catch)) {
catch_term <- m3hat.c ^ 2
} else {
catch_term <- max(c(m3hat.c ^ 2), catch)
}
h2.pos <- ((var.hat.pos.c / (f.hat.c * catch_term)) ^ (1 / 7) *
3.56 * (N.pos ^ (-1 / 7)))
h2.neg <- ((var.hat.neg.c / (f.hat.c * catch_term)) ^ (1 / 7) *
3.56 * (N.neg ^ (-1 / 7)))
## Given the pilot bandwidths h2.pos and h2.neg, we estimate the
## curvature m(2)(c) by a local quadratic
h2.pos.obs <- X0 >= 0 & X0 <= h2.pos
N2.pos <- sum(h2.pos.obs)
lm.h2.pos <- lm(Y[h2.pos.obs] ~ X0[h2.pos.obs] +
I(X0[h2.pos.obs] ^ 2))
m2hat.pos.c <- 2 * coef(lm.h2.pos)[["I(X0[h2.pos.obs]^2)"]]
h2.neg.obs <- X0 >= -h2.neg & X0 < 0
N2.neg <- sum(h2.neg.obs)
lm.h2.neg <- lm(Y[h2.neg.obs] ~ X0[h2.neg.obs] +
I(X0[h2.neg.obs] ^ 2))
m2hat.neg.c <- 2 * coef(lm.h2.neg)[["I(X0[h2.neg.obs]^2)"]]
### (3) Calculation of regulation terms and optimal h.
r.hat.pos <- (720 * var.hat.pos.c) / (N2.pos * h2.pos ^ 4)
r.hat.neg <- (720 * var.hat.neg.c) / (N2.neg * h2.neg ^ 4)
h.opt <- C_k * (N ^ (-1 / 5)) *
((var.hat.neg.c + var.hat.pos.c) /
(((m2hat.pos.c - m2hat.neg.c) ^ 2 + r.hat.pos + r.hat.neg)
* f.hat.c)) ^ (1 / 5)
### (4) Calculate the treatment effect and standard errors.
## Weights based on triangular kernel
wgts <- (1 - abs(X0 / h.opt)) * (abs(X0) <= h.opt)
local.lm <- lm(Y ~ right + X0 + I(X0 * right), weights = wgts)
rd.est <- coef(local.lm)[[2]]
sd.est <- sqrt(vcov(local.lm)[2, 2])
## Output
out <- c(h.opt, rd.est, sd.est)
names(out) <- c("Optimal Bandwidth", "RD Estimate", "Standard Error")
return(out)
}
### TESTING ###
if (FALSE) {
url <- "http://www.economics.harvard.edu/faculty/imbens"
url <- paste(url, "/files/art_sharp_rd.txt", sep="/")
rd_sharp_data <- read.fwf(url, header=FALSE, widths=rep(10,6),
col.names=c("y", "x", paste("z", 1:4,
sep="")))
rd_sharp_data <- rd_sharp_data[1:1000,]
c <- 0.2990
rdoptband_catch(forcingVar = rd_sharp_data$x,
outcomeVar = rd_sharp_data$y,
cutpoint = c,
C_k = 3.4375,
catch = NULL,
median_subset = TRUE)
## > Optimal Bandwidth RD Estimate Standard Error
## 0.6759120 1.0355491 0.3013769
rdoptband_catch(forcingVar = rd_sharp_data$x,
outcomeVar = rd_sharp_data$y,
cutpoint = c,
C_k = 3.4375,
catch = NULL,
median_subset = FALSE)
## > Optimal Bandwidth RD Estimate Standard Error
## 2.0459942 1.2458215 0.2090354
}