################################################################# ### ### ### `rdoptband' ### ### Author: Devin Caughey ### ### Created: 22 July 2009 ### ### Last Modified: 19 June 2012 ### ### Based on the algorithm developed by Imbens & Kalyanaraman ### ### ### ################################################################# rdoptband_catch <- function(forcingVar, outcomeVar, cutpoint = 0, C_k = 3.4375, catch = NULL, median_subset = TRUE) { ## This function calculates the optimal bandwidth for RD estimation ## using local linear regression. The default value for C_k is 3.4375, ## which is appropriate for the 'edge' or 'triangular' kernel. ## It returns the optimal bandwidth, the estimated discontinuity at the ## cutpoint, and the conventional estimated standard error of the ## estimate. ## The `catch' argument is a recent addition. It allows the user to ## decide whether to implement the catch in Equation 4.11 of Imbens & ## Kalyanaraman (February 2009). The default setting is NULL, in ## which case there is no catch. A positive value sets a floor on ## the estimated third derivative used to calculate the pilot ## bandwidths. The value used Imbens & Kalyanaraman (February 2009) ## is 0.01. Later iterations of this paper (e.g., January 2010) ## eliminate this catch, but as of June 2011 it was still used by ## the version of the `rdob' Stata program available on Imbens's ## website. ## The option `median_subset' is more recent still (19 June 2012) ## and was prompted by Ian Gow, who noticed that I&K's software ## implementations of this algorithm estimate the ## third derivative (m3hat.c) from a cubic regression on a _subset_ ## of the data (trimming data beyond the medians on each side of the ## cutpoint). I could find no mention of this in I&K's paper, so I ## give the user the option whether to implement it. ### Error check if (any(is.na(forcingVar)) | any(is.na(outcomeVar))) { stop("Input variables contain missing values.\n") } ### (0) Recenter forcing variable so that the cut-point is 0 X0 <- forcingVar - cutpoint Y <- outcomeVar ### (1) Estimation of density (f.hat.c) and ### conditional variance (S.hat_c ^ 2) Sx <- sd(X0) N <- length(X0) N.pos <- sum(X0 >= 0) N.neg <- sum(X0 < 0) h1 <- 1.84 * Sx * (N ^ -0.2) ## Calculate the number of units on either side of the threshold, ## and the average outcomes on either side. N_h1.neg <- sum(X0 >= -h1 & X0 < 0) Ybar_h1.neg <- mean(Y[X0 >= -h1 & X0 < 0]) N_h1.pos <- sum(X0 >= 0 & X0 <= h1) Ybar_h1.pos <- mean(Y[X0 >= 0 & X0 <= h1]) ## Estimate the density of the forcing variable at the cutpoint f.hat.c <- (N_h1.neg + N_h1.pos) / (2 * N * h1) ## Estimate the limit of the conditional variances of Yi given ## Xi = x, at x = c, from the left and the right var.hat.neg.c <- var(Y[X0 >= -h1 & X0 < 0]) var.hat.pos.c <- var(Y[X0 >= 0 & X0 <= h1]) ## For sample variance, use the following formula: # mean((Y[(X0 >= -h1) & (X0 < 0)] - # mean(Y[(X0 >= -h1) & (X0 < 0)]))^2) ### (2) Estimation of second derivatives ## Fit a third order polynomial to the data and estimate the ## third derivative at the cut-point right <- X0 >= 0 if (median_subset) { ss <- X0 >= median(X0[!right]) & X0 <= median(X0[right]) } else { ss <- rep(TRUE, length(X0)) } lm3 <- lm(Y ~ right + X0 + I(X0 ^ 2) + I(X0 ^ 3), subset = ss) m3hat.c <- 6 * coef(lm3)[["I(X0^3)"]] ## N.B. The February 2009 version of Imbens and Kalyanaraman's ## working paper contains a "catch" (p. 10, equation 4.11) that ## multiplied f.hat.c by the max of m3hat.c ^ 2 and 0.01, in case ## m3hat.c ^ 2 was too close to 0. This catch was dropped from a ## later version of the working paper (January 2010). if (is.null(catch)) { catch_term <- m3hat.c ^ 2 } else { catch_term <- max(c(m3hat.c ^ 2), catch) } h2.pos <- ((var.hat.pos.c / (f.hat.c * catch_term)) ^ (1 / 7) * 3.56 * (N.pos ^ (-1 / 7))) h2.neg <- ((var.hat.neg.c / (f.hat.c * catch_term)) ^ (1 / 7) * 3.56 * (N.neg ^ (-1 / 7))) ## Given the pilot bandwidths h2.pos and h2.neg, we estimate the ## curvature m(2)(c) by a local quadratic h2.pos.obs <- X0 >= 0 & X0 <= h2.pos N2.pos <- sum(h2.pos.obs) lm.h2.pos <- lm(Y[h2.pos.obs] ~ X0[h2.pos.obs] + I(X0[h2.pos.obs] ^ 2)) m2hat.pos.c <- 2 * coef(lm.h2.pos)[["I(X0[h2.pos.obs]^2)"]] h2.neg.obs <- X0 >= -h2.neg & X0 < 0 N2.neg <- sum(h2.neg.obs) lm.h2.neg <- lm(Y[h2.neg.obs] ~ X0[h2.neg.obs] + I(X0[h2.neg.obs] ^ 2)) m2hat.neg.c <- 2 * coef(lm.h2.neg)[["I(X0[h2.neg.obs]^2)"]] ### (3) Calculation of regulation terms and optimal h. r.hat.pos <- (720 * var.hat.pos.c) / (N2.pos * h2.pos ^ 4) r.hat.neg <- (720 * var.hat.neg.c) / (N2.neg * h2.neg ^ 4) h.opt <- C_k * (N ^ (-1 / 5)) * ((var.hat.neg.c + var.hat.pos.c) / (((m2hat.pos.c - m2hat.neg.c) ^ 2 + r.hat.pos + r.hat.neg) * f.hat.c)) ^ (1 / 5) ### (4) Calculate the treatment effect and standard errors. ## Weights based on triangular kernel wgts <- (1 - abs(X0 / h.opt)) * (abs(X0) <= h.opt) local.lm <- lm(Y ~ right + X0 + I(X0 * right), weights = wgts) rd.est <- coef(local.lm)[[2]] sd.est <- sqrt(vcov(local.lm)[2, 2]) ## Output out <- c(h.opt, rd.est, sd.est) names(out) <- c("Optimal Bandwidth", "RD Estimate", "Standard Error") return(out) } ### TESTING ### if (FALSE) { url <- "http://www.economics.harvard.edu/faculty/imbens" url <- paste(url, "/files/art_sharp_rd.txt", sep="/") rd_sharp_data <- read.fwf(url, header=FALSE, widths=rep(10,6), col.names=c("y", "x", paste("z", 1:4, sep=""))) rd_sharp_data <- rd_sharp_data[1:1000,] c <- 0.2990 rdoptband_catch(forcingVar = rd_sharp_data$x, outcomeVar = rd_sharp_data$y, cutpoint = c, C_k = 3.4375, catch = NULL, median_subset = TRUE) ## > Optimal Bandwidth RD Estimate Standard Error ## 0.6759120 1.0355491 0.3013769 rdoptband_catch(forcingVar = rd_sharp_data$x, outcomeVar = rd_sharp_data$y, cutpoint = c, C_k = 3.4375, catch = NULL, median_subset = FALSE) ## > Optimal Bandwidth RD Estimate Standard Error ## 2.0459942 1.2458215 0.2090354 }