### Example of matching locations with a subset of locations library(Matching) library(rgenoud) library(data.table) ############### ## Initial Setup ## Load data and define the sample and tx variable data_dir <- "data-lfs/final/" dat.csv <-read.csv(paste0(data_dir, "sample_data.csv")) dat <- data.table(dat.csv) ## Drop records with no fips dat <- dat[!is.na(fips)] ## Drop historical records with no rent dat <- dat[!(year == 2011 & is.na(rent_irr0))] # add tx indicator dat[, tx := as.integer((year == 2040))] ctrl_n <- sum(dat$tx == 0) tx_n <- sum(dat$tx == 1) # We don't know the rent of the tx locations... dat[tx == 1, rent_irr0 := NA] ###################################################### ## Step 1: Determine which climate variables matter... ## TO DO LATER ###################################################### ## Step 2: Using the variables from step 1, match future to present, then ## calculate the ATT. # Set up the data. z_vars <- c("pop2015", "income2010", "cropacres", "pastureacres", "irrigatedacres") x_vars <- setdiff(names(dat), c("fips", "lat", "lon", "year", "days", "state", "county", "pop2015", "income2010", "rent", "rent_irr1", "rent_irr0", "tx", z_vars)) X <- as.matrix(dat[, ..x_vars]) Z <- as.matrix(dat[, ..z_vars]) tx <- dat$tx # First a GLM model for propensity score glm1 <- glm(formula(paste("tx ~", paste(x_vars, collapse = " + "))), family = binomial, data = dat) # Match locations based on propensity score and mahalanobis distance # Generally the matching terms should include higher order terms than # those in the propensity score formula. m_glm <- Match(Tr = tx, X = glm1$fitted, M=1) #match with replacement m_maha <- Match(Tr = tx, X = dat[, ..x_vars], M=1) #match with replacement # Check the balance of the match using GLM # nboots should be >= 500 for publication-quality p-values # We are particularly interested in gdd and cumulative precip MatchBalance(tx ~ sum_gdd, data = dat, match.out = m_glm, nboots = 100) MatchBalance(tx ~ sum_rain, data = dat, match.out = m_glm, nboots = 100) # Check the balance of the match using mahalanobis MatchBalance(tx ~ sum_gdd, data = dat, match.out = m_maha, nboots = 100) MatchBalance(tx ~ sum_rain, data = dat, match.out = m_maha, nboots = 100) # As an alternative, we can use GenMatch # The BalanceMatrix should usually involve second order terms # but here we leave it since our model accounts for nonlinearity differently. BalanceMatrix <- X gen <- GenMatch(tx, X, BalanceMatrix = BalanceMatrix, pop.size = 100) #Check the balance of the GenMatch algorithm m_gen <- Match(Tr=tx, X=X, Weight.matrix=gen) MatchBalance(tx ~ sum_gdd, data = dat, match.out = m_gen, nboots = 100) MatchBalance(tx ~ sum_rain, data = dat, match.out = m_gen, nboots = 100) qqout <- qqstats(dat$rent_irr0[m_gen$index.treated], dat$rent_irr0[m_gen$index.control]) # Now actually Match and calculate the ATT. We want to recover the actual change in rents Y <- dat$rent_irr0 m_gen1 <- Match(Y = Y, Tr=tx, X=X, Weight.matrix=gen) res <- summary(m_gen1)