Merge pull request #178 from ngreifer/master

Version 4.5.5 updates
kosukeimai · Oct 13, 2023 · c0adaf0 · c0adaf0
2 parents 7b7b48a + 4cf6854
commit c0adaf0
Show file tree

Hide file tree

Showing 40 changed files with 1,033 additions and 207 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -15,3 +15,4 @@
 ^.*\.o$
 ^.*\.so$
 ^CRAN-SUBMISSION$
+^Makefile$
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: MatchIt
-Version: 4.5.3
+Version: 4.5.5
 Title: Nonparametric Preprocessing for Parametric Causal Inference
 Description: Selects matched samples of the original treated and
     control groups with similar covariate distributions -- can be
@@ -28,9 +28,12 @@ Depends:
     R (>= 3.5.0)
 Imports:
     backports (>= 1.1.9),
-    chk (>= 0.8.1), rlang (>= 1.1.0),
-    Rcpp (>= 1.0.10),
-    utils, stats, graphics
+    chk (>= 0.8.1),
+    rlang (>= 1.1.0),
+    Rcpp,
+    utils,
+    stats,
+    graphics
 Suggests:
     optmatch (>= 0.10.6),
     Matching,
@@ -50,11 +53,13 @@ Suggests:
     sandwich (>= 2.5-1),
     survival,
     RcppProgress (>= 0.4.2),
+    highs,
     Rglpk,
     Rsymphony,
     gurobi,
     knitr,
-    rmarkdown
+    rmarkdown,
+    testthat (>= 3.0.0)
 LinkingTo: 
     Rcpp,
     RcppProgress
@@ -67,3 +72,4 @@ BugReports: https://github.com/kosukeimai/MatchIt/issues
 VignetteBuilder: knitr
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.2.3
+Config/testthat/edition: 3
diff --git a/NEWS.md b/NEWS.md
@@ -6,6 +6,26 @@ output:
 `MatchIt` News and Updates
 ======
 
+# MatchIt 4.5.5
+
+* When using `method = "cardinality"`, a new solver, HiGHS, can be requested by setting `solver = "highs"`, which relies on the `highs` package. This is much faster and more reliable than GLPK and is free and easy to install as a regular R package with no additional requirements.
+
+* Fixed a bug when using `method = "optimal"` with `discard` and `exact` specified. Thanks to @NikNakk for the issue and fix. (#171)
+
+# MatchIt 4.5.4
+
+* With `method = "nearest"`, `m.order` can now be set to `"closest"` to request that the closest potential pairs are matched first. This can be used whether a propensity score is used or not.
+
+* Fixed bugs when `distance = NULL` and no covariates are specified in `matchit()`.
+
+* Changed "empirical cumulative density function" to "empirical cumulative distribution function" in documentation. (#166)
+
+* Fixed a bug where calipers would not work properly on some systems. Thanks to Bill Dunlap for the solution. (#163)
+
+* Fixed a bug when `.` was present in formulas. Thanks to @dmolitor. (#167)
+
+* Fixed a bug when nearest neighbor matching for the ATC with `distance` supplied as a numeric distance matrix.
+
 # MatchIt 4.5.3
 
 * Error messages have been improved using `chk` and `rlang`, which are now dependencies.

diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -9,6 +9,10 @@ nn_matchC <- function(treat_, ord_, ratio, discarded, reuse_max, distance_ = NUL
     .Call(`_MatchIt_nn_matchC`, treat_, ord_, ratio, discarded, reuse_max, distance_, distance_mat_, exact_, caliper_dist_, caliper_covs_, caliper_covs_mat_, mah_covs_, antiexact_covs_, unit_id_, disl_prog)
 }
 
+nn_matchC_closest <- function(distance_mat, treat, ratio, discarded, reuse_max, exact_ = NULL, caliper_dist_ = NULL, caliper_covs_ = NULL, caliper_covs_mat_ = NULL, antiexact_covs_ = NULL, unit_id_ = NULL, disl_prog = FALSE) {
+    .Call(`_MatchIt_nn_matchC_closest`, distance_mat, treat, ratio, discarded, reuse_max, exact_, caliper_dist_, caliper_covs_, caliper_covs_mat_, antiexact_covs_, unit_id_, disl_prog)
+}
+
 nn_matchC_vec <- function(treat_, ord_, ratio_, discarded_, reuse_max, distance_, exact_ = NULL, caliper_dist_ = NULL, caliper_covs_ = NULL, caliper_covs_mat_ = NULL, antiexact_covs_ = NULL, unit_id_ = NULL, disl_prog = FALSE) {
     .Call(`_MatchIt_nn_matchC_vec`, treat_, ord_, ratio_, discarded_, reuse_max, distance_, exact_, caliper_dist_, caliper_covs_, caliper_covs_mat_, antiexact_covs_, unit_id_, disl_prog)
 }
@@ -31,5 +35,5 @@ weights_matrixC <- function(mm, treat) {
 
 # Register entry points for exported C++ functions
 methods::setLoadAction(function(ns) {
-    .Call('_MatchIt_RcppExport_registerCCallable', PACKAGE = 'MatchIt')
+    .Call(`_MatchIt_RcppExport_registerCCallable`)
 })
diff --git a/R/add_s.weights.R b/R/add_s.weights.R
@@ -96,8 +96,8 @@ add_s.weights <- function(m,
         if (ncol(s.weights) != 1) .err("`s.weights` can only contain one named variable")
         s.weights <- s.weights[[1]]
       }
-      else if (inherits(s.weights, "formula")) {
-        s.weights.form <- update(s.weights, NULL ~ .)
+      else if (rlang::is_formula(s.weights)) {
+        s.weights.form <- update(terms(s.weights, data = data), NULL ~ .)
         s.weights <- model.frame(s.weights.form, data, na.action = "na.pass")
         if (ncol(s.weights) != 1) .err("`s.weights` can only contain one named variable")
         s.weights <- s.weights[[1]]

diff --git a/R/aux_functions.R b/R/aux_functions.R
@@ -181,7 +181,7 @@ info.to.distance <- function(info) {
 #controls whether words are separated by "and" or "or"; 'is.are' controls whether the list is
 #followed by "is" or "are" (to avoid manually figuring out if plural); quotes controls whether
 #quotes should be placed around words in string. From WeightIt.
-word_list <- function(word.list = NULL, and.or = c("and", "or"), is.are = FALSE, quotes = FALSE) {
+word_list <- function(word.list = NULL, and.or = "and", is.are = FALSE, quotes = FALSE) {
   #When given a vector of strings, creates a string of the form "a and b"
   #or "a, b, and c"
   #If is.are, adds "is" or "are" appropriately
@@ -384,8 +384,9 @@ round_df_char <- function(df, digits, pad = "0", na_vals = "") {
   #lines up. Should be "0" or " "; "" (the empty string) un-aligns decimals.
   #na_vals is what NA should print as.
 
+  if (NROW(df) == 0 || NCOL(df) == 0) return(as.matrix(df))
   if (!is.data.frame(df)) df <- as.data.frame.matrix(df, stringsAsFactors = FALSE)
-  if (NROW(df) == 0 || NCOL(df) == 0) return(df)
+
   rn <- rownames(df)
   cn <- colnames(df)
 
@@ -631,7 +632,7 @@ pooled_sd <- function(X, t, w = NULL, bin.var = NULL, contribution = "proportion
     }, numeric(1L))
   }
 
-  sqrt(pooled_var)
+  setNames(sqrt(pooled_var), colnames(X))
 }
 
 #Effective sample size

diff --git a/R/dist_functions.R b/R/dist_functions.R
@@ -345,7 +345,8 @@ eucdist_internal <- function(X, treat = NULL) {
       d <- abs(outer(X[treat_l,], X[!treat_l,], "-"))
     }
     else {
-      d <- dist_to_matrixC(dist(X))[treat_l, !treat_l, drop = FALSE]
+      d <- dist(X)
+      d <- dist_to_matrixC(d)[treat_l, !treat_l, drop = FALSE]
     }
     dimnames(d) <- list(rownames(X)[treat_l], rownames(X)[!treat_l])
   }
@@ -368,14 +369,15 @@ get.covs.matrix.for.dist <- function(formula = NULL, data = NULL) {
     data <- as.data.frame(data)
   }
 
+  formula <- terms(formula, data = data)
+
   if (rlang::is_formula(formula, lhs = FALSE)) {
     formula <- update(formula, ~ . + 1)
   }
   else {
     formula <- update(formula, . ~ . + 1)
   }
 
-  formula <- terms(formula, data = data)
   mf <- model.frame(formula, data, na.action = na.pass)
 
   chars.in.mf <- vapply(mf, is.character, logical(1L))
@@ -395,6 +397,7 @@ get.covs.matrix.for.dist <- function(formula = NULL, data = NULL) {
 
   X
 }
+
 .check_X <- function(X) {
   if (isTRUE(attr(X, "checked"))) return(X)
 

diff --git a/R/input_processing.R b/R/input_processing.R
@@ -156,9 +156,12 @@ check_treat <- function(treat = NULL, X = NULL) {
 
 #Function to process distance and give warnings about new syntax
 process.distance <- function(distance, method = NULL, treat) {
-  if (is.null(distance) && !is.null(method) %% !method %in% c("cem", "exact", "cardinality")) {
-    .err(sprintf("`distance` cannot be `NULL` with `method = \"%s\"`",
-                 method))
+  if (is.null(distance)) {
+    if (!is.null(method) && !method %in% c("cem", "exact", "cardinality")) {
+      .err(sprintf("`distance` cannot be `NULL` with `method = \"%s\"`",
+                   method))
+    }
+    return(distance)
   }
 
   if (is.character(distance) && length(distance) == 1) {
@@ -247,7 +250,7 @@ process.ratio <- function(ratio, method = NULL, ..., min.controls = NULL, max.co
   ratio.na <- !ratio.null && anyNA(ratio)
 
   if (is.null(method)) return(1)
-  else if (method %in% c("nearest", "optimal")) {
+  if (method %in% c("nearest", "optimal")) {
     if (ratio.null) ratio <- 1
     else if (ratio.na) .err("`ratio` cannot be `NA`")
     else if (!is.atomic(ratio) || !is.numeric(ratio) || length(ratio) > 1 || ratio < 1) {
@@ -485,7 +488,7 @@ process.variable.input <- function(x, data = NULL) {
     x <- reformulate(x)
   }
   else if (rlang::is_formula(x)) {
-    x <- update(x, NULL ~ .)
+    x <- update(terms(x, data = data), NULL ~ .)
   }
   else {
     .err(sprintf("`%s` must be supplied as a character vector of names or a one-sided formula.", n))

diff --git a/R/matchit.R b/R/matchit.R
@@ -109,15 +109,7 @@
 #' individual methods pages for information on whether and how this argument is
 #' used. Default is `FALSE` for matching without replacement.
 #' @param m.order for methods that allow it, the order that the matching takes
-#' place. Allowable options depend on the matching method but include
-#' `"largest"`, where matching takes place in descending order of distance
-#' measures; `"smallest"`, where matching takes place in ascending order
-#' of distance measures; `"random"`, where matching takes place in a
-#' random order; and `"data"` where matching takes place based on the
-#' order of units in the data. When `m.order = "random"`, results may
-#' differ across different runs of the same code unless a seed is set and
-#' specified with [set.seed()]. See the individual methods pages for
-#' information on whether and how this argument is used. The default of
+#' place. Allowable options depend on the matching method. The default of
 #' `NULL` corresponds to `"largest"` when a propensity score is
 #' estimated or supplied as a vector and `"data"` otherwise.
 #' @param caliper for methods that allow it, the width(s) of the caliper(s) to
@@ -230,14 +222,14 @@
 #' (in the case of k:1 matching) or the stratum they belong to (in the case of
 #' exact matching, coarsened exact matching, full matching, or
 #' subclassification). The formula for computing the weights depends on the
-#' argument supplied to `estimand`. A new stratum "propensity score"
-#' (`p`) is computed as the proportion of units in each stratum that are
+#' argument supplied to `estimand`. A new "stratum propensity score"
+#' (`sp`) is computed as the proportion of units in each stratum that are
 #' in the treated group, and all units in that stratum are assigned that
-#' propensity score. Weights are then computed using the standard formulas for
-#' inverse probability weights: for the ATT, weights are 1 for the treated
-#' units and `p/(1-p)` for the control units; for the ATC, weights are
-#' `(1-p)/p` for the treated units and 1 for the control units; for the
-#' ATE, weights are `1/p` for the treated units and `1/(1-p)` for the
+#' stratum propensity score. This is distinct from the propensity score used for matching, if any. Weights are then computed using the standard formulas for
+#' inverse probability weights with the stratum propensity score inserted: for the ATT, weights are 1 for the treated
+#' units and `sp/(1-sp)` for the control units; for the ATC, weights are
+#' `(1-sp)/sp` for the treated units and 1 for the control units; for the
+#' ATE, weights are `1/sp` for the treated units and `1/(1-sp)` for the
 #' control units. For cardinality matching, all matched units receive a weight
 #' of 1.
 #'
@@ -434,8 +426,7 @@ matchit <- function(formula,
   #Process formula and data inputs
   .chk_formula(formula, sides = 2)
 
-  tt <- terms(formula, data = data)
-  treat.form <- update(tt, . ~ 0)
+  treat.form <- update(terms(formula, data = data), . ~ 0)
   treat.mf <- model.frame(treat.form, data = data, na.action = "na.pass")
   treat <- model.response(treat.mf)
 
@@ -478,7 +469,7 @@ matchit <- function(formula,
       s.weights <- s.weights[[1]]
     }
     else if (inherits(s.weights, "formula")) {
-      s.weights.form <- update(s.weights, NULL ~ .)
+      s.weights.form <- update(terms(s.weights, data = data), NULL ~ .)
       s.weights <- model.frame(s.weights.form, data, na.action = "na.pass")
       if (ncol(s.weights) != 1) .err("`s.weights` can only contain one named variable")
       s.weights <- s.weights[[1]]
@@ -503,13 +494,15 @@ matchit <- function(formula,
     if (is.numeric(distance)) {
       fn1 <- "distance2user"
     }
-    else if (distance %in% matchit_distances()) {
-      fn1 <- "distance2mahalanobis"
-      is.full.mahalanobis <- TRUE
-      attr(is.full.mahalanobis, "transform") <- distance
-    }
-    else {
-      fn1 <- paste0("distance2", distance)
+    else if (is.character(distance)) {
+      if (distance %in% matchit_distances()) {
+        fn1 <- "distance2mahalanobis"
+        is.full.mahalanobis <- TRUE
+        attr(is.full.mahalanobis, "transform") <- distance
+      }
+      else {
+        fn1 <- paste0("distance2", distance)
+      }
     }
   }
 
@@ -524,6 +517,8 @@ matchit <- function(formula,
   else {
     covs.formula <- delete.response(terms(formula, data = data))
   }
+
+  covs.formula <- update(covs.formula, ~ .)
   covs <- model.frame(covs.formula, data = data, na.action = "na.pass")
   k <- ncol(covs)
   for (i in seq_len(k)) {
@@ -645,7 +640,7 @@ matchit <- function(formula,
   X.list <- list(covs, exactcovs, mahcovs, calcovs, antiexactcovs)
   all.covs <- lapply(X.list, names)
   for (i in seq_along(X.list)[-1]) if (!is.null(X.list[[i]])) X.list[[i]][names(X.list[[i]]) %in% unlist(all.covs[1:(i-1)])] <- NULL
-  X.list[lengths(X.list) == 0] <- NULL
+  X.list[vapply(X.list, is.null, logical(1L))] <- NULL
 
   ## putting all the results together
   out <- list(
@@ -669,7 +664,7 @@ matchit <- function(formula,
     obj = if (include.obj) match.out[["obj"]]
   )
 
-  out[lengths(out) == 0] <- NULL
+  out[vapply(out, is.null, logical(1L))] <- NULL
 
   class(out) <- class(match.out)
   out