From e1cbd714a5655f3071792f306fb9fb0b4fa46ebf Mon Sep 17 00:00:00 2001
From: Alexander Rossell Hayes <alexander@rossellhayes.com>
Date: Fri, 8 Dec 2023 11:11:11 -0800
Subject: [PATCH 1/5] fix(census_helper_new): fix bug when checking if `geo` is
 "precinct"

---
 R/census_helper_v2.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/census_helper_v2.R b/R/census_helper_v2.R
index 8e4ddd5..8b310de 100644
--- a/R/census_helper_v2.R
+++ b/R/census_helper_v2.R
@@ -72,7 +72,7 @@ census_helper_new <- function(
     skip_bad_geos = FALSE
 ) {
   
-  if (geo == "precinct") {
+  if ("precinct" %in% geo) {
     stop("Error: census_helper_new function does not currently support precinct-level data.")
   }
   

From 59a340dac1e233e98a1e96468f89725b0150a719 Mon Sep 17 00:00:00 2001
From: Alexander Rossell Hayes <alexander@rossellhayes.com>
Date: Fri, 8 Dec 2023 11:12:23 -0800
Subject: [PATCH 2/5] fix(.predict_race_old): use `all(is.na())` in `if`
 statements

---
 R/race_prediction_funs.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/race_prediction_funs.R b/R/race_prediction_funs.R
index ce96e82..23b4564 100644
--- a/R/race_prediction_funs.R
+++ b/R/race_prediction_funs.R
@@ -82,12 +82,12 @@ NULL
       stop("Voter data frame needs to have a column named surname")
     }
   } else {
-    if (missing(census.geo) || is.null(census.geo) || is.na(census.geo) || census.geo %in% c("county", "tract", "block", "place") == FALSE) {
+    if (missing(census.geo) || is.null(census.geo) || all(is.na(census.geo)) || census.geo %in% c("county", "tract", "block", "place") == FALSE) {
       stop("census.geo must be either 'county', 'tract', 'block', or 'place'")
     } else {
       message(paste("Proceeding with Census geographic data at", census.geo, "level..."))
     }
-    if (missing(census.data) || is.null(census.data) || is.na(census.data)) {
+    if (missing(census.data) || is.null(census.data) || all(is.na(census.data))) {
       census.key <- validate_key(census.key)
       message("Downloading Census geographic data using provided API key...")
     } else {

From 4ff2f0f42e1748c52b74ac45d9407c997600ae0b Mon Sep 17 00:00:00 2001
From: Alexander Rossell Hayes <alexander@rossellhayes.com>
Date: Fri, 8 Dec 2023 11:12:57 -0800
Subject: [PATCH 3/5] test(census_geo_api): add comment explaining snapshots

---
 tests/testthat/test-census_geo_api.R | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/testthat/test-census_geo_api.R b/tests/testthat/test-census_geo_api.R
index 4fd02c8..9d18679 100644
--- a/tests/testthat/test-census_geo_api.R
+++ b/tests/testthat/test-census_geo_api.R
@@ -1,6 +1,8 @@
 skip_if_not(nzchar(Sys.getenv("CENSUS_API_KEY")))
 
 test_that("snapshot", {
+  # These snapshots were generated using the calculations in v2.0.0
+  # and verified that the calculations resulted in the same numbers for PR #120.
   expect_snapshot_value(
     census_geo_api(state = "DE", geo = "county", year = "2020"),
     style = "deparse"

From 7be05c092c2a309d2654e780b377581de12870cf Mon Sep 17 00:00:00 2001
From: Alexander Rossell Hayes <alexander@rossellhayes.com>
Date: Fri, 8 Dec 2023 11:18:03 -0800
Subject: [PATCH 4/5] todo: add TODO comments

---
 R/census_helper_v2.R                       | 5 +++++
 README.Rmd                                 | 4 ++++
 tests/testthat/test-census_geo_api.R       | 3 +++
 tests/testthat/test-census_geo_api_names.R | 2 ++
 4 files changed, 14 insertions(+)

diff --git a/R/census_helper_v2.R b/R/census_helper_v2.R
index 8b310de..8744389 100644
--- a/R/census_helper_v2.R
+++ b/R/census_helper_v2.R
@@ -160,6 +160,7 @@ census_helper_new <- function(
       state_must_be_downloaded <- toDownload ||
         is.null(census.data[[state]]) ||
         census.data[[state]]$year != year ||
+        # TODO: Why do we always redownload if sex or age == TRUE?
         census.data[[state]]$age != FALSE ||
         census.data[[state]]$sex != FALSE
       
@@ -174,6 +175,10 @@ census_helper_new <- function(
     
     ## Calculate Pr(Geolocation | Race)
     if (any(c("P2_005N", "P005003") %in% names(census))) {
+      # TODO: Add message that they're using a legacy data source
+      # TODO: Add test that we get the same ratios with legacy and new tables for 2020
+      # Old table: Redistricting (Pl-some numbers) (does not have age, sex, or ZCTAs)
+      # New table: DHC (does have age, sex, and ZCTA)
       vars_ <- census_geo_api_names_legacy(year = year)
     } else {
       vars_ <- census_geo_api_names(year)
diff --git a/README.Rmd b/README.Rmd
index b900a04..ccf678a 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -210,6 +210,10 @@ future::plan(future.callr::callr)
 # ...
 ```
 
+```{comment}
+Add guidance for starting a new analysis vs. replicating a previous analysis
+```
+
 ## Census Data
 
 This package uses the Census Bureau Data API but is not endorsed or certified by the Census Bureau.
diff --git a/tests/testthat/test-census_geo_api.R b/tests/testthat/test-census_geo_api.R
index 9d18679..d4d3534 100644
--- a/tests/testthat/test-census_geo_api.R
+++ b/tests/testthat/test-census_geo_api.R
@@ -1,6 +1,9 @@
 skip_if_not(nzchar(Sys.getenv("CENSUS_API_KEY")))
 
 test_that("snapshot", {
+  # TODO: Test that sub-geographies sum to match pooled geographies (e.g. blocks sum to block groups, sum to tracts, sum to counties)
+  # TODO: Test that age/sex subsets sum to race totals
+  
   # These snapshots were generated using the calculations in v2.0.0
   # and verified that the calculations resulted in the same numbers for PR #120.
   expect_snapshot_value(
diff --git a/tests/testthat/test-census_geo_api_names.R b/tests/testthat/test-census_geo_api_names.R
index 2ca9d84..2d7bd79 100644
--- a/tests/testthat/test-census_geo_api_names.R
+++ b/tests/testthat/test-census_geo_api_names.R
@@ -1,4 +1,6 @@
 test_that("census_geo_api_names() for 2020", {
+  # TODO: Verify against table names here: https://api.census.gov/data/2020/dec/dhc/variables.html
+  
   expect_equal(
     census_geo_api_names("2020"),
     list(

From 4943cff1b069aad59a679651cb04a28bcd0f487e Mon Sep 17 00:00:00 2001
From: Alexander Rossell Hayes <alexander@rossellhayes.com>
Date: Fri, 8 Dec 2023 11:32:51 -0800
Subject: [PATCH 5/5] refactor: replace loops in `census_helper_new()` and
 `predict_race_me()` with explicit calculations

---
 R/census_helper_v2.R     | 11 ++++++-----
 R/race_prediction_funs.R | 23 +++++++++++------------
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/R/census_helper_v2.R b/R/census_helper_v2.R
index 8744389..358260b 100644
--- a/R/census_helper_v2.R
+++ b/R/census_helper_v2.R
@@ -186,11 +186,12 @@ census_helper_new <- function(
     drop <- match(c("state", unlist(vars_)), names(census))
     
     geoPopulations <- rowSums(census[,names(census) %in% vars_])
-      
-    for (i in seq_along(vars_)) {
-      census[[names(vars_)[[i]]]] <- 
-        rowSums(census[, vars_[[i]], drop = FALSE]) / geoPopulations
-    }
+    
+    census$r_whi <- rowSums(census[, vars_[["r_whi"]], drop = FALSE]) / (geoPopulations) #Pr(White | Geo)
+    census$r_bla <- rowSums(census[, vars_[["r_bla"]], drop = FALSE]) / (geoPopulations) #Pr(Black | Geo)
+    census$r_his <- rowSums(census[, vars_[["r_his"]], drop = FALSE]) / (geoPopulations) #Pr(Latino | Geo)
+    census$r_asi <- rowSums(census[, vars_[["r_asi"]], drop = FALSE]) / (geoPopulations) #Pr(Asian or NH/PI | Geo)
+    census$r_oth <- rowSums(census[, vars_[["r_oth"]], drop = FALSE]) / (geoPopulations) #Pr(AI/AN, Other, or Mixed | Geo)
     
     # check locations with zero people
     # get average without places with zero people, and assign that to zero locs.
diff --git a/R/race_prediction_funs.R b/R/race_prediction_funs.R
index 23b4564..f883336 100644
--- a/R/race_prediction_funs.R
+++ b/R/race_prediction_funs.R
@@ -545,7 +545,7 @@ predict_race_me <- function(
   
   vars_ <- census_geo_api_names(year = year)
   
-  tmp_tabs <- lapply(
+  N_rg <- purrr::map(
     census.data,
     function(x) {
       all_names <- names(x[[census.geo]])
@@ -554,20 +554,19 @@ predict_race_me <- function(
         vars_ <- census_geo_api_names_legacy(year = year)
       }
       
-      tmp <- x[[census.geo]][, c(geo_id_names, grep("^P[0-2]", all_names, value = TRUE))]
+      totals <- x[[census.geo]][, match(c(geo_id_names, unlist(vars_)), all_names)]
       
-      for (i in seq_along(vars_)) {
-        tmp[[names(vars_)[[i]]]] <- 
-          rowSums(tmp[, vars_[[i]], drop = FALSE])
-      }
-      
-      all_names <- names(tmp)
-      ## Totals
-      tmp_la <- tmp[, c(geo_id_names, grep("^r_", all_names, value = TRUE))]
-      return(list(tots = tmp_la))
+      totals$r_whi <- rowSums(totals[, vars_[["r_whi"]], drop = FALSE]) # White population
+      totals$r_bla <- rowSums(totals[, vars_[["r_bla"]], drop = FALSE]) # Black population
+      totals$r_his <- rowSums(totals[, vars_[["r_his"]], drop = FALSE]) # Latino population
+      totals$r_asi <- rowSums(totals[, vars_[["r_asi"]], drop = FALSE]) # Asian + NH/PI population
+      totals$r_oth <- rowSums(totals[, vars_[["r_oth"]], drop = FALSE]) # AI/AN + Other + Mixed population
+
+      totals <- totals[, -match(unlist(vars_), names(totals))]
+      totals
     }
   )
-  N_rg <- do.call(rbind, lapply(tmp_tabs, function(x) x$tots))
+  N_rg <- dplyr::bind_rows(N_rg)
   N_rg_geo <- do.call(paste, N_rg[, geo_id_names])
   ## Subset to geo's in vf
   N_rg <- N_rg[N_rg_geo %in% geo_id, ]