Skip to content

Commit

Permalink
Merge pull request #128 from rossellhayes/fixes
Browse files Browse the repository at this point in the history
Updates to dev
  • Loading branch information
1beb authored Dec 8, 2023
2 parents cdf548c + 4943cff commit 1ebbdcc
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 20 deletions.
18 changes: 12 additions & 6 deletions R/census_helper_v2.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ census_helper_new <- function(
skip_bad_geos = FALSE
) {

if (geo == "precinct") {
if ("precinct" %in% geo) {
stop("Error: census_helper_new function does not currently support precinct-level data.")
}

Expand Down Expand Up @@ -160,6 +160,7 @@ census_helper_new <- function(
state_must_be_downloaded <- toDownload ||
is.null(census.data[[state]]) ||
census.data[[state]]$year != year ||
# TODO: Why do we always redownload if sex or age == TRUE?
census.data[[state]]$age != FALSE ||
census.data[[state]]$sex != FALSE

Expand All @@ -174,18 +175,23 @@ census_helper_new <- function(

## Calculate Pr(Geolocation | Race)
if (any(c("P2_005N", "P005003") %in% names(census))) {
# TODO: Add message that they're using a legacy data source
# TODO: Add test that we get the same ratios with legacy and new tables for 2020
# Old table: Redistricting (Pl-some numbers) (does not have age, sex, or ZCTAs)
# New table: DHC (does have age, sex, and ZCTA)
vars_ <- census_geo_api_names_legacy(year = year)
} else {
vars_ <- census_geo_api_names(year)
}
drop <- match(c("state", unlist(vars_)), names(census))

geoPopulations <- rowSums(census[,names(census) %in% vars_])

for (i in seq_along(vars_)) {
census[[names(vars_)[[i]]]] <-
rowSums(census[, vars_[[i]], drop = FALSE]) / geoPopulations
}

census$r_whi <- rowSums(census[, vars_[["r_whi"]], drop = FALSE]) / (geoPopulations) #Pr(White | Geo)
census$r_bla <- rowSums(census[, vars_[["r_bla"]], drop = FALSE]) / (geoPopulations) #Pr(Black | Geo)
census$r_his <- rowSums(census[, vars_[["r_his"]], drop = FALSE]) / (geoPopulations) #Pr(Latino | Geo)
census$r_asi <- rowSums(census[, vars_[["r_asi"]], drop = FALSE]) / (geoPopulations) #Pr(Asian or NH/PI | Geo)
census$r_oth <- rowSums(census[, vars_[["r_oth"]], drop = FALSE]) / (geoPopulations) #Pr(AI/AN, Other, or Mixed | Geo)

# check locations with zero people
# get average without places with zero people, and assign that to zero locs.
Expand Down
27 changes: 13 additions & 14 deletions R/race_prediction_funs.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,12 @@ NULL
stop("Voter data frame needs to have a column named surname")
}
} else {
if (missing(census.geo) || is.null(census.geo) || is.na(census.geo) || census.geo %in% c("county", "tract", "block", "place") == FALSE) {
if (missing(census.geo) || is.null(census.geo) || all(is.na(census.geo)) || census.geo %in% c("county", "tract", "block", "place") == FALSE) {
stop("census.geo must be either 'county', 'tract', 'block', or 'place'")
} else {
message(paste("Proceeding with Census geographic data at", census.geo, "level..."))
}
if (missing(census.data) || is.null(census.data) || is.na(census.data)) {
if (missing(census.data) || is.null(census.data) || all(is.na(census.data))) {
census.key <- validate_key(census.key)
message("Downloading Census geographic data using provided API key...")
} else {
Expand Down Expand Up @@ -545,7 +545,7 @@ predict_race_me <- function(

vars_ <- census_geo_api_names(year = year)

tmp_tabs <- lapply(
N_rg <- purrr::map(
census.data,
function(x) {
all_names <- names(x[[census.geo]])
Expand All @@ -554,20 +554,19 @@ predict_race_me <- function(
vars_ <- census_geo_api_names_legacy(year = year)
}

tmp <- x[[census.geo]][, c(geo_id_names, grep("^P[0-2]", all_names, value = TRUE))]
totals <- x[[census.geo]][, match(c(geo_id_names, unlist(vars_)), all_names)]

for (i in seq_along(vars_)) {
tmp[[names(vars_)[[i]]]] <-
rowSums(tmp[, vars_[[i]], drop = FALSE])
}

all_names <- names(tmp)
## Totals
tmp_la <- tmp[, c(geo_id_names, grep("^r_", all_names, value = TRUE))]
return(list(tots = tmp_la))
totals$r_whi <- rowSums(totals[, vars_[["r_whi"]], drop = FALSE]) # White population
totals$r_bla <- rowSums(totals[, vars_[["r_bla"]], drop = FALSE]) # Black population
totals$r_his <- rowSums(totals[, vars_[["r_his"]], drop = FALSE]) # Latino population
totals$r_asi <- rowSums(totals[, vars_[["r_asi"]], drop = FALSE]) # Asian + NH/PI population
totals$r_oth <- rowSums(totals[, vars_[["r_oth"]], drop = FALSE]) # AI/AN + Other + Mixed population

totals <- totals[, -match(unlist(vars_), names(totals))]
totals
}
)
N_rg <- do.call(rbind, lapply(tmp_tabs, function(x) x$tots))
N_rg <- dplyr::bind_rows(N_rg)
N_rg_geo <- do.call(paste, N_rg[, geo_id_names])
## Subset to geo's in vf
N_rg <- N_rg[N_rg_geo %in% geo_id, ]
Expand Down
4 changes: 4 additions & 0 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,10 @@ future::plan(future.callr::callr)
# ...
```

```{comment}
Add guidance for starting a new analysis vs. replicating a previous analysis
```

## Census Data

This package uses the Census Bureau Data API but is not endorsed or certified by the Census Bureau.
Expand Down
5 changes: 5 additions & 0 deletions tests/testthat/test-census_geo_api.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
skip_if_not(nzchar(Sys.getenv("CENSUS_API_KEY")))

test_that("snapshot", {
# TODO: Test that sub-geographies sum to match pooled geographies (e.g. blocks sum to block groups, sum to tracts, sum to counties)
# TODO: Test that age/sex subsets sum to race totals

# These snapshots were generated using the calculations in v2.0.0
# and verified that the calculations resulted in the same numbers for PR #120.
expect_snapshot_value(
census_geo_api(state = "DE", geo = "county", year = "2020"),
style = "deparse"
Expand Down
2 changes: 2 additions & 0 deletions tests/testthat/test-census_geo_api_names.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
test_that("census_geo_api_names() for 2020", {
# TODO: Verify against table names here: https://api.census.gov/data/2020/dec/dhc/variables.html

expect_equal(
census_geo_api_names("2020"),
list(
Expand Down

0 comments on commit 1ebbdcc

Please sign in to comment.