Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updates to dev #128

Merged
merged 5 commits into from
Dec 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions R/census_helper_v2.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ census_helper_new <- function(
skip_bad_geos = FALSE
) {

if (geo == "precinct") {
if ("precinct" %in% geo) {
stop("Error: census_helper_new function does not currently support precinct-level data.")
}

Expand Down Expand Up @@ -160,6 +160,7 @@ census_helper_new <- function(
state_must_be_downloaded <- toDownload ||
is.null(census.data[[state]]) ||
census.data[[state]]$year != year ||
# TODO: Why do we always redownload if sex or age == TRUE?
census.data[[state]]$age != FALSE ||
census.data[[state]]$sex != FALSE

Expand All @@ -174,18 +175,23 @@ census_helper_new <- function(

## Calculate Pr(Geolocation | Race)
if (any(c("P2_005N", "P005003") %in% names(census))) {
# TODO: Add message that they're using a legacy data source
# TODO: Add test that we get the same ratios with legacy and new tables for 2020
# Old table: Redistricting (Pl-some numbers) (does not have age, sex, or ZCTAs)
# New table: DHC (does have age, sex, and ZCTA)
vars_ <- census_geo_api_names_legacy(year = year)
} else {
vars_ <- census_geo_api_names(year)
}
drop <- match(c("state", unlist(vars_)), names(census))

geoPopulations <- rowSums(census[,names(census) %in% vars_])

for (i in seq_along(vars_)) {
census[[names(vars_)[[i]]]] <-
rowSums(census[, vars_[[i]], drop = FALSE]) / geoPopulations
}

census$r_whi <- rowSums(census[, vars_[["r_whi"]], drop = FALSE]) / (geoPopulations) #Pr(White | Geo)
census$r_bla <- rowSums(census[, vars_[["r_bla"]], drop = FALSE]) / (geoPopulations) #Pr(Black | Geo)
census$r_his <- rowSums(census[, vars_[["r_his"]], drop = FALSE]) / (geoPopulations) #Pr(Latino | Geo)
census$r_asi <- rowSums(census[, vars_[["r_asi"]], drop = FALSE]) / (geoPopulations) #Pr(Asian or NH/PI | Geo)
census$r_oth <- rowSums(census[, vars_[["r_oth"]], drop = FALSE]) / (geoPopulations) #Pr(AI/AN, Other, or Mixed | Geo)

# check locations with zero people
# get average without places with zero people, and assign that to zero locs.
Expand Down
27 changes: 13 additions & 14 deletions R/race_prediction_funs.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,12 @@ NULL
stop("Voter data frame needs to have a column named surname")
}
} else {
if (missing(census.geo) || is.null(census.geo) || is.na(census.geo) || census.geo %in% c("county", "tract", "block", "place") == FALSE) {
if (missing(census.geo) || is.null(census.geo) || all(is.na(census.geo)) || census.geo %in% c("county", "tract", "block", "place") == FALSE) {
stop("census.geo must be either 'county', 'tract', 'block', or 'place'")
} else {
message(paste("Proceeding with Census geographic data at", census.geo, "level..."))
}
if (missing(census.data) || is.null(census.data) || is.na(census.data)) {
if (missing(census.data) || is.null(census.data) || all(is.na(census.data))) {
census.key <- validate_key(census.key)
message("Downloading Census geographic data using provided API key...")
} else {
Expand Down Expand Up @@ -545,7 +545,7 @@ predict_race_me <- function(

vars_ <- census_geo_api_names(year = year)

tmp_tabs <- lapply(
N_rg <- purrr::map(
census.data,
function(x) {
all_names <- names(x[[census.geo]])
Expand All @@ -554,20 +554,19 @@ predict_race_me <- function(
vars_ <- census_geo_api_names_legacy(year = year)
}

tmp <- x[[census.geo]][, c(geo_id_names, grep("^P[0-2]", all_names, value = TRUE))]
totals <- x[[census.geo]][, match(c(geo_id_names, unlist(vars_)), all_names)]

for (i in seq_along(vars_)) {
tmp[[names(vars_)[[i]]]] <-
rowSums(tmp[, vars_[[i]], drop = FALSE])
}

all_names <- names(tmp)
## Totals
tmp_la <- tmp[, c(geo_id_names, grep("^r_", all_names, value = TRUE))]
return(list(tots = tmp_la))
totals$r_whi <- rowSums(totals[, vars_[["r_whi"]], drop = FALSE]) # White population
totals$r_bla <- rowSums(totals[, vars_[["r_bla"]], drop = FALSE]) # Black population
totals$r_his <- rowSums(totals[, vars_[["r_his"]], drop = FALSE]) # Latino population
totals$r_asi <- rowSums(totals[, vars_[["r_asi"]], drop = FALSE]) # Asian + NH/PI population
totals$r_oth <- rowSums(totals[, vars_[["r_oth"]], drop = FALSE]) # AI/AN + Other + Mixed population

totals <- totals[, -match(unlist(vars_), names(totals))]
totals
}
)
N_rg <- do.call(rbind, lapply(tmp_tabs, function(x) x$tots))
N_rg <- dplyr::bind_rows(N_rg)
N_rg_geo <- do.call(paste, N_rg[, geo_id_names])
## Subset to geo's in vf
N_rg <- N_rg[N_rg_geo %in% geo_id, ]
Expand Down
4 changes: 4 additions & 0 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,10 @@ future::plan(future.callr::callr)
# ...
```

```{comment}
Add guidance for starting a new analysis vs. replicating a previous analysis
```

## Census Data

This package uses the Census Bureau Data API but is not endorsed or certified by the Census Bureau.
Expand Down
5 changes: 5 additions & 0 deletions tests/testthat/test-census_geo_api.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
skip_if_not(nzchar(Sys.getenv("CENSUS_API_KEY")))

test_that("snapshot", {
# TODO: Test that sub-geographies sum to match pooled geographies (e.g. blocks sum to block groups, sum to tracts, sum to counties)
# TODO: Test that age/sex subsets sum to race totals

# These snapshots were generated using the calculations in v2.0.0
# and verified that the calculations resulted in the same numbers for PR #120.
expect_snapshot_value(
census_geo_api(state = "DE", geo = "county", year = "2020"),
style = "deparse"
Expand Down
2 changes: 2 additions & 0 deletions tests/testthat/test-census_geo_api_names.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
test_that("census_geo_api_names() for 2020", {
# TODO: Verify against table names here: https://api.census.gov/data/2020/dec/dhc/variables.html

expect_equal(
census_geo_api_names("2020"),
list(
Expand Down
Loading