From a8db9b41fe791f817b14406c3aca983070a8e8ce Mon Sep 17 00:00:00 2001 From: Seo-young Silvia Kim Date: Tue, 12 Jan 2021 13:41:29 -0500 Subject: [PATCH 1/4] restore pull request (accidentally deleted repo) --- R/census_geo_api.R | 38 +++++++++++++++++++++++++++++++++++--- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/R/census_geo_api.R b/R/census_geo_api.R index 94f6933..a96bc3d 100644 --- a/R/census_geo_api.R +++ b/R/census_geo_api.R @@ -21,6 +21,9 @@ #' If \code{TRUE}, function will return Pr(Geolocation, Sex | Race). #' If \code{\var{age}} is also \code{TRUE}, function will return Pr(Geolocation, Age, Sex | Race). #' @param retry The number of retries at the census website if network interruption occurs. +#' @param save_temp File indicating where to save the temporary outputs. +#' Defaults to NULL. If specified, the function will look for an .RData file +#' with the same format as the expected output. #' @return Output will be an object of class \code{list}, indexed by state names. It will #' consist of the original user-input data with additional columns of Census geographic data. #' @@ -34,7 +37,7 @@ #' available \href{https://rstudio-pubs-static.s3.amazonaws.com/19337_2e7f827190514c569ea136db788ce850.html}{here}. #' #' @export -census_geo_api <- function(key, state, geo = "tract", age = FALSE, sex = FALSE, retry = 0) { +census_geo_api <- function(key, state, geo = "tract", age = FALSE, sex = FALSE, retry = 0, save_temp = NULL) { if (missing(key)) { stop('Must enter U.S. Census API key, which can be requested at https://api.census.gov/data/key_signup.html.') @@ -99,13 +102,19 @@ census_geo_api <- function(key, state, geo = "tract", age = FALSE, sex = FALSE, region_county <- paste("for=county:*&in=state:", state.fips, sep = "") county_df <- get_census_api("https://api.census.gov/data/2010/dec/sf1?", key = key, vars = vars, region = region_county, retry) county_list <- county_df$county - census <- NULL + temp <- check_temp_save(county_list, save_temp, census) + county_list <- temp$county_list + census <- temp$census + for (c in 1:length(county_list)) { print(paste("County ", c, " of ", length(county_list), ": ", county_list[c], sep = "")) region_county <- paste("for=tract:*&in=state:", state.fips, "+county:", county_list[c], sep = "") census.temp <- get_census_api("https://api.census.gov/data/2010/dec/sf1?", key = key, vars = vars, region = region_county, retry) census <- rbind(census, census.temp) + if (!is.null(save_temp)) { + save(census, file = save_temp) + } } rm(census.temp) } @@ -117,8 +126,10 @@ census_geo_api <- function(key, state, geo = "tract", age = FALSE, sex = FALSE, region_county <- paste("for=county:*&in=state:", state.fips, sep = "") county_df <- get_census_api("https://api.census.gov/data/2010/dec/sf1?", key = key, vars = vars, region = region_county, retry) county_list <- county_df$county - census <- NULL + temp <- check_temp_save(county_list, save_temp, census) + county_list <- temp$county_list + census <- temp$census for (c in 1:length(county_list)) { print(paste("County ", c, " of ", length(county_list), ": ", county_list[c], sep = "")) @@ -135,6 +146,9 @@ census_geo_api <- function(key, state, geo = "tract", age = FALSE, sex = FALSE, census.temp <- get_census_api("https://api.census.gov/data/2010/dec/sf1?", key = key, vars = vars, region = region_block, retry) census <- rbind(census, census.temp) } + if (!is.null(save_temp)) { + save(census, file = save_temp) + } } rm(census.temp) @@ -236,3 +250,21 @@ census_geo_api <- function(key, state, geo = "tract", age = FALSE, sex = FALSE, return(census) } + +check_temp_save <- function(county_list, save_temp, census) { + if (!is.null(save_temp)) { + if (file.exists(save_temp)) { + message("Temporary save file will be used as requested.") + load(save_temp) + ## Expecting a dataframe named census with the same format + county_list <- setdiff(county_list, unique(census$county)) + message(paste0( + length(unique(census$county)), " counties in the temporary file." + )) + message(paste0(length(county_list), " counties to be processed.")) + } else { + message("Results will be saved in the specified temporary file.") + } + } + return(list(county_list = county_list, census = census)) +} From 63adb12f0e3b95b263cb1c93dac8517335637e74 Mon Sep 17 00:00:00 2001 From: Seo-young Silvia Kim Date: Tue, 12 Jan 2021 13:51:11 -0500 Subject: [PATCH 2/4] travis failure gert package remedy --- .travis.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.travis.yml b/.travis.yml index bd10f82..8db4aca 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,3 +10,8 @@ notifications: email: on_success: change on_failure: change + +addons: + apt: + packages: + - libgit2-dev \ No newline at end of file From d92866c57ab77ae4a2112cff2466acaa09671899 Mon Sep 17 00:00:00 2001 From: Seo-young Silvia Kim Date: Tue, 12 Jan 2021 14:14:25 -0500 Subject: [PATCH 3/4] devtoolss::document --- DESCRIPTION | 2 +- R/census_geo_api_old.R | 238 ++++++++++++++++++++++++++++++++++++++++ man/census_geo_api.Rd | 26 ++++- man/census_helper.Rd | 12 +- man/get_census_api.Rd | 8 +- man/get_census_api_2.Rd | 8 +- man/get_census_data.Rd | 10 +- man/merge_surnames.Rd | 8 +- man/predict_race.Rd | 16 ++- man/surnames2000.Rd | 6 +- man/surnames2010.Rd | 6 +- man/voters.Rd | 6 +- 12 files changed, 319 insertions(+), 27 deletions(-) create mode 100644 R/census_geo_api_old.R diff --git a/DESCRIPTION b/DESCRIPTION index b009c6b..7691199 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -25,4 +25,4 @@ Suggests: LazyLoad: yes LazyData: yes License: GPL (>= 3) -RoxygenNote: 6.0.1 +RoxygenNote: 7.1.1 diff --git a/R/census_geo_api_old.R b/R/census_geo_api_old.R new file mode 100644 index 0000000..94f6933 --- /dev/null +++ b/R/census_geo_api_old.R @@ -0,0 +1,238 @@ +#' Census Data download function. +#' +#' \code{census_geo_api} retrieves U.S. Census geographic data for a given state. +#' +#' This function allows users to download U.S. Census 2010 geographic data, +#' at either the county, tract, block, or place level, for a particular state. +#' +#' @param key A required character object. Must contain user's Census API +#' key, which can be requested \href{https://api.census.gov/data/key_signup.html}{here}. +#' @param state A required character object specifying which state to extract Census data for, +#' e.g., \code{"NJ"}. +#' @param geo A character object specifying what aggregation level to use. +#' Use \code{"county"}, \code{"tract"}, \code{"block"}, or \code{"place"}. +#' Default is \code{"tract"}. Warning: extracting block-level data takes very long. +#' @param age A \code{TRUE}/\code{FALSE} object indicating whether to condition on +#' age or not. If \code{FALSE} (default), function will return Pr(Geolocation | Race). +#' If \code{TRUE}, function will return Pr(Geolocation, Age | Race). +#' If \code{\var{sex}} is also \code{TRUE}, function will return Pr(Geolocation, Age, Sex | Race). +#' @param sex A \code{TRUE}/\code{FALSE} object indicating whether to condition on +#' sex or not. If \code{FALSE} (default), function will return Pr(Geolocation | Race). +#' If \code{TRUE}, function will return Pr(Geolocation, Sex | Race). +#' If \code{\var{age}} is also \code{TRUE}, function will return Pr(Geolocation, Age, Sex | Race). +#' @param retry The number of retries at the census website if network interruption occurs. +#' @return Output will be an object of class \code{list}, indexed by state names. It will +#' consist of the original user-input data with additional columns of Census geographic data. +#' +#' @examples +#' \dontshow{data(voters)} +#' \dontrun{census_geo_api(key = "...", states = c("NJ", "DE"), geo = "block")} +#' \dontrun{census_geo_api(key = "...", states = "FL", geo = "tract", age = TRUE, sex = TRUE)} +#' +#' @references +#' Relies on get_census_api, get_census_api_2, and vec_to_chunk functions authored by Nicholas Nagle, +#' available \href{https://rstudio-pubs-static.s3.amazonaws.com/19337_2e7f827190514c569ea136db788ce850.html}{here}. +#' +#' @export +census_geo_api <- function(key, state, geo = "tract", age = FALSE, sex = FALSE, retry = 0) { + + if (missing(key)) { + stop('Must enter U.S. Census API key, which can be requested at https://api.census.gov/data/key_signup.html.') + } + + state <- toupper(state) + + df.out <- NULL + + fips.codes <- get("State.FIPS") + state.fips <- fips.codes[fips.codes$State == state, "FIPS"] + state.fips <- ifelse(nchar(state.fips) == 1, paste0("0", state.fips), state.fips) + + if (age == F & sex == F) { + num <- ifelse(3:10 != 10, paste("0", 3:10, sep = ""), "10") + vars <- paste("P0050", num, sep = "") + } + + if (age == F & sex == T) { + eth.let <- c("I", "B", "H", "D", "E", "F", "C") + num <- as.character(c("01", "02", "26")) + vars <- NULL + for (e in 1:length(eth.let)) { + vars <- c(vars, paste("P012", eth.let[e], "0", num, sep = "")) + } + } + + if (age == T & sex == F) { + eth.let <- c("I", "B", "H", "D", "E", "F", "C") + num <- as.character(c(c("01", "03", "04", "05", "06", "07", "08", "09"), seq(10, 25), seq(27, 49))) + vars <- NULL + for (e in 1:length(eth.let)) { + vars <- c(vars, paste("P012", eth.let[e], "0", num, sep = "")) + } + } + + if (age == T & sex == T) { + eth.let <- c("I", "B", "H", "D", "E", "F", "C") + num <- as.character(c(c("01", "03", "04", "05", "06", "07", "08", "09"), seq(10, 25), seq(27, 49))) + vars <- NULL + for (e in 1:length(eth.let)) { + vars <- c(vars, paste("P012", eth.let[e], "0", num, sep = "")) + } + } + + if (geo == "place") { + geo.merge <- c("state", "place") + region <- paste("for=place:*&in=state:", state.fips, sep = "") + census <- get_census_api("https://api.census.gov/data/2010/dec/sf1?", key = key, vars = vars, region = region, retry) + } + + if (geo == "county") { + geo.merge <- c("state", "county") + region <- paste("for=county:*&in=state:", state.fips, sep = "") + census <- get_census_api("https://api.census.gov/data/2010/dec/sf1?", key = key, vars = vars, region = region, retry) + } + + if (geo == "tract") { + + geo.merge <- c("state", "county", "tract") + + region_county <- paste("for=county:*&in=state:", state.fips, sep = "") + county_df <- get_census_api("https://api.census.gov/data/2010/dec/sf1?", key = key, vars = vars, region = region_county, retry) + county_list <- county_df$county + + census <- NULL + for (c in 1:length(county_list)) { + print(paste("County ", c, " of ", length(county_list), ": ", county_list[c], sep = "")) + region_county <- paste("for=tract:*&in=state:", state.fips, "+county:", county_list[c], sep = "") + census.temp <- get_census_api("https://api.census.gov/data/2010/dec/sf1?", key = key, vars = vars, region = region_county, retry) + census <- rbind(census, census.temp) + } + rm(census.temp) + } + + if (geo == "block") { + + geo.merge <- c("state", "county", "tract", "block") + + region_county <- paste("for=county:*&in=state:", state.fips, sep = "") + county_df <- get_census_api("https://api.census.gov/data/2010/dec/sf1?", key = key, vars = vars, region = region_county, retry) + county_list <- county_df$county + + census <- NULL + + for (c in 1:length(county_list)) { + print(paste("County ", c, " of ", length(county_list), ": ", county_list[c], sep = "")) + + region_tract <- paste("for=tract:*&in=state:", state.fips, "+county:", county_list[c], sep = "") + print(region_tract) + tract_df <- get_census_api("https://api.census.gov/data/2010/dec/sf1?", key = key, vars = vars, region = region_tract, retry) + tract_list <- tract_df$tract + + for (t in 1:length(tract_list)) { + print(paste("Tract ", t, " of ", length(tract_list), ": ", tract_list[t], sep = "")) + + region_block <- paste("for=block:*&in=state:", state.fips, "+county:", county_list[c], "+tract:", tract_list[t], sep = "") + census.temp <- get_census_api("https://api.census.gov/data/2010/dec/sf1?", key = key, vars = vars, region = region_block, retry) + census <- rbind(census, census.temp) + } + } + + rm(census.temp) + + } + + census$state <- state + + if (age == F & sex == F) { + + ## Calculate Pr(Geolocation | Race) + census$r_whi <- census$P005003 / sum(census$P005003) #Pr(Tract|White) + census$r_bla <- census$P005004 / sum(census$P005004) #Pr(Tract|Black) + census$r_his <- census$P005010 / sum(census$P005010) #Pr(Tract|Latino) + census$r_asi <- (census$P005006 + census$P005007) / (sum(census$P005006) + sum(census$P005007)) #Pr(Tract | Asian or NH/PI) + census$r_oth <- (census$P005005 + census$P005008 + census$P005009) / (sum(census$P005005) + sum(census$P005008) + sum(census$P005009)) #Pr(Tract | AI/AN, Other, or Mixed) + + } + + if (age == F & sex == T) { + + ## Calculate Pr(Geolocation, Sex | Race) + eth.cen <- c("whi", "bla", "his", "asi", "oth") + eth.let <- c("I", "B", "H", "D", "F") + + for (i in 1:length(eth.cen)) { + if (i != 4 & i != 5) { + census[paste("r_mal", eth.cen[i], sep = "_")] <- census[paste("P012", eth.let[i], "002", sep = "")] / sum(census[paste("P012", eth.let[i], "001", sep = "")]) + census[paste("r_fem", eth.cen[i], sep = "_")] <- census[paste("P012", eth.let[i], "026", sep = "")] / sum(census[paste("P012", eth.let[i], "001", sep = "")]) + } + if (i == 4) { + ## Combine Asian and Native Hawaiian/Pacific Islander + census[paste("r_mal", eth.cen[i], sep = "_")] <- (census$P012D002 + census$P012E002) / sum(census$P012D001 + census$P012E001) + census[paste("r_fem", eth.cen[i], sep = "_")] <- (census$P012D026 + census$P012E026) / sum(census$P012D001 + census$P012E001) + } + if (i == 5) { + ## Combine American India/Alaska Native and Other + census[paste("r_mal", eth.cen[i], sep = "_")] <- (census$P012C002 + census$P012F002) / sum(census$P012C001 + census$P012F001) + census[paste("r_fem", eth.cen[i], sep = "_")] <- (census$P012C026 + census$P012F026) / sum(census$P012C001 + census$P012F001) + } + } + } + + if (age == T & sex == F) { + + ## Calculate Pr(Geolocation, Age Category | Race) + eth.cen <- c("whi", "bla", "his", "asi", "oth") + eth.let <- c("I", "B", "H", "D", "F") + age.cat <- c(seq(1, 23), seq(1, 23)) + age.cen <- as.character(c(c("03", "04", "05", "06", "07", "08", "09"), seq(10, 25), seq(27, 49))) + + for (i in 1:length(eth.cen)) { + for (j in 1:23) { + if (i != 4 & i != 5) { + census[paste("r", age.cat[j], eth.cen[i], sep = "_")] <- (census[paste("P012", eth.let[i], "0", age.cen[j], sep = "")] + census[paste("P012", eth.let[i], "0", age.cen[j + 23], sep = "")]) / sum(census[paste("P012", eth.let[i], "001", sep = "")]) + } + if (i == 4) { + ## Combine Asian and Native Hawaiian/Pacific Islander + census[paste("r", age.cat[j], eth.cen[i], sep = "_")] <- (census[paste("P012D0", age.cen[j], sep = "")] + census[paste("P012D0", age.cen[j + 23], sep = "")] + census[paste("P012E0", age.cen[j], sep = "")] + census[paste("P012E0", age.cen[j + 23], sep = "")]) / sum(census$P012D001 + census$P012E001) + } + if (i == 5) { + ## Combine American India/Alaska Native and Other + census[paste("r", age.cat[j], eth.cen[i], sep = "_")] <- (census[paste("P012C0", age.cen[j], sep = "")] + census[paste("P012C0", age.cen[j + 23], sep = "")] + census[paste("P012F0", age.cen[j], sep = "")] + census[paste("P012F0", age.cen[j + 23], sep = "")]) / sum(census$P012C001 + census$P012F001) + } + } + } + } + + if (age == T & sex == T) { + + ## Calculate Pr(Geolocation, Sex, Age Category | Race) + eth.cen <- c("whi", "bla", "his", "asi", "oth") + eth.let <- c("I", "B", "H", "D", "F") + sex.let <- c("mal", "fem") + age.cat <- c(seq(1, 23), seq(1, 23)) + age.cen <- as.character(c(c("03", "04", "05", "06", "07", "08", "09"), seq(10, 25), seq(27, 49))) + + for (i in 1:length(eth.cen)) { + for (k in 1:length(sex.let)) { + for (j in 1:23) { + if (k == 2) { + j <- j + 23 + } + if (i != 4 & i != 5) { + census[paste("r", sex.let[k], age.cat[j], eth.cen[i], sep = "_")] <- census[paste("P012", eth.let[i], "0", age.cen[j], sep = "")] / sum(census[paste("P012", eth.let[i], "001", sep = "")]) + } + if (i == 4) { + ## Combine Asian and Native Hawaiian/Pacific Islander + census[paste("r", sex.let[k], age.cat[j], eth.cen[i], sep = "_")] <- (census[paste("P012D0", age.cen[j], sep = "")] + census[paste("P012E0", age.cen[j], sep = "")]) / sum(census$P012D001 + census$P012E001) + } + if (i == 5) { + ## Combine American India/Alaska Native and Other + census[paste("r", sex.let[k], age.cat[j], eth.cen[i], sep = "_")] <- (census[paste("P012C0", age.cen[j], sep = "")] + census[paste("P012F0", age.cen[j], sep = "")]) / sum(census$P012C001 + census$P012F001) + } + } + } + } + } + + return(census) +} diff --git a/man/census_geo_api.Rd b/man/census_geo_api.Rd index cc97ca6..944b81a 100644 --- a/man/census_geo_api.Rd +++ b/man/census_geo_api.Rd @@ -1,11 +1,12 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/census_geo_api.R +% Please edit documentation in R/census_geo_api.R, R/census_geo_api_old.R \name{census_geo_api} \alias{census_geo_api} \title{Census Data download function.} \usage{ -census_geo_api(key, state, geo = "tract", age = FALSE, sex = FALSE, - retry = 0) +census_geo_api(key, state, geo = "tract", age = FALSE, sex = FALSE, retry = 0) + +census_geo_api(key, state, geo = "tract", age = FALSE, sex = FALSE, retry = 0) } \arguments{ \item{key}{A required character object. Must contain user's Census API @@ -29,15 +30,27 @@ If \code{TRUE}, function will return Pr(Geolocation, Sex | Race). If \code{\var{age}} is also \code{TRUE}, function will return Pr(Geolocation, Age, Sex | Race).} \item{retry}{The number of retries at the census website if network interruption occurs.} + +\item{save_temp}{File indicating where to save the temporary outputs. +Defaults to NULL. If specified, the function will look for an .RData file +with the same format as the expected output.} } \value{ +Output will be an object of class \code{list}, indexed by state names. It will + consist of the original user-input data with additional columns of Census geographic data. + Output will be an object of class \code{list}, indexed by state names. It will consist of the original user-input data with additional columns of Census geographic data. } \description{ +\code{census_geo_api} retrieves U.S. Census geographic data for a given state. + \code{census_geo_api} retrieves U.S. Census geographic data for a given state. } \details{ +This function allows users to download U.S. Census 2010 geographic data, +at either the county, tract, block, or place level, for a particular state. + This function allows users to download U.S. Census 2010 geographic data, at either the county, tract, block, or place level, for a particular state. } @@ -46,8 +59,15 @@ at either the county, tract, block, or place level, for a particular state. \dontrun{census_geo_api(key = "...", states = c("NJ", "DE"), geo = "block")} \dontrun{census_geo_api(key = "...", states = "FL", geo = "tract", age = TRUE, sex = TRUE)} +\dontshow{data(voters)} +\dontrun{census_geo_api(key = "...", states = c("NJ", "DE"), geo = "block")} +\dontrun{census_geo_api(key = "...", states = "FL", geo = "tract", age = TRUE, sex = TRUE)} + } \references{ +Relies on get_census_api, get_census_api_2, and vec_to_chunk functions authored by Nicholas Nagle, +available \href{https://rstudio-pubs-static.s3.amazonaws.com/19337_2e7f827190514c569ea136db788ce850.html}{here}. + Relies on get_census_api, get_census_api_2, and vec_to_chunk functions authored by Nicholas Nagle, available \href{https://rstudio-pubs-static.s3.amazonaws.com/19337_2e7f827190514c569ea136db788ce850.html}{here}. } diff --git a/man/census_helper.Rd b/man/census_helper.Rd index 322a734..911f44c 100644 --- a/man/census_helper.Rd +++ b/man/census_helper.Rd @@ -4,8 +4,16 @@ \alias{census_helper} \title{Census helper function.} \usage{ -census_helper(key, voter.file, states = "all", geo = "tract", age = FALSE, - sex = FALSE, census.data = NA, retry = 0) +census_helper( + key, + voter.file, + states = "all", + geo = "tract", + age = FALSE, + sex = FALSE, + census.data = NA, + retry = 0 +) } \arguments{ \item{key}{A required character object. Must contain user's Census API diff --git a/man/get_census_api.Rd b/man/get_census_api.Rd index d75fc48..a3b47cc 100644 --- a/man/get_census_api.Rd +++ b/man/get_census_api.Rd @@ -8,13 +8,13 @@ get_census_api(data_url, key, vars, region, retry = 0) } \arguments{ \item{data_url}{URL root of the API, including the question mark, -e.g., \code{"https://api.census.gov/data/2010/sf1?"}.} +e.g., \code{"https://api.census.gov/data/2010/dec/dec/sf1?"}.} \item{key}{A required character object containing user's Census API key, which can be requested \href{https://api.census.gov/data/key_signup.html}{here}.} \item{vars}{A character vector of variables to get, -e.g., \code{c("P0050003","P0050004","P0050005", "P0050006")}. +e.g., \code{c("P005003","P005004","P005005", "P005006")}. If there are more than 50 variables, then function will automatically split variables into separate queries.} @@ -36,8 +36,8 @@ This function obtains U.S. Census data via the public API. User can specify the variables and region(s) for which to obtain data. } \examples{ -\dontrun{get_census_api(data_url = "https://api.census.gov/data/2010/sf1?", key = "...", -vars = c("P0050003","P0050004","P0050005", "P0050006"), region = "for=county:*&in=state:34")} +\dontrun{get_census_api(data_url = "https://api.census.gov/data/2010/dec/sf1?", key = "...", +vars = c("P005003","P005004","P005005", "P005006"), region = "for=county:*&in=state:34")} } \references{ diff --git a/man/get_census_api_2.Rd b/man/get_census_api_2.Rd index 36be885..21a8b60 100644 --- a/man/get_census_api_2.Rd +++ b/man/get_census_api_2.Rd @@ -8,13 +8,13 @@ get_census_api_2(data_url, key, get, region, retry = 0) } \arguments{ \item{data_url}{URL root of the API, including the question mark, -e.g., \code{"https://api.census.gov/data/2010/sf1?"}.} +e.g., \code{"https://api.census.gov/data/2010/dec/sf1?"}.} \item{key}{A required character object containing user's Census API key, which can be requested \href{https://api.census.gov/data/key_signup.html}{here}.} \item{get}{A character vector of variables to get, -e.g., \code{c("P0050003","P0050004","P0050005", "P0050006")}. +e.g., \code{c("P005003","P005004","P005005", "P005006")}. If there are more than 50 variables, then function will automatically split variables into separate queries.} @@ -37,8 +37,8 @@ It is used by the \code{get_census_api} function. The user should not need to ca function directly. } \examples{ -\dontrun{get_census_api_2(data_url = "https://api.census.gov/data/2010/sf1?", key = "...", -get = c("P0050003","P0050004","P0050005", "P0050006"), region = "for=county:*&in=state:34")} +\dontrun{get_census_api_2(data_url = "https://api.census.gov/data/2010/dec/sf1?", key = "...", +get = c("P005003","P005004","P005005", "P005006"), region = "for=county:*&in=state:34")} } \references{ diff --git a/man/get_census_data.Rd b/man/get_census_data.Rd index ef94f84..ef04204 100644 --- a/man/get_census_data.Rd +++ b/man/get_census_data.Rd @@ -4,8 +4,14 @@ \alias{get_census_data} \title{Multilevel Census data download function.} \usage{ -get_census_data(key, states, age = FALSE, sex = FALSE, - census.geo = "block", retry = 0) +get_census_data( + key, + states, + age = FALSE, + sex = FALSE, + census.geo = "block", + retry = 0 +) } \arguments{ \item{key}{A required character object containing a valid Census API key, diff --git a/man/merge_surnames.Rd b/man/merge_surnames.Rd index ad70ba1..cb8f4b0 100644 --- a/man/merge_surnames.Rd +++ b/man/merge_surnames.Rd @@ -4,8 +4,12 @@ \alias{merge_surnames} \title{Surname probability merging function.} \usage{ -merge_surnames(voter.file, surname.year = 2010, clean.surname = T, - impute.missing = T) +merge_surnames( + voter.file, + surname.year = 2010, + clean.surname = T, + impute.missing = T +) } \arguments{ \item{voter.file}{An object of class \code{data.frame}. Must contain a field diff --git a/man/predict_race.Rd b/man/predict_race.Rd index feb5f43..67b1b6d 100644 --- a/man/predict_race.Rd +++ b/man/predict_race.Rd @@ -4,9 +4,19 @@ \alias{predict_race} \title{Race prediction function.} \usage{ -predict_race(voter.file, census.surname = TRUE, surname.only = FALSE, - surname.year = 2010, census.geo, census.key, census.data = NA, - age = FALSE, sex = FALSE, party, retry = 0) +predict_race( + voter.file, + census.surname = TRUE, + surname.only = FALSE, + surname.year = 2010, + census.geo, + census.key, + census.data = NA, + age = FALSE, + sex = FALSE, + party, + retry = 0 +) } \arguments{ \item{voter.file}{An object of class \code{data.frame}. diff --git a/man/surnames2000.Rd b/man/surnames2000.Rd index e5c1b55..00a2c30 100644 --- a/man/surnames2000.Rd +++ b/man/surnames2000.Rd @@ -4,7 +4,8 @@ \name{surnames2000} \alias{surnames2000} \title{Census Surname List (2000).} -\format{A data frame with 157,728 rows and 6 variables: +\format{ +A data frame with 157,728 rows and 6 variables: \describe{ \item{surname}{Surname} \item{p_whi}{Pr(White | Surname)} @@ -12,7 +13,8 @@ \item{p_his}{Pr(Hispanic/Latino | Surname)} \item{p_asi}{Pr(Asian/Pacific Islander | Surname)} \item{p_oth}{Pr(Other | Surname)} - #' }} + #' } +} \usage{ surnames2000 } diff --git a/man/surnames2010.Rd b/man/surnames2010.Rd index e598227..7341985 100644 --- a/man/surnames2010.Rd +++ b/man/surnames2010.Rd @@ -4,7 +4,8 @@ \name{surnames2010} \alias{surnames2010} \title{Census Surname List (2010).} -\format{A data frame with 167,613 rows and 6 variables: +\format{ +A data frame with 167,613 rows and 6 variables: \describe{ \item{surname}{Surname} \item{p_whi}{Pr(White | Surname)} @@ -12,7 +13,8 @@ \item{p_his}{Pr(Hispanic/Latino | Surname)} \item{p_asi}{Pr(Asian/Pacific Islander | Surname)} \item{p_oth}{Pr(Other | Surname)} - #' }} + #' } +} \usage{ surnames2010 } diff --git a/man/voters.Rd b/man/voters.Rd index fcfe889..7a58eff 100644 --- a/man/voters.Rd +++ b/man/voters.Rd @@ -4,7 +4,8 @@ \name{voters} \alias{voters} \title{Example voter file.} -\format{A data frame with 10 rows and 12 variables: +\format{ +A data frame with 10 rows and 12 variables: \describe{ \item{VoterID}{Voter identifier (numeric)} \item{surname}{Surname} @@ -19,7 +20,8 @@ \item{sex}{0=male, 1=female} \item{party}{Party registration (character)} \item{PID}{Party registration (numeric)} - #' }} + #' } +} \usage{ voters } From e2b1f032f805a515864dff6a90b518abca34cd4c Mon Sep 17 00:00:00 2001 From: Seo-young Silvia Kim Date: Tue, 12 Jan 2021 14:41:40 -0500 Subject: [PATCH 4/4] delete old code; update document/test --- R/census_geo_api_old.R | 238 -------------------------------------- man/census_geo_api.Rd | 29 ++--- tests/testthat/test-all.R | 8 +- 3 files changed, 14 insertions(+), 261 deletions(-) delete mode 100644 R/census_geo_api_old.R diff --git a/R/census_geo_api_old.R b/R/census_geo_api_old.R deleted file mode 100644 index 94f6933..0000000 --- a/R/census_geo_api_old.R +++ /dev/null @@ -1,238 +0,0 @@ -#' Census Data download function. -#' -#' \code{census_geo_api} retrieves U.S. Census geographic data for a given state. -#' -#' This function allows users to download U.S. Census 2010 geographic data, -#' at either the county, tract, block, or place level, for a particular state. -#' -#' @param key A required character object. Must contain user's Census API -#' key, which can be requested \href{https://api.census.gov/data/key_signup.html}{here}. -#' @param state A required character object specifying which state to extract Census data for, -#' e.g., \code{"NJ"}. -#' @param geo A character object specifying what aggregation level to use. -#' Use \code{"county"}, \code{"tract"}, \code{"block"}, or \code{"place"}. -#' Default is \code{"tract"}. Warning: extracting block-level data takes very long. -#' @param age A \code{TRUE}/\code{FALSE} object indicating whether to condition on -#' age or not. If \code{FALSE} (default), function will return Pr(Geolocation | Race). -#' If \code{TRUE}, function will return Pr(Geolocation, Age | Race). -#' If \code{\var{sex}} is also \code{TRUE}, function will return Pr(Geolocation, Age, Sex | Race). -#' @param sex A \code{TRUE}/\code{FALSE} object indicating whether to condition on -#' sex or not. If \code{FALSE} (default), function will return Pr(Geolocation | Race). -#' If \code{TRUE}, function will return Pr(Geolocation, Sex | Race). -#' If \code{\var{age}} is also \code{TRUE}, function will return Pr(Geolocation, Age, Sex | Race). -#' @param retry The number of retries at the census website if network interruption occurs. -#' @return Output will be an object of class \code{list}, indexed by state names. It will -#' consist of the original user-input data with additional columns of Census geographic data. -#' -#' @examples -#' \dontshow{data(voters)} -#' \dontrun{census_geo_api(key = "...", states = c("NJ", "DE"), geo = "block")} -#' \dontrun{census_geo_api(key = "...", states = "FL", geo = "tract", age = TRUE, sex = TRUE)} -#' -#' @references -#' Relies on get_census_api, get_census_api_2, and vec_to_chunk functions authored by Nicholas Nagle, -#' available \href{https://rstudio-pubs-static.s3.amazonaws.com/19337_2e7f827190514c569ea136db788ce850.html}{here}. -#' -#' @export -census_geo_api <- function(key, state, geo = "tract", age = FALSE, sex = FALSE, retry = 0) { - - if (missing(key)) { - stop('Must enter U.S. Census API key, which can be requested at https://api.census.gov/data/key_signup.html.') - } - - state <- toupper(state) - - df.out <- NULL - - fips.codes <- get("State.FIPS") - state.fips <- fips.codes[fips.codes$State == state, "FIPS"] - state.fips <- ifelse(nchar(state.fips) == 1, paste0("0", state.fips), state.fips) - - if (age == F & sex == F) { - num <- ifelse(3:10 != 10, paste("0", 3:10, sep = ""), "10") - vars <- paste("P0050", num, sep = "") - } - - if (age == F & sex == T) { - eth.let <- c("I", "B", "H", "D", "E", "F", "C") - num <- as.character(c("01", "02", "26")) - vars <- NULL - for (e in 1:length(eth.let)) { - vars <- c(vars, paste("P012", eth.let[e], "0", num, sep = "")) - } - } - - if (age == T & sex == F) { - eth.let <- c("I", "B", "H", "D", "E", "F", "C") - num <- as.character(c(c("01", "03", "04", "05", "06", "07", "08", "09"), seq(10, 25), seq(27, 49))) - vars <- NULL - for (e in 1:length(eth.let)) { - vars <- c(vars, paste("P012", eth.let[e], "0", num, sep = "")) - } - } - - if (age == T & sex == T) { - eth.let <- c("I", "B", "H", "D", "E", "F", "C") - num <- as.character(c(c("01", "03", "04", "05", "06", "07", "08", "09"), seq(10, 25), seq(27, 49))) - vars <- NULL - for (e in 1:length(eth.let)) { - vars <- c(vars, paste("P012", eth.let[e], "0", num, sep = "")) - } - } - - if (geo == "place") { - geo.merge <- c("state", "place") - region <- paste("for=place:*&in=state:", state.fips, sep = "") - census <- get_census_api("https://api.census.gov/data/2010/dec/sf1?", key = key, vars = vars, region = region, retry) - } - - if (geo == "county") { - geo.merge <- c("state", "county") - region <- paste("for=county:*&in=state:", state.fips, sep = "") - census <- get_census_api("https://api.census.gov/data/2010/dec/sf1?", key = key, vars = vars, region = region, retry) - } - - if (geo == "tract") { - - geo.merge <- c("state", "county", "tract") - - region_county <- paste("for=county:*&in=state:", state.fips, sep = "") - county_df <- get_census_api("https://api.census.gov/data/2010/dec/sf1?", key = key, vars = vars, region = region_county, retry) - county_list <- county_df$county - - census <- NULL - for (c in 1:length(county_list)) { - print(paste("County ", c, " of ", length(county_list), ": ", county_list[c], sep = "")) - region_county <- paste("for=tract:*&in=state:", state.fips, "+county:", county_list[c], sep = "") - census.temp <- get_census_api("https://api.census.gov/data/2010/dec/sf1?", key = key, vars = vars, region = region_county, retry) - census <- rbind(census, census.temp) - } - rm(census.temp) - } - - if (geo == "block") { - - geo.merge <- c("state", "county", "tract", "block") - - region_county <- paste("for=county:*&in=state:", state.fips, sep = "") - county_df <- get_census_api("https://api.census.gov/data/2010/dec/sf1?", key = key, vars = vars, region = region_county, retry) - county_list <- county_df$county - - census <- NULL - - for (c in 1:length(county_list)) { - print(paste("County ", c, " of ", length(county_list), ": ", county_list[c], sep = "")) - - region_tract <- paste("for=tract:*&in=state:", state.fips, "+county:", county_list[c], sep = "") - print(region_tract) - tract_df <- get_census_api("https://api.census.gov/data/2010/dec/sf1?", key = key, vars = vars, region = region_tract, retry) - tract_list <- tract_df$tract - - for (t in 1:length(tract_list)) { - print(paste("Tract ", t, " of ", length(tract_list), ": ", tract_list[t], sep = "")) - - region_block <- paste("for=block:*&in=state:", state.fips, "+county:", county_list[c], "+tract:", tract_list[t], sep = "") - census.temp <- get_census_api("https://api.census.gov/data/2010/dec/sf1?", key = key, vars = vars, region = region_block, retry) - census <- rbind(census, census.temp) - } - } - - rm(census.temp) - - } - - census$state <- state - - if (age == F & sex == F) { - - ## Calculate Pr(Geolocation | Race) - census$r_whi <- census$P005003 / sum(census$P005003) #Pr(Tract|White) - census$r_bla <- census$P005004 / sum(census$P005004) #Pr(Tract|Black) - census$r_his <- census$P005010 / sum(census$P005010) #Pr(Tract|Latino) - census$r_asi <- (census$P005006 + census$P005007) / (sum(census$P005006) + sum(census$P005007)) #Pr(Tract | Asian or NH/PI) - census$r_oth <- (census$P005005 + census$P005008 + census$P005009) / (sum(census$P005005) + sum(census$P005008) + sum(census$P005009)) #Pr(Tract | AI/AN, Other, or Mixed) - - } - - if (age == F & sex == T) { - - ## Calculate Pr(Geolocation, Sex | Race) - eth.cen <- c("whi", "bla", "his", "asi", "oth") - eth.let <- c("I", "B", "H", "D", "F") - - for (i in 1:length(eth.cen)) { - if (i != 4 & i != 5) { - census[paste("r_mal", eth.cen[i], sep = "_")] <- census[paste("P012", eth.let[i], "002", sep = "")] / sum(census[paste("P012", eth.let[i], "001", sep = "")]) - census[paste("r_fem", eth.cen[i], sep = "_")] <- census[paste("P012", eth.let[i], "026", sep = "")] / sum(census[paste("P012", eth.let[i], "001", sep = "")]) - } - if (i == 4) { - ## Combine Asian and Native Hawaiian/Pacific Islander - census[paste("r_mal", eth.cen[i], sep = "_")] <- (census$P012D002 + census$P012E002) / sum(census$P012D001 + census$P012E001) - census[paste("r_fem", eth.cen[i], sep = "_")] <- (census$P012D026 + census$P012E026) / sum(census$P012D001 + census$P012E001) - } - if (i == 5) { - ## Combine American India/Alaska Native and Other - census[paste("r_mal", eth.cen[i], sep = "_")] <- (census$P012C002 + census$P012F002) / sum(census$P012C001 + census$P012F001) - census[paste("r_fem", eth.cen[i], sep = "_")] <- (census$P012C026 + census$P012F026) / sum(census$P012C001 + census$P012F001) - } - } - } - - if (age == T & sex == F) { - - ## Calculate Pr(Geolocation, Age Category | Race) - eth.cen <- c("whi", "bla", "his", "asi", "oth") - eth.let <- c("I", "B", "H", "D", "F") - age.cat <- c(seq(1, 23), seq(1, 23)) - age.cen <- as.character(c(c("03", "04", "05", "06", "07", "08", "09"), seq(10, 25), seq(27, 49))) - - for (i in 1:length(eth.cen)) { - for (j in 1:23) { - if (i != 4 & i != 5) { - census[paste("r", age.cat[j], eth.cen[i], sep = "_")] <- (census[paste("P012", eth.let[i], "0", age.cen[j], sep = "")] + census[paste("P012", eth.let[i], "0", age.cen[j + 23], sep = "")]) / sum(census[paste("P012", eth.let[i], "001", sep = "")]) - } - if (i == 4) { - ## Combine Asian and Native Hawaiian/Pacific Islander - census[paste("r", age.cat[j], eth.cen[i], sep = "_")] <- (census[paste("P012D0", age.cen[j], sep = "")] + census[paste("P012D0", age.cen[j + 23], sep = "")] + census[paste("P012E0", age.cen[j], sep = "")] + census[paste("P012E0", age.cen[j + 23], sep = "")]) / sum(census$P012D001 + census$P012E001) - } - if (i == 5) { - ## Combine American India/Alaska Native and Other - census[paste("r", age.cat[j], eth.cen[i], sep = "_")] <- (census[paste("P012C0", age.cen[j], sep = "")] + census[paste("P012C0", age.cen[j + 23], sep = "")] + census[paste("P012F0", age.cen[j], sep = "")] + census[paste("P012F0", age.cen[j + 23], sep = "")]) / sum(census$P012C001 + census$P012F001) - } - } - } - } - - if (age == T & sex == T) { - - ## Calculate Pr(Geolocation, Sex, Age Category | Race) - eth.cen <- c("whi", "bla", "his", "asi", "oth") - eth.let <- c("I", "B", "H", "D", "F") - sex.let <- c("mal", "fem") - age.cat <- c(seq(1, 23), seq(1, 23)) - age.cen <- as.character(c(c("03", "04", "05", "06", "07", "08", "09"), seq(10, 25), seq(27, 49))) - - for (i in 1:length(eth.cen)) { - for (k in 1:length(sex.let)) { - for (j in 1:23) { - if (k == 2) { - j <- j + 23 - } - if (i != 4 & i != 5) { - census[paste("r", sex.let[k], age.cat[j], eth.cen[i], sep = "_")] <- census[paste("P012", eth.let[i], "0", age.cen[j], sep = "")] / sum(census[paste("P012", eth.let[i], "001", sep = "")]) - } - if (i == 4) { - ## Combine Asian and Native Hawaiian/Pacific Islander - census[paste("r", sex.let[k], age.cat[j], eth.cen[i], sep = "_")] <- (census[paste("P012D0", age.cen[j], sep = "")] + census[paste("P012E0", age.cen[j], sep = "")]) / sum(census$P012D001 + census$P012E001) - } - if (i == 5) { - ## Combine American India/Alaska Native and Other - census[paste("r", sex.let[k], age.cat[j], eth.cen[i], sep = "_")] <- (census[paste("P012C0", age.cen[j], sep = "")] + census[paste("P012F0", age.cen[j], sep = "")]) / sum(census$P012C001 + census$P012F001) - } - } - } - } - } - - return(census) -} diff --git a/man/census_geo_api.Rd b/man/census_geo_api.Rd index 944b81a..8a1cb17 100644 --- a/man/census_geo_api.Rd +++ b/man/census_geo_api.Rd @@ -1,12 +1,18 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/census_geo_api.R, R/census_geo_api_old.R +% Please edit documentation in R/census_geo_api.R \name{census_geo_api} \alias{census_geo_api} \title{Census Data download function.} \usage{ -census_geo_api(key, state, geo = "tract", age = FALSE, sex = FALSE, retry = 0) - -census_geo_api(key, state, geo = "tract", age = FALSE, sex = FALSE, retry = 0) +census_geo_api( + key, + state, + geo = "tract", + age = FALSE, + sex = FALSE, + retry = 0, + save_temp = NULL +) } \arguments{ \item{key}{A required character object. Must contain user's Census API @@ -36,21 +42,13 @@ Defaults to NULL. If specified, the function will look for an .RData file with the same format as the expected output.} } \value{ -Output will be an object of class \code{list}, indexed by state names. It will - consist of the original user-input data with additional columns of Census geographic data. - Output will be an object of class \code{list}, indexed by state names. It will consist of the original user-input data with additional columns of Census geographic data. } \description{ -\code{census_geo_api} retrieves U.S. Census geographic data for a given state. - \code{census_geo_api} retrieves U.S. Census geographic data for a given state. } \details{ -This function allows users to download U.S. Census 2010 geographic data, -at either the county, tract, block, or place level, for a particular state. - This function allows users to download U.S. Census 2010 geographic data, at either the county, tract, block, or place level, for a particular state. } @@ -59,15 +57,8 @@ at either the county, tract, block, or place level, for a particular state. \dontrun{census_geo_api(key = "...", states = c("NJ", "DE"), geo = "block")} \dontrun{census_geo_api(key = "...", states = "FL", geo = "tract", age = TRUE, sex = TRUE)} -\dontshow{data(voters)} -\dontrun{census_geo_api(key = "...", states = c("NJ", "DE"), geo = "block")} -\dontrun{census_geo_api(key = "...", states = "FL", geo = "tract", age = TRUE, sex = TRUE)} - } \references{ -Relies on get_census_api, get_census_api_2, and vec_to_chunk functions authored by Nicholas Nagle, -available \href{https://rstudio-pubs-static.s3.amazonaws.com/19337_2e7f827190514c569ea136db788ce850.html}{here}. - Relies on get_census_api, get_census_api_2, and vec_to_chunk functions authored by Nicholas Nagle, available \href{https://rstudio-pubs-static.s3.amazonaws.com/19337_2e7f827190514c569ea136db788ce850.html}{here}. } diff --git a/tests/testthat/test-all.R b/tests/testthat/test-all.R index d8d178e..9dd8e77 100644 --- a/tests/testthat/test-all.R +++ b/tests/testthat/test-all.R @@ -46,11 +46,11 @@ test_that("Tests predictions using the Census object", { # Build a Census object by parts; both county-level and tract-level statistics needed for tract-level predictions censusObj2 <- list() - county.dc <- census_geo_api(key = k, state = "DC", geo = "county", age = TRUE, sex = FALSE) - tract.dc <- census_geo_api(key = k, state = "DC", geo = "tract", age = TRUE, sex = FALSE) + county.dc <- census_geo_api(key = k, state = "DC", geo = "county", age = TRUE, sex = FALSE, save_temp = NULL) + tract.dc <- census_geo_api(key = k, state = "DC", geo = "tract", age = TRUE, sex = FALSE, save_temp = NULL) censusObj2[["DC"]] <- list(state = "DC", county = county.dc, tract = tract.dc, age = TRUE, sex = FALSE) - tract.nj <- census_geo_api(key = k, state = "NJ", geo = "tract", age = TRUE, sex = FALSE) - county.nj <- census_geo_api(key = k, state = "NJ", geo = "county", age = TRUE, sex = FALSE) + tract.nj <- census_geo_api(key = k, state = "NJ", geo = "tract", age = TRUE, sex = FALSE, save_temp = NULL) + county.nj <- census_geo_api(key = k, state = "NJ", geo = "county", age = TRUE, sex = FALSE, save_temp = NULL) censusObj2[["NJ"]] <- list(state = "NJ", county = county.nj, tract = tract.nj, age = TRUE, sex = FALSE) # Prediction using the Census object built in the previous step; county-level statistics used in prediction