From c4c47698fc19ccc41c552cbf4f2c613f553f200c Mon Sep 17 00:00:00 2001 From: Xiyu Yang <90369889+xiyuy@users.noreply.github.com> Date: Fri, 7 Jan 2022 11:05:12 -0500 Subject: [PATCH 01/19] Update voters.RData Copy and paste the voters.RData from master branch. Delete the original voters.RData file in hwru branch. --- data/voters.RData | Bin 616 -> 687 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/data/voters.RData b/data/voters.RData index e7cfd64219516497d75e31c247bf9c24cbe61d2a..ffe9973ec404c66ce7006c039936b3c3b49ee678 100644 GIT binary patch literal 687 zcmV;g0#N-QiwFP!000001I<=jZ`v>vc5W>oZLPH5(JFyLLEAJo5EHbWCbUbtHa_v|?7)YvGJpzz6982JH2|jo>Hr$K#9kQV1oCP%LmZ}z3v`!Lw0Abblw!r)?gU3L z(vRkZ!^nOo8b&0(2FGtBHdDCJ$LW~OqYzwsY(x`ALC`syl3$9HzG9k&BQnm0FBngV zvD>yX$G~}Rtrl%B)M^~q)^-cfhn_}PD{lw9AKaCD`SZ3-e#_VQx175{J7#~%>K)$? zG%RPZ7VInm%H1(idj`)-@F1d)P|+#`JMn$rVAZv;x-k}11HpFHX(?TnVXlFlM|8S@ zUxw4=RuvwWB2u94VCw#M@%;=*II>SxvI$+M$L&2MDnbXmmPRM8>&58n!Fs=PBih`5%> zlG&I5YhfO<1j^tYfk4G8OmB|l1vFk@4w}3U$8-z=-{S!nivfC07{wfx=JjTAg!AuO ziYEl#wFfZqi~Nr0wt*%yF5O^ClHwGI-2$(@kj-fN6P!{m?hKGY%t8=Zm~s+ER1Z4% z27);W`2ka=nCoE9N3F%1#yA9jS9~Yv9K(nUvc9?^XWx%ug|cStTX9(|7JnY8r_wb} V@$rz$)vjvl?k@-Qr>=kr007V#Qa1nq literal 616 zcmV-u0+;>$H+ooF0004LBHlIv03iV!0000G&sfah2$}*aT>vQ&2UKVgRpfklI z#ix#GmMP!ctZhp*SntSRl*d1=kl*bZT5%Ebl$v?2ZIdyf%anbe+?%<*&Os7-#)Ryz z0b?sZ4;CSM)5B8nY@;!ALn>gVW5~=Ma!qSJCXz7>g#`0QmTGr03;MKF`9T)!7P%P! z6LXesr@rNPnl3MBYLq|!3Kr_02y*@$>`NfGnFb~hXnVKl13a{O+!jxJw5$sSgJk?dMv{kFH0RWExcw#L$6cpKdRi*;7F zCc}TO?aH}LItSI+o6sQ$`q-yc{k6L?cjZ7UGzQLgj{P zrxmh+c(K~^iV`z6w}DbJi$HC*C#ogu{_yET<>X+YGCrNyI$W0_7pwa*=EDbwJN2ho zQmIdo?~Jqo91~#0`d{yv=fuVN@)ngqJsCBE#Tl(IDiZwSB;%iQ;nb(bgH&zF6c=38 z(!mOUg=-(~26ga5PCi&w`$pq$G@bIuleHul5_h5Y(EPN0BCRL*_xU*mzF(r_2$UoD z`GNs(i_VW~`wa1lmj{nqFFQm7?{kF@#188~4{LSnsI(wNFHKf;yj3%mATW0eEn5!+ zoHSN9+y`5-b?0m1~E5dZ){Vo)7EFb#_W000000a;pc CC>}Zh From 16e1dc247f5d3a2a8f0f32c672b8ea2086a23b2d Mon Sep 17 00:00:00 2001 From: Xiyu Yang <90369889+xiyuy@users.noreply.github.com> Date: Fri, 7 Jan 2022 11:07:38 -0500 Subject: [PATCH 02/19] Import Packages Add stringr, dplyr, Rcpp to the Imports field of DESCRIPTION file. Add import(dplyr) in the NAMESPACE file. --- DESCRIPTION | 7 +++++-- NAMESPACE | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index e936347..2ce39bd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,11 +22,14 @@ Depends: utils Imports: foreach (>= 1.5.1), - devtools (>= 1.10.0) + devtools (>= 1.10.0), + stringr, + dplyr, + Rcpp Suggests: testthat, wruData (>= 0.0.1), -LinkingTo: +LinkingTo: Rcpp, RcppArmadillo, RcppProgress diff --git a/NAMESPACE b/NAMESPACE index 29cd61a..433a6c3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,6 +14,7 @@ export(predict_race_new) export(vec_to_chunk) import(devtools) import(stringr) +import(dplyr) importFrom(Rcpp,evalCpp) importFrom(foreach,"%dopar%") importFrom(foreach,foreach) From ed3a6b070c973b8b948933cae6161edab4ebac8c Mon Sep 17 00:00:00 2001 From: Xiyu Yang <90369889+xiyuy@users.noreply.github.com> Date: Fri, 7 Jan 2022 11:09:06 -0500 Subject: [PATCH 03/19] Change function merge_names Add default values to the arguments of merge_names function. Comment out the line of code require(dplyr). --- R/merge_names.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/merge_names.R b/R/merge_names.R index 049da15..c930788 100644 --- a/R/merge_names.R +++ b/R/merge_names.R @@ -54,7 +54,7 @@ #' merge_names(voters) #' #' @export -merge_names <- function(voter.file, namesToUse, use.census.surnames, census.surnames=NULL, clean.names = T) { +merge_names <- function(voter.file, namesToUse='last', use.census.surnames=F, census.surnames=NULL, clean.names = T) { # check the names if(namesToUse == 'last') { @@ -221,7 +221,7 @@ merge_names <- function(voter.file, namesToUse, use.census.surnames, census.surn ## For unmatched names, just fill with a NA - require(dplyr) + # require(dplyr) warning(paste(paste(sum(is.na(df$p_whi_last)), " (", round(100*mean(is.na(df$p_whi_last)), 1), "%) indivduals' last names were not matched.", sep = ""))) if(grepl('first', namesToUse)) { warning(paste(paste(sum(is.na(df$p_whi_first)), " (", round(100*mean(is.na(df$p_whi_first)), 1), "%) indivduals' first names were not matched.", sep = ""))) From cb00b91b2e88004aef88aa4ab44127c5571040f4 Mon Sep 17 00:00:00 2001 From: Xiyu Yang <90369889+xiyuy@users.noreply.github.com> Date: Tue, 11 Jan 2022 16:39:51 -0500 Subject: [PATCH 04/19] Update census_geo_api.R Add a new argument year; Modify the parts of code with arguments age and sex equal to False; Substitute all 2010 census data links with variable census_data_url. --- R/census_geo_api.R | 72 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 57 insertions(+), 15 deletions(-) diff --git a/R/census_geo_api.R b/R/census_geo_api.R index a96bc3d..1901a09 100644 --- a/R/census_geo_api.R +++ b/R/census_geo_api.R @@ -37,7 +37,7 @@ #' available \href{https://rstudio-pubs-static.s3.amazonaws.com/19337_2e7f827190514c569ea136db788ce850.html}{here}. #' #' @export -census_geo_api <- function(key, state, geo = "tract", age = FALSE, sex = FALSE, retry = 0, save_temp = NULL) { +census_geo_api <- function(key, state, geo = "tract", age = FALSE, sex = FALSE, year = "2010", retry = 0, save_temp = NULL) { if (missing(key)) { stop('Must enter U.S. Census API key, which can be requested at https://api.census.gov/data/key_signup.html.') @@ -51,9 +51,32 @@ census_geo_api <- function(key, state, geo = "tract", age = FALSE, sex = FALSE, state.fips <- fips.codes[fips.codes$State == state, "FIPS"] state.fips <- ifelse(nchar(state.fips) == 1, paste0("0", state.fips), state.fips) + # if (age == F & sex == F) { + # num <- ifelse(3:10 != 10, paste("0", 3:10, sep = ""), "10") + # vars <- paste("P0050", num, sep = "") + # } + + # assign variable values based on the year of the census data + if (year == "2010"){ + vars <- c( + pop_white = 'P005003', pop_black = 'P005004', + pop_aian = 'P005005', pop_asian = 'P005006', + pop_nhpi = 'P005007', pop_other = 'P005008', + pop_two = 'P005009', pop_hisp = 'P005010' + ) + } + else if (year == "2020") { + vars <- c( + pop_white = 'P2_005N', pop_black = 'P2_006N', + pop_aian = 'P2_007N', pop_asian = 'P2_008N', + pop_nhpi = 'P2_009N', pop_other = 'P2_010N', + pop_two = 'P2_011N', pop_hisp = 'P2_002N' + ) + } + if (age == F & sex == F) { - num <- ifelse(3:10 != 10, paste("0", 3:10, sep = ""), "10") - vars <- paste("P0050", num, sep = "") + vars <- vars[c("pop_white", "pop_black", "pop_aian", "pop_asian", + "pop_nhpi", "pop_other", "pop_two", "pop_hisp")] } if (age == F & sex == T) { @@ -83,16 +106,24 @@ census_geo_api <- function(key, state, geo = "tract", age = FALSE, sex = FALSE, } } + # set the census data url links + if (year == "2010") { + census_data_url = "https://api.census.gov/data/2010/dec/sf1?" + } + else if (year == "2020") { + census_data_url = "https://api.census.gov/data/2020/dec/pl?" + } + if (geo == "place") { geo.merge <- c("state", "place") region <- paste("for=place:*&in=state:", state.fips, sep = "") - census <- get_census_api("https://api.census.gov/data/2010/dec/sf1?", key = key, vars = vars, region = region, retry) + census <- get_census_api(census_data_url, key = key, vars = vars, region = region, retry) } if (geo == "county") { geo.merge <- c("state", "county") region <- paste("for=county:*&in=state:", state.fips, sep = "") - census <- get_census_api("https://api.census.gov/data/2010/dec/sf1?", key = key, vars = vars, region = region, retry) + census <- get_census_api(census_data_url, key = key, vars = vars, region = region, retry) } if (geo == "tract") { @@ -100,7 +131,7 @@ census_geo_api <- function(key, state, geo = "tract", age = FALSE, sex = FALSE, geo.merge <- c("state", "county", "tract") region_county <- paste("for=county:*&in=state:", state.fips, sep = "") - county_df <- get_census_api("https://api.census.gov/data/2010/dec/sf1?", key = key, vars = vars, region = region_county, retry) + county_df <- get_census_api(census_data_url, key = key, vars = vars, region = region_county, retry) county_list <- county_df$county census <- NULL temp <- check_temp_save(county_list, save_temp, census) @@ -110,7 +141,7 @@ census_geo_api <- function(key, state, geo = "tract", age = FALSE, sex = FALSE, for (c in 1:length(county_list)) { print(paste("County ", c, " of ", length(county_list), ": ", county_list[c], sep = "")) region_county <- paste("for=tract:*&in=state:", state.fips, "+county:", county_list[c], sep = "") - census.temp <- get_census_api("https://api.census.gov/data/2010/dec/sf1?", key = key, vars = vars, region = region_county, retry) + census.temp <- get_census_api(census_data_url, key = key, vars = vars, region = region_county, retry) census <- rbind(census, census.temp) if (!is.null(save_temp)) { save(census, file = save_temp) @@ -124,7 +155,7 @@ census_geo_api <- function(key, state, geo = "tract", age = FALSE, sex = FALSE, geo.merge <- c("state", "county", "tract", "block") region_county <- paste("for=county:*&in=state:", state.fips, sep = "") - county_df <- get_census_api("https://api.census.gov/data/2010/dec/sf1?", key = key, vars = vars, region = region_county, retry) + county_df <- get_census_api(census_data_url, key = key, vars = vars, region = region_county, retry) county_list <- county_df$county census <- NULL temp <- check_temp_save(county_list, save_temp, census) @@ -136,14 +167,14 @@ census_geo_api <- function(key, state, geo = "tract", age = FALSE, sex = FALSE, region_tract <- paste("for=tract:*&in=state:", state.fips, "+county:", county_list[c], sep = "") print(region_tract) - tract_df <- get_census_api("https://api.census.gov/data/2010/dec/sf1?", key = key, vars = vars, region = region_tract, retry) + tract_df <- get_census_api(census_data_url, key = key, vars = vars, region = region_tract, retry) tract_list <- tract_df$tract for (t in 1:length(tract_list)) { print(paste("Tract ", t, " of ", length(tract_list), ": ", tract_list[t], sep = "")) region_block <- paste("for=block:*&in=state:", state.fips, "+county:", county_list[c], "+tract:", tract_list[t], sep = "") - census.temp <- get_census_api("https://api.census.gov/data/2010/dec/sf1?", key = key, vars = vars, region = region_block, retry) + census.temp <- get_census_api(census_data_url, key = key, vars = vars, region = region_block, retry) census <- rbind(census, census.temp) } if (!is.null(save_temp)) { @@ -157,14 +188,25 @@ census_geo_api <- function(key, state, geo = "tract", age = FALSE, sex = FALSE, census$state <- state + # if (age == F & sex == F) { + # + # ## Calculate Pr(Geolocation | Race) + # census$r_whi <- census$P005003 / sum(census$P005003) #Pr(Tract|White) + # census$r_bla <- census$P005004 / sum(census$P005004) #Pr(Tract|Black) + # census$r_his <- census$P005010 / sum(census$P005010) #Pr(Tract|Latino) + # census$r_asi <- (census$P005006 + census$P005007) / (sum(census$P005006) + sum(census$P005007)) #Pr(Tract | Asian or NH/PI) + # census$r_oth <- (census$P005005 + census$P005008 + census$P005009) / (sum(census$P005005) + sum(census$P005008) + sum(census$P005009)) #Pr(Tract | AI/AN, Other, or Mixed) + # + # } + if (age == F & sex == F) { ## Calculate Pr(Geolocation | Race) - census$r_whi <- census$P005003 / sum(census$P005003) #Pr(Tract|White) - census$r_bla <- census$P005004 / sum(census$P005004) #Pr(Tract|Black) - census$r_his <- census$P005010 / sum(census$P005010) #Pr(Tract|Latino) - census$r_asi <- (census$P005006 + census$P005007) / (sum(census$P005006) + sum(census$P005007)) #Pr(Tract | Asian or NH/PI) - census$r_oth <- (census$P005005 + census$P005008 + census$P005009) / (sum(census$P005005) + sum(census$P005008) + sum(census$P005009)) #Pr(Tract | AI/AN, Other, or Mixed) + census$r_whi <- census[, vars["pop_white"]] / sum(census[, vars["pop_white"]]) #Pr(Geo|White) + census$r_bla <- census[, vars["pop_black"]] / sum(census[, vars["pop_black"]]) #Pr(Geo|Black) + census$r_his <- census[, vars["pop_hisp"]] / sum(census[, vars["pop_hisp"]]) #Pr(Geo|Latino) + census$r_asi <- (census[, vars["pop_asian"]] + census[, vars["pop_nhpi"]]) / (sum(census[, vars["pop_asian"]]) + sum(census[, vars["pop_nhpi"]])) #Pr(Geo | Asian or NH/PI) + census$r_oth <- (census[, vars["pop_aian"]] + census[, vars["pop_other"]] + census[, vars["pop_two"]]) / (sum(census[, vars["pop_aian"]]) + sum(census[, vars["pop_other"]]) + sum(census[, vars["pop_two"]])) #Pr(Geo | AI/AN, Other, or Mixed) } From d54ea01669792ff54c79d4d08ec8e43849c7323f Mon Sep 17 00:00:00 2001 From: Xiyu Yang <90369889+xiyuy@users.noreply.github.com> Date: Wed, 12 Jan 2022 18:19:34 -0500 Subject: [PATCH 05/19] Update get_census_data.R Add a new argument, year = "2010", to the get_census_data function. --- R/get_census_data.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/get_census_data.R b/R/get_census_data.R index 2c6d49f..281f629 100644 --- a/R/get_census_data.R +++ b/R/get_census_data.R @@ -27,7 +27,7 @@ #' @export #' #' @examples \dontrun{get_census_data(key = "...", states = c("NJ", "NY"), age = TRUE, sex = FALSE)} -get_census_data <- function(key, states, age = FALSE, sex = FALSE, census.geo = "block", retry = 0) { +get_census_data <- function(key, states, age = FALSE, sex = FALSE, year = "2010", census.geo = "block", retry = 0) { if (missing(key)) { stop('Must enter valid Census API key, which can be requested at https://api.census.gov/data/key_signup.html.') @@ -37,21 +37,21 @@ get_census_data <- function(key, states, age = FALSE, sex = FALSE, census.geo = CensusObj <- NULL for (s in states) { - CensusObj[[s]] <- list(state = s, age = age, sex = sex) + CensusObj[[s]] <- list(state = s, age = age, sex = sex, year = year) if (census.geo == "place") { - place <- census_geo_api(key, s, geo = "place", age, sex, retry) + place <- census_geo_api(key, s, geo = "place", age, sex, year, retry) CensusObj[[s]]$place <- place } if (census.geo == "block") { - block <- census_geo_api(key, s, geo = "block", age, sex, retry) + block <- census_geo_api(key, s, geo = "block", age, sex, year, retry) CensusObj[[s]]$block <- block } if ((census.geo == "block") || (census.geo == "tract")) { - tract <- census_geo_api(key, s, geo = "tract", age, sex, retry) + tract <- census_geo_api(key, s, geo = "tract", age, sex, year, retry) CensusObj[[s]]$tract <- tract } if ((census.geo == "block") || (census.geo == "tract") || (census.geo == "county")) { - county <- census_geo_api(key, s, geo = "county", age, sex, retry) + county <- census_geo_api(key, s, geo = "county", age, sex, year, retry) CensusObj[[s]]$county <- county } } From 99cae1d03ed62d8b9b6c47828eef4fcca2a0720b Mon Sep 17 00:00:00 2001 From: Xiyu Yang <90369889+xiyuy@users.noreply.github.com> Date: Wed, 12 Jan 2022 20:44:16 -0500 Subject: [PATCH 06/19] Update census_helper_v2.R Add argument year = "2010"; Update the part calculating probability Pr(Race|Geo). --- R/census_helper_v2.R | 66 +++++++++++++++++++++++++++++++++----------- 1 file changed, 50 insertions(+), 16 deletions(-) diff --git a/R/census_helper_v2.R b/R/census_helper_v2.R index d160d25..db88a8e 100644 --- a/R/census_helper_v2.R +++ b/R/census_helper_v2.R @@ -48,7 +48,7 @@ #' age = TRUE, sex = TRUE)} #' #' @export -census_helper_new <- function(key, voter.file, states = "all", geo = "tract", age = FALSE, sex = FALSE, census.data = NA, retry = 0) { +census_helper_new <- function(key, voter.file, states = "all", geo = "tract", age = FALSE, sex = FALSE, year = "2010", census.data = NA, retry = 0) { if (is.na(census.data) || (typeof(census.data) != "list")) { toDownload = TRUE @@ -76,8 +76,8 @@ census_helper_new <- function(key, voter.file, states = "all", geo = "tract", ag if (geo == "place") { geo.merge <- c("place") - if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) { - census <- census_geo_api(key, state, geo = "place", age, sex, retry) + if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex) || (census.data[[state]]$year != year)) { + census <- census_geo_api(key, state, geo = "place", age, sex, year, retry) } else { census <- census.data[[toupper(state)]]$place } @@ -85,8 +85,8 @@ census_helper_new <- function(key, voter.file, states = "all", geo = "tract", ag if (geo == "county") { geo.merge <- c("county") - if ((toDownload) || (is.null(census.data[[state]]))) {#} || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) { - census <- census_geo_api(key, state, geo = "county", age, sex, retry) + if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$year != year)) {#} || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) { + census <- census_geo_api(key, state, geo = "county", age, sex, year, retry) } else { census <- census.data[[toupper(state)]]$county } @@ -94,8 +94,8 @@ census_helper_new <- function(key, voter.file, states = "all", geo = "tract", ag if (geo == "tract") { geo.merge <- c("county", "tract") - if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) { - census <- census_geo_api(key, state, geo = "tract", age, sex, retry) + if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex) || (census.data[[state]]$year != year)) { + census <- census_geo_api(key, state, geo = "tract", age, sex, year, retry) } else { census <- census.data[[toupper(state)]]$tract } @@ -103,8 +103,8 @@ census_helper_new <- function(key, voter.file, states = "all", geo = "tract", ag if (geo == "block") { geo.merge <- c("county", "tract", "block") - if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) { - census <- census_geo_api(key, state, geo = "block", age, sex, retry) + if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex) || (census.data[[state]]$year != year)) { + census <- census_geo_api(key, state, geo = "block", age, sex, year, retry) } else { census <- census.data[[toupper(state)]]$block } @@ -112,17 +112,51 @@ census_helper_new <- function(key, voter.file, states = "all", geo = "tract", ag census$state <- state + # if (age == F & sex == F) { + # + # ## Calculate Pr(Geolocation | Race) + # geoPopulations <- rowSums(census[,grepl("P00", names(census))]) + # census$r_whi <- (0.5 + census$P005003) / (geoPopulations + 2.5)#Pr(White | Geo) + # census$r_bla <- (0.5 + census$P005004) / (geoPopulations + 2.5)#Pr(Black | Geo) + # census$r_his <- (0.5 + census$P005010) / (geoPopulations + 2.5)#Pr(Latino | Geo) + # census$r_asi <- (0.5 + census$P005006 + census$P005007) / (geoPopulations + 2.5) #Pr(Asian or NH/PI | Geo) + # census$r_oth <- (0.5 + census$P005005 + census$P005008 + census$P005009) / (geoPopulations + 2.5) #Pr(AI/AN, Other, or Mixed | Geo) + # + # drop <- c(grep("state", names(census)), grep("P005", names(census))) + # voters.census <- merge(voter.file[toupper(voter.file$state) == toupper(states[s]), ], census[, -drop], by = geo.merge, all.x = T) + # + # } + if (age == F & sex == F) { ## Calculate Pr(Geolocation | Race) - geoPopulations <- rowSums(census[,grepl("P00", names(census))]) - census$r_whi <- (0.5 + census$P005003) / (geoPopulations + 2.5)#Pr(White | Geo) - census$r_bla <- (0.5 + census$P005004) / (geoPopulations + 2.5)#Pr(Black | Geo) - census$r_his <- (0.5 + census$P005010) / (geoPopulations + 2.5)#Pr(Latino | Geo) - census$r_asi <- (0.5 + census$P005006 + census$P005007) / (geoPopulations + 2.5) #Pr(Asian or NH/PI | Geo) - census$r_oth <- (0.5 + census$P005005 + census$P005008 + census$P005009) / (geoPopulations + 2.5) #Pr(AI/AN, Other, or Mixed | Geo) + if (year == "2010") { + geoPopulations <- rowSums(census[,grepl("P00", names(census))]) + vars <- c( + pop_white = 'P005003', pop_black = 'P005004', + pop_aian = 'P005005', pop_asian = 'P005006', + pop_nhpi = 'P005007', pop_other = 'P005008', + pop_two = 'P005009', pop_hisp = 'P005010' + ) + drop <- c(grep("state", names(census)), grep("P005", names(census))) + } + else if (year == "2020") { + geoPopulations <- rowSums(census[,grepl("P2_", names(census))]) + vars <- c( + pop_white = 'P2_005N', pop_black = 'P2_006N', + pop_aian = 'P2_007N', pop_asian = 'P2_008N', + pop_nhpi = 'P2_009N', pop_other = 'P2_010N', + pop_two = 'P2_011N', pop_hisp = 'P2_002N' + ) + drop <- c(grep("state", names(census)), grep("P2_", names(census))) + } + + census$r_whi <- (0.5 + census[, vars["pop_white"]]) / (geoPopulations + 2.5) #Pr(White | Geo) + census$r_bla <- (0.5 + census[, vars["pop_black"]]) / (geoPopulations + 2.5) #Pr(Black | Geo) + census$r_his <- (0.5 + census[, vars["pop_hisp"]]) / (geoPopulations + 2.5) #Pr(Latino | Geo) + census$r_asi <- (0.5 + census[, vars["pop_asian"]] + census[, vars["pop_nhpi"]]) / (geoPopulations + 2.5) #Pr(Asian or NH/PI | Geo) + census$r_oth <- (0.5 + census[, vars["pop_aian"]] + census[, vars["pop_other"]] + census[, vars["pop_two"]]) / (geoPopulations + 2.5) #Pr(AI/AN, Other, or Mixed | Geo) - drop <- c(grep("state", names(census)), grep("P005", names(census))) voters.census <- merge(voter.file[toupper(voter.file$state) == toupper(states[s]), ], census[, -drop], by = geo.merge, all.x = T) } From a33e5b109b4a5fe574183af2db75d45454c54ffa Mon Sep 17 00:00:00 2001 From: Xiyu Yang <90369889+xiyuy@users.noreply.github.com> Date: Fri, 14 Jan 2022 17:44:41 -0500 Subject: [PATCH 07/19] Update census_helper_v2.R Keep year argument; Drop two arguments age and sex. --- R/census_helper_v2.R | 78 ++++++++++++++++++++++---------------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/R/census_helper_v2.R b/R/census_helper_v2.R index db88a8e..f589e3d 100644 --- a/R/census_helper_v2.R +++ b/R/census_helper_v2.R @@ -48,7 +48,7 @@ #' age = TRUE, sex = TRUE)} #' #' @export -census_helper_new <- function(key, voter.file, states = "all", geo = "tract", age = FALSE, sex = FALSE, year = "2010", census.data = NA, retry = 0) { +census_helper_new <- function(key, voter.file, states = "all", geo = "tract", year = "2010", census.data = NA, retry = 0) { if (is.na(census.data) || (typeof(census.data) != "list")) { toDownload = TRUE @@ -76,8 +76,8 @@ census_helper_new <- function(key, voter.file, states = "all", geo = "tract", ag if (geo == "place") { geo.merge <- c("place") - if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex) || (census.data[[state]]$year != year)) { - census <- census_geo_api(key, state, geo = "place", age, sex, year, retry) + if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$year != year)) {#} || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) { + census <- census_geo_api(key, state, geo = "place", age = FALSE, sex = FALSE, year, retry) } else { census <- census.data[[toupper(state)]]$place } @@ -86,7 +86,7 @@ census_helper_new <- function(key, voter.file, states = "all", geo = "tract", ag if (geo == "county") { geo.merge <- c("county") if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$year != year)) {#} || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) { - census <- census_geo_api(key, state, geo = "county", age, sex, year, retry) + census <- census_geo_api(key, state, geo = "county", age = FALSE, sex = FALSE, year, retry) } else { census <- census.data[[toupper(state)]]$county } @@ -94,8 +94,8 @@ census_helper_new <- function(key, voter.file, states = "all", geo = "tract", ag if (geo == "tract") { geo.merge <- c("county", "tract") - if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex) || (census.data[[state]]$year != year)) { - census <- census_geo_api(key, state, geo = "tract", age, sex, year, retry) + if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$year != year)) {#} || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) { + census <- census_geo_api(key, state, geo = "tract", age = FALSE, sex = FALSE, year, retry) } else { census <- census.data[[toupper(state)]]$tract } @@ -103,8 +103,8 @@ census_helper_new <- function(key, voter.file, states = "all", geo = "tract", ag if (geo == "block") { geo.merge <- c("county", "tract", "block") - if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex) || (census.data[[state]]$year != year)) { - census <- census_geo_api(key, state, geo = "block", age, sex, year, retry) + if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$year != year)) {#} || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) { + census <- census_geo_api(key, state, geo = "block", age = FALSE, sex = FALSE, year, retry) } else { census <- census.data[[toupper(state)]]$block } @@ -127,39 +127,39 @@ census_helper_new <- function(key, voter.file, states = "all", geo = "tract", ag # # } - if (age == F & sex == F) { - - ## Calculate Pr(Geolocation | Race) - if (year == "2010") { - geoPopulations <- rowSums(census[,grepl("P00", names(census))]) - vars <- c( - pop_white = 'P005003', pop_black = 'P005004', - pop_aian = 'P005005', pop_asian = 'P005006', - pop_nhpi = 'P005007', pop_other = 'P005008', - pop_two = 'P005009', pop_hisp = 'P005010' - ) - drop <- c(grep("state", names(census)), grep("P005", names(census))) - } - else if (year == "2020") { - geoPopulations <- rowSums(census[,grepl("P2_", names(census))]) - vars <- c( - pop_white = 'P2_005N', pop_black = 'P2_006N', - pop_aian = 'P2_007N', pop_asian = 'P2_008N', - pop_nhpi = 'P2_009N', pop_other = 'P2_010N', - pop_two = 'P2_011N', pop_hisp = 'P2_002N' - ) - drop <- c(grep("state", names(census)), grep("P2_", names(census))) - } - - census$r_whi <- (0.5 + census[, vars["pop_white"]]) / (geoPopulations + 2.5) #Pr(White | Geo) - census$r_bla <- (0.5 + census[, vars["pop_black"]]) / (geoPopulations + 2.5) #Pr(Black | Geo) - census$r_his <- (0.5 + census[, vars["pop_hisp"]]) / (geoPopulations + 2.5) #Pr(Latino | Geo) - census$r_asi <- (0.5 + census[, vars["pop_asian"]] + census[, vars["pop_nhpi"]]) / (geoPopulations + 2.5) #Pr(Asian or NH/PI | Geo) - census$r_oth <- (0.5 + census[, vars["pop_aian"]] + census[, vars["pop_other"]] + census[, vars["pop_two"]]) / (geoPopulations + 2.5) #Pr(AI/AN, Other, or Mixed | Geo) - - voters.census <- merge(voter.file[toupper(voter.file$state) == toupper(states[s]), ], census[, -drop], by = geo.merge, all.x = T) + # if (age == F & sex == F) { + ## Calculate Pr(Geolocation | Race) + if (year == "2010") { + geoPopulations <- rowSums(census[,grepl("P00", names(census))]) + vars <- c( + pop_white = 'P005003', pop_black = 'P005004', + pop_aian = 'P005005', pop_asian = 'P005006', + pop_nhpi = 'P005007', pop_other = 'P005008', + pop_two = 'P005009', pop_hisp = 'P005010' + ) + drop <- c(grep("state", names(census)), grep("P005", names(census))) } + else if (year == "2020") { + geoPopulations <- rowSums(census[,grepl("P2_", names(census))]) + vars <- c( + pop_white = 'P2_005N', pop_black = 'P2_006N', + pop_aian = 'P2_007N', pop_asian = 'P2_008N', + pop_nhpi = 'P2_009N', pop_other = 'P2_010N', + pop_two = 'P2_011N', pop_hisp = 'P2_002N' + ) + drop <- c(grep("state", names(census)), grep("P2_", names(census))) + } + + census$r_whi <- (0.5 + census[, vars["pop_white"]]) / (geoPopulations + 2.5) #Pr(White | Geo) + census$r_bla <- (0.5 + census[, vars["pop_black"]]) / (geoPopulations + 2.5) #Pr(Black | Geo) + census$r_his <- (0.5 + census[, vars["pop_hisp"]]) / (geoPopulations + 2.5) #Pr(Latino | Geo) + census$r_asi <- (0.5 + census[, vars["pop_asian"]] + census[, vars["pop_nhpi"]]) / (geoPopulations + 2.5) #Pr(Asian or NH/PI | Geo) + census$r_oth <- (0.5 + census[, vars["pop_aian"]] + census[, vars["pop_other"]] + census[, vars["pop_two"]]) / (geoPopulations + 2.5) #Pr(AI/AN, Other, or Mixed | Geo) + + voters.census <- merge(voter.file[toupper(voter.file$state) == toupper(states[s]), ], census[, -drop], by = geo.merge, all.x = T) + + # } keep.vars <- c(names(voter.file)[names(voter.file) != "agecat"], paste("r", c("whi", "bla", "his", "asi", "oth"), sep = "_")) From 724036f872be747bdaab5b721c5666eb16a38014 Mon Sep 17 00:00:00 2001 From: Xiyu Yang <90369889+xiyuy@users.noreply.github.com> Date: Fri, 14 Jan 2022 18:03:27 -0500 Subject: [PATCH 08/19] Update census_helper.R Add year argument to census_helper.R. --- R/census_helper.R | 66 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 50 insertions(+), 16 deletions(-) diff --git a/R/census_helper.R b/R/census_helper.R index 0cc394a..1f2b46a 100644 --- a/R/census_helper.R +++ b/R/census_helper.R @@ -48,7 +48,7 @@ #' age = TRUE, sex = TRUE)} #' #' @export -census_helper <- function(key, voter.file, states = "all", geo = "tract", age = FALSE, sex = FALSE, census.data = NA, retry = 0) { +census_helper <- function(key, voter.file, states = "all", geo = "tract", age = FALSE, sex = FALSE, year = "2010", census.data = NA, retry = 0) { if (is.na(census.data) || (typeof(census.data) != "list")) { toDownload = TRUE @@ -76,8 +76,8 @@ census_helper <- function(key, voter.file, states = "all", geo = "tract", age = if (geo == "place") { geo.merge <- c("place") - if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) { - census <- census_geo_api(key, state, geo = "place", age, sex, retry) + if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex) || (census.data[[state]]$year != year)) { + census <- census_geo_api(key, state, geo = "place", age, sex, year, retry) } else { census <- census.data[[toupper(state)]]$place } @@ -85,8 +85,8 @@ census_helper <- function(key, voter.file, states = "all", geo = "tract", age = if (geo == "county") { geo.merge <- c("county") - if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) { - census <- census_geo_api(key, state, geo = "county", age, sex, retry) + if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex) || (census.data[[state]]$year != year)) { + census <- census_geo_api(key, state, geo = "county", age, sex, year, retry) } else { census <- census.data[[toupper(state)]]$county } @@ -94,8 +94,8 @@ census_helper <- function(key, voter.file, states = "all", geo = "tract", age = if (geo == "tract") { geo.merge <- c("county", "tract") - if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) { - census <- census_geo_api(key, state, geo = "tract", age, sex, retry) + if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex) || (census.data[[state]]$year != year)) { + census <- census_geo_api(key, state, geo = "tract", age, sex, year, retry) } else { census <- census.data[[toupper(state)]]$tract } @@ -103,8 +103,8 @@ census_helper <- function(key, voter.file, states = "all", geo = "tract", age = if (geo == "block") { geo.merge <- c("county", "tract", "block") - if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) { - census <- census_geo_api(key, state, geo = "block", age, sex, retry) + if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex) || (census.data[[state]]$year != year)) { + census <- census_geo_api(key, state, geo = "block", age, sex, year, retry) } else { census <- census.data[[toupper(state)]]$block } @@ -140,16 +140,50 @@ census_helper <- function(key, voter.file, states = "all", geo = "tract", age = voter.file$agecat <- ifelse(voter.file$age >= 85, 23, voter.file$agecat) } + # if (age == F & sex == F) { + # + # ## Calculate Pr(Geolocation | Race) + # census$r_whi <- census$P005003 / sum(census$P005003) #Pr(Tract|White) + # census$r_bla <- census$P005004 / sum(census$P005004) #Pr(Tract|Black) + # census$r_his <- census$P005010 / sum(census$P005010) #Pr(Tract|Latino) + # census$r_asi <- (census$P005006 + census$P005007) / (sum(census$P005006) + sum(census$P005007)) #Pr(Tract | Asian or NH/PI) + # census$r_oth <- (census$P005005 + census$P005008 + census$P005009) / (sum(census$P005005) + sum(census$P005008) + sum(census$P005009)) #Pr(Tract | AI/AN, Other, or Mixed) + # + # drop <- c(grep("state", names(census)), grep("P005", names(census))) + # voters.census <- merge(voter.file[toupper(voter.file$state) == toupper(states[s]), ], census[, -drop], by = geo.merge, all.x = T) + # + # } + if (age == F & sex == F) { - + ## Calculate Pr(Geolocation | Race) - census$r_whi <- census$P005003 / sum(census$P005003) #Pr(Tract|White) - census$r_bla <- census$P005004 / sum(census$P005004) #Pr(Tract|Black) - census$r_his <- census$P005010 / sum(census$P005010) #Pr(Tract|Latino) - census$r_asi <- (census$P005006 + census$P005007) / (sum(census$P005006) + sum(census$P005007)) #Pr(Tract | Asian or NH/PI) - census$r_oth <- (census$P005005 + census$P005008 + census$P005009) / (sum(census$P005005) + sum(census$P005008) + sum(census$P005009)) #Pr(Tract | AI/AN, Other, or Mixed) + if (year == "2010") { + geoPopulations <- rowSums(census[,grepl("P00", names(census))]) + vars <- c( + pop_white = 'P005003', pop_black = 'P005004', + pop_aian = 'P005005', pop_asian = 'P005006', + pop_nhpi = 'P005007', pop_other = 'P005008', + pop_two = 'P005009', pop_hisp = 'P005010' + ) + drop <- c(grep("state", names(census)), grep("P005", names(census))) + } + else if (year == "2020") { + geoPopulations <- rowSums(census[,grepl("P2_", names(census))]) + vars <- c( + pop_white = 'P2_005N', pop_black = 'P2_006N', + pop_aian = 'P2_007N', pop_asian = 'P2_008N', + pop_nhpi = 'P2_009N', pop_other = 'P2_010N', + pop_two = 'P2_011N', pop_hisp = 'P2_002N' + ) + drop <- c(grep("state", names(census)), grep("P2_", names(census))) + } + + census$r_whi <- census[, vars["pop_white"]] / sum(census[, vars["pop_white"]]) #Pr(Geo | White) + census$r_bla <- census[, vars["pop_black"]] / sum(census[, vars["pop_black"]]) #Pr(Geo | Black) + census$r_his <- census[, vars["pop_hisp"]] / sum(census[, vars["pop_hisp"]]) #Pr(Geo | Latino) + census$r_asi <- (census[, vars["pop_asian"]] + census[, vars["pop_nhpi"]]) / (sum(census[, vars["pop_asian"]]) + sum(census[, vars["pop_nhpi"]])) #Pr(Geo | Asian or NH/PI) + census$r_oth <- (census[, vars["pop_aian"]] + census[, vars["pop_other"]] + census[, vars["pop_two"]]) / (sum(census[, vars["pop_aian"]]) + sum(census[, vars["pop_other"]]) + sum(census[, vars["pop_two"]])) #Pr(Geo | AI/AN, Other, or Mixed) - drop <- c(grep("state", names(census)), grep("P005", names(census))) voters.census <- merge(voter.file[toupper(voter.file$state) == toupper(states[s]), ], census[, -drop], by = geo.merge, all.x = T) } From 570fa10261f4e2ed7a5c0df027679ed4c8d062d9 Mon Sep 17 00:00:00 2001 From: Xiyu Yang <90369889+xiyuy@users.noreply.github.com> Date: Sat, 15 Jan 2022 17:23:04 -0500 Subject: [PATCH 09/19] Update predict_race.R Add a year argument; Add a warning saying that predictions other than having both age and sex equal to FALSE are not supported with 2020 census data. --- R/predict_race.R | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/R/predict_race.R b/R/predict_race.R index e10eb6f..bc427ef 100644 --- a/R/predict_race.R +++ b/R/predict_race.R @@ -97,7 +97,13 @@ ## Race Prediction Function predict_race <- function(voter.file, census.surname = TRUE, surname.only = FALSE, surname.year = 2010, name.data = NULL, - census.geo, census.key, census.data = NA, age = FALSE, sex = FALSE, party, retry = 0, impute.missing = FALSE) { + census.geo, census.key, census.data = NA, age = FALSE, sex = FALSE, year = "2010", + party, retry = 0, impute.missing = FALSE) { + + # warning: 2020 census data only support prediction when both age and sex are equal to FALSE + if ((sex == TRUE || age == TRUE) && (year == "2020")) { + stop('Warning: only predictions with both age and sex equal to FALSE are supported when using 2020 census data.') + } if (!missing(census.geo) && (census.geo == "precinct")) { # geo <- "precinct" @@ -144,7 +150,7 @@ predict_race <- function(voter.file, ## Merge in Pr(Race | Surname) if necessary if (census.surname) { - if (!(surname.year %in% c(2000,2010, 2021))) { + if (!(surname.year %in% c(2000,2010,2021))) { stop(paste(surname.year, "is not a valid surname.year. It should be 2000, 2010 (default) or 2021.")) } voter.file <- merge_surnames(voter.file, surname.year = surname.year, name.data = name.data, impute.missing = impute.missing) @@ -182,6 +188,7 @@ predict_race <- function(voter.file, geo = "place", age = age, sex = sex, + year = year, census.data = census.data, retry = retry) } @@ -195,6 +202,7 @@ predict_race <- function(voter.file, geo = "block", age = age, sex = sex, + year = year, census.data = census.data, retry = retry) } @@ -213,6 +221,7 @@ predict_race <- function(voter.file, geo = "tract", age = age, sex = sex, + year = year, census.data = census.data, retry = retry) } @@ -226,6 +235,7 @@ predict_race <- function(voter.file, geo = "county", age = age, sex = sex, + year = year, census.data = census.data, retry = retry) } From cea34f904273acf79d0660ade7d42bb293199e97 Mon Sep 17 00:00:00 2001 From: Xiyu Yang <90369889+xiyuy@users.noreply.github.com> Date: Tue, 18 Jan 2022 16:54:08 -0500 Subject: [PATCH 10/19] Update documentations of census_geo_api.R Update the function description, parameter description of year, and add one more example. --- R/census_geo_api.R | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/R/census_geo_api.R b/R/census_geo_api.R index 1901a09..e0c3f5d 100644 --- a/R/census_geo_api.R +++ b/R/census_geo_api.R @@ -2,13 +2,13 @@ #' #' \code{census_geo_api} retrieves U.S. Census geographic data for a given state. #' -#' This function allows users to download U.S. Census 2010 geographic data, +#' This function allows users to download U.S. Census 2010 or 2020 geographic data, #' at either the county, tract, block, or place level, for a particular state. #' #' @param key A required character object. Must contain user's Census API #' key, which can be requested \href{https://api.census.gov/data/key_signup.html}{here}. #' @param state A required character object specifying which state to extract Census data for, -#' e.g., \code{"NJ"}. +#' e.g., \code{"NJ"}. #' @param geo A character object specifying what aggregation level to use. #' Use \code{"county"}, \code{"tract"}, \code{"block"}, or \code{"place"}. #' Default is \code{"tract"}. Warning: extracting block-level data takes very long. @@ -20,10 +20,14 @@ #' sex or not. If \code{FALSE} (default), function will return Pr(Geolocation | Race). #' If \code{TRUE}, function will return Pr(Geolocation, Sex | Race). #' If \code{\var{age}} is also \code{TRUE}, function will return Pr(Geolocation, Age, Sex | Race). +#' @param year A character object specifying the year of U.S. Census data to be downloaded. +#' Use \code{"2010"}, or \code{"2020"}. Default is \code{"2010"}. +#' Warning: 2020 U.S. Census data is downloaded only when \code{\var{age}} and +#' \code{\var{sex}} are both \code{FALSE}. #' @param retry The number of retries at the census website if network interruption occurs. #' @param save_temp File indicating where to save the temporary outputs. -#' Defaults to NULL. If specified, the function will look for an .RData file -#' with the same format as the expected output. +#' Defaults to NULL. If specified, the function will look for an .RData file +#' with the same format as the expected output. #' @return Output will be an object of class \code{list}, indexed by state names. It will #' consist of the original user-input data with additional columns of Census geographic data. #' @@ -31,6 +35,7 @@ #' \dontshow{data(voters)} #' \dontrun{census_geo_api(key = "...", states = c("NJ", "DE"), geo = "block")} #' \dontrun{census_geo_api(key = "...", states = "FL", geo = "tract", age = TRUE, sex = TRUE)} +#' \dontrun{census_geo_api(key = "...", states = "MA", geo = "place", age = FALSE, sex = FALSE, year = "2020")} #' #' @references #' Relies on get_census_api, get_census_api_2, and vec_to_chunk functions authored by Nicholas Nagle, From ce78990b1986b9d69b87bd35da876a1dd6ae27d8 Mon Sep 17 00:00:00 2001 From: Xiyu Yang <90369889+xiyuy@users.noreply.github.com> Date: Tue, 18 Jan 2022 17:17:42 -0500 Subject: [PATCH 11/19] Update documentations of get_census_data.R Add description of parameter year and one more example with year equal to 2020. --- R/get_census_data.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/R/get_census_data.R b/R/get_census_data.R index 281f629..bb8d2e8 100644 --- a/R/get_census_data.R +++ b/R/get_census_data.R @@ -15,6 +15,10 @@ #' sex or not. If \code{FALSE} (default), function will return Pr(Geolocation | Race). #' If \code{TRUE}, function will return Pr(Geolocation, Sex | Race). #' If \code{\var{age}} is also \code{TRUE}, function will return Pr(Geolocation, Age, Sex | Race). +#' @param year A character object specifying the year of U.S. Census data to be downloaded. +#' Use \code{"2010"}, or \code{"2020"}. Default is \code{"2010"}. +#' Warning: 2020 U.S. Census data is downloaded only when \code{\var{age}} and +#' \code{\var{sex}} are both \code{FALSE}. #' @param census.geo An optional character vector specifying what level of #' geography to use to merge in U.S. Census 2010 geographic data. Currently #' \code{"county"}, \code{"tract"}, \code{"block"}, and \code{"place"} are supported. @@ -26,7 +30,9 @@ #' #' @export #' -#' @examples \dontrun{get_census_data(key = "...", states = c("NJ", "NY"), age = TRUE, sex = FALSE)} +#' @examples +#' \dontrun{get_census_data(key = "...", states = c("NJ", "NY"), age = TRUE, sex = FALSE)} +#' \dontrun{get_census_data(key = "...", states = "MN", age = FALSE, sex = FALSE, year = "2020")} get_census_data <- function(key, states, age = FALSE, sex = FALSE, year = "2010", census.geo = "block", retry = 0) { if (missing(key)) { From 57fe9fde9d9f825bf18695ad5c26c5b03f50e904 Mon Sep 17 00:00:00 2001 From: Xiyu Yang <90369889+xiyuy@users.noreply.github.com> Date: Thu, 20 Jan 2022 12:11:40 -0500 Subject: [PATCH 12/19] Update the code and documentation of census_helper_new. Code 1. If census.data is provided, ensure that its elements sex and age are FALSE. Documentation 1. Add place as a geolocation level to function description and parameter description of geo. 2. Revise parameter description for census.data (both elements sex and age must be FALSE; year element of census.data must corresponds to the value of the year argument in the function). 3. Revise all examples from census_helper to census_helper_new. 4. Add one more example with year equal to "2020". --- R/census_helper_v2.R | 45 ++++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 25 deletions(-) diff --git a/R/census_helper_v2.R b/R/census_helper_v2.R index f589e3d..636f0dc 100644 --- a/R/census_helper_v2.R +++ b/R/census_helper_v2.R @@ -1,11 +1,11 @@ #' Census helper function. #' -#' \code{census_helper_v2} links user-input dataset with Census geographic data. +#' \code{census_helper_new} links user-input dataset with Census geographic data. #' #' This function allows users to link their geocoded dataset (e.g., voter file) -#' with U.S. Census 2010 data. The function extracts Census Summary File data -#' at the county, tract, or block level using the 'UScensus2010' package. Census data -#' calculated are Pr(Geolocation | Race) where geolocation is county, tract, or block. +#' with U.S. Census data (2010 or 2020). The function extracts Census Summary File data +#' at the county, tract, block, or place level using the 'UScensus2010' package. Census data +#' calculated are Pr(Geolocation | Race) where geolocation is county, tract, block, or place. #' #' @param key A required character object. Must contain user's Census API #' key, which can be requested \href{https://api.census.gov/data/key_signup.html}{here}. @@ -19,22 +19,17 @@ #' Census data for, e.g. \code{c("NJ", "NY")}. Default is \code{"all"}, which extracts #' Census data for all states contained in user-input data. #' @param geo A character object specifying what aggregation level to use. -#' Use \code{"county"}, \code{"tract"}, or \code{"block"}. Default is \code{"tract"}. -#' Warning: extracting block-level data takes very long. -#' @param age A \code{TRUE}/\code{FALSE} object indicating whether to condition on -#' age or not. If \code{FALSE} (default), function will return Pr(Geolocation | Race). -#' If \code{TRUE}, function will return Pr(Geolocation, Age | Race). -#' If \code{\var{sex}} is also \code{TRUE}, function will return Pr(Geolocation, Age, Sex | Race). -#' @param sex A \code{TRUE}/\code{FALSE} object indicating whether to condition on -#' sex or not. If \code{FALSE} (default), function will return Pr(Geolocation | Race). -#' If \code{TRUE}, function will return Pr(Geolocation, Sex | Race). -#' If \code{\var{age}} is also \code{TRUE}, function will return Pr(Geolocation, Age, Sex | Race). +#' Use \code{"county"}, \code{"tract"}, \code{"block"}, or \code{"place"}. +#' Default is \code{"tract"}. Warning: extracting block-level data takes very long. +#' @param year A character object specifying the year of U.S. Census data to be downloaded. +#' Use \code{"2010"}, or \code{"2020"}. Default is \code{"2010"}. #' @param census.data A optional census object of class \code{list} containing #' pre-saved Census geographic data. Can be created using \code{get_census_data} function. -#' If \code{\var{census.data}} is provided, the \code{\var{age}} element must have the same value -#' as the \code{\var{age}} option specified in this function (i.e., \code{TRUE} in both or -#' \code{FALSE} in both). Similarly, the \code{\var{sex}} element in the object provided in -#' \code{\var{census.data}} must have the same value as the \code{\var{sex}} option here. +#' If \code{\var{census.data}} is provided, the \code{\var{year}} element must +#' have the same value as the \code{\var{year}} option specified in this function +#' (i.e., \code{"2010"} in both or \code{"2020"} in both). +#' If \code{\var{census.data}} is provided, the \code{\var{age}} and the \code{\var{sex}} +#' elements must be \code{FALSE}. This corresponds to the defaults of \code{census_geo_api}. #' If \code{\var{census.data}} is missing, Census geographic data will be obtained via Census API. #' @param retry The number of retries at the census website if network interruption occurs. #' @return Output will be an object of class \code{data.frame}. It will @@ -43,9 +38,9 @@ #' #' @examples #' \dontshow{data(voters)} -#' \dontrun{census_helper(key = "...", voter.file = voters, states = "nj", geo = "block")} -#' \dontrun{census_helper(key = "...", voter.file = voters, states = "all", geo = "tract", -#' age = TRUE, sex = TRUE)} +#' \dontrun{census_helper_new(key = "...", voter.file = voters, states = "nj", geo = "block")} +#' \dontrun{census_helper_new(key = "...", voter.file = voters, states = "all", geo = "tract")} +#' \dontrun{census_helper_new(key = "...", voter.file = voters, states = "all", geo = "place", year = "2020")} #' #' @export census_helper_new <- function(key, voter.file, states = "all", geo = "tract", year = "2010", census.data = NA, retry = 0) { @@ -76,7 +71,7 @@ census_helper_new <- function(key, voter.file, states = "all", geo = "tract", ye if (geo == "place") { geo.merge <- c("place") - if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$year != year)) {#} || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) { + if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$year != year) || (census.data[[state]]$age != FALSE) || (census.data[[state]]$sex != FALSE)) {#} || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) { census <- census_geo_api(key, state, geo = "place", age = FALSE, sex = FALSE, year, retry) } else { census <- census.data[[toupper(state)]]$place @@ -85,7 +80,7 @@ census_helper_new <- function(key, voter.file, states = "all", geo = "tract", ye if (geo == "county") { geo.merge <- c("county") - if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$year != year)) {#} || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) { + if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$year != year) || (census.data[[state]]$age != FALSE) || (census.data[[state]]$sex != FALSE)) {#} || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) { census <- census_geo_api(key, state, geo = "county", age = FALSE, sex = FALSE, year, retry) } else { census <- census.data[[toupper(state)]]$county @@ -94,7 +89,7 @@ census_helper_new <- function(key, voter.file, states = "all", geo = "tract", ye if (geo == "tract") { geo.merge <- c("county", "tract") - if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$year != year)) {#} || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) { + if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$year != year) || (census.data[[state]]$age != FALSE) || (census.data[[state]]$sex != FALSE)) {#} || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) { census <- census_geo_api(key, state, geo = "tract", age = FALSE, sex = FALSE, year, retry) } else { census <- census.data[[toupper(state)]]$tract @@ -103,7 +98,7 @@ census_helper_new <- function(key, voter.file, states = "all", geo = "tract", ye if (geo == "block") { geo.merge <- c("county", "tract", "block") - if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$year != year)) {#} || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) { + if ((toDownload) || (is.null(census.data[[state]])) || (census.data[[state]]$year != year) || (census.data[[state]]$age != FALSE) || (census.data[[state]]$sex != FALSE)) {#} || (census.data[[state]]$age != age) || (census.data[[state]]$sex != sex)) { census <- census_geo_api(key, state, geo = "block", age = FALSE, sex = FALSE, year, retry) } else { census <- census.data[[toupper(state)]]$block From 87642efaad79a47a42de8873729387ea4d2bc26d Mon Sep 17 00:00:00 2001 From: Xiyu Yang <90369889+xiyuy@users.noreply.github.com> Date: Thu, 20 Jan 2022 12:20:16 -0500 Subject: [PATCH 13/19] Update the documentation of census_helper.R 1. Add place as a geolocation level to function description and parameter description of geo. 2. Add parameter description for year. 3. Revise parameter description for census.data (year element of census.data must corresponds to the value of the year argument). 4. Add one more example with year equal to "2020". --- R/census_helper.R | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/R/census_helper.R b/R/census_helper.R index 1f2b46a..8e20e42 100644 --- a/R/census_helper.R +++ b/R/census_helper.R @@ -3,9 +3,9 @@ #' \code{census_helper} links user-input dataset with Census geographic data. #' #' This function allows users to link their geocoded dataset (e.g., voter file) -#' with U.S. Census 2010 data. The function extracts Census Summary File data -#' at the county, tract, or block level using the 'UScensus2010' package. Census data -#' calculated are Pr(Geolocation | Race) where geolocation is county, tract, or block. +#' with U.S. Census data (2010 or 2020). The function extracts Census Summary File data +#' at the county, tract, block, or place level using the 'UScensus2010' package. Census data +#' calculated are Pr(Geolocation | Race) where geolocation is county, tract, block, or place. #' #' @param key A required character object. Must contain user's Census API #' key, which can be requested \href{https://api.census.gov/data/key_signup.html}{here}. @@ -19,7 +19,7 @@ #' Census data for, e.g. \code{c("NJ", "NY")}. Default is \code{"all"}, which extracts #' Census data for all states contained in user-input data. #' @param geo A character object specifying what aggregation level to use. -#' Use \code{"county"}, \code{"tract"}, or \code{"block"}. Default is \code{"tract"}. +#' Use \code{"county"}, \code{"tract"}, \code{"block"} or \code{"place"}. Default is \code{"tract"}. #' Warning: extracting block-level data takes very long. #' @param age A \code{TRUE}/\code{FALSE} object indicating whether to condition on #' age or not. If \code{FALSE} (default), function will return Pr(Geolocation | Race). @@ -29,12 +29,19 @@ #' sex or not. If \code{FALSE} (default), function will return Pr(Geolocation | Race). #' If \code{TRUE}, function will return Pr(Geolocation, Sex | Race). #' If \code{\var{age}} is also \code{TRUE}, function will return Pr(Geolocation, Age, Sex | Race). +#' @param year A character object specifying the year of U.S. Census data to be downloaded. +#' Use \code{"2010"}, or \code{"2020"}. Default is \code{"2010"}. +#' Warning: 2020 U.S. Census data is downloaded only when \code{\var{age}} and +#' \code{\var{sex}} are both \code{FALSE}. #' @param census.data A optional census object of class \code{list} containing #' pre-saved Census geographic data. Can be created using \code{get_census_data} function. #' If \code{\var{census.data}} is provided, the \code{\var{age}} element must have the same value #' as the \code{\var{age}} option specified in this function (i.e., \code{TRUE} in both or #' \code{FALSE} in both). Similarly, the \code{\var{sex}} element in the object provided in #' \code{\var{census.data}} must have the same value as the \code{\var{sex}} option here. +#' Moreover, the \code{\var{year}} element in the object provided in \code{\var{census.data}} +#' must have the same value as the \code{\var{year}} option in the function (i.e., \code{"2010"} +#' in both or \code{"2020"} in both). #' If \code{\var{census.data}} is missing, Census geographic data will be obtained via Census API. #' @param retry The number of retries at the census website if network interruption occurs. #' @return Output will be an object of class \code{data.frame}. It will @@ -46,6 +53,8 @@ #' \dontrun{census_helper(key = "...", voter.file = voters, states = "nj", geo = "block")} #' \dontrun{census_helper(key = "...", voter.file = voters, states = "all", geo = "tract", #' age = TRUE, sex = TRUE)} +#' \dontrun{census_helper(key = "...", voter.file = voters, states = "all", geo = "county", +#' age = FALSE, sex = FALSE, year = "2020")} #' #' @export census_helper <- function(key, voter.file, states = "all", geo = "tract", age = FALSE, sex = FALSE, year = "2010", census.data = NA, retry = 0) { From f8c43fdf6c26bdde0121a842449212dc7e237ce8 Mon Sep 17 00:00:00 2001 From: Xiyu Yang <90369889+xiyuy@users.noreply.github.com> Date: Thu, 20 Jan 2022 17:55:11 -0500 Subject: [PATCH 14/19] Update predict_race_v2.R Add a year argument to function predict_race_new. --- R/predict_race_v2.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R/predict_race_v2.R b/R/predict_race_v2.R index ca2e265..696e471 100644 --- a/R/predict_race_v2.R +++ b/R/predict_race_v2.R @@ -73,7 +73,7 @@ ## Race Prediction Function predict_race_new <- function(voter.file, namesToUse = 'last', census.geo, census.surnames = NULL, census.key, - census.data = NA, retry = 0) { + census.data = NA, year = "2010", retry = 0) { # check the geography if (!missing(census.geo) && (census.geo == "precinct")) { @@ -147,6 +147,7 @@ predict_race_new <- function(voter.file, namesToUse = 'last', census.geo, census voter.file = voter.file, states = "all", geo = "place", + year = year, census.data = census.data, retry = retry) } @@ -158,6 +159,7 @@ predict_race_new <- function(voter.file, namesToUse = 'last', census.geo, census voter.file = voter.file, states = "all", geo = "block", + year = year, census.data = census.data, retry = retry) } @@ -174,6 +176,7 @@ predict_race_new <- function(voter.file, namesToUse = 'last', census.geo, census voter.file = voter.file, states = "all", geo = "tract", + year = year, census.data = census.data, retry = retry) } @@ -185,6 +188,7 @@ predict_race_new <- function(voter.file, namesToUse = 'last', census.geo, census voter.file = voter.file, states = "all", geo = "county", + year = year, census.data = census.data, retry = retry) } From 10f9b2f0f2b32af312da21f34dad6ffc27238ca6 Mon Sep 17 00:00:00 2001 From: Xiyu Yang <90369889+xiyuy@users.noreply.github.com> Date: Fri, 21 Jan 2022 12:18:10 -0500 Subject: [PATCH 15/19] Update the documentation of predict_race.R Based on line 156 (surname.year %in% c(2000, 2010, 2021)), there are three available values for surname.year. The documentations of parameters census.surname, surname.year, and census.geo need to be revised to reflect the new value option 2021. 1. Revise the description for census.surname to include the 2021 Surname List. 2. Revise the description for surname.year to include the 2021 Surname List. 3. Revise the description for census.geo to not specify the year of the Census geogrphic data. 4. Add parameter description for year. 5. Add one more example for the function. --- R/predict_race.R | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/R/predict_race.R b/R/predict_race.R index bc427ef..c12d119 100644 --- a/R/predict_race.R +++ b/R/predict_race.R @@ -20,7 +20,7 @@ #' See below for other optional fields. #' @param census.surname A \code{TRUE}/\code{FALSE} object. If \code{TRUE}, #' function will call \code{merge_surnames} to merge in Pr(Race | Surname) -#' from U.S. Census Surname List (2000 or 2010) and Spanish Surname List. +#' from U.S. Census Surname List (2000, 2010, or 2021) and Spanish Surname List. #' If \code{FALSE}, \code{voter.file} object must contain additional fields specifying #' Pr(Race | Surname), named as follows: \code{\var{p_whi}} for Whites, #' \code{\var{p_bla}} for Blacks, \code{\var{p_his}} for Hispanics/Latinos, @@ -31,9 +31,9 @@ #' @param surname.year A number to specify the year of the census surname statistics. #' These surname statistics is stored in the data, and will be automatically loaded. #' The default value is \code{2010}, which means the surname statistics from the -#' 2010 census will be used. Currently, the other available choice is \code{2000}. +#' 2010 census will be used. Currently, the other available choices are \code{2000} and \code{2021}. #' @param census.geo An optional character vector specifying what level of -#' geography to use to merge in U.S. Census 2010 geographic data. Currently +#' geography to use to merge in U.S. Census geographic data. Currently #' \code{"county"}, \code{"tract"}, \code{"block"}, and \code{"place"} are supported. #' Note: sufficient information must be in user-defined \code{\var{voter.file}} object. #' If \code{\var{census.geo} = "county"}, then \code{\var{voter.file}} @@ -63,6 +63,8 @@ #' May only be set to \code{TRUE} if \code{census.geo} option is specified. #' If \code{TRUE}, \code{\var{voter.file}} should include a numerical variable \code{\var{sex}}, #' where \code{\var{sex}} is coded as 0 for males and 1 for females. +#' @param year An optional character vector specifying the year of U.S. Census geographic +#' data to be downloaded. Use \code{"2010"}, or \code{"2020"}. Default is \code{"2010"}. #' @param party An optional character object specifying party registration field #' in \code{\var{voter.file}}, e.g., \code{\var{party} = "PartyReg"}. #' If specified, race/ethnicity predictions will be conditioned @@ -86,6 +88,7 @@ #' \dontrun{predict_race(voter.file = voters, census.geo = "tract", census.key = "...")} #' \dontrun{predict_race(voter.file = voters, census.geo = "tract", census.key = "...", age = T)} #' \dontrun{predict_race(voter.file = voters, census.geo = "place", census.key = "...", sex = T)} +#' \dontrun{predict_race(voter.file = voters, census.geo = "place", census.key = "...", year = "2020")} #' \dontrun{CensusObj <- get_census_data("...", state = c("NY", "DC", "NJ")); #' predict_race(voter.file = voters, census.geo = "tract", census.data = CensusObj, party = "PID")} #' \dontrun{CensusObj2 <- get_census_data(key = "...", state = c("NY", "DC", "NJ"), age = T, sex = T); From 4d4f8a382d5a2c31e1807908fa9a5cdc93ffc802 Mon Sep 17 00:00:00 2001 From: Xiyu Yang <90369889+xiyuy@users.noreply.github.com> Date: Fri, 21 Jan 2022 19:07:36 -0500 Subject: [PATCH 16/19] Update the documentation of census_helper.R and census_helper_v2.R Take out the references to "USCensus2010" in the function description. --- R/census_helper.R | 4 ++-- R/census_helper_v2.R | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/census_helper.R b/R/census_helper.R index 8e20e42..bf409fd 100644 --- a/R/census_helper.R +++ b/R/census_helper.R @@ -4,8 +4,8 @@ #' #' This function allows users to link their geocoded dataset (e.g., voter file) #' with U.S. Census data (2010 or 2020). The function extracts Census Summary File data -#' at the county, tract, block, or place level using the 'UScensus2010' package. Census data -#' calculated are Pr(Geolocation | Race) where geolocation is county, tract, block, or place. +#' at the county, tract, block, or place level. Census data calculated are +#' Pr(Geolocation | Race) where geolocation is county, tract, block, or place. #' #' @param key A required character object. Must contain user's Census API #' key, which can be requested \href{https://api.census.gov/data/key_signup.html}{here}. diff --git a/R/census_helper_v2.R b/R/census_helper_v2.R index 636f0dc..32485a8 100644 --- a/R/census_helper_v2.R +++ b/R/census_helper_v2.R @@ -4,8 +4,8 @@ #' #' This function allows users to link their geocoded dataset (e.g., voter file) #' with U.S. Census data (2010 or 2020). The function extracts Census Summary File data -#' at the county, tract, block, or place level using the 'UScensus2010' package. Census data -#' calculated are Pr(Geolocation | Race) where geolocation is county, tract, block, or place. +#' at the county, tract, block, or place level. Census data calculated are +#' Pr(Geolocation | Race) where geolocation is county, tract, block, or place. #' #' @param key A required character object. Must contain user's Census API #' key, which can be requested \href{https://api.census.gov/data/key_signup.html}{here}. From b600ddee5402eda84ceea9d9992831fba9692c2f Mon Sep 17 00:00:00 2001 From: Xiyu Yang <90369889+xiyuy@users.noreply.github.com> Date: Fri, 21 Jan 2022 19:59:28 -0500 Subject: [PATCH 17/19] Update documentation of merge_names.R 1. Add the description of use.census.surnames. 2. Add the description of census.surnames. --- R/merge_names.R | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/R/merge_names.R b/R/merge_names.R index c930788..cb634fa 100644 --- a/R/merge_names.R +++ b/R/merge_names.R @@ -33,6 +33,16 @@ #' Other options are \code{"last, first"}, indicating that both last and first names will be #' used, and \code{"last, first, middle"}, indicating that last, first, and middle names will all #' be used. +#' @param use.census.surnames A \code{TRUE}/\code{FALSE} object indicating whether +#' to use an alternative surname dictionary. If \code{TRUE}, the surname dictionary +#' should be passed to \code{\var{census.surnames}}. Default is \code{FALSE}. +#' @param census.surnames An object of class \code{data.frame} provided by the +#' users as an alternative surname dictionary. It will consist of a list of +#' U.S. surnames, along with the associated probabilities P(name | ethnicity) +#' for ethnicities: white, Black, Hispanic, Asian, and other. Default is \code{NULL}. +#' (\code{\var{last_name}} for U.S. surnames, \code{\var{p_whi_last}} for White, +#' \code{\var{p_bla_last}} for Black, \code{\var{p_his_last}} for Hispanic, +#' \code{\var{p_asi_last}} for Asian, \code{\var{p_oth_last}} for other). #' @param clean.names A \code{TRUE}/\code{FALSE} object. If \code{TRUE}, #' any surnames in \code{\var{voter.file}} that cannot initially be matched #' to the database will be cleaned, according to U.S. Census specifications, From 2787837920ef038477e60f6bb23575a371fa1888 Mon Sep 17 00:00:00 2001 From: Xiyu Yang <90369889+xiyuy@users.noreply.github.com> Date: Fri, 21 Jan 2022 20:04:50 -0500 Subject: [PATCH 18/19] Update the documentation of predict_race_v2.R 1. Revise the description of census.geo so as to not specify the year of the Census geographic data. 2. Add the description of census.surnames. 3. Add the description of year. 4. Change the function name of all examples from predict_race to predict_race_new. 5. Delete, modify and add examples to reflect the functionality of predict_race_new. --- R/predict_race_v2.R | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/R/predict_race_v2.R b/R/predict_race_v2.R index 696e471..986c069 100644 --- a/R/predict_race_v2.R +++ b/R/predict_race_v2.R @@ -21,13 +21,13 @@ #' County is three characters (e.g., \code{"031"} not \code{"31"}), #' tract is six characters, and block is four characters. Place is five characters. #' See below for other optional fields. -#' #' @param namesToUse A character vector identifying which names to use for the prediction. +#' @param namesToUse A character vector identifying which names to use for the prediction. #' The default value is \code{"last"}, indicating that only the last name will be used. #' Other options are \code{"last, first"}, indicating that both last and first names will be #' used, and \code{"last, first, middle"}, indicating that last, first, and middle names will all #' be used. #' @param census.geo An optional character vector specifying what level of -#' geography to use to merge in U.S. Census 2010 geographic data. Currently +#' geography to use to merge in U.S. Census geographic data. Currently #' \code{"county"}, \code{"tract"}, \code{"block"}, and \code{"place"} are supported. #' Note: sufficient information must be in user-defined \code{\var{voter.file}} object. #' If \code{\var{census.geo} = "county"}, then \code{\var{voter.file}} @@ -40,12 +40,21 @@ #' must have column named \code{place}. #' Specifying \code{\var{census.geo}} will call \code{census_helper} function #' to merge Census geographic data at specified level of geography. +#' @param census.surnames An object of class \code{data.frame} provided by the +#' users as an alternative surname dictionary. It will consist of a list of +#' U.S. surnames, along with the associated probabilities P(name | ethnicity) +#' for ethnicities: white, Black, Hispanic, Asian, and other. Default is \code{NULL}. +#' (\code{\var{last_name}} for U.S. surnames, \code{\var{p_whi_last}} for White, +#' \code{\var{p_bla_last}} for Black, \code{\var{p_his_last}} for Hispanic, +#' \code{\var{p_asi_last}} for Asian, \code{\var{p_oth_last}} for other). #' @param census.key A character object specifying user's Census API #' key. Required if \code{\var{census.geo}} is specified, because #' a valid Census API key is required to download Census geographic data. #' @param census.data A list indexed by two-letter state abbreviations, #' which contains pre-saved Census geographic data. #' Can be generated using \code{get_census_data} function. +#' @param year An optional character vector specifying the year of U.S. Census geographic +#' data to be downloaded. Use \code{"2010"}, or \code{"2020"}. Default is \code{"2010"}. #' @param retry The number of retries at the census website if network interruption occurs. #' @return Output will be an object of class \code{data.frame}. It will #' consist of the original user-input data with additional columns with @@ -58,17 +67,16 @@ #' #' @examples #' data(voters) -#' predict_race(voters, surname.only = TRUE) -#' predict_race(voter.file = voters, surname.only = TRUE) -#' \dontrun{predict_race(voter.file = voters, census.geo = "tract", census.key = "...")} -#' \dontrun{predict_race(voter.file = voters, census.geo = "tract", census.key = "...", age = T)} -#' \dontrun{predict_race(voter.file = voters, census.geo = "place", census.key = "...", sex = T)} +#' predict_race_new(voters, namesToUse = 'last') +#' predict_race_new(voter.file = voters, namesToUse = 'last') +#' \dontrun{predict_race_new(voter.file = voters, census.geo = "tract", census.key = "...")} +#' \dontrun{predict_race_new(voter.file = voters, census.geo = "place", census.key = "...", year = "2020")} #' \dontrun{CensusObj <- get_census_data("...", state = c("NY", "DC", "NJ")); -#' predict_race(voter.file = voters, census.geo = "tract", census.data = CensusObj, party = "PID")} -#' \dontrun{CensusObj2 <- get_census_data(key = "...", state = c("NY", "DC", "NJ"), age = T, sex = T); -#' predict_race(voter.file = voters, census.geo = "tract", census.data = CensusObj2, age = T, sex = T)} +#' predict_race_new(voter.file = voters, census.geo = "tract", census.data = CensusObj)} +#' \dontrun{CensusObj2 <- get_census_data(key = "...", state = c("NY", "DC", "NJ"), year = "2020"); +#' predict_race_new(voter.file = voters, census.geo = "tract", census.data = CensusObj2, year = "2020")} #' \dontrun{CensusObj3 <- get_census_data(key = "...", state = c("NY", "DC", "NJ"), census.geo = "place"); -#' predict_race(voter.file = voters, census.geo = "place", census.data = CensusObj3)} +#' predict_race_new(voter.file = voters, census.geo = "place", census.data = CensusObj3)} #' @export ## Race Prediction Function From 4d2e58c1a502874633ac702aba67bd8c86d65769 Mon Sep 17 00:00:00 2001 From: Xiyu Yang <90369889+xiyuy@users.noreply.github.com> Date: Sat, 22 Jan 2022 17:04:46 -0500 Subject: [PATCH 19/19] Read through all the modified documentation and made final revisions 1. Modify the census_geo_api fucntion description. 2. Modify lines 23 & 34 of predict_race function so that the year of the Surname list should be 2020 instead of 2021. 3. Modify lines 156-157 of predict_race function to change the option 2021 to 2020 for surname.year. 4. Small revision for the census.surnames description in function predict_race_new. --- R/census_geo_api.R | 2 +- R/predict_race.R | 8 ++++---- R/predict_race_v2.R | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R/census_geo_api.R b/R/census_geo_api.R index e0c3f5d..830b53d 100644 --- a/R/census_geo_api.R +++ b/R/census_geo_api.R @@ -2,7 +2,7 @@ #' #' \code{census_geo_api} retrieves U.S. Census geographic data for a given state. #' -#' This function allows users to download U.S. Census 2010 or 2020 geographic data, +#' This function allows users to download U.S. Census geographic data (2010 or 2020), #' at either the county, tract, block, or place level, for a particular state. #' #' @param key A required character object. Must contain user's Census API diff --git a/R/predict_race.R b/R/predict_race.R index c12d119..f88ce42 100644 --- a/R/predict_race.R +++ b/R/predict_race.R @@ -20,7 +20,7 @@ #' See below for other optional fields. #' @param census.surname A \code{TRUE}/\code{FALSE} object. If \code{TRUE}, #' function will call \code{merge_surnames} to merge in Pr(Race | Surname) -#' from U.S. Census Surname List (2000, 2010, or 2021) and Spanish Surname List. +#' from U.S. Census Surname List (2000, 2010, or 2020) and Spanish Surname List. #' If \code{FALSE}, \code{voter.file} object must contain additional fields specifying #' Pr(Race | Surname), named as follows: \code{\var{p_whi}} for Whites, #' \code{\var{p_bla}} for Blacks, \code{\var{p_his}} for Hispanics/Latinos, @@ -31,7 +31,7 @@ #' @param surname.year A number to specify the year of the census surname statistics. #' These surname statistics is stored in the data, and will be automatically loaded. #' The default value is \code{2010}, which means the surname statistics from the -#' 2010 census will be used. Currently, the other available choices are \code{2000} and \code{2021}. +#' 2010 census will be used. Currently, the other available choices are \code{2000} and \code{2020}. #' @param census.geo An optional character vector specifying what level of #' geography to use to merge in U.S. Census geographic data. Currently #' \code{"county"}, \code{"tract"}, \code{"block"}, and \code{"place"} are supported. @@ -153,8 +153,8 @@ predict_race <- function(voter.file, ## Merge in Pr(Race | Surname) if necessary if (census.surname) { - if (!(surname.year %in% c(2000,2010,2021))) { - stop(paste(surname.year, "is not a valid surname.year. It should be 2000, 2010 (default) or 2021.")) + if (!(surname.year %in% c(2000,2010,2020))) { + stop(paste(surname.year, "is not a valid surname.year. It should be 2000, 2010 (default) or 2020.")) } voter.file <- merge_surnames(voter.file, surname.year = surname.year, name.data = name.data, impute.missing = impute.missing) } else { diff --git a/R/predict_race_v2.R b/R/predict_race_v2.R index 986c069..df7c4c6 100644 --- a/R/predict_race_v2.R +++ b/R/predict_race_v2.R @@ -43,7 +43,7 @@ #' @param census.surnames An object of class \code{data.frame} provided by the #' users as an alternative surname dictionary. It will consist of a list of #' U.S. surnames, along with the associated probabilities P(name | ethnicity) -#' for ethnicities: white, Black, Hispanic, Asian, and other. Default is \code{NULL}. +#' for ethnicities: White, Black, Hispanic, Asian, and other. Default is \code{NULL}. #' (\code{\var{last_name}} for U.S. surnames, \code{\var{p_whi_last}} for White, #' \code{\var{p_bla_last}} for Black, \code{\var{p_his_last}} for Hispanic, #' \code{\var{p_asi_last}} for Asian, \code{\var{p_oth_last}} for other).