diff --git a/DESCRIPTION b/DESCRIPTION index 8db7881e..f8ce77ab 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: nflfastR Title: Functions to Efficiently Access NFL Play by Play Data -Version: 4.5.1.9005 +Version: 4.5.1.9006 Authors@R: c(person(given = "Sebastian", family = "Carl", diff --git a/NEWS.md b/NEWS.md index ff77c5d8..bcb8aa97 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,8 @@ - The function `calculate_player_stats_def()` no longer errors when small subsets of pbp data are missing stats. (#415) - The function `calculate_series_conversion_rates()` no longer returns `NA` values if a small subset of pbp data is missing series on offense or defense. (#417) - `fixed_drive` now correctly increments on plays where posteam lost a fumble but remains posteam because defteam also lost a fumble during the same play. (#419) +- nflfastR now fixes missing drive number counts in raw pbp data in order to provide accurate drive information. (#420) + # nflfastR 4.5.1 diff --git a/R/helper_additional_functions.R b/R/helper_additional_functions.R index 1d572c16..374c9636 100644 --- a/R/helper_additional_functions.R +++ b/R/helper_additional_functions.R @@ -55,57 +55,6 @@ clean_pbp <- function(pbp, ...) { } else { user_message("Cleaning up play-by-play...", "todo") - if(any(pbp$season >= 2022)){ - - # user_message("Loading pbp player ID patch files", "info") - - patch_seasons <- unique(pbp$season[pbp$season >= 2022]) - - patch_ids <- nflreadr::load_from_url( - glue::glue("https://github.com/nflverse/nflverse-data/releases/download/misc/pbp_patch_ids_{patch_seasons}.rds") - ) %>% suppressMessages() - - patchable_ids <- pbp %>% - dplyr::select( - dplyr::any_of(c( - "game_id", "play_id", - "passer_id", "passer_name" = "passer", - "receiver_id", "receiver_name" = "receiver", - "rusher_id", "rusher_name" = "rusher", - "fantasy_id", "fantasy_name" = "fantasy", - "fantasy_player_name" - )), - dplyr::matches("player_id|player_name") - ) %>% - tidyr::pivot_longer( - cols = -c("game_id","play_id"), - names_to = c("stat",".value"), - names_pattern = c("(.+)_(id|name)"), - values_drop_na = TRUE - ) %>% - dplyr::filter(is.na(.data$id)) %>% - dplyr::left_join(patch_ids, by = c("game_id","play_id","name")) %>% - dplyr::mutate( - id = dplyr::coalesce(.data$id,.data$gsis_id), - gsis_id = NULL, - club_code = NULL, - name = NULL - ) %>% - tidyr::pivot_wider( - names_from = "stat", - values_from = "id", - names_glue = "{stat}_id" - ) - - if(nrow(patchable_ids) > 0){ - pbp <- tibble::tibble(pbp) %>% - dplyr::rows_patch(patchable_ids, by = c("game_id","play_id")) - } - - # cli::cli_alert_success("{my_time()} | Patched {nrow(patchable_ids)} missing gsis_id field{?s}") - - } - # drop existing values of clean_pbp pbp <- pbp %>% dplyr::select(-tidyselect::any_of(drop.cols)) diff --git a/R/helper_scrape_nfl.R b/R/helper_scrape_nfl.R index 1887f970..67b434a9 100644 --- a/R/helper_scrape_nfl.R +++ b/R/helper_scrape_nfl.R @@ -108,6 +108,32 @@ get_pbp_nfl <- function(id, dir = NULL, qs = FALSE, ...) { plays <- raw_data$data$viewer$gameDetail$plays %>% dplyr::mutate(game_id = as.character(game_id)) + # We have this issue https://github.com/nflverse/nflfastR/issues/309 with 2013 postseason games + # where the driveSequenceNumber in the plays df is NA for all plays. That prevents drive information + # from being joined. + # In this case, we compute our own driveSequenceNumber by incrementing a counter depending on the + # value of driveTimeOfPossession. + # driveTimeOfPossession will be a constant value during a drive so this should actually be accurate + if (all(is.na(plays$driveSequenceNumber))){ + plays <- plays %>% + dplyr::mutate( + # First, create a trigger for cumsum + drive_trigger = dplyr::case_when( + # this is the first play of the first drive + is.na(dplyr::lag(driveTimeOfPossession)) & !is.na(driveTimeOfPossession) ~ 1, + # if driveTimeOfPossession changes, there is a new drive + dplyr::lag(driveTimeOfPossession) != driveTimeOfPossession ~ 1, + TRUE ~ 0 + ), + # Now create the drive number by accumulationg triggers + driveSequenceNumber = cumsum(drive_trigger), + # driveSequenceNumber should be NA on plays where driveTimeOfPossession is NA + driveSequenceNumber = ifelse(is.na(driveTimeOfPossession), NA_real_, driveSequenceNumber), + # drop the helper + drive_trigger = NULL + ) + } + #fill missing posteam info for this if ( ((home_team %in% c("JAC", "JAX") | away_team %in% c("JAC", "JAX")) & season <= 2015) |