diff --git a/DESCRIPTION b/DESCRIPTION index e6a08890..da3dbaa1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: nflfastR Title: Functions to Efficiently Scrape NFL Play by Play Data -Version: 2.0.1 +Version: 2.0.2 Authors@R: c(person(given = "Sebastian", family = "Carl", diff --git a/NEWS.md b/NEWS.md index 5f76bed2..af36e36a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,7 +1,13 @@ +# nflfastR 2.0.2 + +* Fixed a bug in the series and series success calculations caused by timeouts +following a possession change +* Fixed win probability on PATs + # nflfastR 2.0.1 * Added minimum version requirement on `xgboost` (>= 1.1) as the recent `xgboost` update -caused a breaking change leading to failure in adding model results to data. +caused a breaking change leading to failure in adding model results to data # nflfastR 2.0.0 diff --git a/R/helper_add_ep_wp.R b/R/helper_add_ep_wp.R index 102a2dbc..c7977756 100644 --- a/R/helper_add_ep_wp.R +++ b/R/helper_add_ep_wp.R @@ -700,6 +700,15 @@ add_wp_variables <- function(pbp_data) { OffWinProb[regular_i] <- get_preds_wp(regular_df) OffWinProb_spread[regular_i] <- get_preds_wp_spread(regular_df) + ## PATs are messed up, set to NA WP for plays down is missing + # for kickoffs, this will get overwritten by the fix after this + + down_na <- which(is.na(pbp_data$down)) + OffWinProb[down_na] <- NA_real_ + OffWinProb_spread[down_na] <- NA_real_ + + ## end PAT fix + ## now we need to fix WP on kickoffs kickoff_data <- pbp_data @@ -733,6 +742,16 @@ add_wp_variables <- function(pbp_data) { pbp_data <- pbp_data %>% dplyr::mutate( wp = OffWinProb, + vegas_wp = OffWinProb_spread) %>% + tidyr::fill( + wp, .direction = "up" + ) %>% + tidyr::fill( + vegas_wp, .direction = "up" + ) %>% + dplyr::mutate( + wp = dplyr::if_else(stringr::str_detect(desc, 'extra point') | !is.na(two_point_conv_result) | !is.na(extra_point_result), 1 - wp, wp), + vegas_wp = dplyr::if_else(stringr::str_detect(desc, 'extra point') | !is.na(two_point_conv_result) | !is.na(extra_point_result), 1 - vegas_wp, vegas_wp), wp = dplyr::if_else(is.na(posteam), NA_real_, wp), def_wp = 1 - wp, home_wp = dplyr::if_else(posteam == home_team, @@ -740,7 +759,6 @@ add_wp_variables <- function(pbp_data) { away_wp = dplyr::if_else(posteam == away_team, wp, def_wp), #add columns for WP taking into account spread - vegas_wp = OffWinProb_spread, vegas_wp = dplyr::if_else(is.na(posteam), NA_real_, vegas_wp), vegas_home_wp = dplyr::if_else(posteam == home_team, vegas_wp, 1 - vegas_wp), @@ -787,21 +805,18 @@ add_wp_variables <- function(pbp_data) { pbp_data$WPA_base_nxt_ind <- with(pbp_data, ifelse(posteam == dplyr::lead(posteam, 2) & - #drive == dplyr::lead(drive, 2) & (is.na(dplyr::lead(play_type)) | (dplyr::lead(timeout) == 1 & dplyr::lead(play_type) == "no_play")), 1, 0)) pbp_data$WPA_change_nxt_ind <- with(pbp_data, ifelse(posteam != dplyr::lead(posteam, 2) & - #drive != dplyr::lead(drive, 2) & (is.na(dplyr::lead(play_type)) | (dplyr::lead(timeout) == 1 & dplyr::lead(play_type) == "no_play")), 1, 0)) pbp_data$WPA_change_ind <- with(pbp_data, ifelse(posteam != dplyr::lead(posteam) & - #drive != dplyr::lead(drive) & !is.na(dplyr::lead(play_type)) & (dplyr::lead(timeout) == 0 | (dplyr::lead(timeout) == 1 & @@ -828,6 +843,8 @@ add_wp_variables <- function(pbp_data) { ifelse(WPA_base_nxt_ind == 1, WPA_base_nxt, ifelse(WPA_change_ind == 1, WPA_change, WPA_base)))))) + + # Home and Away post: pbp_data$home_wp_post <- ifelse(pbp_data$posteam == pbp_data$home_team, @@ -858,7 +875,7 @@ add_wp_variables <- function(pbp_data) { ifelse(stringr::str_detect(tolower(desc), "(end of game)|(end game)"), dplyr::lag(home_wp_post), - ifelse(dplyr::lag(play_type) == "no_play" & play_type == "no_play", dplyr::lag(home_wp),home_wp))) + home_wp)) pbp_data$home_wp_post <- with(pbp_data, ifelse(stringr::str_detect(tolower(desc), @@ -868,7 +885,7 @@ add_wp_variables <- function(pbp_data) { ifelse(stringr::str_detect(tolower(desc), "(end of game)|(end game)"), dplyr::lag(away_wp_post), - ifelse(dplyr::lag(play_type) == "no_play" & play_type == "no_play", dplyr::lag(away_wp),away_wp))) + away_wp)) pbp_data$away_wp_post <- with(pbp_data, ifelse(stringr::str_detect(tolower(desc), diff --git a/R/helper_add_series_data.R b/R/helper_add_series_data.R index e293d147..720f5b4e 100644 --- a/R/helper_add_series_data.R +++ b/R/helper_add_series_data.R @@ -24,11 +24,13 @@ add_series_data <- function(pbp) { # we don't want a first down being indicated for XP, 2P, KO first_down = dplyr::if_else( (first_down_rush == 1 | first_down_pass == 1 | - first_down_penalty == 1 | drive < dplyr::lead(drive)) & + first_down_penalty == 1 | + (drive < dplyr::lead(drive) | (drive < dplyr::lead(drive, 2) & is.na(dplyr::lead(drive)))) + ) & (extra_point_attempt == 0 & two_point_attempt == 0 & kickoff_attempt == 0), 1, 0 ), - # after setting the first down indicator we modificate it for the end of a half + # after setting the first down indicator we modify it for the end of a half first_down = dplyr::if_else(game_half != dplyr::lead(game_half), 1, first_down), # the 'trigger' is being used for calculatung cumsum because we don't want the # series number to increase in the play the first down occured but in the next play