diff --git a/R/build_projection_model_opt_plot.r b/R/build_projection_model_opt_plot.r
deleted file mode 100644
index 2fe3521..0000000
--- a/R/build_projection_model_opt_plot.r
+++ /dev/null
@@ -1,60 +0,0 @@
-#' Model future uptake from current uptake
-#'
-#' BEWARE: THIS DOES NOT YET ACCOUNT FOR MULTI-YEAR OR MULTI-STATE DATA SETS
-#'
-#' Future incident uptake can be predicted from current incident uptake.
-#' A model can be built and fit to project incident uptake into the future.
-#'
-#' The default model says that current incident uptake is a linear function
-#' of the previous incident uptake, time elapsed since rollout, and their
-#' interaction.
-#'
-#' All modeling is performed on the daily-average incident uptake to
-#' remove accumulation dependence among data points and to
-#' account for slight variations in incident reporting intervals.
-#' The first data point may be dropped before fitting the model
-#' if there is a long lag between the vaccine rollout date and
-#' the date that data collection began.
-#' All variables are standardized prior to fitting the model.
-#'
-#' UPDATES FOR THE FUTURE:
-#' - Model must incorporate year-to-year variability
-#' - Model must incorporate state-to-state variability
-#' - A variety of models should be tried and competed, or formula provided
-#'
-#' @description Fit a model to project uptake into the future
-#'
-#' @param data uptake data to fit the projection model
-#' @param drop_first whether to drop the first data point
-#'
-#' @return a brms model object
-#'
-#' @export
-#'
-build_projection_model2 <- function(data, drop_first = NULL, output = F) {
-  if (is.null(drop_first)) {
-    intervals <- diff(data$elapsed)
-    if (abs(data$elapsed[1] - mean(intervals)) > (sd(intervals) + 1)) {
-      drop_first <- TRUE
-    } else {
-      drop_first <- FALSE
-      intervals <- c(data$elapsed[1], intervals)
-    }
-  }
-
-  if (drop_first) {
-    data <- data[-1, ]
-  }
-
-  model <- brms::brm(daily_std ~ previous_std * elapsed_std,
-    data = data,
-    family = gaussian()
-  )
-
-  if (output) {
-    print(summary(model))
-    plot(model)
-  }
-
-  return(model)
-}
diff --git a/R/build_scaling_model.r b/R/build_scaling_model.r
deleted file mode 100644
index 8e0f1f4..0000000
--- a/R/build_scaling_model.r
+++ /dev/null
@@ -1,89 +0,0 @@
-#' Model one source of uptake data from another
-#'
-#' Different sources of uptake data may systematically disagree.
-#' A model can be built and fit to scale one source to the other.
-#'
-#' The default model says that the uptake data from the target source
-#' is a linear function of the input data, time elapsed since rollout,
-#' and their interaction. If multiple states are represented, each state
-#' is modeled separately via group-level variation of the intercept and
-#' the input data coefficients.
-#'
-#' All modeling is performed on the daily-average incident uptake to
-#' remove accumulation dependence among data points and to
-#' account for slight variations in incident reporting intervals.
-#' The first data point may be dropped before fitting the model
-#' if there is a long lag between the vaccine rollout date and
-#' the date that data collection began.
-#' All variables are standardized prior to fitting the model.
-#'
-#' UPDATES FOR THE FUTURE:
-#' - Model must incorporate year-to-year variability
-#' - A variety of models should be tried and competed, or formula provided
-#'
-#' @description Fit a model to scale uptake date from one source to another
-#'
-#' @param input uptake data to use as a predictor
-#' @param target uptake data to use as the outcome
-#' @param drop_first whether to drop the first data point
-#'
-#' @return a brms model object
-#'
-#' @export
-#'
-build_scaling_model <- function(input, target, drop_first = NULL) {
-
-  if (is.null(drop_first)) {
-    input_intervals <- c(input$elapsed[1], diff(input$elapsed))
-    target_intervals <- c(target$elapsed[1], diff(target$elapsed))
-
-    common_dates <- intersect(input$elapsed, target$elapsed)
-
-    input_intervals <- input_intervals[input$elapsed %in% common_dates]
-    target_intervals <- target_intervals[target$elapsed %in% common_dates]
-
-    if ((input_intervals[1] - mean(input_intervals[-1])) >
-          (sd(input_intervals[-1]) + 1) ||
-          (target_intervals[1] - mean(target_intervals[-1])) >
-            (sd(target_intervals[-1]) + 1)) {
-      drop_first <- TRUE
-    } else {
-      drop_first <- FALSE
-    }
-  }
-
-  input <- dplyr::semi_join(input, target, by = "date")
-  target <- dplyr::semi_join(target, input, by = "date")
-
-  if (drop_first) {
-    input <- input[-1, ]
-    target <- target[-1, ]
-  }
-
-  if (nrow(input) != nrow(target)) {
-    stop("There are state or date mismatches between the input and target data")
-  }
-
-  input$daily_std <- (input$daily - mean(input$daily)) /
-    sd(input$daily)
-  input$elapsed_std <- (input$elapsed - mean(input$elapsed)) /
-    sd(input$elapsed)
-  input$target_daily_std <- (target$daily - mean(target$daily)) /
-    sd(target$daily)
-
-  if (length(unique(input$state)) == 1) {
-    model <- brms::brm(target_daily_std ~ daily_std * elapsed_std,
-                       data = input,
-                       family = gaussian())
-  } else {
-    model <- brms::brm(target_daily_std ~ daily_std * elapsed_std +
-                         (1 + daily_std | state),
-                       data = input,
-                       family = gaussian())
-  }
-
-  print(summary(model))
-  plot(model)
-
-  return(model)
-}
\ No newline at end of file
diff --git a/R/evaluate_projection.r b/R/evaluate_projection.r
deleted file mode 100644
index 796c52f..0000000
--- a/R/evaluate_projection.r
+++ /dev/null
@@ -1,215 +0,0 @@
-#' Evaluate a projection model
-#'
-#' To evaluate how successfully a projection model predicts future uptake,
-#' plots of predicted incident and cumulative uptake are generated, along with
-#' the data against which the model is being compared. The model ought to
-#' match the data closely, although uncertainty may propagate through time.
-#' Each plot shows the data and model predictions with credible intervals
-#' (95% by default).
-#'
-#' If only old data, i.e. the data on which the model was fit, is provided,
-#' the projections will be compared against that. If new data, i.e. data
-#' which the model has not seen, is also provided, projections will compared
-#' against that instead. In the latter case, the fit may not be as good, and
-#' credible intervals will be wider due to extra uncertainty over the values
-#' of year-specific parameters.
-#'
-#' If no start date is provided, it is assumed the exact incident and
-#' cumulative uptake is known for each time t-1 when incident uptake for
-#' time t is being predicted. This evaluates how well the model fits
-#' each individual data point through the entire time series.
-#'
-#' If a start date is provided, it is assumed that the exact incident and
-#' cumulative uptake is known only until that date. After that date,
-#' uncertainty propagates through time, as if actually projecting the future.
-#'
-#' If an end date is not provided, projections will be provided through
-#' the end date of the provided data set.
-#'
-#' If an end date is provided, projections will be provided through the
-#' closest expected reporting date to the end date, even if this extrapolates
-#' beyond the end of the provided data set.
-#'
-#' UPDATES FOR THE FUTURE:
-#' - Model must incorporate year-to-year variability
-#' - Model must incorporate state-to-state variability
-#' - Multiple models can be provided, with scaling, to pool projections
-#'
-#' @description Evaluate a model that projects uptake into the future
-#'
-#' @param old_data uptake data used to fit the projection model
-#' @param model model fit to project uptake into the future
-#' @param new_data uptake data to compare the projection model against
-#' @param start_date which date to perform projection from, in %Y-%m-%d format
-#' @param end_date which date to perform projections until, in %Y-%m-%d format
-#' @param conf width of credible interval to plot
-#'
-#' @return data frame of the data underlying diagnostic plots
-#'
-#' @export
-#'
-evaluate_projection <- function(old_data, model, new_data = NULL,
-                                start_date = NULL, end_date = NULL,
-                                conf = 0.95) {
-
-  model_rows <- (nrow(old_data) - nrow(model$data) + 1):(nrow(old_data))
-
-  daily_mean <- mean(old_data$daily[model_rows])
-  daily_sd <- sd(old_data$daily[model_rows])
-  previous_mean <- mean(old_data$daily[model_rows - 1])
-  previous_sd <- sd(old_data$daily[model_rows - 1])
-  elapsed_mean <- mean(old_data$elapsed[model_rows])
-  elapsed_sd <- sd(old_data$elapsed[model_rows])
-
-  if (is.null(new_data)) {
-    output <- old_data
-  } else {
-    output <- new_data
-  }
-
-  output$previous <- c(NA, output$daily[1:(nrow(output) - 1)])
-  output$interval <- c(output$elapsed[1], diff(output$elapsed))
-
-  if (!is.null(end_date)) {
-    end_date <- as.Date(end_date)
-    if (end_date <= output$date[nrow(output)]) {
-      output <- output[output$date <= end_date, ]
-    } else {
-      extra_dates <- seq(output$date[nrow(output)], end_date,
-                         by = round(mean(output$interval)))[-1]
-      extra_rows <- nrow(output) + seq_along(length(extra_dates))
-      output[extra_rows, ] <- NA
-      output$date[extra_rows] <- extra_dates
-      output$elapsed <- as.numeric(output$date -
-                                     (output$date[1] - output$elapsed[1]))
-      output$previous[extra_rows[1]] <- output$daily[extra_rows[1] - 1]
-    }
-  }
-
-  output$incident_mean <- rep(NA, nrow(output))
-  output$incident_upper <- rep(NA, nrow(output))
-  output$incident_lower <- rep(NA, nrow(output))
-  output$cumulative_mean <- rep(NA, nrow(output))
-  output$cumulative_upper <- rep(NA, nrow(output))
-  output$cumulative_lower <- rep(NA, nrow(output))
-
-  if (!is.null(start_date)) {
-    start_date <- as.Date(start_date)
-    early_dates <- which(output$date <= start_date | is.na(output$previous))
-  } else {
-    early_dates <- which(is.na(output$previous))
-  }
-  output$incident_mean[early_dates] <- output$incident[early_dates]
-  output$incident_upper[early_dates] <- output$incident[early_dates]
-  output$incident_lower[early_dates] <- output$incident[early_dates]
-  output$cumulative_mean[early_dates] <- output$cumulative[early_dates]
-  output$cumulative_upper[early_dates] <- output$cumulative[early_dates]
-  output$cumulative_lower[early_dates] <- output$cumulative[early_dates]
-
-  output_copy <- output[is.na(output$incident_mean), ]
-
-  if (is.null(start_date)) {
-    input <- data.frame(elapsed_std = (output_copy$elapsed - elapsed_mean) /
-                          elapsed_sd,
-                        previous_std = (output_copy$previous - previous_mean) /
-                          previous_sd)
-
-    post_check <- brms::posterior_predict(model, newdata = input) *
-      daily_sd + daily_mean
-  } else {
-    output_copy <- output[is.na(output$incident_mean), ]
-
-    post_check <- matrix(NA, nrow = brms::ndraws(model),
-                         ncol = nrow(output_copy))
-
-    for (i in seq_len(nrow(output_copy))) {
-      if (i == 1) {
-        input <- data.frame(elapsed_std = (output_copy$elapsed[i] -
-                                             elapsed_mean) / elapsed_sd,
-                            previous_std = (output_copy$previous[i] -
-                                              previous_mean) / previous_sd)
-        post_check[, i] <- as.vector(brms::posterior_predict(model,
-                                                             newdata = input))
-      } else {
-        input <- data.frame(elapsed_std = (output_copy$elapsed[i] -
-                                             elapsed_mean) / elapsed_sd,
-                            previous_std = (post_check[, i - 1] -
-                                              previous_mean) / previous_sd)
-        post_check[, i] <- as.vector(brms::posterior_predict(model,
-                                                             newdata = input,
-                                                             ndraws = 1))
-      }
-      post_check[, i] <- post_check[, i] * daily_sd + daily_mean
-    }
-  }
-
-  post_check <- sweep(post_check, 2, output_copy$interval, FUN = `*`)
-
-  output_copy$incident_mean <- colMeans(post_check)
-  output_copy$incident_upper <- apply(post_check, 2, quantile,
-                                      1 - (1 - conf) / 2)
-  output_copy$incident_lower <- apply(post_check, 2, quantile,
-                                      (1 - conf) / 2)
-
-  if (is.null(start_date)) {
-    post_check <- sweep(post_check, 2,
-                        (output_copy$cumulative - output_copy$incident),
-                        FUN = `+`)
-  } else {
-    post_check <- t(apply(post_check, 1, cumsum))
-    post_check <- sweep(post_check, 2, (output_copy$cumulative[1] -
-                                          output_copy$incident[1]),
-                        FUN = `+`)
-  }
-
-  output_copy$cumulative_mean <- colMeans(post_check)
-  output_copy$cumulative_upper <- apply(post_check, 2, quantile,
-                                        1 - (1 - conf) / 2)
-  output_copy$cumulative_lower <- apply(post_check, 2, quantile,
-                                        (1 - conf) / 2)
-
-  output <- rbind(output[!is.na(output$incident_mean), ], output_copy)
-
-  output$interval <- NULL
-  output$previous <- NULL
-
-  incident_plot <- ggplot2::ggplot(output) +
-    ggplot2::geom_ribbon(ggplot2::aes(x = date, ymax = incident_upper,
-                                      ymin = incident_lower),
-                         fill = "black", alpha = 0.25) +
-    ggplot2::geom_line(ggplot2::aes(x = date, y = incident),
-                       color = "dodgerblue", linewidth = 1.5) +
-    ggplot2::geom_line(ggplot2::aes(x = date, y = incident_mean),
-                       color = "black", linewidth = 1.5) +
-    ggplot2::theme_bw() +
-    ggplot2::xlab("Time") + ggplot2::ylab("% of Population") +
-    ggplot2::theme(text = ggplot2::element_text(size = 20)) +
-    ggplot2::annotate("text", x = output$date[round(0.9 * nrow(output))],
-                      y = 0.9 * diff(range(output$incident_upper)),
-                      label = "Input", col = "dodgerblue", size = 10) +
-    ggplot2::annotate("text", x = output$date[round(0.9 * nrow(output))],
-                      y = 0.75 * diff(range(output$incident_upper)),
-                      label = "Model", col = "black", size = 10)
-  print(incident_plot)
-
-  cumulative_plot <- ggplot2::ggplot(output) +
-    ggplot2::geom_ribbon(ggplot2::aes(x = date, ymax = cumulative_upper,
-                                      ymin = cumulative_lower),
-                         fill = "black", alpha = 0.25) +
-    ggplot2::geom_line(ggplot2::aes(x = date, y = cumulative),
-                       color = "dodgerblue", linewidth = 1.5) +
-    ggplot2::geom_line(ggplot2::aes(x = date, y = cumulative_mean),
-                       color = "black", linewidth = 1.5) +
-    ggplot2::theme_bw() +
-    ggplot2::xlab("Time") + ggplot2::ylab("% of Population") +
-    ggplot2::theme(text = ggplot2::element_text(size = 20)) +
-    ggplot2::annotate("text", x = output$date[round(0.9 * nrow(output))],
-                      y = 0.25 * diff(range(output$cumulative_upper)),
-                      label = "Input", col = "dodgerblue", size = 10) +
-    ggplot2::annotate("text", x = output$date[round(0.9 * nrow(output))],
-                      y = 0.1 * diff(range(output$cumulative_upper)),
-                      label = "Model", col = "black", size = 10)
-  print(cumulative_plot)
-
-  return(output)
-}
\ No newline at end of file
diff --git a/R/evaluate_scaling.r b/R/evaluate_scaling.r
deleted file mode 100644
index d29b003..0000000
--- a/R/evaluate_scaling.r
+++ /dev/null
@@ -1,107 +0,0 @@
-#' Evaluate a scaling model
-#'
-#' To evaluate how successfully a scaling model scales one uptake data set
-#' onto another, plots of the incident and cumulative uptake are generated.
-#' Each plot shows the input data, the target data, and the model predictions
-#' and credible interval (95% by default).
-#'
-#'UPDATES FOR THE FUTURE:
-#' - Model must incorporate year-to-year variability
-#'
-#' @description Evaluate a model that scales one uptake data set to another
-#'
-#' @param input uptake data to use as a predictor
-#' @param target uptake data to use as the outcome
-#' @param model model fit to scale input to target
-#' @param conf width of credible interval to plot
-#'
-#' @return data frame of the data underlying diagnostic plots
-#'
-#' @export
-#'
-evaluate_scaling <- function(input, target, model, conf = 0.95) {
-
-  output <- dplyr::inner_join(input, target,
-                              by = c("state", "date", "elapsed", "year"))
-  output <- output[order(output$date), ]
-  colnames(output) <- gsub("\\.x", "_input", colnames(output))
-  colnames(output) <- gsub("\\.y", "_target", colnames(output))
-
-  if (nrow(output) == (nobs(model) + 1)) {
-    intervals <- diff(output$elapsed)
-    output <- output[-1, ]
-  } else {
-    intervals <- c(output$elapsed[1], diff(output$elapsed))
-  }
-
-  known <- output$cumulative_target - output$incident_target
-
-  post_check <- brms::posterior_predict(model) *
-    sd(output$daily_target) + mean(output$daily_target)
-
-  post_check <- sweep(post_check, 2, intervals, FUN = `*`)
-
-  output$incident_pred_mean <- colMeans(post_check)
-  output$incident_pred_upper <- apply(post_check, 2, quantile,
-                                      1 - (1 - conf) / 2)
-  output$incident_pred_lower <- apply(post_check, 2, quantile,
-                                      (1 - conf) / 2)
-
-  post_check <- sweep(post_check, 2, known, FUN = `+`)
-
-  output$cumulative_pred_mean <- colMeans(post_check)
-  output$cumulative_pred_upper <- apply(post_check, 2, quantile,
-                                        1 - (1 - conf) / 2)
-  output$cumulative_pred_lower <- apply(post_check, 2, quantile,
-                                        (1 - conf) / 2)
-
-  incident_plot <- ggplot2::ggplot(output) +
-    ggplot2::geom_ribbon(ggplot2::aes(x = date, ymax = incident_pred_upper,
-                                      ymin = incident_pred_lower),
-                         fill = "black", alpha = 0.25) +
-    ggplot2::geom_line(ggplot2::aes(x = date, y = incident_input),
-                       color = "dodgerblue", linewidth = 1.5) +
-    ggplot2::geom_line(ggplot2::aes(x = date, y = incident_target),
-                       color = "darkred", linewidth = 1.5) +
-    ggplot2::geom_line(ggplot2::aes(x = date, y = incident_pred_mean),
-                       color = "black", linewidth = 1.5) +
-    ggplot2::theme_bw() +
-    ggplot2::xlab("Time") + ggplot2::ylab("% of Population") +
-    ggplot2::theme(text = ggplot2::element_text(size = 20)) +
-    ggplot2::annotate("text", x = output$date[round(0.9 * nrow(output))],
-                      y = 0.9 * diff(range(output$incident_pred_upper)),
-                      label = "Input", col = "dodgerblue", size = 10) +
-    ggplot2::annotate("text", x = output$date[round(0.9 * nrow(output))],
-                      y = 0.75 * diff(range(output$incident_pred_upper)),
-                      label = "Target", col = "darkred", size = 10) +
-    ggplot2::annotate("text", x = output$date[round(0.9 * nrow(output))],
-                      y = 0.6 * diff(range(output$incident_pred_upper)),
-                      label = "Model", col = "black", size = 10)
-  print(incident_plot)
-
-  cumulative_plot <- ggplot2::ggplot(output) +
-    ggplot2::geom_ribbon(ggplot2::aes(x = date, ymax = cumulative_pred_upper,
-                                      ymin = cumulative_pred_lower),
-                         fill = "black", alpha = 0.25) +
-    ggplot2::geom_line(ggplot2::aes(x = date, y = cumulative_input),
-                       color = "dodgerblue", linewidth = 1.5) +
-    ggplot2::geom_line(ggplot2::aes(x = date, y = cumulative_target),
-                       color = "darkred", linewidth = 1.5) +
-    ggplot2::geom_line(ggplot2::aes(x = date, y = cumulative_pred_mean),
-                       color = "black", linewidth = 1.5) +
-    ggplot2::theme_bw() +
-    ggplot2::xlab("Time") + ggplot2::ylab("% of Population") +
-    ggplot2::theme(text = ggplot2::element_text(size = 20)) +
-    ggplot2::annotate("text", x = output$date[round(0.9 * nrow(output))],
-                      y = 0.4 * diff(range(output$cumulative_pred_upper)),
-                      label = "Input", col = "dodgerblue", size = 10) +
-    ggplot2::annotate("text", x = output$date[round(0.9 * nrow(output))],
-                      y = 0.25 * diff(range(output$cumulative_pred_upper)),
-                      label = "Target", col = "darkred", size = 10) +
-    ggplot2::annotate("text", x = output$date[round(0.9 * nrow(output))],
-                      y = 0.1 * diff(range(output$cumulative_pred_upper)),
-                      label = "Model", col = "black", size = 10)
-  print(cumulative_plot)
-
-  return(output)
-}
\ No newline at end of file
diff --git a/R/generate_projections.R b/R/generate_projections.R
deleted file mode 100644
index b84cb27..0000000
--- a/R/generate_projections.R
+++ /dev/null
@@ -1,207 +0,0 @@
-library(dplyr)
-library(ggplot2)
-
-### functions from bottom to top:
-# inv_z_scale()
-# data_prepare()
-# build_projection_model2()
-#
-#           IS IN
-#
-# real_time_projection() -> plot_projection()
-#
-#                       IS IN
-#
-# plot_real_time_projection()
-
-# real_time_projection() -> get_mspe()
-#
-#                    IS IN
-#
-# evaluate_real_time_projection()
-
-
-#### inverse Z score #####
-#' @description Inversely transform z-score standardized data to raw data
-#'              using: raw = z*sd + mean
-#'
-#' @param old_data: the previous data that is used for z-score standardization
-#' @param scaled_data: current scaled data
-#' @return inversely z-score transformed data
-inv_z_scale <- function(old_data, scaled_data) {
-    sd <- sd(old_data)
-    m <- mean(old_data)
-
-    scaled_data * sd + m
-}
-
-
-#' @description Data format preparation for model building.
-#' It includes:
-#' 1. Add `previous` predictor.
-#' 2. Z-score standardization of `previous`,`elapsed`, and `daily`
-#' @param data: the data frame need to be prepared.
-#' Must include columns:`previous`,`elapsed`, and `daily`
-#' @return Data frame with scaled `previous`,`elapsed`, and `daily` and ready to build model
-data_prepare <- function(data) {
-    data$previous <- c(NA, data$daily[-nrow(data)]) # nolint
-    data <- data[-1, ]
-    data$previous_std <- scale(data$previous, center = T, scale = T)
-    data$elapsed_std <- scale(data$elapsed, center = T, scale = T)
-    data$daily_std <- scale(data$daily, center = T, scale = T)
-
-    return(data)
-}
-
-#' @description This function uses `train_data` to train the linear regression
-#' and project to the last date in the `test_data`.
-#' It is embedded in `plot_real_time_projection` and 'evaluate_real_time_projection'
-#' @param train_data: data frame used to train linear regression
-#' @param test_data: data frame used to compare with the prediction generated by linear regression
-#' @return a list containing the predicted uptake and the corresponding
-#' observed rate under the `test_data` time frame,
-#' in both uptake rate `rate` and cumulative uptake `cumulative`
-
-real_time_projection <- function(
-    train_data = nis_usa_2022,
-    test_data = nis_usa_2023) {
-    # transform the train data and test data to the format of model building and model prediction #
-    scaled_train <- data_prepare(train_data)
-    scaled_test <- data_prepare(test_data)
-
-    # The first row is also removed for raw test data, to be comparable with the predicted values #
-    test_data <- test_data[-1, ]
-
-    # model building #
-    model <- build_projection_model2(data = scaled_train)
-
-    ## get out-of-sample prediction ##
-    scaled_pred <- brms::posterior_predict(model, newdata = scaled_test)
-
-    ## recover the prediction to the original format for comparision ##
-    pred <- inv_z_scale(test_data$daily, scaled_pred)
-
-    # predicted daily vaccine uptake #
-    pred_summary <- data.frame(
-        date = test_data$date,
-        obs = test_data$daily,
-        lower = apply(pred, 2, quantile, 0.025),
-        upper = apply(pred, 2, quantile, 0.975),
-        mean = colMeans(pred)
-    )
-
-    # predicted cumulative vaccine uptake #
-    cumu <- t(apply(pred, 1, cumsum))
-    cumu_summary <- data.frame(
-        mean = colMeans(cumu),
-        obs = test_data$cumulative,
-        lower = apply(cumu, 2, quantile, 0.025),
-        upper = apply(cumu, 2, quantile, 0.975),
-        date = test_data$date
-    )
-
-    return(list(rate = pred_summary, cumulative = cumu_summary))
-}
-
-#' @description This function plots prediction with observed data
-#' This function is embeded in `plot_real_time_projection`
-#' @param option: which uptake to plot: 'rate' or 'cumulative'
-#' @param predicted: out-of-sample prediction from linear regression
-#' @param test: the observed data corresponding to the prediction
-#' @return a ggplot object
-
-plot_projection <- function(option, predicted, test) {
-    if (option == "rate") {
-        data_y_name <- "daily"
-    } else if (option == "cumulative") {
-        data_y_name <- "cumulative"
-    } else {
-        stop("Response variable in test is not found.")
-    }
-
-    ggplot(predicted) +
-        geom_ribbon(aes(x = date, ymin = lower, ymax = upper),
-            fill = "black", alpha = 0.25
-        ) +
-        geom_line(aes(x = date, y = mean), # Bug warning! colnames may change later #
-            linewidth = 1.5
-        ) +
-        geom_line(aes(x = date, y = daily),
-            data = test,
-            linewidth = 1.5, color = "dodgerblue"
-        ) +
-        xlab("Time") +
-        ylab("% of Population") +
-        ggtitle(paste("Forecast starts at", min(test$date))) +
-        theme_bw() +
-        theme(text = ggplot2::element_text(size = 20)) +
-        annotate("text",
-            x = test$date[round(0.9 * nrow(test))],
-            y = 1.7 * diff(range(predicted$upper)),
-            label = "Input", col = "dodgerblue", size = 5
-        ) +
-        annotate("text",
-            x = test$date[round(0.9 * nrow(test))],
-            y = 1.6 * diff(range(predicted$upper)),
-            label = "Model", col = "black", size = 5
-        )
-}
-
-
-#' @description This function fits the linear regression and plots the prediction
-#' with the observed data in a time-varying manner
-#' @param data_option: which uptake to plot: 'rate' or 'cumulative'
-#' @param all_data: Data used to iteratively train and test,
-#' with each iteration moving forward one data point
-#' @param min_data_size: the minimum data size required for model training,
-#' So far, it is the same for model testing.
-#' @return a X*Y grid ggplot
-
-plot_real_time_projection <- function(
-    data_option = "rate",
-    all_data, min_data_size = 8) {
-    start_date <- sort(all_data$date)[1 + min_data_size - 1]
-    end_date <- sort(all_data$date)[nrow(all_data) - min_data_size + 1]
-
-    date_series <- all_data %>%
-        filter(date >= start_date, date <= end_date) %>%
-        select(date)
-
-    # convert single-column data.frame to vector #
-    date_series <- as.vector(date_series$date) + as.Date("1970-01-01")
-    ps <- list()
-    i <- 1
-
-    for (split_date in date_series) {
-        train_data <- all_data %>%
-            filter(date < split_date)
-
-        test_data <- all_data %>%
-            filter(date >= split_date)
-
-        output <- real_time_projection(
-            train_data = train_data,
-            test_data = test_data
-        )
-
-        if (data_option == "rate") {
-            data_to_plot <- output$rate
-        } else if (data_option == "cumulative") {
-            data_to_plot <- output$cumulative
-        } else {
-            stop("Data_option is not valid.")
-        }
-
-        ## plot ##
-        plot_projection(
-            option = data_option,
-            predicted = data_to_plot,
-            test = test_data
-        ) -> p
-
-        ps[[i]] <- p
-        i <- i + 1
-    }
-
-    cowplot::plot_grid(plotlist = ps, nrow = ceiling(sqrt(length(date_series))))
-}
diff --git a/R/get_uptake_data.r b/R/get_uptake_data.r
deleted file mode 100644
index 9f01d25..0000000
--- a/R/get_uptake_data.r
+++ /dev/null
@@ -1,153 +0,0 @@
-#' Format uptake data
-#'
-#' Download a local or web .csv file of cumulative uptake percentages.
-#' You must name the columns that contain the state, date, and uptake.
-#'
-#' Provide a .csv with state abbreviations in the first column and full
-#' names in the second. States will be renamed to their abbreviations,
-#' and states missing from the .csv will be discarded.
-#'
-#' Also provide the format of the dates in the date column. Use the default
-#' if there are multiple columns and/or the dates are specified non-numerically.
-#' Provide the first date vaccines were available to calculate elapsed time.
-#'
-#' Optionally, provide filters to remove certain rows from the data.
-#' Filters are a list of vectors. In each vector, the first entry gives the name
-#' of the column to filter on. Subsequent entries give acceptable values in that
-#' column. Rows with unmentioned values will be removed.
-#'
-#' Incident percentage uptake is calculated from the cumulative uptake.
-#' Therefore, if the time intervals are not equivalent between all cumulative
-#' reports, a warning will be generated.
-#'
-#' The uptake data is returned in a data frame with seven columns:
-#'
-#' Column 1 is the state for which percentage cumulative uptake is reported.
-#' Column 2 is the date at which percentage cumulative uptake is reported.
-#' Column 3 is the days elapsed since the the vaccine became available.
-#' Column 4 is the year of reporting during the fall portion of the season.
-#' Column 5 is the percentage cumulative uptake.
-#' Column 6 is the incident uptake since the previous report.
-#' Column 7 is the incident uptake averaged daily since the previous report.
-#'
-#' @description Download uptake data and put it into a common format
-#'
-#' @param file_name file name or url address where the data is located
-#' @param state name of column containing the state, or the value to fill in
-#' @param date name of column(s) containing the date, or the value to fill in
-#' @param cumulative name of column containing cumulative uptake
-#' @param state_key file name of .csv with abbr and full name of desired states
-#' @param date_format format of the date in its column
-#' @param start_date date when vaccines were first available
-#' @param filters list of vectors giving column names and values to filter on
-#'
-#' @return formatted uptake data
-#'
-#' @export
-#'
-get_uptake_data <- function(file_name, state, date, cumulative,
-                     state_key = NULL, date_format = "%m/%d/%Y",
-                     start_date = NULL, filters = NULL) {
-  data <- read.csv(file_name)
-
-  if (!is.null(filters)) {
-    for (i in seq_along(filters)) {
-      if (filters[[i]][1] %in% colnames(data)) {
-        data <- data[data[, filters[[i]][1]] %in% filters[[i]][-1], ]
-      } else {
-        stop(paste("There is no column called ", filters[[i]][1], sep = ""))
-      }
-    }
-  }
-
-  if (cumulative %in% colnames(data)) {
-    data <- data[!is.na(data[, cumulative]), ]
-    cuml_col <- data[, cumulative]
-  } else {
-    stop(paste("There is no column called ", cumulative, sep = ""))
-  }
-
-  if (state %in% colnames(data)) {
-    state_col <- data[, state]
-  } else {
-    state_col <- rep_len(state, nrow(data))
-  }
-
-  if (!is.null(state_key)) {
-    state_key <- read.csv(state_key)
-    state_idx <- match(state_col, state_key[, 2], nomatch = 0) +
-      match(state_col, state_key[, 1], nomatch = 0)
-    state_idx[state_idx == 0] <- NA
-    state_col <- state_key[, 1][state_idx]
-    state_col <- state_col[!is.na(state_col)]
-    data <- data[!is.na(state_idx), ]
-    cuml_col <- cuml_col[!is.na(state_idx)]
-  }
-
-  if (all(date %in% colnames(data))) {
-    date_col <- data[, date]
-    if (length(date) > 1) {
-      for (i in seq_along(date)) {
-        if (any(grepl("^[A-Za-z]", date_col[, i]))) {
-          date_col[, i] <- sub(".*-", "", date_col[, i])
-          date_col[, i] <- trimws(date_col[, i], "both")
-          date_col[, i] <- sub(" ", "/", date_col[, i])
-          date_col[, i] <- stringr::str_replace_all(date_col[, i],
-                                                    "January", "1")
-          date_col[, i] <- stringr::str_replace_all(date_col[, i],
-                                                    "February", "2")
-          date_col[, i] <- stringr::str_replace_all(date_col[, i],
-                                                    "March", "3")
-          date_col[, i] <- stringr::str_replace_all(date_col[, i],
-                                                    "April", "4")
-          date_col[, i] <- stringr::str_replace_all(date_col[, i],
-                                                    "May", "5")
-          date_col[, i] <- stringr::str_replace_all(date_col[, i],
-                                                    "June", "6")
-          date_col[, i] <- stringr::str_replace_all(date_col[, i],
-                                                    "July", "7")
-          date_col[, i] <- stringr::str_replace_all(date_col[, i],
-                                                    "August", "8")
-          date_col[, i] <- stringr::str_replace_all(date_col[, i],
-                                                    "September", "9")
-          date_col[, i] <- stringr::str_replace_all(date_col[, i],
-                                                    "October", "10")
-          date_col[, i] <- stringr::str_replace_all(date_col[, i],
-                                                    "November", "11")
-          date_col[, i] <- stringr::str_replace_all(date_col[, i],
-                                                    "December", "12")
-        }
-      }
-      date_col <- do.call(paste, c(date_col, sep = "/"))
-    }
-  } else {
-    date_col <- rep_len(date, nrow(data))
-  }
-  date_col <- as.Date(date_col, format = date_format)
-
-  elapsed_col <- as.numeric(date_col - as.Date(start_date,
-                                               format = date_format))
-
-  year_col <- as.numeric(format(date_col, "%Y")) -
-    1 * (as.numeric(format(date_col, "%m")) < 7)
-
-  data <- data.frame(state = state_col,
-                     date = date_col,
-                     elapsed = elapsed_col,
-                     year = year_col,
-                     cumulative = cuml_col)
-  data <- data[order(data$date), ]
-  
-  data$incident <- ave(data$cumulative, data$state,
-                       FUN = function(x) {c(x[1], diff(x))})
-
-  data$daily <- data$incident / c(data$elapsed[1], diff(data$elapsed))
-
-  check_intervals <- ave(data$elapsed, data$state,
-                         FUN = function(x) {c(x[1], diff(x))})
-  if (length(unique(check_intervals)) > 1) {
-    warning("Not all time intervals are the same for incident uptake!")
-  }
-
-  return(data)
-}
diff --git a/R/interpolate_dates.r b/R/interpolate_dates.r
deleted file mode 100644
index b5b7e28..0000000
--- a/R/interpolate_dates.r
+++ /dev/null
@@ -1,44 +0,0 @@
-#' Interpolate to match dates across data sets
-#'
-#' BEWARE: THIS DOES NOT YET ACCOUNT FOR MULTI-YEAR DATA SETS
-#'
-#' Dates may be offset between two data sets. To make the dates match,
-#' linear interpolation is performed on the input data set to align its
-#' dates with the target data set. Only target dates occurring between
-#' two input dates are included. Target dates outside the input range are
-#' discarded.
-#'
-#' @description Interpolate one uptake data set to match dates with another
-#'
-#' @param input uptake data on which to perform interpolation
-#' @param target uptake data with the dates to match
-#'
-#' @return interpolated uptake data
-#'
-#' @export
-#'
-interpolate_dates <- function(input, target) {
-  output <- target
-  output$cumulative <- rep(NA, nrow(output))
-
-  for (i in seq_len(nrow(target))) {
-    input_sub <- input[input$state == target$state[i], ]
-    time_diff <- target$date[i] - input_sub$date
-    time_idx <- order(abs(time_diff))[1:2]
-    time_diff <- time_diff[time_idx]
-    if (sum(time_diff < 0) == 1) {
-      input_sub <- input_sub[time_idx, ]
-      weights <- as.numeric(abs(time_diff)) / as.numeric(sum(abs(time_diff)))
-      output$cumulative[i] <- sum(input_sub$cumulative * rev(weights))
-    }
-  }
-
-  output <- output[!is.na(output$cumulative), ]
-
-  output$incident <- ave(output$cumulative, output$state,
-                         FUN = function(x) {c(x[1], diff(x))})
-
-  output$daily <- output$incident / c(output$elapsed[1], diff(output$elapsed))
-
-  return(output)
-}
diff --git a/R/retrospective_evaluate.r b/R/retrospective_evaluate.r
deleted file mode 100644
index 679503d..0000000
--- a/R/retrospective_evaluate.r
+++ /dev/null
@@ -1,82 +0,0 @@
-library(dplyr)
-
-# Retrospective evaluation of forecast
-# MSPE only in this version.
-# Update in the future:
-# 1. Public health oriented metrics: end-of-season totals, high demand period, etc.
-# 2. Probabilistic metrics: QS, WIS, etc.
-
-
-#' @description Get mean-squared prediction error
-#' @param data: observed data, can be a constant or a vector
-#' @param pred: the prediction output from models, can be a constant or a vector
-#' @return MSPE
-get_mspe <- function(data, pred) {
-    mean((data - pred)^2)
-}
-
-#' @description Retrospectively evaluate the forecast given the test data
-#' @param data_option: which uptake data type to evaluate? 'rate' or 'cumulative'
-#' @param evaluate_option: which metrics to use? 'mspe' only so far.
-#' Adding this indicates changing the output from the embedded functions.
-#' @param all_data: Data used to iteratively train and test,
-#' with each iteration moving forward one data point
-#' @param min_data_size: the minimum data size required for model training,
-#' So far, this number is the same for model testing.
-#' @return a data frame with MSPE given different initializing date
-
-evaluate_real_time_projection <- function(
-    data_option = "rate",
-    evaluate_option = "mspe",
-    all_data, min_data_size = 8) {
-    start_date <- sort(all_data$date)[1 + min_data_size - 1]
-    end_date <- sort(all_data$date)[nrow(all_data) - min_data_size + 1]
-
-    date_series <- all_data %>%
-        filter(date >= start_date, date <= end_date) %>%
-        select(date)
-
-    # convert single-column data.frame to vector #
-    date_series <- as.vector(date_series$date) + as.Date("1970-01-01")
-    metrics <- data.frame()
-
-    for (split_date in date_series) {
-        train_data <- all_data %>%
-            filter(date < split_date)
-
-        test_data <- all_data %>%
-            filter(date >= split_date)
-
-        output <- real_time_projection(
-            train_data = train_data,
-            test_data = test_data
-        )
-
-        if (data_option == "rate") {
-            data_to_eval <- output$rate
-        } else if (data_option == "cumulative") {
-            data_to_eval <- output$cumulative
-        } else {
-            stop("Data_option is not valid.")
-        }
-
-        ## evaluation ##
-
-        # MSPE #
-        # So far, only use MSPE.
-        # Note: What can be the code structure if multiple metrics are evaluated simultaneously
-        if (evaluate_option == "mspe") {
-            metric <- get_mspe(data_to_eval$obs, data_to_eval$mean)
-        }
-
-        metrics <- rbind(
-            metrics,
-            data.frame(
-                forecast_date = split_date,
-                mspe = metric
-            )
-        )
-    }
-
-    return(metrics)
-}
diff --git a/scripts/evaluation_script.r b/scripts/evaluation_script.r
deleted file mode 100644
index 3242498..0000000
--- a/scripts/evaluation_script.r
+++ /dev/null
@@ -1,58 +0,0 @@
-rm(list = ls())
-# read all the R files under R folder #
-R_path <- "R/"
-purrr::map(paste0(R_path, list.files("R/")), source)
-
-# load observed data #
-
-# Load 2022 NIS data for USA
-nis_usa_2022 <- get_uptake_data(
-    "https://data.cdc.gov/api/views/akkj-j5ru/rows.csv?accessType=DOWNLOAD",
-    state = "Geography",
-    date = c("Time.Period", "Year"),
-    cumulative = "Estimate....",
-    state_key = "data/USA_Name_Key.csv",
-    date_format = "%m/%d/%Y",
-    start_date = "9/2/2022",
-    filters = list(c(
-        "Indicator.Category",
-        "Received updated bivalent booster dose (among adults who completed primary series)"
-    ))
-)
-
-# Load 2023 NIS data for USA
-nis_usa_2023 <- get_uptake_data(
-    "data/NIS_2023-24.csv",
-    state = "geography",
-    date = "date",
-    cumulative = "estimate",
-    state_key = "data/USA_Name_Key.csv",
-    date_format = "%m/%d/%Y",
-    start_date = "9/12/2023",
-    filters = list(c("time_type", "Weekly"), c("group_name", "Overall"))
-)
-
-
-# generate projections initiated at different time #
-all_data <- rbind(nis_usa_2022, nis_usa_2023)
-
-# make sure there at least 4 data points for model training: 30 min to run #
-plot_real_time_projection(
-    data_option = "rate",
-    all_data = all_data
-)
-
-# evaluate MSPE between projections and data initiated at different time: 30 min to run #
-evaluate_real_time_projection(evaluate_option = "mspe", data_option = "rate", all_data = all_data) -> mspe_df
-
-mspe_df %>%
-    mutate(forecast_date = forecast_date + as.Date("1970-01-01")) %>%
-    filter(mspe < 1) %>%
-    ggplot() +
-    geom_point(aes(x = forecast_date, y = mspe)) +
-    xlab("Forecast date") +
-    ylab("MSPE") +
-    theme_bw()
-ggsave("mspe.jpeg", width = 4, height = 3, units = "in")
-# Note for future upgrade: seperate model fitting from plotting and evaluation.
-# Repeatedly fitting the model to plot and evaluate is not time-efficient.
diff --git a/scripts/projection_script.r b/scripts/projection_script.r
deleted file mode 100644
index 1201d02..0000000
--- a/scripts/projection_script.r
+++ /dev/null
@@ -1,65 +0,0 @@
-setwd("/home/tec0/covid_booster_uptake")
-source("get_uptake_data.r")
-source("interpolate_dates.r")
-source("build_scaling_model.r")
-source("evaluate_scaling.r")
-source("build_projection_model.r")
-source("evaluate_projection.r")
-source("projection_functions.r")
-
-# Load 2022 IIS data for USA
-iis_usa_2022 <- get_uptake_data(
-  "https://data.cdc.gov/api/views/unsk-b7fc/rows.csv?accessType=DOWNLOAD",
-  state = "Location",
-  date = "Date",
-  cumulative = "Bivalent_Booster_18Plus_Pop_Pct",
-  state_key = "USA_Name_Key.csv",
-  date_format = "%m/%d/%Y",
-  start_date = "9/2/2022"
-)
-
-# Load 2022 NIS data for USA
-nis_usa_2022 <- get_uptake_data(
-  "https://data.cdc.gov/api/views/akkj-j5ru/rows.csv?accessType=DOWNLOAD",
-  state = "Geography",
-  date = c("Time.Period", "Year"),
-  cumulative = "Estimate....",
-  state_key = "USA_Name_Key.csv",
-  date_format = "%m/%d/%Y",
-  start_date = "9/2/2022",
-  filters = list(c("Indicator.Category",
-                   "Received updated bivalent booster dose (among adults who completed primary series)"))
-)
-
-# Load 2023 NIS data for USA
-nis_usa_2023 <- get_uptake_data(
-  "NIS_2023-24.csv",
-  state = "geography",
-  date = "date",
-  cumulative = "estimate",
-  state_key = "USA_Name_Key.csv",
-  date_format = "%m/%d/%Y",
-  start_date = "9/12/2023",
-  filters = list(c("time_type", "Weekly"), c("group_name", "Overall"))
-)
-
-# Interpolate 2022 IIS dates to match 2022 NIS
-iis_usa_2022_interp <- interpolate_dates(iis_usa_2022, nis_usa_2022)
-
-# Build and evaluate a model to scale 2022 IIS to 2022 NIS
-iis_to_nis_model <- build_scaling_model(iis_usa_2022_interp, nis_usa_2022)
-evaluate_scaling(iis_usa_2022_interp, nis_usa_2022, iis_to_nis_model)
-
-# Build and evaluate a model to project 2022 IIS data forward
-iis_forward_model <- build_projection_model(iis_usa_2022)
-evaluate_projection(iis_usa_2022, iis_forward_model)
-evaluate_projection(iis_usa_2022, iis_forward_model, start_date = "2022-10-19")
-
-# Build and evaluate a model to project 2022 NIS data forward
-nis_forward_model <- build_projection_model(nis_usa_2022)
-evaluate_projection(nis_usa_2022, nis_forward_model)
-evaluate_projection(nis_usa_2022, nis_forward_model, start_date = "2022-09-10")
-
-# Does the 2022 NIS projection model work on the 2023 NIS data?
-evaluate_projection(nis_usa_2022, nis_forward_model, nis_usa_2023)
-evaluate_projection(nis_usa_2022, nis_forward_model, nis_usa_2023, start_date = "2022-09-30")