From c6bbee379a94e0dfef89f01671ecc9d22db49bd3 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Sat, 27 Jul 2024 10:19:56 -0700 Subject: [PATCH 1/2] use dplyr::is_else() instead of base::ifelse() to deal with factors --- R/step_bin2factor.R | 2 +- R/step_impute_mean.R | 2 +- R/step_impute_median.R | 2 +- R/step_impute_mode.R | 2 +- R/step_log.R | 2 +- R/step_novel.R | 2 +- R/step_other.R | 2 +- R/step_unknown.R | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/R/step_bin2factor.R b/R/step_bin2factor.R index c2565dd..3cc1688 100644 --- a/R/step_bin2factor.R +++ b/R/step_bin2factor.R @@ -8,7 +8,7 @@ orbital.step_bin2factor <- function(x, all_vars, ...) { } out <- glue::glue( - "ifelse({columns} == 1, \"{x$levels[1]}\", \"{x$levels[2]}\")" + "dplyr::if_else({columns} == 1, \"{x$levels[1]}\", \"{x$levels[2]}\")" ) names(out) <- columns diff --git a/R/step_impute_mean.R b/R/step_impute_mean.R index 605b32e..5bd623e 100644 --- a/R/step_impute_mean.R +++ b/R/step_impute_mean.R @@ -8,7 +8,7 @@ orbital.step_impute_mean <- function(x, all_vars, ...) { return(NULL) } - out <- glue::glue("ifelse(is.na({names(means)}), {means}, {names(means)})") + out <- glue::glue("dplyr::if_else(is.na({names(means)}), {means}, {names(means)})") names(out) <- names(means) out diff --git a/R/step_impute_median.R b/R/step_impute_median.R index 397e35d..86dc469 100644 --- a/R/step_impute_median.R +++ b/R/step_impute_median.R @@ -9,7 +9,7 @@ orbital.step_impute_median <- function(x, all_vars, ...) { } out <- glue::glue( - "ifelse(is.na({names(medians)}), {medians}, {names(medians)})" + "dplyr::if_else(is.na({names(medians)}), {medians}, {names(medians)})" ) names(out) <- names(medians) diff --git a/R/step_impute_mode.R b/R/step_impute_mode.R index 7754cd5..c9a5083 100644 --- a/R/step_impute_mode.R +++ b/R/step_impute_mode.R @@ -8,7 +8,7 @@ orbital.step_impute_mode <- function(x, all_vars, ...) { return(NULL) } - out <- glue::glue("ifelse(is.na({names(modes)}), \"{modes}\", {names(modes)})") + out <- glue::glue("dplyr::if_else(is.na({names(modes)}), \"{modes}\", {names(modes)})") names(out) <- names(modes) out diff --git a/R/step_log.R b/R/step_log.R index 90dab48..3587311 100644 --- a/R/step_log.R +++ b/R/step_log.R @@ -11,7 +11,7 @@ orbital.step_log <- function(x, all_vars, ...) { if (x$signed) { out <- glue::glue( - "ifelse(abs({columns}) < 1, 0, sign({columns}) * log(abs({columns}), base = {x$base}))" + "dplyr::if_else(abs({columns}) < 1, 0, sign({columns}) * log(abs({columns}), base = {x$base}))" ) } else { out <- glue::glue( diff --git a/R/step_novel.R b/R/step_novel.R index ae26f57..a0fcda1 100644 --- a/R/step_novel.R +++ b/R/step_novel.R @@ -15,7 +15,7 @@ orbital.step_novel <- function(x, all_vars, ...) { levels <- paste(levels, collapse = ", ") levels <- glue::glue("c({levels})") out[[col]] <- glue::glue( - "ifelse(is.na({col}), NA, ifelse({col} %in% {levels}, {col}, \"{x$new_level}\"))" + "dplyr::if_else(is.na({col}), NA, dplyr::if_else({col} %in% {levels}, {col}, \"{x$new_level}\"))" ) } out diff --git a/R/step_other.R b/R/step_other.R index d1231aa..cdf9dd0 100644 --- a/R/step_other.R +++ b/R/step_other.R @@ -18,7 +18,7 @@ orbital.step_other <- function(x, all_vars, ...) { levels <- paste(levels, collapse = ", ") levels <- glue::glue("c({levels})") out[[col]] <- glue::glue( - "ifelse(is.na({col}), NA, ifelse({col} %in% {levels}, {col}, \"{objects[[col]]$other}\"))" + "dplyr::if_else(is.na({col}), NA, dplyr::if_else({col} %in% {levels}, {col}, \"{objects[[col]]$other}\"))" ) } out diff --git a/R/step_unknown.R b/R/step_unknown.R index d1ed5b1..d923be7 100644 --- a/R/step_unknown.R +++ b/R/step_unknown.R @@ -8,7 +8,7 @@ orbital.step_unknown <- function(x, all_vars, ...) { return(NULL) } - out <- glue::glue("ifelse(is.na({vars}), \"{x$new_level}\", {vars})") + out <- glue::glue("dplyr::if_else(is.na({vars}), \"{x$new_level}\", {vars})") names(out) <- vars out From c8038614b68bf77cfdeed1c9a74672a34d1c86b3 Mon Sep 17 00:00:00 2001 From: Emil Hvitfeldt Date: Sat, 27 Jul 2024 11:14:11 -0700 Subject: [PATCH 2/2] add getting started vignette --- DESCRIPTION | 1 + vignettes/articles/orbital.Rmd | 111 +++++++++++++++++++++++++++++++++ 2 files changed, 112 insertions(+) create mode 100644 vignettes/articles/orbital.Rmd diff --git a/DESCRIPTION b/DESCRIPTION index f516b23..944b8fc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -28,6 +28,7 @@ Suggests: jsonlite, kknn, knitr, + modeldata, parsnip, R6, recipes, diff --git a/vignettes/articles/orbital.Rmd b/vignettes/articles/orbital.Rmd new file mode 100644 index 0000000..70f472a --- /dev/null +++ b/vignettes/articles/orbital.Rmd @@ -0,0 +1,111 @@ +--- +title: "Introduction to orbital" +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +## Introduction + +The orbital package allows you to turn a fitted workflow into a new object, that retains all the information needed to perform prediction. These predictions should be identical to predictions made using the original workflow objects but with smaller objects and fewer dependencies needed. + +## Creating a fitted model + +```{r setup} +library(orbital) +library(recipes) +library(parsnip) +library(workflows) +library(modeldata) +``` + +We will be using the Ames housing data: + +```{r} +ames +``` + +We won't do a [data split](https://www.tmwr.org/splitting) here to get to the point of the package faster, but you should do that in practice. + +our model will be in two parts. First, we will create a recipe to do the preprocessing, then specify a parsnip model to go along with it. + +This is the recipe we will be going with + +```{r} +rec_spec <- recipe(Sale_Price ~ ., data = ames) |> + step_impute_median(all_numeric_predictors()) |> + step_unknown(all_nominal_predictors()) |> + step_other(all_nominal_predictors()) |> + step_dummy(all_nominal_predictors()) |> + step_nzv(all_numeric_predictors()) |> + step_normalize(all_numeric_predictors()) |> + step_corr(all_numeric_predictors()) +``` + +we will be using a standard linear regression + +```{r} +lm_spec <- linear_reg() +``` + +Putting them together in a workflow gives us this fitted model. + +```{r} +wf_spec <- workflow(rec_spec, lm_spec) +wf_fit <- fit(wf_spec, data = ames) +wf_fit +``` + +## Converting model + +Once we have a fitted workflow all we have to do is call the `orbital()` function on the fitted workflow. This will create an orbital object that we will use from here on out. + +```{r} +orbital_obj <- orbital(wf_fit) +orbital_obj +``` + +One of the neat things about orbital objects is that they only require the orbital package to be loaded, compared to the workflow object which needs recipes, parsnip, workflows and the engine package to be loaded. It is also substantially smaller in size. + +```{r} +object.size(orbital_obj) +object.size(wf_fit) +``` + +## Predicting + +Predicting with an orbital object is done using the `predict()` function, the same way it is done with workflows objects. + +```{r} +predict(orbital_obj, ames) +``` + +Notice how it produces the same results as if we were to `predict()` on the workflow object. + +```{r} +predict(wf_fit, ames) +``` + +orbital objects also allow for prediction in database objects such as SQL or spark databases. Below is a small example using an ephemeral in-memory RSQLite database. + +```{r} +library(DBI) +library(RSQLite) + +con <- dbConnect(SQLite(), path = ":memory:") +ames_sqlite <- copy_to(con, ames) + +predict(orbital_obj, ames_sqlite) +``` + +## Code Generation + +In the same way that you can predict in databases, you can also get the code needed to run the query. + +```{r} +orbital_sql(orbital_obj, con) +``` \ No newline at end of file