From 4c177a56468c16dbe334e82668d5047074b02943 Mon Sep 17 00:00:00 2001 From: David Cortes Date: Thu, 14 Oct 2021 20:34:38 -0300 Subject: [PATCH 01/40] idiomatic serialization --- R-package/R/lgb.Booster.R | 54 +++++++++--- R-package/R/lgb.Predictor.R | 7 +- R-package/R/lgb.cv.R | 5 ++ R-package/R/lgb.drop_serialized.R | 17 ++++ R-package/R/lgb.importance.R | 1 + R-package/R/lgb.make_serializable.R | 17 ++++ R-package/R/lgb.restore_handle.R | 18 ++++ R-package/R/lgb.train.R | 4 + R-package/R/lightgbm.R | 20 +++++ R-package/R/readRDS.lgb.Booster.R | 62 ------------- R-package/R/saveRDS.lgb.Booster.R | 89 ------------------- R-package/man/lgb.cv.Rd | 4 + R-package/man/lgb.drop_serialized.Rd | 23 +++++ R-package/man/lgb.load.Rd | 2 +- R-package/man/lgb.make_serializable.Rd | 23 +++++ R-package/man/lgb.restore_handle.Rd | 24 ++++++ R-package/man/lgb.train.Rd | 4 + R-package/man/lgb_shared_params.Rd | 21 +++++ R-package/man/lightgbm.Rd | 4 + R-package/man/readRDS.lgb.Booster.Rd | 47 ---------- R-package/man/saveRDS.lgb.Booster.Rd | 70 --------------- R-package/src/lightgbm_R.cpp | 21 +++-- R-package/tests/testthat/test_lgb.Booster.R | 96 ++++----------------- 23 files changed, 265 insertions(+), 368 deletions(-) create mode 100644 R-package/R/lgb.drop_serialized.R create mode 100644 R-package/R/lgb.make_serializable.R create mode 100644 R-package/R/lgb.restore_handle.R delete mode 100644 R-package/R/readRDS.lgb.Booster.R delete mode 100644 R-package/R/saveRDS.lgb.Booster.R create mode 100644 R-package/man/lgb.drop_serialized.Rd create mode 100644 R-package/man/lgb.make_serializable.Rd create mode 100644 R-package/man/lgb.restore_handle.Rd delete mode 100644 R-package/man/readRDS.lgb.Booster.Rd delete mode 100644 R-package/man/saveRDS.lgb.Booster.Rd diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R index 586f3a38899d..8f61e8869851 100644 --- a/R-package/R/lgb.Booster.R +++ b/R-package/R/lgb.Booster.R @@ -84,9 +84,11 @@ Booster <- R6::R6Class( } else if (!is.null(model_str)) { - # Do we have a model_str as character? - if (!is.character(model_str)) { - stop("lgb.Booster: Can only use a string as model_str") + # Do we have a model_str as character/raw? + if (is.character(model_str)) + model_str <- charToRaw(model_str) + if (!is.raw(model_str)) { + stop("lgb.Booster: Can only use a character/raw vector as model_str") } # Create booster from model @@ -436,7 +438,7 @@ Booster <- R6::R6Class( return(invisible(self)) }, - save_model_to_string = function(num_iteration = NULL, feature_importance_type = 0L) { + save_model_to_string = function(num_iteration = NULL, feature_importance_type = 0L, as_char=TRUE) { if (is.null(num_iteration)) { num_iteration <- self$best_iter @@ -449,6 +451,9 @@ Booster <- R6::R6Class( , as.integer(feature_importance_type) ) + if (as_char) + model_str <- rawToChar(model_str) + return(model_str) }, @@ -527,17 +532,37 @@ Booster <- R6::R6Class( return(Predictor$new(modelfile = private$handle)) }, - # Used for save - raw = NA, + # Used for serialization + raw = NULL, + + # Store serialized raw bytes in model object + save_raw = function() { + if (is.null(self$raw)) + self$raw <- self$save_model_to_string(NULL, as_char=FALSE) + return(invisible(NULL)) + + }, - # Save model to temporary file for in-memory saving - save = function() { + drop_raw = function() { + self$raw <- NULL + return(invisible(NULL)) + }, - # Overwrite model in object - self$raw <- self$save_model_to_string(NULL) + check_null_handle = function() { + return(lgb.is.null.handle(private$handle)) + }, + restore_handle = function() { + if (self$check_null_handle()) { + if (is.null(self$raw)) + stop("LightGBM model is not de-serializable. Try using 'serializable=TRUE'.") + private$handle <- .Call(LGBM_BoosterLoadModelFromString_R, self$raw) + } return(invisible(NULL)) + }, + get_handle = function() { + return(private$handle) } ), @@ -784,6 +809,7 @@ predict.lgb.Booster <- function(object, if (!lgb.is.Booster(x = object)) { stop("predict.lgb.Booster: object should be an ", sQuote("lgb.Booster")) } + object$restore_handle() additional_params <- list(...) if (length(additional_params) > 0L) { @@ -815,7 +841,7 @@ predict.lgb.Booster <- function(object, #' @description Load LightGBM takes in either a file path or model string. #' If both are provided, Load will default to loading from file #' @param filename path of model file -#' @param model_str a str containing the model +#' @param model_str a str containing the model (as a `character` or `raw` vector) #' #' @return lgb.Booster #' @@ -863,9 +889,11 @@ lgb.load <- function(filename = NULL, model_str = NULL) { return(invisible(Booster$new(modelfile = filename))) } + if (is.character(model_str)) + model_str <- charToRaw(model_str) if (model_str_provided) { - if (!is.character(model_str)) { - stop("lgb.load: model_str should be character") + if (!is.raw(model_str)) { + stop("lgb.load: model_str should be a character/raw vector") } return(invisible(Booster$new(model_str = model_str))) } diff --git a/R-package/R/lgb.Predictor.R b/R-package/R/lgb.Predictor.R index 147bed11ae7f..4d11e3833b73 100644 --- a/R-package/R/lgb.Predictor.R +++ b/R-package/R/lgb.Predictor.R @@ -39,12 +39,17 @@ Predictor <- R6::R6Class( ) private$need_free_handle <- TRUE - } else if (methods::is(modelfile, "lgb.Booster.handle")) { + } else if (methods::is(modelfile, "lgb.Booster.handle") || inherits(modelfile, "externalptr")) { # Check if model file is a booster handle already handle <- modelfile private$need_free_handle <- FALSE + } else if (lgb.is.Booster(modelfile)) { + + handle <- modelfile$get_handle() + private$need_free_handle <- FALSE + } else { stop("lgb.Predictor: modelfile must be either a character filename or an lgb.Booster.handle") diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R index 039411e4cf40..8836428732c7 100644 --- a/R-package/R/lgb.cv.R +++ b/R-package/R/lgb.cv.R @@ -97,6 +97,7 @@ lgb.cv <- function(params = list() , categorical_feature = NULL , early_stopping_rounds = NULL , callbacks = list() + , serializable = TRUE , reset_data = FALSE , ... ) { @@ -456,6 +457,10 @@ lgb.cv <- function(params = list() }) } + if (serializable) { + lapply(cv_booster$boosters, function(model) model$booster$save_raw()) + } + return(cv_booster) } diff --git a/R-package/R/lgb.drop_serialized.R b/R-package/R/lgb.drop_serialized.R new file mode 100644 index 000000000000..afffe1226455 --- /dev/null +++ b/R-package/R/lgb.drop_serialized.R @@ -0,0 +1,17 @@ +#' @name lgb.drop_serialized +#' @title Drop serialized raw bytes in a LightGBM model object +#' @description If a LightGBM model object was produced with argument `serializable=TRUE`, the R object will keep +#' a copy of the underlying C++ object as raw bytes, which can be used to reconstruct such object after getting +#' serialized and de-serialized, but at the cost of extra memory usage. If these raw bytes are not needed anymore, +#' they can be dropped through this function in order to save memory. Note that the object will be modified in-place. +#' @param model \code{lgb.Booster} object which was produced with `serializable=TRUE`. +#' +#' @return \code{lgb.Booster} (the same `model` object that was passed as input, as invisible). +#' @seealso \link{lgb.restore_handle}, \link{lgb.make_serializable}. +#' @examples +#' @export +lgb.drop_serialized <- function(model) { + stopifnot(lgb.is.Booster(model)) + model$drop_raw() + return(invisible(model)) +} diff --git a/R-package/R/lgb.importance.R b/R-package/R/lgb.importance.R index c05c6628be34..687e710ccff3 100644 --- a/R-package/R/lgb.importance.R +++ b/R-package/R/lgb.importance.R @@ -41,6 +41,7 @@ lgb.importance <- function(model, percentage = TRUE) { if (!lgb.is.Booster(x = model)) { stop("'model' has to be an object of class lgb.Booster") } + model$restore_handle() # Setup importance tree_dt <- lgb.model.dt.tree(model = model) diff --git a/R-package/R/lgb.make_serializable.R b/R-package/R/lgb.make_serializable.R new file mode 100644 index 000000000000..5e552f8fd925 --- /dev/null +++ b/R-package/R/lgb.make_serializable.R @@ -0,0 +1,17 @@ +#' @name lgb.make_serializable +#' @title Make a LightGBM object serializable by keeping raw bytes +#' @description If a LightGBM model object was produced with argument `serializable=FALSE`, the R object will not +#' be serializable (e.g. cannot save and load with \code{saveRDS} and \code{readRDS}) as it will lack the raw bytes +#' needed to reconstruct its underlying C++ object. This function can be used to forcibly produce those serialized +#' raw bytes and make the object serializable. Note that the object will be modified in-place. +#' @param model \code{lgb.Booster} object which was produced with `serializable=FALSE`. +#' +#' @return \code{lgb.Booster} (the same `model` object that was passed as input, as invisible). +#' @seealso \link{lgb.restore_handle}, \link{lgb.drop_serialized}. +#' @examples +#' @export +lgb.make_serializable <- function(model) { + stopifnot(lgb.is.Booster(model)) + model$save_raw() + return(invisible(model)) +} diff --git a/R-package/R/lgb.restore_handle.R b/R-package/R/lgb.restore_handle.R new file mode 100644 index 000000000000..1fd01ab7c308 --- /dev/null +++ b/R-package/R/lgb.restore_handle.R @@ -0,0 +1,18 @@ +#' @name lgb.restore_handle +#' @title Restore the C++ component of a deserialized LGB model +#' @description After a LightGBM model object is de-serialized through functions such as \code{save} or +#' \code{saveRDS}, its underlying C++ object will be blank and needs to be restored to able to use it. Such +#' object is restored automatically when calling functions such as \code{predict}, but this function can be +#' used to forcibly restore it beforehand. Note that the object will be modified in-place. +#' @param model \code{lgb.Booster} object which was de-serialized and whose underlying C++ object and R handle +#' need to be restored. +#' +#' @return \code{lgb.Booster} (the same `model` object that was passed as input, as invisible). +#' @seealso \link{lgb.make_serializable}, \link{lgb.drop_serialized}. +#' @examples +#' @export +lgb.restore_handle <- function(model) { + stopifnot(lgb.is.Booster(model)) + model$restore_handle() + return(invisible(model)) +} diff --git a/R-package/R/lgb.train.R b/R-package/R/lgb.train.R index 6f602fe416a4..d579aa1b60be 100644 --- a/R-package/R/lgb.train.R +++ b/R-package/R/lgb.train.R @@ -66,6 +66,7 @@ lgb.train <- function(params = list(), categorical_feature = NULL, early_stopping_rounds = NULL, callbacks = list(), + serializable = TRUE, reset_data = FALSE, ...) { @@ -395,6 +396,9 @@ lgb.train <- function(params = list(), } + if (serializable) + booster$save_raw() + return(booster) } diff --git a/R-package/R/lightgbm.R b/R-package/R/lightgbm.R index e2df9063ed26..faceb91555cc 100644 --- a/R-package/R/lightgbm.R +++ b/R-package/R/lightgbm.R @@ -52,6 +52,9 @@ #' @param params a list of parameters. See \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html}{ #' the "Parameters" section of the documentation} for a list of parameters and valid values. #' @param verbose verbosity for output, if <= 0, also will disable the print of evaluation during training +#' @param serializable whether to make the resulting objects serializable through functions such as +#' \code{save} or \code{saveRDS} (see section "Model serialization"). +#' #' @section Early Stopping: #' #' "early stopping" refers to stopping the training process if the model's performance on a given @@ -66,6 +69,21 @@ #' in \code{params}, that metric will be considered the "first" one. If you omit \code{metric}, #' a default metric will be used based on your choice for the parameter \code{obj} (keyword argument) #' or \code{objective} (passed into \code{params}). +#' @section Model serialization: +#' +#' LightGBM models objects can be serialized and de-serialized through functions such as \code{save} +#' or \code{saveRDS}, but similarly to libraries such as 'xgboost', serialization works a bit differently +#' from typical R objects. In order to make models serializable in R, a copy of the underlying C++ object +#' as serialized raw bytes is produced and stored in the R model object, and when this R object is +#' de-serialized, the underlying C++ model object gets reconstructed from these raw bytes, but will only +#' do so once some function that uses it is called, such as \code{predict}. In order to forcibly +#' reconstruct the C++ object after deserialization (e.g. after calling \code{readRDS} or similar), one +#' can use the function \link{lgb.restore_handle} (for example, if one makes predictions in parallel or in +#' forked processes, it will be faster to restore the handle beforehand). +#' +#' Producing and keeping these raw bytes however uses extra memory, and if they are not required, +#' it is possible to avoid producing them by passing `serializable=FALSE`. In such cases, these raw +#' bytes can be added to the model on demand through function \link{lgb.make_serializable}. #' @keywords internal NULL @@ -113,6 +131,7 @@ lightgbm <- function(data, save_name = "lightgbm.model", init_model = NULL, callbacks = list(), + serializable = TRUE, ...) { # validate inputs early to avoid unnecessary computation @@ -137,6 +156,7 @@ lightgbm <- function(data, , "early_stopping_rounds" = early_stopping_rounds , "init_model" = init_model , "callbacks" = callbacks + , "serializable" = serializable ) train_args <- append(train_args, list(...)) diff --git a/R-package/R/readRDS.lgb.Booster.R b/R-package/R/readRDS.lgb.Booster.R deleted file mode 100644 index 4844abeb3aa9..000000000000 --- a/R-package/R/readRDS.lgb.Booster.R +++ /dev/null @@ -1,62 +0,0 @@ -#' @name readRDS.lgb.Booster -#' @title readRDS for \code{lgb.Booster} models -#' @description Attempts to load a model stored in a \code{.rds} file, using \code{\link[base]{readRDS}} -#' @param file a connection or the name of the file where the R object is saved to or read from. -#' @param refhook a hook function for handling reference objects. -#' -#' @return \code{lgb.Booster} -#' -#' @examples -#' \donttest{ -#' library(lightgbm) -#' data(agaricus.train, package = "lightgbm") -#' train <- agaricus.train -#' dtrain <- lgb.Dataset(train$data, label = train$label) -#' data(agaricus.test, package = "lightgbm") -#' test <- agaricus.test -#' dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) -#' params <- list( -#' objective = "regression" -#' , metric = "l2" -#' , min_data = 1L -#' , learning_rate = 1.0 -#' ) -#' valids <- list(test = dtest) -#' model <- lgb.train( -#' params = params -#' , data = dtrain -#' , nrounds = 10L -#' , valids = valids -#' , early_stopping_rounds = 5L -#' ) -#' model_file <- tempfile(fileext = ".rds") -#' saveRDS.lgb.Booster(model, model_file) -#' new_model <- readRDS.lgb.Booster(model_file) -#' } -#' @export -readRDS.lgb.Booster <- function(file, refhook = NULL) { - - object <- readRDS(file = file, refhook = refhook) - - # Check if object has the model stored - if (!is.na(object$raw)) { - - # Create temporary model for the model loading - object2 <- lgb.load(model_str = object$raw) - - # Restore best iteration and recorded evaluations - object2$best_iter <- object$best_iter - object2$record_evals <- object$record_evals - object2$params <- object$params - - # Return newly loaded object - return(object2) - - } else { - - # Return RDS loaded object - return(object) - - } - -} diff --git a/R-package/R/saveRDS.lgb.Booster.R b/R-package/R/saveRDS.lgb.Booster.R deleted file mode 100644 index 705cc266fcbb..000000000000 --- a/R-package/R/saveRDS.lgb.Booster.R +++ /dev/null @@ -1,89 +0,0 @@ -#' @name saveRDS.lgb.Booster -#' @title saveRDS for \code{lgb.Booster} models -#' @description Attempts to save a model using RDS. Has an additional parameter (\code{raw}) -#' which decides whether to save the raw model or not. -#' @param object \code{lgb.Booster} object to serialize. -#' @param file a connection or the name of the file where the R object is saved to or read from. -#' @param ascii a logical. If TRUE or NA, an ASCII representation is written; otherwise (default), -#' a binary one is used. See the comments in the help for save. -#' @param version the workspace format version to use. \code{NULL} specifies the current default -#' version (2). Versions prior to 2 are not supported, so this will only be relevant -#' when there are later versions. -#' @param compress a logical specifying whether saving to a named file is to use "gzip" compression, -#' or one of \code{"gzip"}, \code{"bzip2"} or \code{"xz"} to indicate the type of -#' compression to be used. Ignored if file is a connection. -#' @param refhook a hook function for handling reference objects. -#' @param raw whether to save the model in a raw variable or not, recommended to leave it to \code{TRUE}. -#' -#' @return NULL invisibly. -#' -#' @examples -#' \donttest{ -#' library(lightgbm) -#' data(agaricus.train, package = "lightgbm") -#' train <- agaricus.train -#' dtrain <- lgb.Dataset(train$data, label = train$label) -#' data(agaricus.test, package = "lightgbm") -#' test <- agaricus.test -#' dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) -#' params <- list( -#' objective = "regression" -#' , metric = "l2" -#' , min_data = 1L -#' , learning_rate = 1.0 -#' ) -#' valids <- list(test = dtest) -#' model <- lgb.train( -#' params = params -#' , data = dtrain -#' , nrounds = 10L -#' , valids = valids -#' , early_stopping_rounds = 5L -#' ) -#' model_file <- tempfile(fileext = ".rds") -#' saveRDS.lgb.Booster(model, model_file) -#' } -#' @export -saveRDS.lgb.Booster <- function(object, - file, - ascii = FALSE, - version = NULL, - compress = TRUE, - refhook = NULL, - raw = TRUE) { - - # Check if object has a raw value (and if the user wants to store the raw) - if (is.na(object$raw) && raw) { - - object$save() - - saveRDS( - object - , file = file - , ascii = ascii - , version = version - , compress = compress - , refhook = refhook - ) - - # Free model from memory - object$raw <- NA - - return(invisible(NULL)) - - } else { - - saveRDS( - object - , file = file - , ascii = ascii - , version = version - , compress = compress - , refhook = refhook - ) - - return(invisible(NULL)) - - } - -} diff --git a/R-package/man/lgb.cv.Rd b/R-package/man/lgb.cv.Rd index bd098a81dbdc..5940b657e5ce 100644 --- a/R-package/man/lgb.cv.Rd +++ b/R-package/man/lgb.cv.Rd @@ -24,6 +24,7 @@ lgb.cv( categorical_feature = NULL, early_stopping_rounds = NULL, callbacks = list(), + serializable = TRUE, reset_data = FALSE, ... ) @@ -115,6 +116,9 @@ set to the iteration number of the best iteration.} \item{callbacks}{List of callback functions that are applied at each iteration.} +\item{serializable}{whether to make the resulting objects serializable through functions such as +\code{save} or \code{saveRDS} (see section "Model serialization").} + \item{reset_data}{Boolean, setting it to TRUE (not the default value) will transform the booster model into a predictor model which frees up memory and the original datasets} diff --git a/R-package/man/lgb.drop_serialized.Rd b/R-package/man/lgb.drop_serialized.Rd new file mode 100644 index 000000000000..3c7d08fe9aea --- /dev/null +++ b/R-package/man/lgb.drop_serialized.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.drop_serialized.R +\name{lgb.drop_serialized} +\alias{lgb.drop_serialized} +\title{Drop serialized raw bytes in a LightGBM model object} +\usage{ +lgb.drop_serialized(model) +} +\arguments{ +\item{model}{\code{lgb.Booster} object which was produced with `serializable=TRUE`.} +} +\value{ +\code{lgb.Booster} (the same `model` object that was passed as input, as invisible). +} +\description{ +If a LightGBM model object was produced with argument `serializable=TRUE`, the R object will keep +a copy of the underlying C++ object as raw bytes, which can be used to reconstruct such object after getting +serialized and de-serialized, but at the cost of extra memory usage. If these raw bytes are not needed anymore, +they can be dropped through this function in order to save memory. Note that the object will be modified in-place. +} +\seealso{ +\link{lgb.restore_handle}, \link{lgb.make_serializable}. +} diff --git a/R-package/man/lgb.load.Rd b/R-package/man/lgb.load.Rd index 55d241b2d670..775003c3279f 100644 --- a/R-package/man/lgb.load.Rd +++ b/R-package/man/lgb.load.Rd @@ -9,7 +9,7 @@ lgb.load(filename = NULL, model_str = NULL) \arguments{ \item{filename}{path of model file} -\item{model_str}{a str containing the model} +\item{model_str}{a str containing the model (as a `character` or `raw` vector)} } \value{ lgb.Booster diff --git a/R-package/man/lgb.make_serializable.Rd b/R-package/man/lgb.make_serializable.Rd new file mode 100644 index 000000000000..0a237c4eeb3e --- /dev/null +++ b/R-package/man/lgb.make_serializable.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.make_serializable.R +\name{lgb.make_serializable} +\alias{lgb.make_serializable} +\title{Make a LightGBM object serializable by keeping raw bytes} +\usage{ +lgb.make_serializable(model) +} +\arguments{ +\item{model}{\code{lgb.Booster} object which was produced with `serializable=FALSE`.} +} +\value{ +\code{lgb.Booster} (the same `model` object that was passed as input, as invisible). +} +\description{ +If a LightGBM model object was produced with argument `serializable=FALSE`, the R object will not +be serializable (e.g. cannot save and load with \code{saveRDS} and \code{readRDS}) as it will lack the raw bytes +needed to reconstruct its underlying C++ object. This function can be used to forcibly produce those serialized +raw bytes and make the object serializable. Note that the object will be modified in-place. +} +\seealso{ +\link{lgb.restore_handle}, \link{lgb.drop_serialized}. +} diff --git a/R-package/man/lgb.restore_handle.Rd b/R-package/man/lgb.restore_handle.Rd new file mode 100644 index 000000000000..c4f4ed102a45 --- /dev/null +++ b/R-package/man/lgb.restore_handle.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.restore_handle.R +\name{lgb.restore_handle} +\alias{lgb.restore_handle} +\title{Restore the C++ component of a deserialized LGB model} +\usage{ +lgb.restore_handle(model) +} +\arguments{ +\item{model}{\code{lgb.Booster} object which was de-serialized and whose underlying C++ object and R handle +need to be restored.} +} +\value{ +\code{lgb.Booster} (the same `model` object that was passed as input, as invisible). +} +\description{ +After a LightGBM model object is de-serialized through functions such as \code{save} or +\code{saveRDS}, its underlying C++ object will be blank and needs to be restored to able to use it. Such +object is restored automatically when calling functions such as \code{predict}, but this function can be +used to forcibly restore it beforehand. Note that the object will be modified in-place. +} +\seealso{ +\link{lgb.make_serializable}, \link{lgb.drop_serialized}. +} diff --git a/R-package/man/lgb.train.Rd b/R-package/man/lgb.train.Rd index 4c8935df4f2d..085b89a665cb 100644 --- a/R-package/man/lgb.train.Rd +++ b/R-package/man/lgb.train.Rd @@ -19,6 +19,7 @@ lgb.train( categorical_feature = NULL, early_stopping_rounds = NULL, callbacks = list(), + serializable = TRUE, reset_data = FALSE, ... ) @@ -95,6 +96,9 @@ set to the iteration number of the best iteration.} \item{callbacks}{List of callback functions that are applied at each iteration.} +\item{serializable}{whether to make the resulting objects serializable through functions such as +\code{save} or \code{saveRDS} (see section "Model serialization").} + \item{reset_data}{Boolean, setting it to TRUE (not the default value) will transform the booster model into a predictor model which frees up memory and the original datasets} diff --git a/R-package/man/lgb_shared_params.Rd b/R-package/man/lgb_shared_params.Rd index e5288e0450ae..0caa3eb4cfab 100644 --- a/R-package/man/lgb_shared_params.Rd +++ b/R-package/man/lgb_shared_params.Rd @@ -64,6 +64,9 @@ set to the iteration number of the best iteration.} the "Parameters" section of the documentation} for a list of parameters and valid values.} \item{verbose}{verbosity for output, if <= 0, also will disable the print of evaluation during training} + +\item{serializable}{whether to make the resulting objects serializable through functions such as +\code{save} or \code{saveRDS} (see section "Model serialization").} } \description{ Parameter docs shared by \code{lgb.train}, \code{lgb.cv}, and \code{lightgbm} @@ -85,4 +88,22 @@ Parameter docs shared by \code{lgb.train}, \code{lgb.cv}, and \code{lightgbm} or \code{objective} (passed into \code{params}). } +\section{Model serialization}{ + + + LightGBM models objects can be serialized and de-serialized through functions such as \code{save} + or \code{saveRDS}, but similarly to libraries such as 'xgboost', serialization works a bit differently + from typical R objects. In order to make models serializable in R, a copy of the underlying C++ object + as serialized raw bytes is produced and stored in the R model object, and when this R object is + de-serialized, the underlying C++ model object gets reconstructed from these raw bytes, but will only + do so once some function that uses it is called, such as \code{predict}. In order to forcibly + reconstruct the C++ object after deserialization (e.g. after calling \code{readRDS} or similar), one + can use the function \link{lgb.restore_handle} (for example, if one makes predictions in parallel or in + forked processes, it will be faster to restore the handle beforehand). + + Producing and keeping these raw bytes however uses extra memory, and if they are not required, + it is possible to avoid producing them by passing `serializable=FALSE`. In such cases, these raw + bytes can be added to the model on demand through function \link{lgb.make_serializable}. +} + \keyword{internal} diff --git a/R-package/man/lightgbm.Rd b/R-package/man/lightgbm.Rd index 6512dbc6b23a..c506ee1465d7 100644 --- a/R-package/man/lightgbm.Rd +++ b/R-package/man/lightgbm.Rd @@ -16,6 +16,7 @@ lightgbm( save_name = "lightgbm.model", init_model = NULL, callbacks = list(), + serializable = TRUE, ... ) } @@ -49,6 +50,9 @@ set to the iteration number of the best iteration.} \item{callbacks}{List of callback functions that are applied at each iteration.} +\item{serializable}{whether to make the resulting objects serializable through functions such as +\code{save} or \code{saveRDS} (see section "Model serialization").} + \item{...}{Additional arguments passed to \code{\link{lgb.train}}. For example \itemize{ \item{\code{valids}: a list of \code{lgb.Dataset} objects, used for validation} diff --git a/R-package/man/readRDS.lgb.Booster.Rd b/R-package/man/readRDS.lgb.Booster.Rd deleted file mode 100644 index 3592148ac128..000000000000 --- a/R-package/man/readRDS.lgb.Booster.Rd +++ /dev/null @@ -1,47 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/readRDS.lgb.Booster.R -\name{readRDS.lgb.Booster} -\alias{readRDS.lgb.Booster} -\title{readRDS for \code{lgb.Booster} models} -\usage{ -readRDS.lgb.Booster(file, refhook = NULL) -} -\arguments{ -\item{file}{a connection or the name of the file where the R object is saved to or read from.} - -\item{refhook}{a hook function for handling reference objects.} -} -\value{ -\code{lgb.Booster} -} -\description{ -Attempts to load a model stored in a \code{.rds} file, using \code{\link[base]{readRDS}} -} -\examples{ -\donttest{ -library(lightgbm) -data(agaricus.train, package = "lightgbm") -train <- agaricus.train -dtrain <- lgb.Dataset(train$data, label = train$label) -data(agaricus.test, package = "lightgbm") -test <- agaricus.test -dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) -params <- list( - objective = "regression" - , metric = "l2" - , min_data = 1L - , learning_rate = 1.0 -) -valids <- list(test = dtest) -model <- lgb.train( - params = params - , data = dtrain - , nrounds = 10L - , valids = valids - , early_stopping_rounds = 5L -) -model_file <- tempfile(fileext = ".rds") -saveRDS.lgb.Booster(model, model_file) -new_model <- readRDS.lgb.Booster(model_file) -} -} diff --git a/R-package/man/saveRDS.lgb.Booster.Rd b/R-package/man/saveRDS.lgb.Booster.Rd deleted file mode 100644 index 02f5a715762c..000000000000 --- a/R-package/man/saveRDS.lgb.Booster.Rd +++ /dev/null @@ -1,70 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/saveRDS.lgb.Booster.R -\name{saveRDS.lgb.Booster} -\alias{saveRDS.lgb.Booster} -\title{saveRDS for \code{lgb.Booster} models} -\usage{ -saveRDS.lgb.Booster( - object, - file, - ascii = FALSE, - version = NULL, - compress = TRUE, - refhook = NULL, - raw = TRUE -) -} -\arguments{ -\item{object}{\code{lgb.Booster} object to serialize.} - -\item{file}{a connection or the name of the file where the R object is saved to or read from.} - -\item{ascii}{a logical. If TRUE or NA, an ASCII representation is written; otherwise (default), -a binary one is used. See the comments in the help for save.} - -\item{version}{the workspace format version to use. \code{NULL} specifies the current default -version (2). Versions prior to 2 are not supported, so this will only be relevant -when there are later versions.} - -\item{compress}{a logical specifying whether saving to a named file is to use "gzip" compression, -or one of \code{"gzip"}, \code{"bzip2"} or \code{"xz"} to indicate the type of -compression to be used. Ignored if file is a connection.} - -\item{refhook}{a hook function for handling reference objects.} - -\item{raw}{whether to save the model in a raw variable or not, recommended to leave it to \code{TRUE}.} -} -\value{ -NULL invisibly. -} -\description{ -Attempts to save a model using RDS. Has an additional parameter (\code{raw}) - which decides whether to save the raw model or not. -} -\examples{ -\donttest{ -library(lightgbm) -data(agaricus.train, package = "lightgbm") -train <- agaricus.train -dtrain <- lgb.Dataset(train$data, label = train$label) -data(agaricus.test, package = "lightgbm") -test <- agaricus.test -dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) -params <- list( - objective = "regression" - , metric = "l2" - , min_data = 1L - , learning_rate = 1.0 -) -valids <- list(test = dtest) -model <- lgb.train( - params = params - , data = dtrain - , nrounds = 10L - , valids = valids - , early_stopping_rounds = 5L -) -model_file <- tempfile(fileext = ".rds") -saveRDS.lgb.Booster(model, model_file) -} -} diff --git a/R-package/src/lightgbm_R.cpp b/R-package/src/lightgbm_R.cpp index 99dd666dbf9e..ac55112da0ac 100644 --- a/R-package/src/lightgbm_R.cpp +++ b/R-package/src/lightgbm_R.cpp @@ -60,6 +60,10 @@ SEXP wrapped_R_string(void *len) { return Rf_allocVector(STRSXP, *(reinterpret_cast(len))); } +SEXP wrapped_R_raw(void *len) { + return Rf_allocVector(RAWSXP, *(reinterpret_cast(len))); +} + SEXP wrapped_Rf_mkChar(void *txt) { return Rf_mkChar(reinterpret_cast(txt)); } @@ -75,6 +79,10 @@ SEXP safe_R_string(R_xlen_t len, SEXP *cont_token) { return R_UnwindProtect(wrapped_R_string, reinterpret_cast(&len), throw_R_memerr, cont_token, *cont_token); } +SEXP safe_R_raw(R_xlen_t len, SEXP *cont_token) { + return R_UnwindProtect(wrapped_R_raw, reinterpret_cast(&len), throw_R_memerr, cont_token, *cont_token); +} + SEXP safe_R_mkChar(char *txt, SEXP *cont_token) { return R_UnwindProtect(wrapped_Rf_mkChar, reinterpret_cast(txt), throw_R_memerr, cont_token, *cont_token); } @@ -463,12 +471,12 @@ SEXP LGBM_BoosterLoadModelFromString_R(SEXP model_str) { R_API_BEGIN(); SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue)); int out_num_iterations = 0; - const char* model_str_ptr = CHAR(PROTECT(Rf_asChar(model_str))); + const char* model_str_ptr = reinterpret_cast(RAW(model_str)); BoosterHandle handle = nullptr; CHECK_CALL(LGBM_BoosterLoadModelFromString(model_str_ptr, &out_num_iterations, &handle)); R_SetExternalPtrAddr(ret, handle); R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE); - UNPROTECT(2); + UNPROTECT(1); return ret; R_API_END(); } @@ -819,20 +827,19 @@ SEXP LGBM_BoosterSaveModelToString_R(SEXP handle, SEXP cont_token = PROTECT(R_MakeUnwindCont()); R_API_BEGIN(); _AssertBoosterHandleNotNull(handle); - SEXP model_str; int64_t out_len = 0; int64_t buf_len = 1024 * 1024; int num_iter = Rf_asInteger(num_iteration); int importance_type = Rf_asInteger(feature_importance_type); std::vector inner_char_buf(buf_len); CHECK_CALL(LGBM_BoosterSaveModelToString(R_ExternalPtrAddr(handle), 0, num_iter, importance_type, buf_len, &out_len, inner_char_buf.data())); + SEXP model_str = PROTECT(safe_R_raw(out_len, &cont_token)); // if the model string was larger than the initial buffer, allocate a bigger buffer and try again if (out_len > buf_len) { - inner_char_buf.resize(out_len); - CHECK_CALL(LGBM_BoosterSaveModelToString(R_ExternalPtrAddr(handle), 0, num_iter, importance_type, out_len, &out_len, inner_char_buf.data())); + CHECK_CALL(LGBM_BoosterSaveModelToString(R_ExternalPtrAddr(handle), 0, num_iter, importance_type, out_len, &out_len, reinterpret_cast(RAW(model_str)))); + } else { + std::copy(inner_char_buf.begin(), inner_char_buf.begin() + out_len, reinterpret_cast(RAW(model_str))); } - model_str = PROTECT(safe_R_string(static_cast(1), &cont_token)); - SET_STRING_ELT(model_str, 0, safe_R_mkChar(inner_char_buf.data(), &cont_token)); UNPROTECT(2); return model_str; R_API_END(); diff --git a/R-package/tests/testthat/test_lgb.Booster.R b/R-package/tests/testthat/test_lgb.Booster.R index b76acd9d9d09..8cc048be93aa 100644 --- a/R-package/tests/testthat/test_lgb.Booster.R +++ b/R-package/tests/testthat/test_lgb.Booster.R @@ -819,79 +819,6 @@ test_that("early_stopping, num_iterations are stored correctly in model string e }) -test_that("Booster: method calls Booster with a null handle should raise an informative error and not segfault", { - data(agaricus.train, package = "lightgbm") - train <- agaricus.train - dtrain <- lgb.Dataset(train$data, label = train$label) - bst <- lgb.train( - params = list( - objective = "regression" - , metric = "l2" - , num_leaves = 8L - ) - , data = dtrain - , verbose = -1L - , nrounds = 5L - , valids = list( - train = dtrain - ) - ) - tmp_file <- tempfile(fileext = ".rds") - saveRDS(bst, tmp_file) - rm(bst) - bst <- readRDS(tmp_file) - .expect_booster_error <- function(object) { - error_regexp <- "Attempting to use a Booster which no longer exists" - expect_error(object, regexp = error_regexp) - } - .expect_booster_error({ - bst$current_iter() - }) - .expect_booster_error({ - bst$dump_model() - }) - .expect_booster_error({ - bst$eval(data = dtrain, name = "valid") - }) - .expect_booster_error({ - bst$eval_train() - }) - .expect_booster_error({ - bst$lower_bound() - }) - .expect_booster_error({ - bst$predict(data = train$data[seq_len(5L), ]) - }) - .expect_booster_error({ - bst$reset_parameter(params = list(learning_rate = 0.123)) - }) - .expect_booster_error({ - bst$rollback_one_iter() - }) - .expect_booster_error({ - bst$save() - }) - .expect_booster_error({ - bst$save_model(filename = tempfile(fileext = ".model")) - }) - .expect_booster_error({ - bst$save_model_to_string() - }) - .expect_booster_error({ - bst$update() - }) - .expect_booster_error({ - bst$upper_bound() - }) - predictor <- bst$to_predictor() - .expect_booster_error({ - predictor$current_iter() - }) - .expect_booster_error({ - predictor$predict(data = train$data[seq_len(5L), ]) - }) -}) - test_that("Booster$new() using a Dataset with a null handle should raise an informative error and not segfault", { data(agaricus.train, package = "lightgbm") train <- agaricus.train @@ -964,7 +891,7 @@ test_that("lgb.cv() correctly handles passing through params to the model file", }) -context("saveRDS.lgb.Booster() and readRDS.lgb.Booster()") +context("saveRDS and readRDS work on Booster") test_that("params (including dataset params) should be stored in .rds file for Booster", { data(agaricus.train, package = "lightgbm") @@ -985,9 +912,9 @@ test_that("params (including dataset params) should be stored in .rds file for B , train_set = dtrain ) bst_file <- tempfile(fileext = ".rds") - saveRDS.lgb.Booster(bst, file = bst_file) + saveRDS(bst, file = bst_file) - bst_from_file <- readRDS.lgb.Booster(file = bst_file) + bst_from_file <- readRDS(file = bst_file) expect_identical( bst_from_file$params , list( @@ -999,6 +926,19 @@ test_that("params (including dataset params) should be stored in .rds file for B ) }) +test_that("Handle is automatically restored when calling predict", { + data(agaricus.train, package = "lightgbm") + bst <- lightgbm(agaricus.train$data, agaricus.train$label, nrounds=5L, obj="binary") + bst_file <- tempfile(fileext = ".rds") + saveRDS(bst, file = bst_file) + + bst_from_file <- readRDS(file = bst_file) + + pred_before <- predict(bst, agaricus.train$data) + pred_after <- predict(bst_from_file, agaricus.train$data) + expect_equal(pred_before, pred_after) +}) + test_that("boosters with linear models at leaves can be written to RDS and re-loaded successfully", { X <- matrix(rnorm(100L), ncol = 1L) labels <- 2L * X + runif(nrow(X), 0L, 0.1) @@ -1025,13 +965,13 @@ test_that("boosters with linear models at leaves can be written to RDS and re-lo # save predictions, then write the model to a file and destroy it in R preds <- predict(bst, X) model_file <- tempfile(fileext = ".rds") - saveRDS.lgb.Booster(bst, file = model_file) + saveRDS(bst, file = model_file) bst$finalize() expect_null(bst$.__enclos_env__$private$handle) rm(bst) # load the booster and make predictions...should be the same - bst2 <- readRDS.lgb.Booster(file = model_file) + bst2 <- readRDS(file = model_file) preds2 <- predict(bst2, X) expect_identical(preds, preds2) }) From ada260bc15865e5bfeccc959584e102bc6057d3c Mon Sep 17 00:00:00 2001 From: David Cortes Date: Thu, 14 Oct 2021 20:47:35 -0300 Subject: [PATCH 02/40] linter --- R-package/R/lgb.Booster.R | 2 +- R-package/R/lightgbm.R | 1 - R-package/tests/testthat/test_lgb.Booster.R | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R index 8f61e8869851..315542bbefd3 100644 --- a/R-package/R/lgb.Booster.R +++ b/R-package/R/lgb.Booster.R @@ -438,7 +438,7 @@ Booster <- R6::R6Class( return(invisible(self)) }, - save_model_to_string = function(num_iteration = NULL, feature_importance_type = 0L, as_char=TRUE) { + save_model_to_string = function(num_iteration = NULL, feature_importance_type = 0L, as_char = TRUE) { if (is.null(num_iteration)) { num_iteration <- self$best_iter diff --git a/R-package/R/lightgbm.R b/R-package/R/lightgbm.R index faceb91555cc..79815dbb41ed 100644 --- a/R-package/R/lightgbm.R +++ b/R-package/R/lightgbm.R @@ -54,7 +54,6 @@ #' @param verbose verbosity for output, if <= 0, also will disable the print of evaluation during training #' @param serializable whether to make the resulting objects serializable through functions such as #' \code{save} or \code{saveRDS} (see section "Model serialization"). -#' #' @section Early Stopping: #' #' "early stopping" refers to stopping the training process if the model's performance on a given diff --git a/R-package/tests/testthat/test_lgb.Booster.R b/R-package/tests/testthat/test_lgb.Booster.R index 8cc048be93aa..ad96f334f41b 100644 --- a/R-package/tests/testthat/test_lgb.Booster.R +++ b/R-package/tests/testthat/test_lgb.Booster.R @@ -928,7 +928,7 @@ test_that("params (including dataset params) should be stored in .rds file for B test_that("Handle is automatically restored when calling predict", { data(agaricus.train, package = "lightgbm") - bst <- lightgbm(agaricus.train$data, agaricus.train$label, nrounds=5L, obj="binary") + bst <- lightgbm(agaricus.train$data, agaricus.train$label, nrounds = 5L, obj = "binary") bst_file <- tempfile(fileext = ".rds") saveRDS(bst, file = bst_file) From 0d551a065a15a386d4dbf6132dea45020a88c2be Mon Sep 17 00:00:00 2001 From: David Cortes Date: Thu, 14 Oct 2021 20:57:13 -0300 Subject: [PATCH 03/40] linter, namespace --- R-package/NAMESPACE | 5 +++-- R-package/R/lgb.Booster.R | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE index 8df060d28605..6b584567cc0a 100644 --- a/R-package/NAMESPACE +++ b/R-package/NAMESPACE @@ -19,20 +19,21 @@ export(lgb.Dataset.set.categorical) export(lgb.Dataset.set.reference) export(lgb.convert_with_rules) export(lgb.cv) +export(lgb.drop_serialized) export(lgb.dump) export(lgb.get.eval.result) export(lgb.importance) export(lgb.interprete) export(lgb.load) +export(lgb.make_serializable) export(lgb.model.dt.tree) export(lgb.plot.importance) export(lgb.plot.interpretation) +export(lgb.restore_handle) export(lgb.save) export(lgb.train) export(lgb.unloader) export(lightgbm) -export(readRDS.lgb.Booster) -export(saveRDS.lgb.Booster) export(set_field) export(setinfo) export(slice) diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R index 315542bbefd3..236d29b4d3fd 100644 --- a/R-package/R/lgb.Booster.R +++ b/R-package/R/lgb.Booster.R @@ -538,7 +538,7 @@ Booster <- R6::R6Class( # Store serialized raw bytes in model object save_raw = function() { if (is.null(self$raw)) - self$raw <- self$save_model_to_string(NULL, as_char=FALSE) + self$raw <- self$save_model_to_string(NULL, as_char = FALSE) return(invisible(NULL)) }, From 088eef3426ac310a8e3b6a3eae4536212d6b091f Mon Sep 17 00:00:00 2001 From: David Cortes Date: Thu, 14 Oct 2021 21:28:23 -0300 Subject: [PATCH 04/40] comments, linter, fix failing test --- R-package/R/lgb.Booster.R | 12 ++-- R-package/R/lgb.train.R | 2 +- R-package/tests/testthat/test_lgb.Booster.R | 76 ++++++++++++++++++++- 3 files changed, 84 insertions(+), 6 deletions(-) diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R index 236d29b4d3fd..fb915fbce672 100644 --- a/R-package/R/lgb.Booster.R +++ b/R-package/R/lgb.Booster.R @@ -85,8 +85,9 @@ Booster <- R6::R6Class( } else if (!is.null(model_str)) { # Do we have a model_str as character/raw? - if (is.character(model_str)) + if (is.character(model_str)) { model_str <- charToRaw(model_str) + } if (!is.raw(model_str)) { stop("lgb.Booster: Can only use a character/raw vector as model_str") } @@ -451,8 +452,9 @@ Booster <- R6::R6Class( , as.integer(feature_importance_type) ) - if (as_char) + if (as_char) { model_str <- rawToChar(model_str) + } return(model_str) @@ -554,8 +556,9 @@ Booster <- R6::R6Class( restore_handle = function() { if (self$check_null_handle()) { - if (is.null(self$raw)) + if (is.null(self$raw)) { stop("LightGBM model is not de-serializable. Try using 'serializable=TRUE'.") + } private$handle <- .Call(LGBM_BoosterLoadModelFromString_R, self$raw) } return(invisible(NULL)) @@ -889,8 +892,9 @@ lgb.load <- function(filename = NULL, model_str = NULL) { return(invisible(Booster$new(modelfile = filename))) } - if (is.character(model_str)) + if (is.character(model_str)) { model_str <- charToRaw(model_str) + } if (model_str_provided) { if (!is.raw(model_str)) { stop("lgb.load: model_str should be a character/raw vector") diff --git a/R-package/R/lgb.train.R b/R-package/R/lgb.train.R index d579aa1b60be..a9ff59914e6f 100644 --- a/R-package/R/lgb.train.R +++ b/R-package/R/lgb.train.R @@ -66,8 +66,8 @@ lgb.train <- function(params = list(), categorical_feature = NULL, early_stopping_rounds = NULL, callbacks = list(), - serializable = TRUE, reset_data = FALSE, + serializable = TRUE, ...) { # validate inputs early to avoid unnecessary computation diff --git a/R-package/tests/testthat/test_lgb.Booster.R b/R-package/tests/testthat/test_lgb.Booster.R index ad96f334f41b..8b0efad382d2 100644 --- a/R-package/tests/testthat/test_lgb.Booster.R +++ b/R-package/tests/testthat/test_lgb.Booster.R @@ -157,7 +157,7 @@ test_that("lgb.load() gives the expected error messages given different incorrec # if given, model_str should be a string expect_error({ lgb.load(model_str = c(4.0, 5.0, 6.0)) - }, regexp = "model_str should be character") + }, regexp = "lgb.load: model_str should be a character/raw vector") }) @@ -819,6 +819,80 @@ test_that("early_stopping, num_iterations are stored correctly in model string e }) +test_that("Booster: method calls Booster with a null handle should raise an informative error and not segfault", { + data(agaricus.train, package = "lightgbm") + train <- agaricus.train + dtrain <- lgb.Dataset(train$data, label = train$label) + bst <- lgb.train( + params = list( + objective = "regression" + , metric = "l2" + , num_leaves = 8L + ) + , data = dtrain + , verbose = -1L + , nrounds = 5L + , valids = list( + train = dtrain + ) + , serializable = FALSE + ) + tmp_file <- tempfile(fileext = ".rds") + saveRDS(bst, tmp_file) + rm(bst) + bst <- readRDS(tmp_file) + .expect_booster_error <- function(object) { + error_regexp <- "Attempting to use a Booster which no longer exists" + expect_error(object, regexp = error_regexp) + } + .expect_booster_error({ + bst$current_iter() + }) + .expect_booster_error({ + bst$dump_model() + }) + .expect_booster_error({ + bst$eval(data = dtrain, name = "valid") + }) + .expect_booster_error({ + bst$eval_train() + }) + .expect_booster_error({ + bst$lower_bound() + }) + .expect_booster_error({ + bst$predict(data = train$data[seq_len(5L), ]) + }) + .expect_booster_error({ + bst$reset_parameter(params = list(learning_rate = 0.123)) + }) + .expect_booster_error({ + bst$rollback_one_iter() + }) + .expect_booster_error({ + bst$save() + }) + .expect_booster_error({ + bst$save_model(filename = tempfile(fileext = ".model")) + }) + .expect_booster_error({ + bst$save_model_to_string() + }) + .expect_booster_error({ + bst$update() + }) + .expect_booster_error({ + bst$upper_bound() + }) + predictor <- bst$to_predictor() + .expect_booster_error({ + predictor$current_iter() + }) + .expect_booster_error({ + predictor$predict(data = train$data[seq_len(5L), ]) + }) +}) + test_that("Booster$new() using a Dataset with a null handle should raise an informative error and not segfault", { data(agaricus.train, package = "lightgbm") train <- agaricus.train From 5e2a922a6d2d2ac19dcd6918d8f774f6999588c6 Mon Sep 17 00:00:00 2001 From: David Cortes Date: Thu, 14 Oct 2021 21:43:32 -0300 Subject: [PATCH 05/40] standardize error messages for null handles --- R-package/R/lgb.Booster.R | 5 +++-- R-package/src/lightgbm_R.cpp | 14 ++++++++++---- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R index fb915fbce672..6d334bbb5336 100644 --- a/R-package/R/lgb.Booster.R +++ b/R-package/R/lgb.Booster.R @@ -539,8 +539,9 @@ Booster <- R6::R6Class( # Store serialized raw bytes in model object save_raw = function() { - if (is.null(self$raw)) + if (is.null(self$raw)) { self$raw <- self$save_model_to_string(NULL, as_char = FALSE) + } return(invisible(NULL)) }, @@ -557,7 +558,7 @@ Booster <- R6::R6Class( restore_handle = function() { if (self$check_null_handle()) { if (is.null(self$raw)) { - stop("LightGBM model is not de-serializable. Try using 'serializable=TRUE'.") + .Call(LGBM_NullHandleError) } private$handle <- .Call(LGBM_BoosterLoadModelFromString_R, self$raw) } diff --git a/R-package/src/lightgbm_R.cpp b/R-package/src/lightgbm_R.cpp index ac55112da0ac..9fc0237dd7a9 100644 --- a/R-package/src/lightgbm_R.cpp +++ b/R-package/src/lightgbm_R.cpp @@ -98,12 +98,17 @@ void _DatasetFinalizer(SEXP handle) { LGBM_DatasetFree_R(handle); } +SEXP LGBM_NullHandleError() { + Rf_error( + "Attempting to use a Booster which no longer exists and/or cannot be restored. " + "This can happen if you have called Booster$finalize() " + "or if this Booster was saved through saveRDS() using 'serializable=FALSE'."); + return R_NilValue; +} + void _AssertBoosterHandleNotNull(SEXP handle) { if (Rf_isNull(handle) || !R_ExternalPtrAddr(handle)) { - Rf_error( - "Attempting to use a Booster which no longer exists. " - "This can happen if you have called Booster$finalize() or if this Booster was saved with saveRDS(). " - "To avoid this error in the future, use saveRDS.lgb.Booster() or Booster$save_model() to save lightgbm Boosters."); + LGBM_NullHandleError(); } } @@ -913,6 +918,7 @@ static const R_CallMethodDef CallEntries[] = { {"LGBM_BoosterSaveModel_R" , (DL_FUNC) &LGBM_BoosterSaveModel_R , 4}, {"LGBM_BoosterSaveModelToString_R" , (DL_FUNC) &LGBM_BoosterSaveModelToString_R , 3}, {"LGBM_BoosterDumpModel_R" , (DL_FUNC) &LGBM_BoosterDumpModel_R , 3}, + {"LGBM_NullHandleError" , (DL_FUNC) &LGBM_NullHandleError , 0}, {NULL, NULL, 0} }; From 5c1d260fa40f595033ac1fbab9a543ebc3b9af47 Mon Sep 17 00:00:00 2001 From: David Cortes Date: Thu, 14 Oct 2021 21:53:08 -0300 Subject: [PATCH 06/40] auto-restore handle in more functions --- R-package/R/lgb.Booster.R | 16 ++++++++++++++++ R-package/tests/testthat/test_lgb.Booster.R | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R index 6d334bbb5336..f4909d02315d 100644 --- a/R-package/R/lgb.Booster.R +++ b/R-package/R/lgb.Booster.R @@ -197,6 +197,8 @@ Booster <- R6::R6Class( params <- utils::modifyList(params, additional_params) params_str <- lgb.params2str(params = params) + self$restore_handle() + .Call( LGBM_BoosterResetParameter_R , private$handle @@ -290,6 +292,8 @@ Booster <- R6::R6Class( # Return one iteration behind rollback_one_iter = function() { + self$restore_handle() + .Call( LGBM_BoosterRollbackOneIter_R , private$handle @@ -307,6 +311,8 @@ Booster <- R6::R6Class( # Get current iteration current_iter = function() { + self$restore_handle() + cur_iter <- 0L .Call( LGBM_BoosterGetCurrentIteration_R @@ -320,6 +326,8 @@ Booster <- R6::R6Class( # Get upper bound upper_bound = function() { + self$restore_handle() + upper_bound <- 0.0 .Call( LGBM_BoosterGetUpperBoundValue_R @@ -333,6 +341,8 @@ Booster <- R6::R6Class( # Get lower bound lower_bound = function() { + self$restore_handle() + lower_bound <- 0.0 .Call( LGBM_BoosterGetLowerBoundValue_R @@ -424,6 +434,8 @@ Booster <- R6::R6Class( # Save model save_model = function(filename, num_iteration = NULL, feature_importance_type = 0L) { + self$restore_handle() + if (is.null(num_iteration)) { num_iteration <- self$best_iter } @@ -441,6 +453,8 @@ Booster <- R6::R6Class( save_model_to_string = function(num_iteration = NULL, feature_importance_type = 0L, as_char = TRUE) { + self$restore_handle() + if (is.null(num_iteration)) { num_iteration <- self$best_iter } @@ -665,6 +679,8 @@ Booster <- R6::R6Class( stop("data_idx should not be greater than num_dataset") } + self$restore_handle() + private$get_eval_info() ret <- list() diff --git a/R-package/tests/testthat/test_lgb.Booster.R b/R-package/tests/testthat/test_lgb.Booster.R index 8b0efad382d2..6ae84c86bae7 100644 --- a/R-package/tests/testthat/test_lgb.Booster.R +++ b/R-package/tests/testthat/test_lgb.Booster.R @@ -870,7 +870,7 @@ test_that("Booster: method calls Booster with a null handle should raise an info bst$rollback_one_iter() }) .expect_booster_error({ - bst$save() + bst$save_raw() }) .expect_booster_error({ bst$save_model(filename = tempfile(fileext = ".model")) From 8ed14a6d0ce82ecb05c8d754f0160db51a230ae1 Mon Sep 17 00:00:00 2001 From: David Cortes Date: Thu, 14 Oct 2021 22:00:36 -0300 Subject: [PATCH 07/40] linter --- R-package/R/lightgbm.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/R/lightgbm.R b/R-package/R/lightgbm.R index 79815dbb41ed..e2b36e4ac407 100644 --- a/R-package/R/lightgbm.R +++ b/R-package/R/lightgbm.R @@ -79,7 +79,7 @@ #' reconstruct the C++ object after deserialization (e.g. after calling \code{readRDS} or similar), one #' can use the function \link{lgb.restore_handle} (for example, if one makes predictions in parallel or in #' forked processes, it will be faster to restore the handle beforehand). -#' +#' #' Producing and keeping these raw bytes however uses extra memory, and if they are not required, #' it is possible to avoid producing them by passing `serializable=FALSE`. In such cases, these raw #' bytes can be added to the model on demand through function \link{lgb.make_serializable}. From 840de5e78046e37e531b35006aa418817d7e21ac Mon Sep 17 00:00:00 2001 From: David Cortes Date: Thu, 14 Oct 2021 22:18:22 -0300 Subject: [PATCH 08/40] missing declaration --- R-package/R/lgb.Booster.R | 2 +- R-package/src/lightgbm_R.cpp | 6 +++--- R-package/src/lightgbm_R.h | 8 ++++++++ 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R index f4909d02315d..9cf9fc36a0e9 100644 --- a/R-package/R/lgb.Booster.R +++ b/R-package/R/lgb.Booster.R @@ -572,7 +572,7 @@ Booster <- R6::R6Class( restore_handle = function() { if (self$check_null_handle()) { if (is.null(self$raw)) { - .Call(LGBM_NullHandleError) + .Call(LGBM_NullHandleError_R) } private$handle <- .Call(LGBM_BoosterLoadModelFromString_R, self$raw) } diff --git a/R-package/src/lightgbm_R.cpp b/R-package/src/lightgbm_R.cpp index 9fc0237dd7a9..e16e7c2fe156 100644 --- a/R-package/src/lightgbm_R.cpp +++ b/R-package/src/lightgbm_R.cpp @@ -98,7 +98,7 @@ void _DatasetFinalizer(SEXP handle) { LGBM_DatasetFree_R(handle); } -SEXP LGBM_NullHandleError() { +SEXP LGBM_NullHandleError_R() { Rf_error( "Attempting to use a Booster which no longer exists and/or cannot be restored. " "This can happen if you have called Booster$finalize() " @@ -108,7 +108,7 @@ SEXP LGBM_NullHandleError() { void _AssertBoosterHandleNotNull(SEXP handle) { if (Rf_isNull(handle) || !R_ExternalPtrAddr(handle)) { - LGBM_NullHandleError(); + LGBM_NullHandleError_R(); } } @@ -918,7 +918,7 @@ static const R_CallMethodDef CallEntries[] = { {"LGBM_BoosterSaveModel_R" , (DL_FUNC) &LGBM_BoosterSaveModel_R , 4}, {"LGBM_BoosterSaveModelToString_R" , (DL_FUNC) &LGBM_BoosterSaveModelToString_R , 3}, {"LGBM_BoosterDumpModel_R" , (DL_FUNC) &LGBM_BoosterDumpModel_R , 3}, - {"LGBM_NullHandleError" , (DL_FUNC) &LGBM_NullHandleError , 0}, + {"LGBM_NullHandleError_R" , (DL_FUNC) &LGBM_NullHandleError_R , 0}, {NULL, NULL, 0} }; diff --git a/R-package/src/lightgbm_R.h b/R-package/src/lightgbm_R.h index 16a87c3a611f..1d0bd67d6727 100644 --- a/R-package/src/lightgbm_R.h +++ b/R-package/src/lightgbm_R.h @@ -20,6 +20,14 @@ LIGHTGBM_C_EXPORT SEXP LGBM_HandleIsNull_R( SEXP handle ); +/*! +* \brief Throw a standardized error message when encountering a null Booster handle +* \return No return, will throw an error +*/ +LIGHTGBM_C_EXPORT SEXP LGBM_NullHandleError_R( + SEXP handle +); + // --- start Dataset interface /*! From af16b2d7236a6620116d9f2ab4ed462ea572205b Mon Sep 17 00:00:00 2001 From: David Cortes Date: Thu, 14 Oct 2021 23:41:26 -0300 Subject: [PATCH 09/40] correct wrong signature --- R-package/src/lightgbm_R.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/R-package/src/lightgbm_R.h b/R-package/src/lightgbm_R.h index 1d0bd67d6727..819987067575 100644 --- a/R-package/src/lightgbm_R.h +++ b/R-package/src/lightgbm_R.h @@ -24,9 +24,7 @@ LIGHTGBM_C_EXPORT SEXP LGBM_HandleIsNull_R( * \brief Throw a standardized error message when encountering a null Booster handle * \return No return, will throw an error */ -LIGHTGBM_C_EXPORT SEXP LGBM_NullHandleError_R( - SEXP handle -); +LIGHTGBM_C_EXPORT SEXP LGBM_NullHandleError_R(); // --- start Dataset interface From 9d7e6f80fb2cb5efa3e578550be4fc8d6f69ef06 Mon Sep 17 00:00:00 2001 From: David Cortes Date: Fri, 15 Oct 2021 00:40:15 -0300 Subject: [PATCH 10/40] fix docs --- R-package/R/lgb.drop_serialized.R | 1 - R-package/R/lgb.make_serializable.R | 1 - R-package/R/lgb.restore_handle.R | 15 +++++++++++++++ R-package/man/lgb.restore_handle.Rd | 17 +++++++++++++++++ R-package/man/lgb.train.Rd | 8 ++++---- R-package/man/lgb_shared_params.Rd | 2 +- 6 files changed, 37 insertions(+), 7 deletions(-) diff --git a/R-package/R/lgb.drop_serialized.R b/R-package/R/lgb.drop_serialized.R index afffe1226455..de114538a32b 100644 --- a/R-package/R/lgb.drop_serialized.R +++ b/R-package/R/lgb.drop_serialized.R @@ -8,7 +8,6 @@ #' #' @return \code{lgb.Booster} (the same `model` object that was passed as input, as invisible). #' @seealso \link{lgb.restore_handle}, \link{lgb.make_serializable}. -#' @examples #' @export lgb.drop_serialized <- function(model) { stopifnot(lgb.is.Booster(model)) diff --git a/R-package/R/lgb.make_serializable.R b/R-package/R/lgb.make_serializable.R index 5e552f8fd925..2f149382503b 100644 --- a/R-package/R/lgb.make_serializable.R +++ b/R-package/R/lgb.make_serializable.R @@ -8,7 +8,6 @@ #' #' @return \code{lgb.Booster} (the same `model` object that was passed as input, as invisible). #' @seealso \link{lgb.restore_handle}, \link{lgb.drop_serialized}. -#' @examples #' @export lgb.make_serializable <- function(model) { stopifnot(lgb.is.Booster(model)) diff --git a/R-package/R/lgb.restore_handle.R b/R-package/R/lgb.restore_handle.R index 1fd01ab7c308..6d890711ee9e 100644 --- a/R-package/R/lgb.restore_handle.R +++ b/R-package/R/lgb.restore_handle.R @@ -10,6 +10,21 @@ #' @return \code{lgb.Booster} (the same `model` object that was passed as input, as invisible). #' @seealso \link{lgb.make_serializable}, \link{lgb.drop_serialized}. #' @examples +#' library(lightgbm) +#' data("agaricus.train") +#' model <- lightgbm( +#' agaricus.train$data +#' , agaricus.train$label +#' , params = list(objective = "binary", nthreads = 1L) +#' , nrounds = 5L +#' , verbose = 0) +#' fname <- tempfile(fileext="rds") +#' saveRDS(model, fname) +#' +#' model_new <- readRDS(fname) +#' model_new$check_null_handle() +#' lgb.restore_handle(model_new) +#' model_new$check_null_handle() #' @export lgb.restore_handle <- function(model) { stopifnot(lgb.is.Booster(model)) diff --git a/R-package/man/lgb.restore_handle.Rd b/R-package/man/lgb.restore_handle.Rd index c4f4ed102a45..6817f5b6a180 100644 --- a/R-package/man/lgb.restore_handle.Rd +++ b/R-package/man/lgb.restore_handle.Rd @@ -19,6 +19,23 @@ After a LightGBM model object is de-serialized through functions such as \code{s object is restored automatically when calling functions such as \code{predict}, but this function can be used to forcibly restore it beforehand. Note that the object will be modified in-place. } +\examples{ +library(lightgbm) +data("agaricus.train") +model <- lightgbm( + agaricus.train$data + , agaricus.train$label + , params = list(objective = "binary", nthreads = 1L) + , nrounds = 5L + , verbose = 0) +fname <- tempfile(fileext="rds") +saveRDS(model, fname) + +model_new <- readRDS(fname) +model_new$check_null_handle() +lgb.restore_handle(model_new) +model_new$check_null_handle() +} \seealso{ \link{lgb.make_serializable}, \link{lgb.drop_serialized}. } diff --git a/R-package/man/lgb.train.Rd b/R-package/man/lgb.train.Rd index 085b89a665cb..be48d38f2738 100644 --- a/R-package/man/lgb.train.Rd +++ b/R-package/man/lgb.train.Rd @@ -19,8 +19,8 @@ lgb.train( categorical_feature = NULL, early_stopping_rounds = NULL, callbacks = list(), - serializable = TRUE, reset_data = FALSE, + serializable = TRUE, ... ) } @@ -96,13 +96,13 @@ set to the iteration number of the best iteration.} \item{callbacks}{List of callback functions that are applied at each iteration.} -\item{serializable}{whether to make the resulting objects serializable through functions such as -\code{save} or \code{saveRDS} (see section "Model serialization").} - \item{reset_data}{Boolean, setting it to TRUE (not the default value) will transform the booster model into a predictor model which frees up memory and the original datasets} +\item{serializable}{whether to make the resulting objects serializable through functions such as +\code{save} or \code{saveRDS} (see section "Model serialization").} + \item{...}{other parameters, see \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html}{ the "Parameters" section of the documentation} for more information. A few key parameters: \itemize{ diff --git a/R-package/man/lgb_shared_params.Rd b/R-package/man/lgb_shared_params.Rd index 0caa3eb4cfab..053b378ddf42 100644 --- a/R-package/man/lgb_shared_params.Rd +++ b/R-package/man/lgb_shared_params.Rd @@ -100,7 +100,7 @@ Parameter docs shared by \code{lgb.train}, \code{lgb.cv}, and \code{lightgbm} reconstruct the C++ object after deserialization (e.g. after calling \code{readRDS} or similar), one can use the function \link{lgb.restore_handle} (for example, if one makes predictions in parallel or in forked processes, it will be faster to restore the handle beforehand). - + Producing and keeping these raw bytes however uses extra memory, and if they are not required, it is possible to avoid producing them by passing `serializable=FALSE`. In such cases, these raw bytes can be added to the model on demand through function \link{lgb.make_serializable}. From 4428a235fe475618e1ea3632fd75ad9d33449841 Mon Sep 17 00:00:00 2001 From: david-cortes Date: Fri, 15 Oct 2021 18:41:37 +0300 Subject: [PATCH 11/40] Update R-package/R/lgb.train.R Co-authored-by: James Lamb --- R-package/R/lgb.train.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R-package/R/lgb.train.R b/R-package/R/lgb.train.R index a9ff59914e6f..06ab8d94559b 100644 --- a/R-package/R/lgb.train.R +++ b/R-package/R/lgb.train.R @@ -396,8 +396,9 @@ lgb.train <- function(params = list(), } - if (serializable) + if (serializable) { booster$save_raw() + } return(booster) From 730f2e6fdc247999194ab310ebf7f5c038d530c9 Mon Sep 17 00:00:00 2001 From: david-cortes Date: Fri, 15 Oct 2021 18:50:36 +0300 Subject: [PATCH 12/40] Update R-package/R/lgb.drop_serialized.R Co-authored-by: James Lamb --- R-package/R/lgb.drop_serialized.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R-package/R/lgb.drop_serialized.R b/R-package/R/lgb.drop_serialized.R index de114538a32b..edec4472feb6 100644 --- a/R-package/R/lgb.drop_serialized.R +++ b/R-package/R/lgb.drop_serialized.R @@ -10,7 +10,9 @@ #' @seealso \link{lgb.restore_handle}, \link{lgb.make_serializable}. #' @export lgb.drop_serialized <- function(model) { - stopifnot(lgb.is.Booster(model)) + if (!lgb.is.Booster(x = model)) { + stop("lgb.drop_serialized: model should be an ", sQuote("lgb.Booster")) + } model$drop_raw() return(invisible(model)) } From 719af93cc978af0bf6424db105c262c270e51b0d Mon Sep 17 00:00:00 2001 From: david-cortes Date: Fri, 15 Oct 2021 18:50:45 +0300 Subject: [PATCH 13/40] Update R-package/R/lgb.restore_handle.R Co-authored-by: James Lamb --- R-package/R/lgb.restore_handle.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R-package/R/lgb.restore_handle.R b/R-package/R/lgb.restore_handle.R index 6d890711ee9e..f28c38290bf7 100644 --- a/R-package/R/lgb.restore_handle.R +++ b/R-package/R/lgb.restore_handle.R @@ -27,7 +27,9 @@ #' model_new$check_null_handle() #' @export lgb.restore_handle <- function(model) { - stopifnot(lgb.is.Booster(model)) + if (!lgb.is.Booster(x = model)) { + stop("lgb.restore_handle: model should be an ", sQuote("lgb.Booster")) + } model$restore_handle() return(invisible(model)) } From 41a75bd74622390875f53d84efea763c1493ac14 Mon Sep 17 00:00:00 2001 From: david-cortes Date: Fri, 15 Oct 2021 18:50:58 +0300 Subject: [PATCH 14/40] Update R-package/R/lgb.restore_handle.R Co-authored-by: James Lamb --- R-package/R/lgb.restore_handle.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/R/lgb.restore_handle.R b/R-package/R/lgb.restore_handle.R index f28c38290bf7..075acf8131f1 100644 --- a/R-package/R/lgb.restore_handle.R +++ b/R-package/R/lgb.restore_handle.R @@ -7,7 +7,7 @@ #' @param model \code{lgb.Booster} object which was de-serialized and whose underlying C++ object and R handle #' need to be restored. #' -#' @return \code{lgb.Booster} (the same `model` object that was passed as input, as invisible). +#' @return \code{lgb.Booster} (the same `model` object that was passed as input, invisibly). #' @seealso \link{lgb.make_serializable}, \link{lgb.drop_serialized}. #' @examples #' library(lightgbm) From 9b5de4d95fbe78424a1af9b6cf0db5dc1546fd61 Mon Sep 17 00:00:00 2001 From: david-cortes Date: Fri, 15 Oct 2021 18:51:16 +0300 Subject: [PATCH 15/40] Update R-package/R/lgb.make_serializable.R Co-authored-by: James Lamb --- R-package/R/lgb.make_serializable.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R-package/R/lgb.make_serializable.R b/R-package/R/lgb.make_serializable.R index 2f149382503b..e2fde54df610 100644 --- a/R-package/R/lgb.make_serializable.R +++ b/R-package/R/lgb.make_serializable.R @@ -10,7 +10,9 @@ #' @seealso \link{lgb.restore_handle}, \link{lgb.drop_serialized}. #' @export lgb.make_serializable <- function(model) { - stopifnot(lgb.is.Booster(model)) + if (!lgb.is.Booster(x = model)) { + stop("lgb.make_serializable: model should be an ", sQuote("lgb.Booster")) + } model$save_raw() return(invisible(model)) } From 1f4aa912bbe3af28e8411d6e0b543b14bd878f58 Mon Sep 17 00:00:00 2001 From: David Cortes Date: Fri, 15 Oct 2021 12:53:05 -0300 Subject: [PATCH 16/40] move 'restore_handle' from feature importance to dump method --- R-package/R/lgb.Booster.R | 2 ++ R-package/R/lgb.importance.R | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R index 9cf9fc36a0e9..4ad69f953879 100644 --- a/R-package/R/lgb.Booster.R +++ b/R-package/R/lgb.Booster.R @@ -477,6 +477,8 @@ Booster <- R6::R6Class( # Dump model in memory dump_model = function(num_iteration = NULL, feature_importance_type = 0L) { + self$restore_handle() + if (is.null(num_iteration)) { num_iteration <- self$best_iter } diff --git a/R-package/R/lgb.importance.R b/R-package/R/lgb.importance.R index 687e710ccff3..c05c6628be34 100644 --- a/R-package/R/lgb.importance.R +++ b/R-package/R/lgb.importance.R @@ -41,7 +41,6 @@ lgb.importance <- function(model, percentage = TRUE) { if (!lgb.is.Booster(x = model)) { stop("'model' has to be an object of class lgb.Booster") } - model$restore_handle() # Setup importance tree_dt <- lgb.model.dt.tree(model = model) From 84af4e782288944b0b90a733554949e831a8794f Mon Sep 17 00:00:00 2001 From: David Cortes Date: Fri, 15 Oct 2021 12:54:06 -0300 Subject: [PATCH 17/40] missing header --- R-package/src/lightgbm_R.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/R-package/src/lightgbm_R.cpp b/R-package/src/lightgbm_R.cpp index e16e7c2fe156..ffe448094d4c 100644 --- a/R-package/src/lightgbm_R.cpp +++ b/R-package/src/lightgbm_R.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #define COL_MAJOR (0) From 25557f7fcf83a5edac69115b8e0a97641e94ad15 Mon Sep 17 00:00:00 2001 From: David Cortes Date: Fri, 15 Oct 2021 13:01:13 -0300 Subject: [PATCH 18/40] move arguments order, update docs --- R-package/R/lgb.cv.R | 2 +- R-package/man/lgb.cv.Rd | 8 ++++---- R-package/man/lgb.restore_handle.Rd | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R index 8836428732c7..db30bbe9fe02 100644 --- a/R-package/R/lgb.cv.R +++ b/R-package/R/lgb.cv.R @@ -97,8 +97,8 @@ lgb.cv <- function(params = list() , categorical_feature = NULL , early_stopping_rounds = NULL , callbacks = list() - , serializable = TRUE , reset_data = FALSE + , serializable = TRUE , ... ) { diff --git a/R-package/man/lgb.cv.Rd b/R-package/man/lgb.cv.Rd index 5940b657e5ce..6a5f18a512be 100644 --- a/R-package/man/lgb.cv.Rd +++ b/R-package/man/lgb.cv.Rd @@ -24,8 +24,8 @@ lgb.cv( categorical_feature = NULL, early_stopping_rounds = NULL, callbacks = list(), - serializable = TRUE, reset_data = FALSE, + serializable = TRUE, ... ) } @@ -116,12 +116,12 @@ set to the iteration number of the best iteration.} \item{callbacks}{List of callback functions that are applied at each iteration.} -\item{serializable}{whether to make the resulting objects serializable through functions such as -\code{save} or \code{saveRDS} (see section "Model serialization").} - \item{reset_data}{Boolean, setting it to TRUE (not the default value) will transform the booster model into a predictor model which frees up memory and the original datasets} +\item{serializable}{whether to make the resulting objects serializable through functions such as +\code{save} or \code{saveRDS} (see section "Model serialization").} + \item{...}{other parameters, see Parameters.rst for more information. A few key parameters: \itemize{ \item{\code{boosting}: Boosting type. \code{"gbdt"}, \code{"rf"}, \code{"dart"} or \code{"goss"}.} diff --git a/R-package/man/lgb.restore_handle.Rd b/R-package/man/lgb.restore_handle.Rd index 6817f5b6a180..dcc32b94b4de 100644 --- a/R-package/man/lgb.restore_handle.Rd +++ b/R-package/man/lgb.restore_handle.Rd @@ -11,7 +11,7 @@ lgb.restore_handle(model) need to be restored.} } \value{ -\code{lgb.Booster} (the same `model` object that was passed as input, as invisible). +\code{lgb.Booster} (the same `model` object that was passed as input, invisibly). } \description{ After a LightGBM model object is de-serialized through functions such as \code{save} or From ff78dd2355fb2d45ca4c7cc74155a35a448732e4 Mon Sep 17 00:00:00 2001 From: David Cortes Date: Fri, 15 Oct 2021 13:06:52 -0300 Subject: [PATCH 19/40] linter --- R-package/R/lgb.drop_serialized.R | 6 +++--- R-package/R/lgb.make_serializable.R | 6 +++--- R-package/R/lgb.restore_handle.R | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/R-package/R/lgb.drop_serialized.R b/R-package/R/lgb.drop_serialized.R index edec4472feb6..1e1157ff997f 100644 --- a/R-package/R/lgb.drop_serialized.R +++ b/R-package/R/lgb.drop_serialized.R @@ -10,9 +10,9 @@ #' @seealso \link{lgb.restore_handle}, \link{lgb.make_serializable}. #' @export lgb.drop_serialized <- function(model) { - if (!lgb.is.Booster(x = model)) { - stop("lgb.drop_serialized: model should be an ", sQuote("lgb.Booster")) - } + if (!lgb.is.Booster(x = model)) { + stop("lgb.drop_serialized: model should be an ", sQuote("lgb.Booster")) + } model$drop_raw() return(invisible(model)) } diff --git a/R-package/R/lgb.make_serializable.R b/R-package/R/lgb.make_serializable.R index e2fde54df610..515341f275e1 100644 --- a/R-package/R/lgb.make_serializable.R +++ b/R-package/R/lgb.make_serializable.R @@ -10,9 +10,9 @@ #' @seealso \link{lgb.restore_handle}, \link{lgb.drop_serialized}. #' @export lgb.make_serializable <- function(model) { - if (!lgb.is.Booster(x = model)) { - stop("lgb.make_serializable: model should be an ", sQuote("lgb.Booster")) - } + if (!lgb.is.Booster(x = model)) { + stop("lgb.make_serializable: model should be an ", sQuote("lgb.Booster")) + } model$save_raw() return(invisible(model)) } diff --git a/R-package/R/lgb.restore_handle.R b/R-package/R/lgb.restore_handle.R index 075acf8131f1..d9911a8f7a4d 100644 --- a/R-package/R/lgb.restore_handle.R +++ b/R-package/R/lgb.restore_handle.R @@ -27,9 +27,9 @@ #' model_new$check_null_handle() #' @export lgb.restore_handle <- function(model) { - if (!lgb.is.Booster(x = model)) { - stop("lgb.restore_handle: model should be an ", sQuote("lgb.Booster")) - } + if (!lgb.is.Booster(x = model)) { + stop("lgb.restore_handle: model should be an ", sQuote("lgb.Booster")) + } model$restore_handle() return(invisible(model)) } From 19f3c4a6629a03b05b71ae25ccac2fa0fdc92370 Mon Sep 17 00:00:00 2001 From: David Cortes Date: Fri, 15 Oct 2021 13:44:50 -0300 Subject: [PATCH 20/40] avoid leaving files in working directory --- R-package/R/lgb.restore_handle.R | 1 + R-package/R/lightgbm.R | 5 ++++- R-package/man/lgb.restore_handle.Rd | 1 + R-package/man/lightgbm.Rd | 3 ++- 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/R-package/R/lgb.restore_handle.R b/R-package/R/lgb.restore_handle.R index d9911a8f7a4d..c6d554649aaa 100644 --- a/R-package/R/lgb.restore_handle.R +++ b/R-package/R/lgb.restore_handle.R @@ -17,6 +17,7 @@ #' , agaricus.train$label #' , params = list(objective = "binary", nthreads = 1L) #' , nrounds = 5L +#' , save_name = NULL #' , verbose = 0) #' fname <- tempfile(fileext="rds") #' saveRDS(model, fname) diff --git a/R-package/R/lightgbm.R b/R-package/R/lightgbm.R index e2b36e4ac407..ba19fc5791a0 100644 --- a/R-package/R/lightgbm.R +++ b/R-package/R/lightgbm.R @@ -93,6 +93,7 @@ NULL #' @param label Vector of labels, used if \code{data} is not an \code{\link{lgb.Dataset}} #' @param weight vector of response values. If not NULL, will set to dataset #' @param save_name File name to use when writing the trained model to disk. Should end in ".model". +#' If passing `NULL`, will not save the trained model to disk. #' @param ... Additional arguments passed to \code{\link{lgb.train}}. For example #' \itemize{ #' \item{\code{valids}: a list of \code{lgb.Dataset} objects, used for validation} @@ -175,7 +176,9 @@ lightgbm <- function(data, ) # Store model under a specific name - bst$save_model(filename = save_name) + if (!is.null(save_name)) { + bst$save_model(filename = save_name) + } return(bst) } diff --git a/R-package/man/lgb.restore_handle.Rd b/R-package/man/lgb.restore_handle.Rd index dcc32b94b4de..983de9ac3a67 100644 --- a/R-package/man/lgb.restore_handle.Rd +++ b/R-package/man/lgb.restore_handle.Rd @@ -27,6 +27,7 @@ model <- lightgbm( , agaricus.train$label , params = list(objective = "binary", nthreads = 1L) , nrounds = 5L + , save_name = NULL , verbose = 0) fname <- tempfile(fileext="rds") saveRDS(model, fname) diff --git a/R-package/man/lightgbm.Rd b/R-package/man/lightgbm.Rd index c506ee1465d7..edd129e0e2ef 100644 --- a/R-package/man/lightgbm.Rd +++ b/R-package/man/lightgbm.Rd @@ -44,7 +44,8 @@ fails to improve for \code{early_stopping_rounds} consecutive boosting rounds. If training stops early, the returned model will have attribute \code{best_iter} set to the iteration number of the best iteration.} -\item{save_name}{File name to use when writing the trained model to disk. Should end in ".model".} +\item{save_name}{File name to use when writing the trained model to disk. Should end in ".model". +If passing `NULL`, will not save the trained model to disk.} \item{init_model}{path of model file of \code{lgb.Booster} object, will continue training from this model} From 2f3a334cdaead6ccdc67e041dc3ddc1bd4d2fb9e Mon Sep 17 00:00:00 2001 From: David Cortes Date: Fri, 15 Oct 2021 14:33:11 -0300 Subject: [PATCH 21/40] add test for save_model=NULL --- R-package/tests/testthat/test_basic.R | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index 5aff8e2c2d2c..3ffe442e918d 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -231,6 +231,23 @@ test_that("lightgbm() performs evaluation on validation sets if they are provide expect_true(abs(bst$record_evals[["valid2"]][["binary_error"]][["eval"]][[1L]] - 0.02226317) < TOLERANCE) }) +test_that("lightgbm() accepts a NULL for save_name" { + files_before <- list.files(getwd()) + + model <- lightgbm( + data = train$data + , label = train$label + , nrounds = 5L + , params = list(objective = "binary") + , verbose = 0L + , save_name = NULL + ) + + files_after <- list.files(getwd()) + + expect_equal(files_before, files_after) +}) + context("training continuation") From 6e7b852dc5beab7e13a0b8bebdd0b826001d41c3 Mon Sep 17 00:00:00 2001 From: David Cortes Date: Fri, 15 Oct 2021 14:50:16 -0300 Subject: [PATCH 22/40] missing comma --- R-package/tests/testthat/test_basic.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index 3ffe442e918d..4af26740a595 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -231,7 +231,7 @@ test_that("lightgbm() performs evaluation on validation sets if they are provide expect_true(abs(bst$record_evals[["valid2"]][["binary_error"]][["eval"]][[1L]] - 0.02226317) < TOLERANCE) }) -test_that("lightgbm() accepts a NULL for save_name" { +test_that("lightgbm() accepts a NULL for save_name", { files_before <- list.files(getwd()) model <- lightgbm( From 617b226a28962ec9325cbc9e081daf7e1619623f Mon Sep 17 00:00:00 2001 From: david-cortes Date: Sat, 16 Oct 2021 19:57:22 +0300 Subject: [PATCH 23/40] Update R-package/R/lgb.restore_handle.R Co-authored-by: Nikita Titov --- R-package/R/lgb.restore_handle.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/R/lgb.restore_handle.R b/R-package/R/lgb.restore_handle.R index c6d554649aaa..be3036a52986 100644 --- a/R-package/R/lgb.restore_handle.R +++ b/R-package/R/lgb.restore_handle.R @@ -1,5 +1,5 @@ #' @name lgb.restore_handle -#' @title Restore the C++ component of a deserialized LGB model +#' @title Restore the C++ component of a de-serialized LightGBM model #' @description After a LightGBM model object is de-serialized through functions such as \code{save} or #' \code{saveRDS}, its underlying C++ object will be blank and needs to be restored to able to use it. Such #' object is restored automatically when calling functions such as \code{predict}, but this function can be From 8a078f47eb28641caa142dff379fa381fc3dafb4 Mon Sep 17 00:00:00 2001 From: david-cortes Date: Sat, 16 Oct 2021 19:57:32 +0300 Subject: [PATCH 24/40] Update R-package/src/lightgbm_R.cpp Co-authored-by: Nikita Titov --- R-package/src/lightgbm_R.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/src/lightgbm_R.cpp b/R-package/src/lightgbm_R.cpp index ffe448094d4c..dc03148514f6 100644 --- a/R-package/src/lightgbm_R.cpp +++ b/R-package/src/lightgbm_R.cpp @@ -99,7 +99,7 @@ void _DatasetFinalizer(SEXP handle) { LGBM_DatasetFree_R(handle); } -SEXP LGBM_NullHandleError_R() { +SEXP LGBM_NullBoosterHandleError_R() { Rf_error( "Attempting to use a Booster which no longer exists and/or cannot be restored. " "This can happen if you have called Booster$finalize() " From 8e194afc1f260edec19c366a397618817138fa28 Mon Sep 17 00:00:00 2001 From: David Cortes Date: Sat, 16 Oct 2021 13:58:54 -0300 Subject: [PATCH 25/40] change name of error function --- R-package/R/lgb.Booster.R | 2 +- R-package/src/lightgbm_R.cpp | 4 ++-- R-package/src/lightgbm_R.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R index 4ad69f953879..e98a1704a1f8 100644 --- a/R-package/R/lgb.Booster.R +++ b/R-package/R/lgb.Booster.R @@ -574,7 +574,7 @@ Booster <- R6::R6Class( restore_handle = function() { if (self$check_null_handle()) { if (is.null(self$raw)) { - .Call(LGBM_NullHandleError_R) + .Call(LGBM_NullBoosterHandleError_R) } private$handle <- .Call(LGBM_BoosterLoadModelFromString_R, self$raw) } diff --git a/R-package/src/lightgbm_R.cpp b/R-package/src/lightgbm_R.cpp index dc03148514f6..0af118ba7144 100644 --- a/R-package/src/lightgbm_R.cpp +++ b/R-package/src/lightgbm_R.cpp @@ -109,7 +109,7 @@ SEXP LGBM_NullBoosterHandleError_R() { void _AssertBoosterHandleNotNull(SEXP handle) { if (Rf_isNull(handle) || !R_ExternalPtrAddr(handle)) { - LGBM_NullHandleError_R(); + LGBM_NullBoosterHandleError_R(); } } @@ -919,7 +919,7 @@ static const R_CallMethodDef CallEntries[] = { {"LGBM_BoosterSaveModel_R" , (DL_FUNC) &LGBM_BoosterSaveModel_R , 4}, {"LGBM_BoosterSaveModelToString_R" , (DL_FUNC) &LGBM_BoosterSaveModelToString_R , 3}, {"LGBM_BoosterDumpModel_R" , (DL_FUNC) &LGBM_BoosterDumpModel_R , 3}, - {"LGBM_NullHandleError_R" , (DL_FUNC) &LGBM_NullHandleError_R , 0}, + {"LGBM_NullBoosterHandleError_R" , (DL_FUNC) &LGBM_NullBoosterHandleError_R , 0}, {NULL, NULL, 0} }; diff --git a/R-package/src/lightgbm_R.h b/R-package/src/lightgbm_R.h index 819987067575..37da664d91fa 100644 --- a/R-package/src/lightgbm_R.h +++ b/R-package/src/lightgbm_R.h @@ -24,7 +24,7 @@ LIGHTGBM_C_EXPORT SEXP LGBM_HandleIsNull_R( * \brief Throw a standardized error message when encountering a null Booster handle * \return No return, will throw an error */ -LIGHTGBM_C_EXPORT SEXP LGBM_NullHandleError_R(); +LIGHTGBM_C_EXPORT SEXP LGBM_NullBoosterHandleError_R(); // --- start Dataset interface From d4c8ef1d501b564f5b7552c46ebf2ba9739f2d11 Mon Sep 17 00:00:00 2001 From: David Cortes Date: Sat, 16 Oct 2021 14:01:11 -0300 Subject: [PATCH 26/40] update comment --- R-package/src/lightgbm_R.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/src/lightgbm_R.cpp b/R-package/src/lightgbm_R.cpp index 0af118ba7144..9f1af96d9120 100644 --- a/R-package/src/lightgbm_R.cpp +++ b/R-package/src/lightgbm_R.cpp @@ -840,7 +840,7 @@ SEXP LGBM_BoosterSaveModelToString_R(SEXP handle, std::vector inner_char_buf(buf_len); CHECK_CALL(LGBM_BoosterSaveModelToString(R_ExternalPtrAddr(handle), 0, num_iter, importance_type, buf_len, &out_len, inner_char_buf.data())); SEXP model_str = PROTECT(safe_R_raw(out_len, &cont_token)); - // if the model string was larger than the initial buffer, allocate a bigger buffer and try again + // if the model string was larger than the initial buffer, call the function again, writing directly to the R object if (out_len > buf_len) { CHECK_CALL(LGBM_BoosterSaveModelToString(R_ExternalPtrAddr(handle), 0, num_iter, importance_type, out_len, &out_len, reinterpret_cast(RAW(model_str)))); } else { From 44ca8db89c4a8a0fd66398fe51c969c5e6e2f355 Mon Sep 17 00:00:00 2001 From: David Cortes Date: Sat, 16 Oct 2021 14:19:39 -0300 Subject: [PATCH 27/40] restore old serialization functions but set as deprecated --- R-package/NAMESPACE | 2 + R-package/R/readRDS.lgb.Booster.R | 47 +++++++++++++++++ R-package/R/saveRDS.lgb.Booster.R | 77 ++++++++++++++++++++++++++++ R-package/man/lgb.restore_handle.Rd | 2 +- R-package/man/readRDS.lgb.Booster.Rd | 50 ++++++++++++++++++ R-package/man/saveRDS.lgb.Booster.Rd | 72 ++++++++++++++++++++++++++ 6 files changed, 249 insertions(+), 1 deletion(-) create mode 100644 R-package/R/readRDS.lgb.Booster.R create mode 100644 R-package/R/saveRDS.lgb.Booster.R create mode 100644 R-package/man/readRDS.lgb.Booster.Rd create mode 100644 R-package/man/saveRDS.lgb.Booster.Rd diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE index 6b584567cc0a..6bcf971bc020 100644 --- a/R-package/NAMESPACE +++ b/R-package/NAMESPACE @@ -34,6 +34,8 @@ export(lgb.save) export(lgb.train) export(lgb.unloader) export(lightgbm) +export(readRDS.lgb.Booster) +export(saveRDS.lgb.Booster) export(set_field) export(setinfo) export(slice) diff --git a/R-package/R/readRDS.lgb.Booster.R b/R-package/R/readRDS.lgb.Booster.R new file mode 100644 index 000000000000..0739f80cb067 --- /dev/null +++ b/R-package/R/readRDS.lgb.Booster.R @@ -0,0 +1,47 @@ +#' @name readRDS.lgb.Booster +#' @title readRDS for \code{lgb.Booster} models (DEPRECATED) +#' @description Calls \code{readRDS} in what is expected to a serialized \code{lgb.Booster} object, +#' and then restores its handle through \code{lgb.restore_handle}. +#' +#' \bold{This function throws a warning and will be deprecated in future versions.} +#' @param file a connection or the name of the file where the R object is saved to or read from. +#' @param refhook a hook function for handling reference objects. +#' +#' @return \code{lgb.Booster} +#' +#' @examples +#' \donttest{ +#' library(lightgbm) +#' data(agaricus.train, package = "lightgbm") +#' train <- agaricus.train +#' dtrain <- lgb.Dataset(train$data, label = train$label) +#' data(agaricus.test, package = "lightgbm") +#' test <- agaricus.test +#' dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) +#' params <- list( +#' objective = "regression" +#' , metric = "l2" +#' , min_data = 1L +#' , learning_rate = 1.0 +#' ) +#' valids <- list(test = dtest) +#' model <- lgb.train( +#' params = params +#' , data = dtrain +#' , nrounds = 10L +#' , valids = valids +#' , early_stopping_rounds = 5L +#' ) +#' model_file <- tempfile(fileext = ".rds") +#' saveRDS.lgb.Booster(model, model_file) +#' new_model <- readRDS.lgb.Booster(model_file) +#' } +#' @export +readRDS.lgb.Booster <- function(file, refhook = NULL) { + + warning("'readRDS.lgb.Booster' is deprecated and will be removed in a future release.") + + object <- readRDS(file = file, refhook = refhook) + lgb.restore_handle(object) + return(object) +} diff --git a/R-package/R/saveRDS.lgb.Booster.R b/R-package/R/saveRDS.lgb.Booster.R new file mode 100644 index 000000000000..a618dd7abe9b --- /dev/null +++ b/R-package/R/saveRDS.lgb.Booster.R @@ -0,0 +1,77 @@ +#' @name saveRDS.lgb.Booster +#' @title saveRDS for \code{lgb.Booster} models (DEPRECATED) +#' @description Calls \code{saveRDS} on an \code{lgb.Booster} object, making it serializable before the call if +#' it isn't already. +#' +#' \bold{This function throws a warning and will be deprecated in future versions.} +#' @param object \code{lgb.Booster} object to serialize. +#' @param file a connection or the name of the file where the R object is saved to or read from. +#' @param ascii a logical. If TRUE or NA, an ASCII representation is written; otherwise (default), +#' a binary one is used. See the comments in the help for save. +#' @param version the workspace format version to use. \code{NULL} specifies the current default +#' version (2). Versions prior to 2 are not supported, so this will only be relevant +#' when there are later versions. +#' @param compress a logical specifying whether saving to a named file is to use "gzip" compression, +#' or one of \code{"gzip"}, \code{"bzip2"} or \code{"xz"} to indicate the type of +#' compression to be used. Ignored if file is a connection. +#' @param refhook a hook function for handling reference objects. +#' @param raw whether to save the model in a raw variable or not, recommended to leave it to \code{TRUE}. +#' +#' @return NULL invisibly. +#' +#' @examples +#' \donttest{ +#' library(lightgbm) +#' data(agaricus.train, package = "lightgbm") +#' train <- agaricus.train +#' dtrain <- lgb.Dataset(train$data, label = train$label) +#' data(agaricus.test, package = "lightgbm") +#' test <- agaricus.test +#' dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) +#' params <- list( +#' objective = "regression" +#' , metric = "l2" +#' , min_data = 1L +#' , learning_rate = 1.0 +#' ) +#' valids <- list(test = dtest) +#' model <- lgb.train( +#' params = params +#' , data = dtrain +#' , nrounds = 10L +#' , valids = valids +#' , early_stopping_rounds = 5L +#' ) +#' model_file <- tempfile(fileext = ".rds") +#' saveRDS.lgb.Booster(model, model_file) +#' } +#' @export +saveRDS.lgb.Booster <- function(object, + file, + ascii = FALSE, + version = NULL, + compress = TRUE, + refhook = NULL, + raw = TRUE) { + + warning("'saveRDS.lgb.Booster' is deprecated and will be removed in a future release.") + + if (!lgb.is.Booster(x = object)) { + stop("saveRDS.lgb.Booster: object should be an ", sQuote("lgb.Booster")) + } + + if (is.null(object$raw)) { + lgb.make_serializable(object) + } + + saveRDS( + object + , file = file + , ascii = ascii + , version = version + , compress = compress + , refhook = refhook + ) + + return(invisible(NULL)) +} diff --git a/R-package/man/lgb.restore_handle.Rd b/R-package/man/lgb.restore_handle.Rd index 983de9ac3a67..199614241502 100644 --- a/R-package/man/lgb.restore_handle.Rd +++ b/R-package/man/lgb.restore_handle.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/lgb.restore_handle.R \name{lgb.restore_handle} \alias{lgb.restore_handle} -\title{Restore the C++ component of a deserialized LGB model} +\title{Restore the C++ component of a de-serialized LightGBM model} \usage{ lgb.restore_handle(model) } diff --git a/R-package/man/readRDS.lgb.Booster.Rd b/R-package/man/readRDS.lgb.Booster.Rd new file mode 100644 index 000000000000..618e1417715a --- /dev/null +++ b/R-package/man/readRDS.lgb.Booster.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/readRDS.lgb.Booster.R +\name{readRDS.lgb.Booster} +\alias{readRDS.lgb.Booster} +\title{readRDS for \code{lgb.Booster} models (DEPRECATED)} +\usage{ +readRDS.lgb.Booster(file, refhook = NULL) +} +\arguments{ +\item{file}{a connection or the name of the file where the R object is saved to or read from.} + +\item{refhook}{a hook function for handling reference objects.} +} +\value{ +\code{lgb.Booster} +} +\description{ +Calls \code{readRDS} in what is expected to a serialized \code{lgb.Booster} object, + and then restores its handle through \code{lgb.restore_handle}. + + \bold{This function throws a warning and will be deprecated in future versions.} +} +\examples{ +\donttest{ +library(lightgbm) +data(agaricus.train, package = "lightgbm") +train <- agaricus.train +dtrain <- lgb.Dataset(train$data, label = train$label) +data(agaricus.test, package = "lightgbm") +test <- agaricus.test +dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) +params <- list( + objective = "regression" + , metric = "l2" + , min_data = 1L + , learning_rate = 1.0 +) +valids <- list(test = dtest) +model <- lgb.train( + params = params + , data = dtrain + , nrounds = 10L + , valids = valids + , early_stopping_rounds = 5L +) +model_file <- tempfile(fileext = ".rds") +saveRDS.lgb.Booster(model, model_file) +new_model <- readRDS.lgb.Booster(model_file) +} +} diff --git a/R-package/man/saveRDS.lgb.Booster.Rd b/R-package/man/saveRDS.lgb.Booster.Rd new file mode 100644 index 000000000000..9f307487c7b0 --- /dev/null +++ b/R-package/man/saveRDS.lgb.Booster.Rd @@ -0,0 +1,72 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/saveRDS.lgb.Booster.R +\name{saveRDS.lgb.Booster} +\alias{saveRDS.lgb.Booster} +\title{saveRDS for \code{lgb.Booster} models (DEPRECATED)} +\usage{ +saveRDS.lgb.Booster( + object, + file, + ascii = FALSE, + version = NULL, + compress = TRUE, + refhook = NULL, + raw = TRUE +) +} +\arguments{ +\item{object}{\code{lgb.Booster} object to serialize.} + +\item{file}{a connection or the name of the file where the R object is saved to or read from.} + +\item{ascii}{a logical. If TRUE or NA, an ASCII representation is written; otherwise (default), +a binary one is used. See the comments in the help for save.} + +\item{version}{the workspace format version to use. \code{NULL} specifies the current default +version (2). Versions prior to 2 are not supported, so this will only be relevant +when there are later versions.} + +\item{compress}{a logical specifying whether saving to a named file is to use "gzip" compression, +or one of \code{"gzip"}, \code{"bzip2"} or \code{"xz"} to indicate the type of +compression to be used. Ignored if file is a connection.} + +\item{refhook}{a hook function for handling reference objects.} + +\item{raw}{whether to save the model in a raw variable or not, recommended to leave it to \code{TRUE}.} +} +\value{ +NULL invisibly. +} +\description{ +Calls \code{saveRDS} on an \code{lgb.Booster} object, making it serializable before the call if + it isn't already. + + \bold{This function throws a warning and will be deprecated in future versions.} +} +\examples{ +\donttest{ +library(lightgbm) +data(agaricus.train, package = "lightgbm") +train <- agaricus.train +dtrain <- lgb.Dataset(train$data, label = train$label) +data(agaricus.test, package = "lightgbm") +test <- agaricus.test +dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) +params <- list( + objective = "regression" + , metric = "l2" + , min_data = 1L + , learning_rate = 1.0 +) +valids <- list(test = dtest) +model <- lgb.train( + params = params + , data = dtrain + , nrounds = 10L + , valids = valids + , early_stopping_rounds = 5L +) +model_file <- tempfile(fileext = ".rds") +saveRDS.lgb.Booster(model, model_file) +} +} From d6f4c747de70bfb9ff406f1fb3eb47829bac2b2e Mon Sep 17 00:00:00 2001 From: david-cortes Date: Sun, 17 Oct 2021 04:33:32 +0300 Subject: [PATCH 28/40] Update R-package/R/readRDS.lgb.Booster.R Co-authored-by: Nikita Titov --- R-package/R/readRDS.lgb.Booster.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/R/readRDS.lgb.Booster.R b/R-package/R/readRDS.lgb.Booster.R index 0739f80cb067..e1b7d6c01356 100644 --- a/R-package/R/readRDS.lgb.Booster.R +++ b/R-package/R/readRDS.lgb.Booster.R @@ -3,7 +3,7 @@ #' @description Calls \code{readRDS} in what is expected to a serialized \code{lgb.Booster} object, #' and then restores its handle through \code{lgb.restore_handle}. #' -#' \bold{This function throws a warning and will be deprecated in future versions.} +#' \bold{This function throws a warning and will be removed in future versions.} #' @param file a connection or the name of the file where the R object is saved to or read from. #' @param refhook a hook function for handling reference objects. #' From 8d282e4d7d13777edb31e68e84aeb9b036f43f18 Mon Sep 17 00:00:00 2001 From: david-cortes Date: Sun, 17 Oct 2021 04:33:39 +0300 Subject: [PATCH 29/40] Update R-package/R/saveRDS.lgb.Booster.R Co-authored-by: Nikita Titov --- R-package/R/saveRDS.lgb.Booster.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/R/saveRDS.lgb.Booster.R b/R-package/R/saveRDS.lgb.Booster.R index a618dd7abe9b..5ed1d5053f56 100644 --- a/R-package/R/saveRDS.lgb.Booster.R +++ b/R-package/R/saveRDS.lgb.Booster.R @@ -3,7 +3,7 @@ #' @description Calls \code{saveRDS} on an \code{lgb.Booster} object, making it serializable before the call if #' it isn't already. #' -#' \bold{This function throws a warning and will be deprecated in future versions.} +#' \bold{This function throws a warning and will be removed in future versions.} #' @param object \code{lgb.Booster} object to serialize. #' @param file a connection or the name of the file where the R object is saved to or read from. #' @param ascii a logical. If TRUE or NA, an ASCII representation is written; otherwise (default), From 0817eb0b0fca9d2a8d4ca407db988072d145ed6a Mon Sep 17 00:00:00 2001 From: David Cortes Date: Sat, 16 Oct 2021 22:46:50 -0300 Subject: [PATCH 30/40] update docs --- R-package/man/readRDS.lgb.Booster.Rd | 2 +- R-package/man/saveRDS.lgb.Booster.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R-package/man/readRDS.lgb.Booster.Rd b/R-package/man/readRDS.lgb.Booster.Rd index 618e1417715a..303c73015813 100644 --- a/R-package/man/readRDS.lgb.Booster.Rd +++ b/R-package/man/readRDS.lgb.Booster.Rd @@ -18,7 +18,7 @@ readRDS.lgb.Booster(file, refhook = NULL) Calls \code{readRDS} in what is expected to a serialized \code{lgb.Booster} object, and then restores its handle through \code{lgb.restore_handle}. - \bold{This function throws a warning and will be deprecated in future versions.} + \bold{This function throws a warning and will be removed in future versions.} } \examples{ \donttest{ diff --git a/R-package/man/saveRDS.lgb.Booster.Rd b/R-package/man/saveRDS.lgb.Booster.Rd index 9f307487c7b0..e730f36b2caf 100644 --- a/R-package/man/saveRDS.lgb.Booster.Rd +++ b/R-package/man/saveRDS.lgb.Booster.Rd @@ -41,7 +41,7 @@ NULL invisibly. Calls \code{saveRDS} on an \code{lgb.Booster} object, making it serializable before the call if it isn't already. - \bold{This function throws a warning and will be deprecated in future versions.} + \bold{This function throws a warning and will be removed in future versions.} } \examples{ \donttest{ From f84555493b1d77d321d726a24f832b15e561d7a6 Mon Sep 17 00:00:00 2001 From: david-cortes Date: Tue, 26 Oct 2021 18:59:55 +0300 Subject: [PATCH 31/40] Update R-package/R/readRDS.lgb.Booster.R Co-authored-by: James Lamb --- R-package/R/readRDS.lgb.Booster.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/R/readRDS.lgb.Booster.R b/R-package/R/readRDS.lgb.Booster.R index e1b7d6c01356..5001558f087a 100644 --- a/R-package/R/readRDS.lgb.Booster.R +++ b/R-package/R/readRDS.lgb.Booster.R @@ -39,7 +39,7 @@ #' @export readRDS.lgb.Booster <- function(file, refhook = NULL) { - warning("'readRDS.lgb.Booster' is deprecated and will be removed in a future release.") + warning("'readRDS.lgb.Booster' is deprecated and will be removed in a future release. Use readRDS() instead.") object <- readRDS(file = file, refhook = refhook) lgb.restore_handle(object) From 51fa08807c1c77f42e4d3af9be6ae15d92cfddc9 Mon Sep 17 00:00:00 2001 From: david-cortes Date: Tue, 26 Oct 2021 19:00:05 +0300 Subject: [PATCH 32/40] Update R-package/R/saveRDS.lgb.Booster.R Co-authored-by: James Lamb --- R-package/R/saveRDS.lgb.Booster.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/R/saveRDS.lgb.Booster.R b/R-package/R/saveRDS.lgb.Booster.R index 5ed1d5053f56..eb71e7a2f08c 100644 --- a/R-package/R/saveRDS.lgb.Booster.R +++ b/R-package/R/saveRDS.lgb.Booster.R @@ -54,7 +54,7 @@ saveRDS.lgb.Booster <- function(object, refhook = NULL, raw = TRUE) { - warning("'saveRDS.lgb.Booster' is deprecated and will be removed in a future release.") + warning("'saveRDS.lgb.Booster' is deprecated and will be removed in a future release. Use saveRDS() instead.") if (!lgb.is.Booster(x = object)) { stop("saveRDS.lgb.Booster: object should be an ", sQuote("lgb.Booster")) From 8522ce72a3668d90459fa0872383f1440b07cafa Mon Sep 17 00:00:00 2001 From: david-cortes Date: Tue, 26 Oct 2021 19:00:31 +0300 Subject: [PATCH 33/40] Update R-package/tests/testthat/test_basic.R Co-authored-by: James Lamb --- R-package/tests/testthat/test_basic.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index 4af26740a595..d9518d8a18cf 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -231,7 +231,7 @@ test_that("lightgbm() performs evaluation on validation sets if they are provide expect_true(abs(bst$record_evals[["valid2"]][["binary_error"]][["eval"]][[1L]] - 0.02226317) < TOLERANCE) }) -test_that("lightgbm() accepts a NULL for save_name", { +test_that("lightgbm() does not write model to disk if save_name=NULL", { files_before <- list.files(getwd()) model <- lightgbm( From c1162707a30d99a4d592185d0a8417583418594b Mon Sep 17 00:00:00 2001 From: david-cortes Date: Tue, 26 Oct 2021 19:00:50 +0300 Subject: [PATCH 34/40] Update R-package/R/readRDS.lgb.Booster.R Co-authored-by: James Lamb --- R-package/R/readRDS.lgb.Booster.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/R/readRDS.lgb.Booster.R b/R-package/R/readRDS.lgb.Booster.R index 5001558f087a..a995d804adc5 100644 --- a/R-package/R/readRDS.lgb.Booster.R +++ b/R-package/R/readRDS.lgb.Booster.R @@ -1,6 +1,6 @@ #' @name readRDS.lgb.Booster #' @title readRDS for \code{lgb.Booster} models (DEPRECATED) -#' @description Calls \code{readRDS} in what is expected to a serialized \code{lgb.Booster} object, +#' @description Calls \code{readRDS} in what is expected to be a serialized \code{lgb.Booster} object, #' and then restores its handle through \code{lgb.restore_handle}. #' #' \bold{This function throws a warning and will be removed in future versions.} From bee5bc1692823eab1345b8fe21efb3f2b05bbade Mon Sep 17 00:00:00 2001 From: David Cortes Date: Tue, 26 Oct 2021 14:38:54 -0300 Subject: [PATCH 35/40] comments --- R-package/R/lgb.Booster.R | 3 +- R-package/man/readRDS.lgb.Booster.Rd | 2 +- R-package/tests/testthat/test_lgb.Booster.R | 35 +++++++++++++++++++++ 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R index e98a1704a1f8..9434486fe1ee 100644 --- a/R-package/R/lgb.Booster.R +++ b/R-package/R/lgb.Booster.R @@ -506,6 +506,8 @@ Booster <- R6::R6Class( params = list(), ...) { + object$restore_handle() + additional_params <- list(...) if (length(additional_params) > 0L) { warning(paste0( @@ -831,7 +833,6 @@ predict.lgb.Booster <- function(object, if (!lgb.is.Booster(x = object)) { stop("predict.lgb.Booster: object should be an ", sQuote("lgb.Booster")) } - object$restore_handle() additional_params <- list(...) if (length(additional_params) > 0L) { diff --git a/R-package/man/readRDS.lgb.Booster.Rd b/R-package/man/readRDS.lgb.Booster.Rd index 303c73015813..5a1c647a0f74 100644 --- a/R-package/man/readRDS.lgb.Booster.Rd +++ b/R-package/man/readRDS.lgb.Booster.Rd @@ -15,7 +15,7 @@ readRDS.lgb.Booster(file, refhook = NULL) \code{lgb.Booster} } \description{ -Calls \code{readRDS} in what is expected to a serialized \code{lgb.Booster} object, +Calls \code{readRDS} in what is expected to be a serialized \code{lgb.Booster} object, and then restores its handle through \code{lgb.restore_handle}. \bold{This function throws a warning and will be removed in future versions.} diff --git a/R-package/tests/testthat/test_lgb.Booster.R b/R-package/tests/testthat/test_lgb.Booster.R index 6ae84c86bae7..56f15cc8b15b 100644 --- a/R-package/tests/testthat/test_lgb.Booster.R +++ b/R-package/tests/testthat/test_lgb.Booster.R @@ -965,6 +965,41 @@ test_that("lgb.cv() correctly handles passing through params to the model file", }) +context("saveRDS.lgb.Booster() and readRDS.lgb.Booster()") + +test_that("params (including dataset params) should be stored in .rds file for Booster", { + data(agaricus.train, package = "lightgbm") + dtrain <- lgb.Dataset( + agaricus.train$data + , label = agaricus.train$label + , params = list( + max_bin = 17L + ) + ) + params <- list( + objective = "binary" + , max_depth = 4L + , bagging_fraction = 0.8 + ) + bst <- Booster$new( + params = params + , train_set = dtrain + ) + bst_file <- tempfile(fileext = ".rds") + expect_warning(saveRDS.lgb.Booster(bst, file = bst_file)) + + expect_warning(bst_from_file <- readRDS.lgb.Booster(file = bst_file)) + expect_identical( + bst_from_file$params + , list( + objective = "binary" + , max_depth = 4L + , bagging_fraction = 0.8 + , max_bin = 17L + ) + ) +}) + context("saveRDS and readRDS work on Booster") test_that("params (including dataset params) should be stored in .rds file for Booster", { From b0f9f9329ad0c1f0028afcb520c89ee15a8bae95 Mon Sep 17 00:00:00 2001 From: David Cortes Date: Tue, 26 Oct 2021 15:28:32 -0300 Subject: [PATCH 36/40] fix variable name --- R-package/R/lgb.Booster.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R index 9434486fe1ee..a782f98532cf 100644 --- a/R-package/R/lgb.Booster.R +++ b/R-package/R/lgb.Booster.R @@ -506,7 +506,7 @@ Booster <- R6::R6Class( params = list(), ...) { - object$restore_handle() + self$restore_handle() additional_params <- list(...) if (length(additional_params) > 0L) { From 2d3a13230e0be82bf2ed4872647e594a33a86880 Mon Sep 17 00:00:00 2001 From: David Cortes Date: Tue, 26 Oct 2021 16:02:06 -0300 Subject: [PATCH 37/40] restore serialization test for linear models --- R-package/tests/testthat/test_lgb.Booster.R | 37 +++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/R-package/tests/testthat/test_lgb.Booster.R b/R-package/tests/testthat/test_lgb.Booster.R index 56f15cc8b15b..87bb761b96e4 100644 --- a/R-package/tests/testthat/test_lgb.Booster.R +++ b/R-package/tests/testthat/test_lgb.Booster.R @@ -1048,6 +1048,43 @@ test_that("Handle is automatically restored when calling predict", { expect_equal(pred_before, pred_after) }) +test_that("boosters with linear models at leaves work with saveRDS.lgb.Booster and readRDS.lgb.Booster", { + X <- matrix(rnorm(100L), ncol = 1L) + labels <- 2L * X + runif(nrow(X), 0L, 0.1) + dtrain <- lgb.Dataset( + data = X + , label = labels + ) + + params <- list( + objective = "regression" + , verbose = -1L + , metric = "mse" + , seed = 0L + , num_leaves = 2L + ) + + bst <- lgb.train( + data = dtrain + , nrounds = 10L + , params = params + ) + expect_true(lgb.is.Booster(bst)) + + # save predictions, then write the model to a file and destroy it in R + preds <- predict(bst, X) + model_file <- tempfile(fileext = ".rds") + expect_warning(saveRDS.lgb.Booster(bst, file = model_file)) + bst$finalize() + expect_null(bst$.__enclos_env__$private$handle) + rm(bst) + + # load the booster and make predictions...should be the same + expect_warning({bst2 <- readRDS.lgb.Booster(file = model_file)}) + preds2 <- predict(bst2, X) + expect_identical(preds, preds2) +}) + test_that("boosters with linear models at leaves can be written to RDS and re-loaded successfully", { X <- matrix(rnorm(100L), ncol = 1L) labels <- 2L * X + runif(nrow(X), 0L, 0.1) From c53495287f8420f8543e5be7825e543b2d6feff8 Mon Sep 17 00:00:00 2001 From: david-cortes Date: Thu, 18 Nov 2021 18:11:38 +0200 Subject: [PATCH 38/40] Update R-package/R/lightgbm.R Co-authored-by: James Lamb --- R-package/R/lightgbm.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/R/lightgbm.R b/R-package/R/lightgbm.R index ba19fc5791a0..6f7e538b5c20 100644 --- a/R-package/R/lightgbm.R +++ b/R-package/R/lightgbm.R @@ -70,7 +70,7 @@ #' or \code{objective} (passed into \code{params}). #' @section Model serialization: #' -#' LightGBM models objects can be serialized and de-serialized through functions such as \code{save} +#' LightGBM model objects can be serialized and de-serialized through functions such as \code{save} #' or \code{saveRDS}, but similarly to libraries such as 'xgboost', serialization works a bit differently #' from typical R objects. In order to make models serializable in R, a copy of the underlying C++ object #' as serialized raw bytes is produced and stored in the R model object, and when this R object is From b1b4e2b778549af06ea02e9bbd14cbd5d606a475 Mon Sep 17 00:00:00 2001 From: David Cortes Date: Thu, 18 Nov 2021 13:23:47 -0300 Subject: [PATCH 39/40] update docs --- R-package/man/lgb_shared_params.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/man/lgb_shared_params.Rd b/R-package/man/lgb_shared_params.Rd index 053b378ddf42..b95c258e5f75 100644 --- a/R-package/man/lgb_shared_params.Rd +++ b/R-package/man/lgb_shared_params.Rd @@ -91,7 +91,7 @@ Parameter docs shared by \code{lgb.train}, \code{lgb.cv}, and \code{lightgbm} \section{Model serialization}{ - LightGBM models objects can be serialized and de-serialized through functions such as \code{save} + LightGBM model objects can be serialized and de-serialized through functions such as \code{save} or \code{saveRDS}, but similarly to libraries such as 'xgboost', serialization works a bit differently from typical R objects. In order to make models serializable in R, a copy of the underlying C++ object as serialized raw bytes is produced and stored in the R model object, and when this R object is From eb7fd32f6cb96be3c3d108bf7f2f79222d759429 Mon Sep 17 00:00:00 2001 From: David Cortes Date: Fri, 3 Dec 2021 21:31:43 -0300 Subject: [PATCH 40/40] fix issues with null terminator --- R-package/R/lgb.Booster.R | 10 ++-------- R-package/src/lightgbm_R.cpp | 21 +++++++++++++++++++-- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R index f364f31df1fe..c7c50864d7e0 100644 --- a/R-package/R/lgb.Booster.R +++ b/R-package/R/lgb.Booster.R @@ -87,10 +87,7 @@ Booster <- R6::R6Class( } else if (!is.null(model_str)) { # Do we have a model_str as character/raw? - if (is.character(model_str)) { - model_str <- charToRaw(model_str) - } - if (!is.raw(model_str)) { + if (!is.raw(model_str) && !is.character(model_str)) { stop("lgb.Booster: Can only use a character/raw vector as model_str") } @@ -976,11 +973,8 @@ lgb.load <- function(filename = NULL, model_str = NULL) { return(invisible(Booster$new(modelfile = filename))) } - if (is.character(model_str)) { - model_str <- charToRaw(model_str) - } if (model_str_provided) { - if (!is.raw(model_str)) { + if (!is.raw(model_str) && !is.character(model_str)) { stop("lgb.load: model_str should be a character/raw vector") } return(invisible(Booster$new(model_str = model_str))) diff --git a/R-package/src/lightgbm_R.cpp b/R-package/src/lightgbm_R.cpp index c05a5ca44807..87a84c36162c 100644 --- a/R-package/src/lightgbm_R.cpp +++ b/R-package/src/lightgbm_R.cpp @@ -476,13 +476,30 @@ SEXP LGBM_BoosterCreateFromModelfile_R(SEXP filename) { SEXP LGBM_BoosterLoadModelFromString_R(SEXP model_str) { R_API_BEGIN(); SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue)); + SEXP temp = NULL; + int n_protected = 1; int out_num_iterations = 0; - const char* model_str_ptr = reinterpret_cast(RAW(model_str)); + const char* model_str_ptr = nullptr; + switch (TYPEOF(model_str)) { + case RAWSXP: { + model_str_ptr = reinterpret_cast(RAW(model_str)); + break; + } + case CHARSXP: { + model_str_ptr = reinterpret_cast(CHAR(model_str)); + break; + } + case STRSXP: { + temp = PROTECT(STRING_ELT(model_str, 0)); + n_protected++; + model_str_ptr = reinterpret_cast(CHAR(temp)); + } + } BoosterHandle handle = nullptr; CHECK_CALL(LGBM_BoosterLoadModelFromString(model_str_ptr, &out_num_iterations, &handle)); R_SetExternalPtrAddr(ret, handle); R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE); - UNPROTECT(1); + UNPROTECT(n_protected); return ret; R_API_END(); }