diff --git a/R-package/CMakeLists.txt b/R-package/CMakeLists.txt index b11447251f9a..ac763dc61185 100644 --- a/R-package/CMakeLists.txt +++ b/R-package/CMakeLists.txt @@ -32,3 +32,7 @@ set_target_properties( set(XGBOOST_DEFINITIONS "${XGBOOST_DEFINITIONS};${R_DEFINITIONS}" PARENT_SCOPE) set(XGBOOST_OBJ_SOURCES $ PARENT_SCOPE) set(LINKED_LIBRARIES_PRIVATE ${LINKED_LIBRARIES_PRIVATE} ${LIBR_CORE_LIBRARY} PARENT_SCOPE) + +if (USE_OPENMP) + target_link_libraries(xgboost-r PRIVATE OpenMP::OpenMP_CXX) +endif () diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 23e64c22a545..794abdb6d775 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -63,5 +63,5 @@ Imports: data.table (>= 1.9.6), magrittr (>= 1.5), stringi (>= 0.5.2) -RoxygenNote: 7.0.2 +RoxygenNote: 7.1.0 SystemRequirements: GNU make, C++11 diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE index cd2a3b92b70d..fb0ac542f8bf 100644 --- a/R-package/NAMESPACE +++ b/R-package/NAMESPACE @@ -40,6 +40,7 @@ export(xgb.ggplot.deepness) export(xgb.ggplot.importance) export(xgb.importance) export(xgb.load) +export(xgb.load.raw) export(xgb.model.dt.tree) export(xgb.plot.deepness) export(xgb.plot.importance) @@ -48,7 +49,9 @@ export(xgb.plot.shap) export(xgb.plot.tree) export(xgb.save) export(xgb.save.raw) +export(xgb.serialize) export(xgb.train) +export(xgb.unserialize) export(xgboost) import(methods) importClassesFrom(Matrix,dgCMatrix) diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R index dd901b07d162..41475a1d494f 100644 --- a/R-package/R/xgb.Booster.R +++ b/R-package/R/xgb.Booster.R @@ -5,20 +5,34 @@ xgb.Booster.handle <- function(params = list(), cachelist = list(), modelfile = !all(vapply(cachelist, inherits, logical(1), what = 'xgb.DMatrix'))) { stop("cachelist must be a list of xgb.DMatrix objects") } - - handle <- .Call(XGBoosterCreate_R, cachelist) + ## Load existing model, dispatch for on disk model file and in memory buffer if (!is.null(modelfile)) { if (typeof(modelfile) == "character") { + ## A filename + handle <- .Call(XGBoosterCreate_R, cachelist) .Call(XGBoosterLoadModel_R, handle, modelfile[1]) + class(handle) <- "xgb.Booster.handle" + if (length(params) > 0) { + xgb.parameters(handle) <- params + } + return(handle) } else if (typeof(modelfile) == "raw") { - .Call(XGBoosterLoadModelFromRaw_R, handle, modelfile) + ## A memory buffer + bst <- xgb.unserialize(modelfile) + xgb.parameters(bst) <- params + return (bst) } else if (inherits(modelfile, "xgb.Booster")) { + ## A booster object bst <- xgb.Booster.complete(modelfile, saveraw = TRUE) - .Call(XGBoosterLoadModelFromRaw_R, handle, bst$raw) + bst <- xgb.unserialize(bst$raw) + xgb.parameters(bst) <- params + return (bst) } else { stop("modelfile must be either character filename, or raw booster dump, or xgb.Booster object") } } + ## Create new model + handle <- .Call(XGBoosterCreate_R, cachelist) class(handle) <- "xgb.Booster.handle" if (length(params) > 0) { xgb.parameters(handle) <- params @@ -113,8 +127,9 @@ xgb.Booster.complete <- function(object, saveraw = TRUE) { if (is.null.handle(object$handle)) { object$handle <- xgb.Booster.handle(modelfile = object$raw) } else { - if (is.null(object$raw) && saveraw) - object$raw <- xgb.save.raw(object$handle) + if (is.null(object$raw) && saveraw) { + object$raw <- xgb.serialize(object$handle) + } } return(object) } @@ -399,7 +414,7 @@ predict.xgb.Booster.handle <- function(object, ...) { #' That would only matter if attributes need to be set many times. #' Note, however, that when feeding a handle of an \code{xgb.Booster} object to the attribute setters, #' the raw model cache of an \code{xgb.Booster} object would not be automatically updated, -#' and it would be user's responsibility to call \code{xgb.save.raw} to update it. +#' and it would be user's responsibility to call \code{xgb.serialize} to update it. #' #' The \code{xgb.attributes<-} setter either updates the existing or adds one or several attributes, #' but it doesn't delete the other existing attributes. @@ -458,7 +473,7 @@ xgb.attr <- function(object, name) { } .Call(XGBoosterSetAttr_R, handle, as.character(name[1]), value) if (is(object, 'xgb.Booster') && !is.null(object$raw)) { - object$raw <- xgb.save.raw(object$handle) + object$raw <- xgb.serialize(object$handle) } object } @@ -498,7 +513,7 @@ xgb.attributes <- function(object) { .Call(XGBoosterSetAttr_R, handle, names(a[i]), a[[i]]) } if (is(object, 'xgb.Booster') && !is.null(object$raw)) { - object$raw <- xgb.save.raw(object$handle) + object$raw <- xgb.serialize(object$handle) } object } @@ -528,7 +543,8 @@ xgb.config <- function(object) { `xgb.config<-` <- function(object, value) { handle <- xgb.get.handle(object) .Call(XGBoosterLoadJsonConfig_R, handle, value) - object$raw <- xgb.Booster.complete(object) + object$raw <- NULL # force renew the raw buffer + object <- xgb.Booster.complete(object) object } @@ -568,7 +584,7 @@ xgb.config <- function(object) { .Call(XGBoosterSetParam_R, handle, names(p[i]), p[[i]]) } if (is(object, 'xgb.Booster') && !is.null(object$raw)) { - object$raw <- xgb.save.raw(object$handle) + object$raw <- xgb.serialize(object$handle) } object } diff --git a/R-package/R/xgb.load.raw.R b/R-package/R/xgb.load.raw.R new file mode 100644 index 000000000000..2a7d375a9b05 --- /dev/null +++ b/R-package/R/xgb.load.raw.R @@ -0,0 +1,14 @@ +#' Load serialised xgboost model from R's raw vector +#' +#' User can generate raw memory buffer by calling xgb.save.raw +#' +#' @param buffer the buffer returned by xgb.save.raw +#' +#' @export +xgb.load.raw <- function(buffer) { + cachelist <- list() + handle <- .Call(XGBoosterCreate_R, cachelist) + .Call(XGBoosterLoadModelFromRaw_R, handle, buffer) + class(handle) <- "xgb.Booster.handle" + return (handle) +} diff --git a/R-package/R/xgb.save.raw.R b/R-package/R/xgb.save.raw.R index ae27d04f0390..967a314827f7 100644 --- a/R-package/R/xgb.save.raw.R +++ b/R-package/R/xgb.save.raw.R @@ -1,23 +1,23 @@ #' Save xgboost model to R's raw vector, -#' user can call xgb.load to load the model back from raw vector -#' +#' user can call xgb.load.raw to load the model back from raw vector +#' #' Save xgboost model from xgboost or xgb.train -#' +#' #' @param model the model object. -#' +#' #' @examples #' data(agaricus.train, package='xgboost') #' data(agaricus.test, package='xgboost') #' train <- agaricus.train #' test <- agaricus.test -#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2, +#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2, #' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic") #' raw <- xgb.save.raw(bst) -#' bst <- xgb.load(raw) +#' bst <- xgb.load.raw(raw) #' pred <- predict(bst, test$data) #' #' @export xgb.save.raw <- function(model) { - model <- xgb.get.handle(model) - .Call(XGBoosterModelToRaw_R, model) + handle <- xgb.get.handle(model) + .Call(XGBoosterModelToRaw_R, handle) } diff --git a/R-package/R/xgb.serialize.R b/R-package/R/xgb.serialize.R new file mode 100644 index 000000000000..00bbb429320c --- /dev/null +++ b/R-package/R/xgb.serialize.R @@ -0,0 +1,21 @@ +#' Serialize the booster instance into R's raw vector. The serialization method differs +#' from \code{\link{xgb.save.raw}} as the latter one saves only the model but not +#' parameters. This serialization format is not stable across different xgboost versions. +#' +#' @param booster the booster instance +#' +#' @examples +#' data(agaricus.train, package='xgboost') +#' data(agaricus.test, package='xgboost') +#' train <- agaricus.train +#' test <- agaricus.test +#' bst <- xgboost(data = train$data, label = train$label, max_depth = 2, +#' eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic") +#' raw <- xgb.serialize(bst) +#' bst <- xgb.unserialize(raw) +#' +#' @export +xgb.serialize <- function(booster) { + handle <- xgb.get.handle(booster) + .Call(XGBoosterSerializeToBuffer_R, handle) +} diff --git a/R-package/R/xgb.unserialize.R b/R-package/R/xgb.unserialize.R new file mode 100644 index 000000000000..1a62e4cde304 --- /dev/null +++ b/R-package/R/xgb.unserialize.R @@ -0,0 +1,12 @@ +#' Load the instance back from \code{\link{xgb.serialize}} +#' +#' @param buffer the buffer containing booster instance saved by \code{\link{xgb.serialize}} +#' +#' @export +xgb.unserialize <- function(buffer) { + cachelist <- list() + handle <- .Call(XGBoosterCreate_R, cachelist) + .Call(XGBoosterUnserializeFromBuffer_R, handle, buffer) + class(handle) <- "xgb.Booster.handle" + return (handle) +} diff --git a/R-package/man/agaricus.test.Rd b/R-package/man/agaricus.test.Rd index b88b340966dc..e3694ae0dfe8 100644 --- a/R-package/man/agaricus.test.Rd +++ b/R-package/man/agaricus.test.Rd @@ -4,8 +4,10 @@ \name{agaricus.test} \alias{agaricus.test} \title{Test part from Mushroom Data Set} -\format{A list containing a label vector, and a dgCMatrix object with 1611 -rows and 126 variables} +\format{ +A list containing a label vector, and a dgCMatrix object with 1611 +rows and 126 variables +} \usage{ data(agaricus.test) } diff --git a/R-package/man/agaricus.train.Rd b/R-package/man/agaricus.train.Rd index 6df609699dd9..92692c965438 100644 --- a/R-package/man/agaricus.train.Rd +++ b/R-package/man/agaricus.train.Rd @@ -4,8 +4,10 @@ \name{agaricus.train} \alias{agaricus.train} \title{Training part from Mushroom Data Set} -\format{A list containing a label vector, and a dgCMatrix object with 6513 -rows and 127 variables} +\format{ +A list containing a label vector, and a dgCMatrix object with 6513 +rows and 127 variables +} \usage{ data(agaricus.train) } diff --git a/R-package/man/xgb.attr.Rd b/R-package/man/xgb.attr.Rd index ff55d432f067..03779e4202d9 100644 --- a/R-package/man/xgb.attr.Rd +++ b/R-package/man/xgb.attr.Rd @@ -55,7 +55,7 @@ than for \code{xgb.Booster}, since only just a handle (pointer) would need to be That would only matter if attributes need to be set many times. Note, however, that when feeding a handle of an \code{xgb.Booster} object to the attribute setters, the raw model cache of an \code{xgb.Booster} object would not be automatically updated, -and it would be user's responsibility to call \code{xgb.save.raw} to update it. +and it would be user's responsibility to call \code{xgb.serialize} to update it. The \code{xgb.attributes<-} setter either updates the existing or adds one or several attributes, but it doesn't delete the other existing attributes. diff --git a/R-package/man/xgb.cv.Rd b/R-package/man/xgb.cv.Rd index cbe9966fd003..8cb03a5ef3f0 100644 --- a/R-package/man/xgb.cv.Rd +++ b/R-package/man/xgb.cv.Rd @@ -135,7 +135,7 @@ An object of class \code{xgb.cv.synchronous} with the following elements: (only available with early stopping). \item \code{pred} CV prediction values available when \code{prediction} is set. It is either vector or matrix (see \code{\link{cb.cv.predict}}). - \item \code{models} a liost of the CV folds' models. It is only available with the explicit + \item \code{models} a list of the CV folds' models. It is only available with the explicit setting of the \code{cb.cv.predict(save_models = TRUE)} callback. } } diff --git a/R-package/man/xgb.load.raw.Rd b/R-package/man/xgb.load.raw.Rd new file mode 100644 index 000000000000..f0248cd9e002 --- /dev/null +++ b/R-package/man/xgb.load.raw.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/xgb.load.raw.R +\name{xgb.load.raw} +\alias{xgb.load.raw} +\title{Load serialised xgboost model from R's raw vector} +\usage{ +xgb.load.raw(buffer) +} +\arguments{ +\item{buffer}{the buffer returned by xgb.save.raw} +} +\description{ +User can generate raw memory buffer by calling xgb.save.raw +} diff --git a/R-package/man/xgb.save.raw.Rd b/R-package/man/xgb.save.raw.Rd index bbe7faf2ce50..6f2faa0ef118 100644 --- a/R-package/man/xgb.save.raw.Rd +++ b/R-package/man/xgb.save.raw.Rd @@ -3,7 +3,7 @@ \name{xgb.save.raw} \alias{xgb.save.raw} \title{Save xgboost model to R's raw vector, -user can call xgb.load to load the model back from raw vector} +user can call xgb.load.raw to load the model back from raw vector} \usage{ xgb.save.raw(model) } @@ -18,10 +18,10 @@ data(agaricus.train, package='xgboost') data(agaricus.test, package='xgboost') train <- agaricus.train test <- agaricus.test -bst <- xgboost(data = train$data, label = train$label, max_depth = 2, +bst <- xgboost(data = train$data, label = train$label, max_depth = 2, eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic") raw <- xgb.save.raw(bst) -bst <- xgb.load(raw) +bst <- xgb.load.raw(raw) pred <- predict(bst, test$data) } diff --git a/R-package/man/xgb.serialize.Rd b/R-package/man/xgb.serialize.Rd new file mode 100644 index 000000000000..952441d98bea --- /dev/null +++ b/R-package/man/xgb.serialize.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/xgb.serialize.R +\name{xgb.serialize} +\alias{xgb.serialize} +\title{Serialize the booster instance into R's raw vector. The serialization method differs +from \code{\link{xgb.save.raw}} as the latter one saves only the model but not +parameters. This serialization format is not stable across different xgboost versions.} +\usage{ +xgb.serialize(booster) +} +\arguments{ +\item{booster}{the booster instance} +} +\description{ +Serialize the booster instance into R's raw vector. The serialization method differs +from \code{\link{xgb.save.raw}} as the latter one saves only the model but not +parameters. This serialization format is not stable across different xgboost versions. +} +\examples{ +data(agaricus.train, package='xgboost') +data(agaricus.test, package='xgboost') +train <- agaricus.train +test <- agaricus.test +bst <- xgboost(data = train$data, label = train$label, max_depth = 2, + eta = 1, nthread = 2, nrounds = 2,objective = "binary:logistic") +raw <- xgb.serialize(bst) +bst <- xgb.unserialize(raw) + +} diff --git a/R-package/man/xgb.unserialize.Rd b/R-package/man/xgb.unserialize.Rd new file mode 100644 index 000000000000..7a11c5c5eca2 --- /dev/null +++ b/R-package/man/xgb.unserialize.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/xgb.unserialize.R +\name{xgb.unserialize} +\alias{xgb.unserialize} +\title{Load the instance back from \code{\link{xgb.serialize}}} +\usage{ +xgb.unserialize(buffer) +} +\arguments{ +\item{buffer}{the buffer containing booster instance saved by \code{\link{xgb.serialize}}} +} +\description{ +Load the instance back from \code{\link{xgb.serialize}} +} diff --git a/R-package/src/init.c b/R-package/src/init.c index b85d4e756207..2093059df083 100644 --- a/R-package/src/init.c +++ b/R-package/src/init.c @@ -25,6 +25,8 @@ extern SEXP XGBoosterLoadModelFromRaw_R(SEXP, SEXP); extern SEXP XGBoosterLoadModel_R(SEXP, SEXP); extern SEXP XGBoosterSaveJsonConfig_R(SEXP handle); extern SEXP XGBoosterLoadJsonConfig_R(SEXP handle, SEXP value); +extern SEXP XGBoosterSerializeToBuffer_R(SEXP handle); +extern SEXP XGBoosterUnserializeFromBuffer_R(SEXP handle, SEXP raw); extern SEXP XGBoosterModelToRaw_R(SEXP); extern SEXP XGBoosterPredict_R(SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP XGBoosterSaveModel_R(SEXP, SEXP); @@ -53,6 +55,8 @@ static const R_CallMethodDef CallEntries[] = { {"XGBoosterLoadModel_R", (DL_FUNC) &XGBoosterLoadModel_R, 2}, {"XGBoosterSaveJsonConfig_R", (DL_FUNC) &XGBoosterSaveJsonConfig_R, 1}, {"XGBoosterLoadJsonConfig_R", (DL_FUNC) &XGBoosterLoadJsonConfig_R, 2}, + {"XGBoosterSerializeToBuffer_R", (DL_FUNC) &XGBoosterSerializeToBuffer_R, 1}, + {"XGBoosterUnserializeFromBuffer_R", (DL_FUNC) &XGBoosterUnserializeFromBuffer_R, 2}, {"XGBoosterModelToRaw_R", (DL_FUNC) &XGBoosterModelToRaw_R, 1}, {"XGBoosterPredict_R", (DL_FUNC) &XGBoosterPredict_R, 5}, {"XGBoosterSaveModel_R", (DL_FUNC) &XGBoosterSaveModel_R, 2}, diff --git a/R-package/src/xgboost_R.cc b/R-package/src/xgboost_R.cc index d89ea7bed249..cb86ef46905c 100644 --- a/R-package/src/xgboost_R.cc +++ b/R-package/src/xgboost_R.cc @@ -338,15 +338,6 @@ SEXP XGBoosterSaveModel_R(SEXP handle, SEXP fname) { return R_NilValue; } -SEXP XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw) { - R_API_BEGIN(); - CHECK_CALL(XGBoosterLoadModelFromBuffer(R_ExternalPtrAddr(handle), - RAW(raw), - length(raw))); - R_API_END(); - return R_NilValue; -} - SEXP XGBoosterModelToRaw_R(SEXP handle) { SEXP ret; R_API_BEGIN(); @@ -362,6 +353,15 @@ SEXP XGBoosterModelToRaw_R(SEXP handle) { return ret; } +SEXP XGBoosterLoadModelFromRaw_R(SEXP handle, SEXP raw) { + R_API_BEGIN(); + CHECK_CALL(XGBoosterLoadModelFromBuffer(R_ExternalPtrAddr(handle), + RAW(raw), + length(raw))); + R_API_END(); + return R_NilValue; +} + SEXP XGBoosterSaveJsonConfig_R(SEXP handle) { const char* ret; R_API_BEGIN(); @@ -380,6 +380,30 @@ SEXP XGBoosterLoadJsonConfig_R(SEXP handle, SEXP value) { return R_NilValue; } +SEXP XGBoosterSerializeToBuffer_R(SEXP handle) { + SEXP ret; + R_API_BEGIN(); + bst_ulong out_len; + const char *raw; + CHECK_CALL(XGBoosterSerializeToBuffer(R_ExternalPtrAddr(handle), &out_len, &raw)); + ret = PROTECT(allocVector(RAWSXP, out_len)); + if (out_len != 0) { + memcpy(RAW(ret), raw, out_len); + } + R_API_END(); + UNPROTECT(1); + return ret; +} + +SEXP XGBoosterUnserializeFromBuffer_R(SEXP handle, SEXP raw) { + R_API_BEGIN(); + XGBoosterUnserializeFromBuffer(R_ExternalPtrAddr(handle), + RAW(raw), + length(raw)); + R_API_END(); + return R_NilValue; +} + SEXP XGBoosterDumpModel_R(SEXP handle, SEXP fmap, SEXP with_stats, SEXP dump_format) { SEXP out; R_API_BEGIN(); diff --git a/R-package/src/xgboost_R.h b/R-package/src/xgboost_R.h index 05cd7afbe48d..be16ff974c36 100644 --- a/R-package/src/xgboost_R.h +++ b/R-package/src/xgboost_R.h @@ -187,6 +187,7 @@ XGB_DLL SEXP XGBoosterModelToRaw_R(SEXP handle); * \param handle handle * \return JSON string */ + XGB_DLL SEXP XGBoosterSaveJsonConfig_R(SEXP handle); /*! * \brief Load the JSON string returnd by XGBoosterSaveJsonConfig_R @@ -195,6 +196,22 @@ XGB_DLL SEXP XGBoosterSaveJsonConfig_R(SEXP handle); * \return R_NilValue */ XGB_DLL SEXP XGBoosterLoadJsonConfig_R(SEXP handle, SEXP value); + +/*! + * \brief Memory snapshot based serialization method. Saves everything states + * into buffer. + * \param handle handle to booster + */ +XGB_DLL SEXP XGBoosterSerializeToBuffer_R(SEXP handle); + +/*! + * \brief Memory snapshot based serialization method. Loads the buffer returned + * from `XGBoosterSerializeToBuffer'. + * \param handle handle to booster + * \return raw byte array + */ +XGB_DLL SEXP XGBoosterUnserializeFromBuffer_R(SEXP handle, SEXP raw); + /*! * \brief dump model into a string * \param handle handle diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index 733c38080409..7d6918027df6 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -219,6 +219,21 @@ test_that("training continuation works", { expect_equal(dim(bst2$evaluation_log), c(2, 2)) }) +test_that("model serialization works", { + out_path <- "model_serialization" + dtrain <- xgb.DMatrix(train$data, label = train$label) + watchlist = list(train=dtrain) + param <- list(objective = "binary:logistic") + booster <- xgb.train(param, dtrain, nrounds = 4, watchlist) + raw <- xgb.serialize(booster) + saveRDS(raw, out_path) + raw <- readRDS(out_path) + + loaded <- xgb.unserialize(raw) + raw_from_loaded <- xgb.serialize(loaded) + expect_equal(raw, raw_from_loaded) + file.remove(out_path) +}) test_that("xgb.cv works", { set.seed(11) diff --git a/R-package/tests/testthat/test_callbacks.R b/R-package/tests/testthat/test_callbacks.R index 59daaf448c5f..76bcd484d5c5 100644 --- a/R-package/tests/testthat/test_callbacks.R +++ b/R-package/tests/testthat/test_callbacks.R @@ -30,16 +30,16 @@ param <- list(objective = "binary:logistic", max_depth = 2, nthread = 2) test_that("cb.print.evaluation works as expected", { - + bst_evaluation <- c('train-auc'=0.9, 'test-auc'=0.8) bst_evaluation_err <- NULL begin_iteration <- 1 end_iteration <- 7 - + f0 <- cb.print.evaluation(period=0) f1 <- cb.print.evaluation(period=1) f5 <- cb.print.evaluation(period=5) - + expect_false(is.null(attr(f1, 'call'))) expect_equal(attr(f1, 'name'), 'cb.print.evaluation') @@ -48,15 +48,15 @@ test_that("cb.print.evaluation works as expected", { expect_output(f1(), "\\[1\\]\ttrain-auc:0.900000\ttest-auc:0.800000") expect_output(f5(), "\\[1\\]\ttrain-auc:0.900000\ttest-auc:0.800000") expect_null(f1()) - + iteration <- 2 expect_output(f1(), "\\[2\\]\ttrain-auc:0.900000\ttest-auc:0.800000") expect_silent(f5()) - + iteration <- 7 expect_output(f1(), "\\[7\\]\ttrain-auc:0.900000\ttest-auc:0.800000") expect_output(f5(), "\\[7\\]\ttrain-auc:0.900000\ttest-auc:0.800000") - + bst_evaluation_err <- c('train-auc'=0.1, 'test-auc'=0.2) expect_output(f1(), "\\[7\\]\ttrain-auc:0.900000\\+0.100000\ttest-auc:0.800000\\+0.200000") }) @@ -65,40 +65,40 @@ test_that("cb.evaluation.log works as expected", { bst_evaluation <- c('train-auc'=0.9, 'test-auc'=0.8) bst_evaluation_err <- NULL - + evaluation_log <- list() f <- cb.evaluation.log() - + expect_false(is.null(attr(f, 'call'))) expect_equal(attr(f, 'name'), 'cb.evaluation.log') - + iteration <- 1 expect_silent(f()) - expect_equal(evaluation_log, + expect_equal(evaluation_log, list(c(iter=1, bst_evaluation))) iteration <- 2 expect_silent(f()) - expect_equal(evaluation_log, + expect_equal(evaluation_log, list(c(iter=1, bst_evaluation), c(iter=2, bst_evaluation))) expect_silent(f(finalize = TRUE)) - expect_equal(evaluation_log, + expect_equal(evaluation_log, data.table(iter=1:2, train_auc=c(0.9,0.9), test_auc=c(0.8,0.8))) - + bst_evaluation_err <- c('train-auc'=0.1, 'test-auc'=0.2) evaluation_log <- list() f <- cb.evaluation.log() - + iteration <- 1 expect_silent(f()) - expect_equal(evaluation_log, + expect_equal(evaluation_log, list(c(iter=1, c(bst_evaluation, bst_evaluation_err)))) iteration <- 2 expect_silent(f()) - expect_equal(evaluation_log, + expect_equal(evaluation_log, list(c(iter=1, c(bst_evaluation, bst_evaluation_err)), c(iter=2, c(bst_evaluation, bst_evaluation_err)))) expect_silent(f(finalize = TRUE)) - expect_equal(evaluation_log, + expect_equal(evaluation_log, data.table(iter=1:2, train_auc_mean=c(0.9,0.9), train_auc_std=c(0.1,0.1), test_auc_mean=c(0.8,0.8), test_auc_std=c(0.2,0.2))) @@ -130,18 +130,18 @@ test_that("cb.reset.parameters works as expected", { bst1 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, callbacks = list(cb.reset.parameters(my_par))) expect_false(is.null(bst1$evaluation_log$train_error)) - expect_equal(bst0$evaluation_log$train_error, + expect_equal(bst0$evaluation_log$train_error, bst1$evaluation_log$train_error) - + # same eta but re-set via a function in the callback set.seed(111) my_par <- list(eta = function(itr, itr_end) 0.9) bst2 <- xgb.train(param, dtrain, nrounds = 2, watchlist, verbose = 0, callbacks = list(cb.reset.parameters(my_par))) expect_false(is.null(bst2$evaluation_log$train_error)) - expect_equal(bst0$evaluation_log$train_error, + expect_equal(bst0$evaluation_log$train_error, bst2$evaluation_log$train_error) - + # different eta re-set as a vector parameter in the callback set.seed(111) my_par <- list(eta = c(0.6, 0.5)) @@ -149,7 +149,7 @@ test_that("cb.reset.parameters works as expected", { callbacks = list(cb.reset.parameters(my_par))) expect_false(is.null(bst3$evaluation_log$train_error)) expect_false(all(bst0$evaluation_log$train_error == bst3$evaluation_log$train_error)) - + # resetting multiple parameters at the same time runs with no error my_par <- list(eta = c(1., 0.5), gamma = c(1, 2), max_depth = c(4, 8)) expect_error( @@ -175,7 +175,7 @@ test_that("cb.reset.parameters works as expected", { test_that("cb.save.model works as expected", { files <- c('xgboost_01.model', 'xgboost_02.model', 'xgboost.model') for (f in files) if (file.exists(f)) file.remove(f) - + bst <- xgb.train(param, dtrain, nrounds = 2, watchlist, eta = 1, verbose = 0, save_period = 1, save_name = "xgboost_%02d.model") expect_true(file.exists('xgboost_01.model')) @@ -184,6 +184,9 @@ test_that("cb.save.model works as expected", { expect_equal(xgb.ntree(b1), 1) b2 <- xgb.load('xgboost_02.model') expect_equal(xgb.ntree(b2), 2) + + xgb.config(b2) <- xgb.config(bst) + expect_equal(xgb.config(bst), xgb.config(b2)) expect_equal(bst$raw, b2$raw) # save_period = 0 saves the last iteration's model @@ -191,8 +194,9 @@ test_that("cb.save.model works as expected", { save_period = 0) expect_true(file.exists('xgboost.model')) b2 <- xgb.load('xgboost.model') + xgb.config(b2) <- xgb.config(bst) expect_equal(bst$raw, b2$raw) - + for (f in files) if (file.exists(f)) file.remove(f) }) @@ -211,7 +215,7 @@ test_that("early stopping xgb.train works", { err_pred <- err(ltest, pred) err_log <- bst$evaluation_log[bst$best_iteration, test_error] expect_equal(err_log, err_pred, tolerance = 5e-6) - + set.seed(11) expect_silent( bst0 <- xgb.train(param, dtrain, nrounds = 20, watchlist, eta = 0.3, @@ -288,13 +292,13 @@ test_that("prediction in early-stopping xgb.cv works", { early_stopping_rounds = 5, maximize = FALSE, stratified = FALSE, prediction = TRUE) , "Stopping. Best iteration") - + expect_false(is.null(cv$best_iteration)) expect_lt(cv$best_iteration, 19) expect_false(is.null(cv$evaluation_log)) expect_false(is.null(cv$pred)) expect_length(cv$pred, nrow(train$data)) - + err_pred <- mean( sapply(cv$folds, function(f) mean(err(ltrain[f], cv$pred[f]))) ) err_log <- cv$evaluation_log[cv$best_iteration, test_error_mean] expect_equal(err_pred, err_log, tolerance = 1e-6) diff --git a/jvm-packages/dev/Dockerfile b/jvm-packages/dev/Dockerfile index a767163e6bc7..02df129cf946 100644 --- a/jvm-packages/dev/Dockerfile +++ b/jvm-packages/dev/Dockerfile @@ -24,7 +24,7 @@ RUN \ yum install -y tar unzip wget xz git centos-release-scl yum-utils java-1.8.0-openjdk-devel && \ yum-config-manager --enable centos-sclo-rh-testing && \ yum -y update && \ - yum install -y devtoolset-4-gcc devtoolset-4-binutils devtoolset-4-gcc-c++ && \ + yum install -y devtoolset-6-gcc devtoolset-6-binutils devtoolset-6-gcc-c++ && \ # Python wget https://repo.continuum.io/miniconda/Miniconda3-4.5.12-Linux-x86_64.sh && \ bash Miniconda3-4.5.12-Linux-x86_64.sh -b -p /opt/python && \ @@ -38,9 +38,9 @@ RUN \ # Set the required environment variables ENV PATH=/opt/python/bin:/opt/maven/bin:$PATH -ENV CC=/opt/rh/devtoolset-4/root/usr/bin/gcc -ENV CXX=/opt/rh/devtoolset-4/root/usr/bin/c++ -ENV CPP=/opt/rh/devtoolset-4/root/usr/bin/cpp +ENV CC=/opt/rh/devtoolset-6/root/usr/bin/gcc +ENV CXX=/opt/rh/devtoolset-6/root/usr/bin/c++ +ENV CPP=/opt/rh/devtoolset-6/root/usr/bin/cpp ENV JAVA_HOME=/usr/lib/jvm/java # Install Python packages diff --git a/tests/ci_build/Dockerfile.gpu_build b/tests/ci_build/Dockerfile.gpu_build index 8db1c723097f..c460db4cd93e 100644 --- a/tests/ci_build/Dockerfile.gpu_build +++ b/tests/ci_build/Dockerfile.gpu_build @@ -3,6 +3,7 @@ FROM nvidia/cuda:$CUDA_VERSION-devel-centos6 # Environment ENV DEBIAN_FRONTEND noninteractive +ENV DEVTOOLSET_URL_ROOT http://mirror.centos.org/centos/6/sclo/x86_64/rh/devtoolset-4 # Install all basic requirements RUN \ @@ -10,7 +11,11 @@ RUN \ yum install -y tar unzip wget xz git centos-release-scl yum-utils && \ yum-config-manager --enable centos-sclo-rh-testing && \ yum -y update && \ - yum install -y devtoolset-4-gcc devtoolset-4-binutils devtoolset-4-gcc-c++ && \ + yum install -y $DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-5.3.1-6.1.el6.x86_64.rpm \ + $DEVTOOLSET_URL_ROOT/devtoolset-4-gcc-c++-5.3.1-6.1.el6.x86_64.rpm \ + $DEVTOOLSET_URL_ROOT/devtoolset-4-binutils-2.25.1-8.el6.x86_64.rpm \ + $DEVTOOLSET_URL_ROOT/devtoolset-4-runtime-4.1-3.sc1.el6.x86_64.rpm \ + $DEVTOOLSET_URL_ROOT/devtoolset-4-libstdc++-devel-5.3.1-6.1.el6.x86_64.rpm && \ # Python wget https://repo.continuum.io/miniconda/Miniconda3-4.5.12-Linux-x86_64.sh && \ bash Miniconda3-4.5.12-Linux-x86_64.sh -b -p /opt/python && \ diff --git a/tests/ci_build/Dockerfile.jvm b/tests/ci_build/Dockerfile.jvm index d1974c4d6fc9..17285237c2d2 100644 --- a/tests/ci_build/Dockerfile.jvm +++ b/tests/ci_build/Dockerfile.jvm @@ -6,7 +6,7 @@ RUN \ yum install -y tar unzip wget xz git centos-release-scl yum-utils java-1.8.0-openjdk-devel && \ yum-config-manager --enable centos-sclo-rh-testing && \ yum -y update && \ - yum install -y devtoolset-4-gcc devtoolset-4-binutils devtoolset-4-gcc-c++ && \ + yum install -y devtoolset-6-gcc devtoolset-6-binutils devtoolset-6-gcc-c++ && \ # Python wget https://repo.continuum.io/miniconda/Miniconda3-4.5.12-Linux-x86_64.sh && \ bash Miniconda3-4.5.12-Linux-x86_64.sh -b -p /opt/python && \ @@ -19,9 +19,9 @@ RUN \ ln -s /opt/apache-maven-3.6.1/ /opt/maven ENV PATH=/opt/python/bin:/opt/maven/bin:$PATH -ENV CC=/opt/rh/devtoolset-4/root/usr/bin/gcc -ENV CXX=/opt/rh/devtoolset-4/root/usr/bin/c++ -ENV CPP=/opt/rh/devtoolset-4/root/usr/bin/cpp +ENV CC=/opt/rh/devtoolset-6/root/usr/bin/gcc +ENV CXX=/opt/rh/devtoolset-6/root/usr/bin/c++ +ENV CPP=/opt/rh/devtoolset-6/root/usr/bin/cpp # Install Python packages RUN \