From db2792088f9bbd2272ba203360057cd6d5cdd551 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 4 Aug 2020 21:39:50 -0500 Subject: [PATCH] remove lgb.convert() --- R-package/NAMESPACE | 2 - R-package/R/lgb.convert.R | 159 --------------- R-package/R/lgb.convert_with_rules.R | 4 +- R-package/R/removed.R | 4 +- R-package/demo/categorical_features_prepare.R | 88 --------- R-package/man/lgb.convert.Rd | 51 ----- R-package/man/lgb.convert_with_rules.Rd | 4 +- R-package/pkgdown/_pkgdown.yml | 1 - R-package/tests/testthat/test_lgb.convert.R | 185 ------------------ 9 files changed, 6 insertions(+), 492 deletions(-) delete mode 100644 R-package/R/lgb.convert.R delete mode 100644 R-package/demo/categorical_features_prepare.R delete mode 100644 R-package/man/lgb.convert.Rd delete mode 100644 R-package/tests/testthat/test_lgb.convert.R diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE index e3b1816bed37..17ae4683bdb8 100644 --- a/R-package/NAMESPACE +++ b/R-package/NAMESPACE @@ -14,7 +14,6 @@ export(lgb.Dataset.create.valid) export(lgb.Dataset.save) export(lgb.Dataset.set.categorical) export(lgb.Dataset.set.reference) -export(lgb.convert) export(lgb.convert_with_rules) export(lgb.cv) export(lgb.dump) @@ -43,7 +42,6 @@ importFrom(R6,R6Class) importFrom(data.table,":=") importFrom(data.table,as.data.table) importFrom(data.table,data.table) -importFrom(data.table,is.data.table) importFrom(data.table,rbindlist) importFrom(data.table,set) importFrom(data.table,setnames) diff --git a/R-package/R/lgb.convert.R b/R-package/R/lgb.convert.R deleted file mode 100644 index 64a64322ce6b..000000000000 --- a/R-package/R/lgb.convert.R +++ /dev/null @@ -1,159 +0,0 @@ -# [description] get all column classes of a data.table or data.frame. -# This function collapses the result of class() into a single string -.get_column_classes <- function(df) { - return( - vapply( - X = df - , FUN = function(x) {paste0(class(x), collapse = ",")} - , FUN.VALUE = character(1L) - ) - ) -} - -# [description] check a data frame or data table for columns tthat are any -# type other than numeric and integer. This is used by lgb.convert() -# and lgb.convert_with_rules() too warn if more action is needed by users -# before a dataset can be converted to a lgb.Dataset. -.warn_for_unconverted_columns <- function(df, function_name) { - column_classes <- .get_column_classes(df) - unconverted_columns <- column_classes[!(column_classes %in% c("numeric", "integer"))] - if (length(unconverted_columns) > 0L) { - col_detail_string <- paste0( - paste0( - names(unconverted_columns) - , " (" - , unconverted_columns - , ")" - ) - , collapse = ", " - ) - msg <- paste0( - function_name - , ": " - , length(unconverted_columns) - , " columns are not numeric or integer. These need to be dropped or converted to " - , "be used in an lgb.Dataset object. " - , col_detail_string - ) - warning(msg) - } - return(invisible(NULL)) -} - -.LGB_CONVERT_DEFAULT_FOR_LOGICAL_NA <- function() {return(-1L)} -.LGB_CONVERT_DEFAULT_FOR_NON_LOGICAL_NA <- function() {return(0L)} - -#' @name lgb.convert -#' @title Data preparator for LightGBM datasets (integer) -#' @description Attempts to prepare a clean dataset to put in a \code{lgb.Dataset}. -#' Factor, character, and logical columns are converted to integer. -#' -#' Missing values in factor and character columns will be replaced with 0. Missing -#' values in logical columns will be replaced with -1. -#' -#' Please use \code{\link{lgb.convert_with_rules}} if you want to apply this -#' transformation to other datasets. -#' -#' NOTE: In previous releases of LightGBM, this function was called \code{lgb.prepare}. -#' @param data A data.frame or \code{data.table} to prepare. If a \code{data.table} is provided, -#' it will be modified in place for speed and too avoid out-of-memory erros. -#' @return The cleaned dataset. It must be converted to a matrix format (\code{as.matrix}) -#' for input in \code{lgb.Dataset}. If \code{data} is a \code{data.table}, it will be -#' modified in place. -#' -#' @examples -#' data(iris) -#' -#' str(iris) -#' -#' # Convert all factors/chars to integer -#' str(lgb.convert(data = iris)) -#' -#' \dontrun{ -#' # When lightgbm package is installed, and you do not want to load it -#' # You can still use the function! -#' lgb.unloader() -#' str(lightgbm::lgb.convert(data = iris)) -#' # 'data.frame': 150 obs. of 5 variables: -#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... -#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... -#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... -#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... -#' # $ Species : int 1 1 1 1 1 1 1 1 1 1 ... -#' } -#' -#' @importFrom data.table := is.data.table -#' @export -lgb.convert <- function(data) { - - column_classes <- .get_column_classes(data) - - is_char <- which(column_classes == "character") - is_factor <- which(column_classes == "factor") - is_logical <- which(column_classes == "logical") - - is_data_table <- data.table::is.data.table(data) - is_data_frame <- is.data.frame(data) - - if (!(is_data_table || is_data_frame)) { - stop( - "lgb.convert: you provided " - , paste(class(data), collapse = " & ") - , " but data should have class data.frame or data.table" - ) - } - - # data.table not behaving like data.frame - if (is_data_table) { - - if (length(is_char) > 0L) { - for (col_name in names(is_char)) { - data[, (col_name) := as.integer(as.factor(get(col_name)))] - data[is.na(get(col_name)), (col_name) := .LGB_CONVERT_DEFAULT_FOR_NON_LOGICAL_NA()] - } - } - - if (length(is_factor) > 0L) { - for (col_name in names(is_factor)) { - data[, (col_name) := as.integer(get(col_name))] - data[is.na(get(col_name)), (col_name) := .LGB_CONVERT_DEFAULT_FOR_NON_LOGICAL_NA()] - } - } - - if (length(is_logical) > 0L) { - for (col_name in names(is_logical)) { - data[, (col_name) := as.integer(get(col_name))] - data[is.na(get(col_name)), (col_name) := .LGB_CONVERT_DEFAULT_FOR_LOGICAL_NA()] - } - } - - } else if (is_data_frame) { - - if (length(is_char) > 0L) { - for (col_name in names(is_char)) { - data[[col_name]] <- as.integer(as.factor(data[[col_name]])) - data[is.na(data[col_name]), col_name] <- .LGB_CONVERT_DEFAULT_FOR_NON_LOGICAL_NA() - } - } - - if (length(is_factor) > 0L) { - for (col_name in names(is_factor)) { - data[[col_name]] <- as.integer(data[[col_name]]) - data[is.na(data[col_name]), col_name] <- .LGB_CONVERT_DEFAULT_FOR_NON_LOGICAL_NA() - } - } - - if (length(is_logical) > 0L) { - for (col_name in names(is_logical)) { - data[[col_name]] <- as.integer(data[[col_name]]) - data[is.na(data[col_name]), col_name] <- .LGB_CONVERT_DEFAULT_FOR_LOGICAL_NA() - } - } - - } - - .warn_for_unconverted_columns(df = data, function_name = "lgb.convert") - - return(data) - -} diff --git a/R-package/R/lgb.convert_with_rules.R b/R-package/R/lgb.convert_with_rules.R index 176957719a48..6d13c5761034 100644 --- a/R-package/R/lgb.convert_with_rules.R +++ b/R-package/R/lgb.convert_with_rules.R @@ -5,8 +5,8 @@ #' in factors and characters will be filled with 0L. Missing values in logicals #' will be filled with -1L. #' -#' Unlike \code{\link{lgb.convert}}, this function returns and optionally takes -#' in "rules" the describe exactly how to convert values in columns. +#' This function returns and optionally takes in "rules" the describe exactly +#' how to convert values in columns. #' #' Columns that contain only NA values will be converted by this function but will #' not show up in the returned \code{rules}. diff --git a/R-package/R/removed.R b/R-package/R/removed.R index 733c59765810..3fda5f27c07b 100644 --- a/R-package/R/removed.R +++ b/R-package/R/removed.R @@ -4,7 +4,7 @@ #' @param ... catch-all too match old calls #' @export lgb.prepare <- function(...) { - stop("lgb.prepare() was removed in LightGBM 3.0.0. Please use lgb.convert()") + stop("lgb.prepare() was removed in LightGBM 3.0.0. Please use lgb.convert_with_rules()") } #' @title removed functions @@ -13,7 +13,7 @@ lgb.prepare <- function(...) { #' @param ... catch-all too match old calls #' @export lgb.prepare2 <- function(...) { - stop("lgb.prepare2() was removed in LightGBM 3.0.0. Please use lgb.convert()") + stop("lgb.prepare2() was removed in LightGBM 3.0.0. Please use lgb.convert_with_rules()") } #' @title removed functions diff --git a/R-package/demo/categorical_features_prepare.R b/R-package/demo/categorical_features_prepare.R deleted file mode 100644 index 6830e5a1232c..000000000000 --- a/R-package/demo/categorical_features_prepare.R +++ /dev/null @@ -1,88 +0,0 @@ -# Here we are going to try training a model with categorical features - -# Load libraries -library(data.table) -library(lightgbm) - -# Load data and look at the structure -# -# Classes 'data.table' and 'data.frame': 4521 obs. of 17 variables: -# $ age : int 30 33 35 30 59 35 36 39 41 43 ... -# $ job : chr "unemployed" "services" "management" "management" ... -# $ marital : chr "married" "married" "single" "married" ... -# $ education: chr "primary" "secondary" "tertiary" "tertiary" ... -# $ default : chr "no" "no" "no" "no" ... -# $ balance : int 1787 4789 1350 1476 0 747 307 147 221 -88 ... -# $ housing : chr "no" "yes" "yes" "yes" ... -# $ loan : chr "no" "yes" "no" "yes" ... -# $ contact : chr "cellular" "cellular" "cellular" "unknown" ... -# $ day : int 19 11 16 3 5 23 14 6 14 17 ... -# $ month : chr "oct" "may" "apr" "jun" ... -# $ duration : int 79 220 185 199 226 141 341 151 57 313 ... -# $ campaign : int 1 1 1 4 1 2 1 2 2 1 ... -# $ pdays : int -1 339 330 -1 -1 176 330 -1 -1 147 ... -# $ previous : int 0 4 1 0 0 3 2 0 0 2 ... -# $ poutcome : chr "unknown" "failure" "failure" "unknown" ... -# $ y : chr "no" "no" "no" "no" ... -data(bank, package = "lightgbm") -str(bank) - -# We must now transform the data to fit in LightGBM -# For this task, we use lgb.prepare -# The function transforms the data into a fittable data -# -# Classes 'data.table' and 'data.frame': 4521 obs. of 17 variables: -# $ age : int 30 33 35 30 59 35 36 39 41 43 ... -# $ job : num 11 8 5 5 2 5 7 10 3 8 ... -# $ marital : num 2 2 3 2 2 3 2 2 2 2 ... -# $ education: num 1 2 3 3 2 3 3 2 3 1 ... -# $ default : num 1 1 1 1 1 1 1 1 1 1 ... -# $ balance : int 1787 4789 1350 1476 0 747 307 147 221 -88 ... -# $ housing : num 1 2 2 2 2 1 2 2 2 2 ... -# $ loan : num 1 2 1 2 1 1 1 1 1 2 ... -# $ contact : num 1 1 1 3 3 1 1 1 3 1 ... -# $ day : int 19 11 16 3 5 23 14 6 14 17 ... -# $ month : num 11 9 1 7 9 4 9 9 9 1 ... -# $ duration : int 79 220 185 199 226 141 341 151 57 313 ... -# $ campaign : int 1 1 1 4 1 2 1 2 2 1 ... -# $ pdays : int -1 339 330 -1 -1 176 330 -1 -1 147 ... -# $ previous : int 0 4 1 0 0 3 2 0 0 2 ... -# $ poutcome : num 4 1 1 4 4 1 2 4 4 1 ... -# $ y : num 1 1 1 1 1 1 1 1 1 1 ... -bank <- lgb.prepare(data = bank) -str(bank) - -# Remove 1 to label because it must be between 0 and 1 -bank$y <- bank$y - 1L - -# Data input to LightGBM must be a matrix, without the label -my_data <- as.matrix(bank[, 1L:16L, with = FALSE]) - -# Creating the LightGBM dataset with categorical features -# The categorical features must be indexed like in R (1-indexed, not 0-indexed) -lgb_data <- lgb.Dataset( - data = my_data - , label = bank$y - , categorical_feature = c(2L, 3L, 4L, 5L, 7L, 8L, 9L, 11L, 16L) -) - -# We can now train a model -params <- list( - objective = "binary" - , metric = "l2" - , min_data = 1L - , learning_rate = 0.1 - , min_data = 0L - , min_hessian = 1.0 - , max_depth = 2L -) -model <- lgb.train( - params = params - , data = lgb_data - , nrounds = 100L - , valids = list(train = lgb_data) -) - -# Try to find split_feature: 2 -# If you find it, it means it used a categorical feature in the first tree -lgb.dump(model, num_iteration = 1L) diff --git a/R-package/man/lgb.convert.Rd b/R-package/man/lgb.convert.Rd deleted file mode 100644 index b6a30415db42..000000000000 --- a/R-package/man/lgb.convert.Rd +++ /dev/null @@ -1,51 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lgb.convert.R -\name{lgb.convert} -\alias{lgb.convert} -\title{Data preparator for LightGBM datasets (integer)} -\usage{ -lgb.convert(data) -} -\arguments{ -\item{data}{A data.frame or \code{data.table} to prepare. If a \code{data.table} is provided, -it will be modified in place for speed and too avoid out-of-memory erros.} -} -\value{ -The cleaned dataset. It must be converted to a matrix format (\code{as.matrix}) - for input in \code{lgb.Dataset}. If \code{data} is a \code{data.table}, it will be - modified in place. -} -\description{ -Attempts to prepare a clean dataset to put in a \code{lgb.Dataset}. - Factor, character, and logical columns are converted to integer. - - Missing values in factor and character columns will be replaced with 0. Missing - values in logical columns will be replaced with -1. - - Please use \code{\link{lgb.convert_with_rules}} if you want to apply this - transformation to other datasets. - - NOTE: In previous releases of LightGBM, this function was called \code{lgb.prepare}. -} -\examples{ -data(iris) - -str(iris) - -# Convert all factors/chars to integer -str(lgb.convert(data = iris)) - -\dontrun{ -# When lightgbm package is installed, and you do not want to load it -# You can still use the function! -lgb.unloader() -str(lightgbm::lgb.convert(data = iris)) -# 'data.frame': 150 obs. of 5 variables: -# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... -# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... -# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... -# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... -# $ Species : int 1 1 1 1 1 1 1 1 1 1 ... -} - -} diff --git a/R-package/man/lgb.convert_with_rules.Rd b/R-package/man/lgb.convert_with_rules.Rd index 40b7ead12b98..8ca842fa1ec9 100644 --- a/R-package/man/lgb.convert_with_rules.Rd +++ b/R-package/man/lgb.convert_with_rules.Rd @@ -25,8 +25,8 @@ Attempts to prepare a clean dataset to prepare to put in a \code{lgb.Dataset}. in factors and characters will be filled with 0L. Missing values in logicals will be filled with -1L. - Unlike \code{\link{lgb.convert}}, this function returns and optionally takes - in "rules" the describe exactly how to convert values in columns. + This function returns and optionally takes in "rules" the describe exactly + how to convert values in columns. Columns that contain only NA values will be converted by this function but will not show up in the returned \code{rules}. diff --git a/R-package/pkgdown/_pkgdown.yml b/R-package/pkgdown/_pkgdown.yml index 803dbbe08343..0a509df2d54c 100644 --- a/R-package/pkgdown/_pkgdown.yml +++ b/R-package/pkgdown/_pkgdown.yml @@ -65,7 +65,6 @@ reference: - '`lgb.Dataset.save`' - '`lgb.Dataset.set.categorical`' - '`lgb.Dataset.set.reference`' - - '`lgb.convert`' - '`lgb.convert_with_rules`' - title: Machine Learning desc: Train models with LightGBM diff --git a/R-package/tests/testthat/test_lgb.convert.R b/R-package/tests/testthat/test_lgb.convert.R deleted file mode 100644 index e97b5cb01c1e..000000000000 --- a/R-package/tests/testthat/test_lgb.convert.R +++ /dev/null @@ -1,185 +0,0 @@ -context("lgb.convert()") - -test_that("lgb.convert() rejects inputs that are not a data.table or data.frame", { - bad_inputs <- list( - matrix(1.0:10.0, 2L, 5L) - , TRUE - , c("a", "b") - , NA - , 10L - , lgb.Dataset( - data = matrix(1.0:10.0, 2L, 5L) - , params = list() - ) - ) - for (bad_input in bad_inputs) { - expect_error({ - converted_dataset <- lgb.convert(bad_input) - }, regexp = "lgb.convert: you provided", fixed = TRUE) - } -}) - -test_that("lgb.convert() should work correctly for a dataset with only character columns", { - testDF <- data.frame( - col1 = c("a", "b", "c") - , col2 = c("green", "green", "red") - , stringsAsFactors = FALSE - ) - testDT <- data.table::as.data.table(testDF) - for (input_data in list(testDF, testDT)) { - converted_dataset <- lgb.convert(input_data) - expect_identical(class(input_data), class(converted_dataset)) - expect_identical(class(converted_dataset[["col1"]]), "integer") - expect_identical(class(converted_dataset[["col2"]]), "integer") - expect_identical(converted_dataset[["col1"]], c(1L, 2L, 3L)) - expect_identical(converted_dataset[["col2"]], c(1L, 1L, 2L)) - } -}) - -test_that("lgb.convert() should work correctly for a dataset with only factor columns", { - testDF <- data.frame( - col1 = as.factor(c("a", "b", "c")) - , col2 = as.factor(c("green", "green", "red")) - , stringsAsFactors = FALSE - ) - testDT <- data.table::as.data.table(testDF) - for (input_data in list(testDF, testDT)) { - converted_dataset <- lgb.convert(input_data) - expect_identical(class(input_data), class(converted_dataset)) - expect_identical(class(converted_dataset[["col1"]]), "integer") - expect_identical(class(converted_dataset[["col2"]]), "integer") - expect_identical(converted_dataset[["col1"]], c(1L, 2L, 3L)) - expect_identical(converted_dataset[["col2"]], c(1L, 1L, 2L)) - } -}) - -test_that("lgb.convert() should not change a dataset with only integer columns", { - testDF <- data.frame( - col1 = 11L:15L - , col2 = 16L:20L - , stringsAsFactors = FALSE - ) - testDT <- data.table::as.data.table(testDF) - for (input_data in list(testDF, testDT)) { - converted_dataset <- lgb.convert(input_data) - expect_identical(converted_dataset, input_data) - } -}) - -test_that("lgb.convert() should work correctly for a dataset with numeric, factor, and character columns", { - testDF <- data.frame( - character_col = c("a", "b", "c") - , numeric_col = c(1.0, 9.0, 10.0) - , factor_col = as.factor(c("n", "n", "y")) - , stringsAsFactors = FALSE - ) - testDT <- data.table::as.data.table(testDF) - for (input_data in list(testDF, testDT)) { - converted_dataset <- lgb.convert(input_data) - expect_identical(class(input_data), class(converted_dataset)) - expect_identical(class(converted_dataset[["character_col"]]), "integer") - expect_identical(class(converted_dataset[["factor_col"]]), "integer") - expect_identical(converted_dataset[["character_col"]], c(1L, 2L, 3L)) - expect_identical(converted_dataset[["factor_col"]], c(1L, 1L, 2L)) - - # today, lgb.convert() does not convert numeric columns - expect_identical(class(converted_dataset[["numeric_col"]]), "numeric") - expect_identical(converted_dataset[["numeric_col"]], c(1.0, 9.0, 10.0)) - } -}) - - -test_that("lgb.convert() should work correctly for a dataset with all logical columns", { - testDF <- data.frame( - all_trues = rep(TRUE, 5L) - , all_falses = rep(FALSE, 5L) - , back_and_forth = c(TRUE, FALSE, TRUE, FALSE, TRUE) - ) - testDT <- data.table::as.data.table(testDF) - for (input_data in list(testDF, testDT)) { - converted_dataset <- lgb.convert(input_data) - expect_identical(class(input_data), class(converted_dataset)) - for (col_name in names(input_data)) { - expect_identical(class(converted_dataset[[col_name]]), "integer") - } - expect_identical(converted_dataset[["all_trues"]], rep(1L, 5L)) - expect_identical(converted_dataset[["all_falses"]], rep(0L, 5L)) - expect_identical(converted_dataset[["back_and_forth"]], c(1L, 0L, 1L, 0L, 1L)) - } -}) - -test_that("lgb.convert() should convert missing values and should work with columns that have NAs", { - testDF <- data.frame( - character_col = c("a", NA_character_, "c") - , na_col = rep(NA, 3L) - , na_real_col = rep(NA_real_, 3L) - , na_int_col = rep(NA_integer_, 3L) - , na_character_col = rep(NA_character_, 3L) - , numeric_col = c(1.0, 9.0, NA_real_) - , factor_col = as.factor(c("n", "n", "y")) - , integer_col = c(1L, 9L, NA_integer_) - , stringsAsFactors = FALSE - ) - testDT <- data.table::as.data.table(testDF) - for (input_data in list(testDF, testDT)) { - converted_dataset <- lgb.convert(input_data) - expect_identical(class(input_data), class(converted_dataset)) - - expect_identical(class(converted_dataset[["character_col"]]), "integer") - expect_identical(converted_dataset[["character_col"]], c(1L, 0L, 2L)) - - # should not convert integer columns - expect_identical(class(converted_dataset[["integer_col"]]), "integer") - expect_identical(converted_dataset[["integer_col"]], c(1L, 9L, NA_integer_)) - expect_identical(class(converted_dataset[["na_int_col"]]), "integer") - expect_identical(converted_dataset[["na_int_col"]], rep(NA_integer_, nrow(converted_dataset))) - - expect_identical(class(converted_dataset[["factor_col"]]), "integer") - expect_identical(converted_dataset[["factor_col"]], c(1L, 1L, 2L)) - - - # even columns of all NAs should be converted if they are character - expect_identical(class(converted_dataset[["na_character_col"]]), "integer") - expect_identical(converted_dataset[["na_character_col"]], rep(0L, nrow(converted_dataset))) - - # columns of all logical NAs should have been converted to integer too - expect_identical(class(converted_dataset[["na_col"]]), "integer") - expect_identical(converted_dataset[["na_col"]], rep(-1L, 3L)) - - # lgb.convert() should convert numeric columns to integer - expect_identical(class(converted_dataset[["na_real_col"]]), "numeric") - expect_identical(converted_dataset[["na_real_col"]], rep(NA_real_, nrow(converted_dataset))) - expect_identical(class(converted_dataset[["numeric_col"]]), "numeric") - expect_identical(converted_dataset[["numeric_col"]], c(1.0, 9.0, NA_real_)) - } -}) - -test_that("lgb.convert() should modify data.tables in-place", { - testDT <- data.table::data.table( - character_col = c("a", NA_character_, "c") - , na_col = rep(NA, 3L) - , na_real_col = rep(NA_real_, 3L) - , na_int_col = rep(NA_integer_, 3L) - , na_character_col = rep(NA_character_, 3L) - , numeric_col = c(1.0, 9.0, NA_real_) - , factor_col = as.factor(c("n", "n", "y")) - , integer_col = c(1L, 9L, NA_integer_) - ) - resultDT <- lgb.convert(testDT) - expect_identical(resultDT, testDT) -}) - -test_that("lgb.convert() should warn on the presence of columns it cannot convert", { - testDF <- data.frame( - character_col = c("a", NA_character_, "c") - , posix_col = rep(as.POSIXct(Sys.time()), 3L) - ) - testDT <- data.table::as.data.table(testDF) - for (input_data in list(testDF, testDT)) { - expect_warning({ - converted_dataset <- lgb.convert(input_data) - }, regexp = "columns are not numeric or integer") - expect_identical(converted_dataset[["character_col"]], c(1L, 0L, 2L)) - expect_identical(converted_dataset[["posix_col"]], input_data[["posix_col"]]) - } -})