diff --git a/R-package/R/lgb.Dataset.R b/R-package/R/lgb.Dataset.R index 276429e64f8b..0b9c7ba42a9a 100644 --- a/R-package/R/lgb.Dataset.R +++ b/R-package/R/lgb.Dataset.R @@ -171,16 +171,6 @@ Dataset <- R6::R6Class( } - # Check has header or not - has_header <- FALSE - if (!is.null(private$params$has_header) || !is.null(private$params$header)) { - params_has_header <- tolower(as.character(private$params$has_header)) == "true" - params_header <- tolower(as.character(private$params$header)) == "true" - if (params_has_header || params_header) { - has_header <- TRUE - } - } - # Generate parameter str params_str <- lgb.params2str(params = private$params) diff --git a/R-package/tests/testthat/test_dataset.R b/R-package/tests/testthat/test_dataset.R index a0b0670f8745..ec2250fbdbc6 100644 --- a/R-package/tests/testthat/test_dataset.R +++ b/R-package/tests/testthat/test_dataset.R @@ -310,3 +310,45 @@ test_that("lgb.Dataset: should be able to use and retrieve long feature names", expect_equal(col_names[1L], long_name) expect_equal(nchar(col_names[1L]), 1000L) }) + +test_that("lgb.Dataset: should be able to create a Dataset from a text file with a header", { + train_file <- tempfile(pattern = "train_", fileext = ".csv") + write.table( + data.frame(y = rnorm(100L), x1 = rnorm(100L), x2 = rnorm(100L)) + , file = train_file + , sep = "," + , col.names = TRUE + , row.names = FALSE + , quote = FALSE + ) + + dtrain <- lgb.Dataset( + data = train_file + , params = list(header = TRUE) + ) + dtrain$construct() + expect_identical(dtrain$get_colnames(), c("x1", "x2")) + expect_identical(dtrain$get_params(), list(header = TRUE)) + expect_identical(dtrain$dim(), c(100L, 2L)) +}) + +test_that("lgb.Dataset: should be able to create a Dataset from a text file without a header", { + train_file <- tempfile(pattern = "train_", fileext = ".csv") + write.table( + data.frame(y = rnorm(100L), x1 = rnorm(100L), x2 = rnorm(100L)) + , file = train_file + , sep = "," + , col.names = FALSE + , row.names = FALSE + , quote = FALSE + ) + + dtrain <- lgb.Dataset( + data = train_file + , params = list(header = FALSE) + ) + dtrain$construct() + expect_identical(dtrain$get_colnames(), c("Column_0", "Column_1")) + expect_identical(dtrain$get_params(), list(header = FALSE)) + expect_identical(dtrain$dim(), c(100L, 2L)) +})