From 52c603bdb26aa86711940ab354a30a0b35ceff53 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Tue, 7 Jul 2020 15:52:19 -0700 Subject: [PATCH 1/2] fix device sketch with weights in external memory mode --- src/common/hist_util.cu | 1 - tests/cpp/common/test_hist_util.cu | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/common/hist_util.cu b/src/common/hist_util.cu index 5da1006ed972..fe1305d4bf99 100644 --- a/src/common/hist_util.cu +++ b/src/common/hist_util.cu @@ -264,7 +264,6 @@ void ProcessWeightedBatch(int device, const SparsePage& page, d_temp_weights[idx] = weights[group]; }); } else { - CHECK_EQ(weights.size(), page.offset.Size() - 1); dh::LaunchN(device, temp_weights.size(), [=] __device__(size_t idx) { size_t element_idx = idx + begin; size_t ridx = thrust::upper_bound(thrust::seq, row_ptrs.begin(), diff --git a/tests/cpp/common/test_hist_util.cu b/tests/cpp/common/test_hist_util.cu index d8a75ba49ef2..60c85e556bce 100644 --- a/tests/cpp/common/test_hist_util.cu +++ b/tests/cpp/common/test_hist_util.cu @@ -209,6 +209,24 @@ TEST(HistUtil, DeviceSketchMultipleColumnsExternal) { } } +// See https://github.com/dmlc/xgboost/issues/5866. +TEST(HistUtil, DeviceSketchExternalMemoryWithWeights) { + int bin_sizes[] = {2, 16, 256, 512}; + int sizes[] = {100, 1000, 1500}; + int num_columns = 5; + for (auto num_rows : sizes) { + auto x = GenerateRandom(num_rows, num_columns); + dmlc::TemporaryDirectory temp; + auto dmat = + GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, 100, temp); + dmat->Info().weights_.HostVector() = GenerateRandomWeights(num_rows); + for (auto num_bins : bin_sizes) { + auto cuts = DeviceSketch(0, dmat.get(), num_bins); + ValidateCuts(cuts, dmat.get(), num_bins); + } + } +} + template void ValidateBatchedCuts(Adapter adapter, int num_bins, int num_columns, int num_rows, DMatrix* dmat) { From 15fcf20aa6c516da5d50a93e2a74d71e1654a97b Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Tue, 7 Jul 2020 16:41:42 -0700 Subject: [PATCH 2/2] review feedback --- tests/cpp/common/test_hist_util.cu | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/cpp/common/test_hist_util.cu b/tests/cpp/common/test_hist_util.cu index 60c85e556bce..3ec49668a7eb 100644 --- a/tests/cpp/common/test_hist_util.cu +++ b/tests/cpp/common/test_hist_util.cu @@ -214,11 +214,10 @@ TEST(HistUtil, DeviceSketchExternalMemoryWithWeights) { int bin_sizes[] = {2, 16, 256, 512}; int sizes[] = {100, 1000, 1500}; int num_columns = 5; + dmlc::TemporaryDirectory temp; for (auto num_rows : sizes) { auto x = GenerateRandom(num_rows, num_columns); - dmlc::TemporaryDirectory temp; - auto dmat = - GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, 100, temp); + auto dmat = GetExternalMemoryDMatrixFromData(x, num_rows, num_columns, 100, temp); dmat->Info().weights_.HostVector() = GenerateRandomWeights(num_rows); for (auto num_bins : bin_sizes) { auto cuts = DeviceSketch(0, dmat.get(), num_bins);