diff --git a/src/tree/updater_gpu_hist.cu b/src/tree/updater_gpu_hist.cu index 8625395c2ae4..e85c767265e2 100644 --- a/src/tree/updater_gpu_hist.cu +++ b/src/tree/updater_gpu_hist.cu @@ -286,7 +286,7 @@ struct GPUHistMakerDevice { matrix.feature_segments, matrix.gidx_fvalue_map, matrix.min_fvalue, - matrix.is_dense + matrix.is_dense && !collective::IsDistributed() }; auto split = this->evaluator_.EvaluateSingleSplit(inputs, shared_inputs); return split; @@ -300,11 +300,11 @@ struct GPUHistMakerDevice { std::vector nidx(2 * candidates.size()); auto h_node_inputs = pinned2.GetSpan(2 * candidates.size()); auto matrix = page->GetDeviceAccessor(ctx_->gpu_id); - EvaluateSplitSharedInputs shared_inputs{ - GPUTrainingParam{param}, *quantiser, feature_types, matrix.feature_segments, - matrix.gidx_fvalue_map, matrix.min_fvalue, - matrix.is_dense - }; + EvaluateSplitSharedInputs shared_inputs{GPUTrainingParam{param}, *quantiser, feature_types, + matrix.feature_segments, matrix.gidx_fvalue_map, + matrix.min_fvalue, + // is_dense represents the local data + matrix.is_dense && !collective::IsDistributed()}; dh::TemporaryArray entries(2 * candidates.size()); // Store the feature set ptrs so they dont go out of scope before the kernel is called std::vector>> feature_sets; diff --git a/src/tree/updater_quantile_hist.cc b/src/tree/updater_quantile_hist.cc index 1e9d76d4fe11..9155204b5c25 100644 --- a/src/tree/updater_quantile_hist.cc +++ b/src/tree/updater_quantile_hist.cc @@ -78,7 +78,7 @@ CPUExpandEntry QuantileHistMaker::Builder::InitRoot( { GradientPairPrecise grad_stat; - if (p_fmat->IsDense()) { + if (p_fmat->IsDense() && !collective::IsDistributed()) { /** * Specialized code for dense data: For dense data (with no missing value), the sum * of gradient histogram is equal to snode[nid]