From 12102cc22096f44fff79f472b2051cc4a74f8f9b Mon Sep 17 00:00:00 2001
From: Atanas Dimitrov <nasko119@abv.bg>
Date: Wed, 24 Jul 2024 17:46:07 +0300
Subject: [PATCH 01/31] Fix value calculation in root node

---
 include/LightGBM/tree.h                 | 2 +-
 src/boosting/gbdt.cpp                   | 3 +++
 src/treelearner/serial_tree_learner.cpp | 6 ++++++
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/include/LightGBM/tree.h b/include/LightGBM/tree.h
index 0c4a41f46a87..8718a0577e19 100644
--- a/include/LightGBM/tree.h
+++ b/include/LightGBM/tree.h
@@ -563,7 +563,7 @@ inline void Tree::Split(int leaf, int feature, int real_feature,
   leaf_parent_[leaf] = new_node_idx;
   leaf_parent_[num_leaves_] = new_node_idx;
   // save current leaf value to internal node before change
-  internal_weight_[new_node_idx] = leaf_weight_[leaf];
+  internal_weight_[new_node_idx] = left_weight + right_weight;
   internal_value_[new_node_idx] = leaf_value_[leaf];
   internal_count_[new_node_idx] = left_cnt + right_cnt;
   leaf_value_[leaf] = std::isnan(left_value) ? 0.0f : left_value;
diff --git a/src/boosting/gbdt.cpp b/src/boosting/gbdt.cpp
index 937b44fcc8aa..3a83323b372d 100644
--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -420,6 +420,9 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
           }
         }
         new_tree->AsConstantTree(init_scores[cur_tree_id]);
+      } else {
+        // extend init_scores with zeros
+        new_tree->AsConstantTree(0);
       }
     }
     // add model
diff --git a/src/treelearner/serial_tree_learner.cpp b/src/treelearner/serial_tree_learner.cpp
index f3a88bd18679..14ede072dc9e 100644
--- a/src/treelearner/serial_tree_learner.cpp
+++ b/src/treelearner/serial_tree_learner.cpp
@@ -201,6 +201,12 @@ Tree* SerialTreeLearner::Train(const score_t* gradients, const score_t *hessians
   auto tree_ptr = tree.get();
   constraints_->ShareTreePointer(tree_ptr);
 
+  // set the root value by hand, as it is not handled by splits
+  tree->SetLeafOutput(0, FeatureHistogram::CalculateSplittedLeafOutput<true, true, true, false>(
+    smaller_leaf_splits_->sum_gradients(), smaller_leaf_splits_->sum_hessians(),
+    config_->lambda_l1, config_->lambda_l2, config_->max_delta_step,
+    BasicConstraint(), config_->path_smooth, static_cast<data_size_t>(num_data_), 0));
+
   // root leaf
   int left_leaf = 0;
   int cur_depth = 1;

From c933399c07537c2d3b1721dc155a332192f2a2c3 Mon Sep 17 00:00:00 2001
From: Atanas Dimitrov <nasko119@abv.bg>
Date: Fri, 26 Jul 2024 17:53:32 +0300
Subject: [PATCH 02/31] Fix dask tests

---
 tests/python_package_test/test_dask.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/python_package_test/test_dask.py b/tests/python_package_test/test_dask.py
index 37d6db2541f5..7dcc21ac4d6a 100644
--- a/tests/python_package_test/test_dask.py
+++ b/tests/python_package_test/test_dask.py
@@ -1464,8 +1464,7 @@ def test_init_score(task, output, cluster):
             init_scores = dy.map_blocks(lambda x: np.full((x.size, size_factor), init_score))
         model = model_factory(client=client, **params)
         model.fit(dX, dy, sample_weight=dw, init_score=init_scores, group=dg)
-        # value of the root node is 0 when init_score is set
-        assert model.booster_.trees_to_dataframe()["value"][0] == 0
+        assert model.fitted_
 
 
 def sklearn_checks_to_run():

From 2f1de57e341d601ee690871993a810e35fdcc259 Mon Sep 17 00:00:00 2001
From: Atanas Dimitrov <nasko119@abv.bg>
Date: Mon, 29 Jul 2024 14:47:35 +0300
Subject: [PATCH 03/31] Create proper tests

---
 tests/python_package_test/test_engine.py | 17 ++++++++++--
 tests/python_package_test/utils.py       | 35 ++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 9ff56206ca70..73793e48e6ac 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -24,6 +24,7 @@
 
 from .utils import (
     SERIALIZERS,
+    assert_all_trees_valid,
     dummy_obj,
     load_breast_cancer,
     load_digits,
@@ -3857,16 +3858,26 @@ def test_dump_model():
     train_data = lgb.Dataset(X, label=y)
     params = {"objective": "binary", "verbose": -1}
     bst = lgb.train(params, train_data, num_boost_round=5)
-    dumped_model_str = str(bst.dump_model(5, 0))
+    dumped_model = bst.dump_model(5, 0)
+    assert_all_trees_valid(dumped_model)
+    dumped_model_str = str(dumped_model)
     assert "leaf_features" not in dumped_model_str
     assert "leaf_coeff" not in dumped_model_str
     assert "leaf_const" not in dumped_model_str
     assert "leaf_value" in dumped_model_str
     assert "leaf_count" in dumped_model_str
-    params["linear_tree"] = True
+    for tree in dumped_model["tree_info"]:
+        assert not np.allclose(tree["tree_structure"]["internal_value"], 0)
+
+
+def test_dump_model_linear():
+    X, y = load_breast_cancer(return_X_y=True)
+    params = {"objective": "binary", "verbose": -1, "linear_tree": True}
     train_data = lgb.Dataset(X, label=y)
     bst = lgb.train(params, train_data, num_boost_round=5)
-    dumped_model_str = str(bst.dump_model(5, 0))
+    dumped_model = bst.dump_model(5, 0)
+    assert_all_trees_valid(dumped_model)
+    dumped_model_str = str(dumped_model)
     assert "leaf_features" in dumped_model_str
     assert "leaf_coeff" in dumped_model_str
     assert "leaf_const" in dumped_model_str
diff --git a/tests/python_package_test/utils.py b/tests/python_package_test/utils.py
index 66298b8190fa..60274dea4322 100644
--- a/tests/python_package_test/utils.py
+++ b/tests/python_package_test/utils.py
@@ -206,3 +206,38 @@ def np_assert_array_equal(*args, **kwargs):
     if not _numpy_testing_supports_strict_kwarg:
         kwargs.pop("strict")
     np.testing.assert_array_equal(*args, **kwargs)
+
+
+def assert_subtree_valid(root):
+    """Recursively checks the validity of a subtree rooted at `root`.
+
+    Currently it only checks whether weights and counts are consistent between
+    all parent nodes and their children.
+
+    Parameters
+    ----------
+    root : dict
+        A dictionary representing the root of the subtree.
+        It should be produced by dump_model()
+
+    Returns
+    -------
+    tuple
+        A tuple containing the weight and count of the subtree rooted at `root`.
+    """
+    if "leaf_count" in root:
+        return (root["leaf_weight"], root["leaf_count"])
+
+    left_child = root["left_child"]
+    right_child = root["right_child"]
+    (l_w, l_c) = assert_subtree_valid(left_child)
+    (r_w, r_c) = assert_subtree_valid(right_child)
+    assert np.allclose(root["internal_weight"], l_w + r_w)
+    assert np.allclose(root["internal_count"], l_c + r_c)
+    return (root["internal_weight"], root["internal_count"])
+
+
+def assert_all_trees_valid(model_dump):
+    for idx, tree in enumerate(model_dump["tree_info"]):
+        assert tree["tree_index"] == idx
+        assert_subtree_valid(tree["tree_structure"])

From 208df85064e3f4625ebf3a0e660728301ec2d10a Mon Sep 17 00:00:00 2001
From: Atanas Dimitrov <nasko119@abv.bg>
Date: Tue, 30 Jul 2024 02:10:11 +0300
Subject: [PATCH 04/31] Test only on cpu

---
 tests/python_package_test/test_engine.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 73793e48e6ac..056bd7cdc78c 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3859,7 +3859,6 @@ def test_dump_model():
     params = {"objective": "binary", "verbose": -1}
     bst = lgb.train(params, train_data, num_boost_round=5)
     dumped_model = bst.dump_model(5, 0)
-    assert_all_trees_valid(dumped_model)
     dumped_model_str = str(dumped_model)
     assert "leaf_features" not in dumped_model_str
     assert "leaf_coeff" not in dumped_model_str
@@ -3869,6 +3868,10 @@ def test_dump_model():
     for tree in dumped_model["tree_info"]:
         assert not np.allclose(tree["tree_structure"]["internal_value"], 0)
 
+    # Cuda seems to report innacurately
+    if getenv("TASK", "") != "cuda":
+        assert_all_trees_valid(dumped_model)
+
 
 def test_dump_model_linear():
     X, y = load_breast_cancer(return_X_y=True)

From 48e6b968dfdc16c5c899e2feef533a642fd55f33 Mon Sep 17 00:00:00 2001
From: Atanas Dimitrov <nasko119@abv.bg>
Date: Tue, 30 Jul 2024 12:21:35 +0300
Subject: [PATCH 05/31] Disable new tests for CUDA

---
 tests/python_package_test/test_engine.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 056bd7cdc78c..6c117a4ccfaa 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3865,11 +3865,11 @@ def test_dump_model():
     assert "leaf_const" not in dumped_model_str
     assert "leaf_value" in dumped_model_str
     assert "leaf_count" in dumped_model_str
-    for tree in dumped_model["tree_info"]:
-        assert not np.allclose(tree["tree_structure"]["internal_value"], 0)
 
-    # Cuda seems to report innacurately
+    # CUDA does not return correct values for the root
     if getenv("TASK", "") != "cuda":
+        for tree in dumped_model["tree_info"]:
+            assert not np.allclose(tree["tree_structure"]["internal_value"], 0)
         assert_all_trees_valid(dumped_model)
 
 

From 26b9859de75cabf6105d6da7f10d13a9360cf1cd Mon Sep 17 00:00:00 2001
From: Atanas Dimitrov <nasko119@abv.bg>
Date: Sat, 3 Aug 2024 19:10:29 +0300
Subject: [PATCH 06/31] Merge with #5964

---
 src/io/tree.cpp                          | 22 ++++++++++--------
 tests/python_package_test/test_engine.py | 29 +++++++++++++++++++++++-
 2 files changed, 41 insertions(+), 10 deletions(-)

diff --git a/src/io/tree.cpp b/src/io/tree.cpp
index 4312b4f65002..c051c8060a33 100644
--- a/src/io/tree.cpp
+++ b/src/io/tree.cpp
@@ -416,12 +416,16 @@ std::string Tree::ToJSON() const {
   str_buf << "\"num_cat\":" << num_cat_ << "," << '\n';
   str_buf << "\"shrinkage\":" << shrinkage_ << "," << '\n';
   if (num_leaves_ == 1) {
+    str_buf << "\"tree_structure\":{";
     if (is_linear_) {
-      str_buf << "\"tree_structure\":{" << "\"leaf_value\":" << leaf_value_[0] << ", " << "\n";
-      str_buf << LinearModelToJSON(0) << "}" << "\n";
+      str_buf << "\"leaf_value\":" << leaf_value_[0] << ", " << '\n';
+      str_buf << "\"leaf_count\":" << leaf_count_[0] << ", " << '\n';
+      str_buf << LinearModelToJSON(0);
     } else {
-      str_buf << "\"tree_structure\":{" << "\"leaf_value\":" << leaf_value_[0] << "}" << '\n';
+      str_buf << "\"leaf_value\":" << leaf_value_[0] << ", " << '\n';
+      str_buf << "\"leaf_count\":" << leaf_count_[0];
     }
+    str_buf << "}" << '\n';
   } else {
     str_buf << "\"tree_structure\":" << NodeToJSON(0) << '\n';
   }
@@ -731,6 +735,12 @@ Tree::Tree(const char* str, size_t* used_len) {
     is_linear_ = false;
   }
 
+  if (key_vals.count("leaf_count")) {
+    leaf_count_ = CommonC::StringToArrayFast<int>(key_vals["leaf_count"], num_leaves_);
+  } else {
+    leaf_count_.resize(num_leaves_);
+  }
+
   #ifdef USE_CUDA
   is_cuda_tree_ = false;
   #endif  // USE_CUDA
@@ -793,12 +803,6 @@ Tree::Tree(const char* str, size_t* used_len) {
     leaf_weight_.resize(num_leaves_);
   }
 
-  if (key_vals.count("leaf_count")) {
-    leaf_count_ = CommonC::StringToArrayFast<int>(key_vals["leaf_count"], num_leaves_);
-  } else {
-    leaf_count_.resize(num_leaves_);
-  }
-
   if (key_vals.count("decision_type")) {
     decision_type_ = CommonC::StringToArrayFast<int8_t>(key_vals["decision_type"], num_leaves_ - 1);
   } else {
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 6c117a4ccfaa..e2338a739e73 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3853,10 +3853,37 @@ def test_reset_params_works_with_metric_num_class_and_boosting():
     assert new_bst.params == expected_params
 
 
+def test_dump_model_stump():
+    X, y = load_breast_cancer(return_X_y=True)
+    # intentionally create a stump (tree with only a root-node)
+    # using restricted # samples
+    subidx = random.sample(range(len(y)), 30)
+    X = X[subidx]
+    y = y[subidx]
+
+    train_data = lgb.Dataset(X, label=y)
+    params = {
+        "objective": "binary",
+        "verbose": -1,
+        "n_jobs": 1,
+    }
+    bst = lgb.train(params, train_data, num_boost_round=5)
+    dumped_model = bst.dump_model(5, 0)
+    print(dumped_model)
+    assert len(dumped_model["tree_info"]) == 1
+    tree_structure = dumped_model["tree_info"][0]["tree_structure"]
+    assert "leaf_value" in tree_structure
+    assert "leaf_count" in tree_structure
+    assert tree_structure["leaf_count"] == 30
+
+
 def test_dump_model():
     X, y = load_breast_cancer(return_X_y=True)
     train_data = lgb.Dataset(X, label=y)
-    params = {"objective": "binary", "verbose": -1}
+    params = {
+        "objective": "binary",
+        "verbose": -1,
+    }
     bst = lgb.train(params, train_data, num_boost_round=5)
     dumped_model = bst.dump_model(5, 0)
     dumped_model_str = str(dumped_model)

From 88e3dec15a63c698458ce1a0e9c84a34f6dd784d Mon Sep 17 00:00:00 2001
From: Atanas Dimitrov <nasko119@abv.bg>
Date: Sat, 3 Aug 2024 19:43:21 +0300
Subject: [PATCH 07/31] Finish merging with dump_model unification

---
 include/LightGBM/cuda/cuda_tree.hpp      |  2 +-
 include/LightGBM/tree.h                  |  3 ++-
 python-package/lightgbm/basic.py         |  2 +-
 src/boosting/gbdt.cpp                    |  4 ++--
 src/boosting/rf.hpp                      |  2 +-
 src/io/cuda/cuda_tree.cpp                |  5 +++--
 tests/python_package_test/test_engine.py | 14 +++++++-------
 7 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/include/LightGBM/cuda/cuda_tree.hpp b/include/LightGBM/cuda/cuda_tree.hpp
index e2836baa2be5..7ab06190481b 100644
--- a/include/LightGBM/cuda/cuda_tree.hpp
+++ b/include/LightGBM/cuda/cuda_tree.hpp
@@ -77,7 +77,7 @@ class CUDATree : public Tree {
                             const data_size_t* used_data_indices,
                             data_size_t num_data, double* score) const override;
 
-  inline void AsConstantTree(double val) override;
+  inline void AsConstantTree(double val, int count) override;
 
   const int* cuda_leaf_parent() const { return cuda_leaf_parent_; }
 
diff --git a/include/LightGBM/tree.h b/include/LightGBM/tree.h
index 8718a0577e19..c28ddd140c48 100644
--- a/include/LightGBM/tree.h
+++ b/include/LightGBM/tree.h
@@ -228,13 +228,14 @@ class Tree {
     shrinkage_ = 1.0f;
   }
 
-  virtual inline void AsConstantTree(double val) {
+  virtual inline void AsConstantTree(double val, int count = 0) {
     num_leaves_ = 1;
     shrinkage_ = 1.0f;
     leaf_value_[0] = val;
     if (is_linear_) {
       leaf_const_[0] = val;
     }
+    leaf_count_[0] = count;
   }
 
   /*! \brief Serialize this object to string*/
diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py
index af4d757f480b..90b01f25fa80 100644
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -3913,7 +3913,7 @@ def _get_split_feature(
                 return feature_name
 
             def _is_single_node_tree(tree: Dict[str, Any]) -> bool:
-                return set(tree.keys()) == {"leaf_value"}
+                return set(tree.keys()) == {"leaf_value", "leaf_count"}
 
             # Create the node record, and populate universal data members
             node: Dict[str, Union[int, str, None]] = OrderedDict()
diff --git a/src/boosting/gbdt.cpp b/src/boosting/gbdt.cpp
index 3a83323b372d..f966a275ae8b 100644
--- a/src/boosting/gbdt.cpp
+++ b/src/boosting/gbdt.cpp
@@ -419,10 +419,10 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
             score_updater->AddScore(init_scores[cur_tree_id], cur_tree_id);
           }
         }
-        new_tree->AsConstantTree(init_scores[cur_tree_id]);
+        new_tree->AsConstantTree(init_scores[cur_tree_id], num_data_);
       } else {
         // extend init_scores with zeros
-        new_tree->AsConstantTree(0);
+        new_tree->AsConstantTree(0, num_data_);
       }
     }
     // add model
diff --git a/src/boosting/rf.hpp b/src/boosting/rf.hpp
index e6101dc30a39..3eb065b30f2d 100644
--- a/src/boosting/rf.hpp
+++ b/src/boosting/rf.hpp
@@ -168,7 +168,7 @@ class RF : public GBDT {
               output = init_scores_[cur_tree_id];
             }
           }
-          new_tree->AsConstantTree(output);
+          new_tree->AsConstantTree(output, num_data_);
           MultiplyScore(cur_tree_id, (iter_ + num_init_iteration_));
           UpdateScore(new_tree.get(), cur_tree_id);
           MultiplyScore(cur_tree_id, 1.0 / (iter_ + num_init_iteration_ + 1));
diff --git a/src/io/cuda/cuda_tree.cpp b/src/io/cuda/cuda_tree.cpp
index 923e51961e0b..d7da93daeed1 100644
--- a/src/io/cuda/cuda_tree.cpp
+++ b/src/io/cuda/cuda_tree.cpp
@@ -330,9 +330,10 @@ void CUDATree::SyncLeafOutputFromCUDAToHost() {
   CopyFromCUDADeviceToHost<double>(leaf_value_.data(), cuda_leaf_value_, leaf_value_.size(), __FILE__, __LINE__);
 }
 
-void CUDATree::AsConstantTree(double val) {
-  Tree::AsConstantTree(val);
+void CUDATree::AsConstantTree(double val, int count) {
+  Tree::AsConstantTree(val, int count);
   CopyFromHostToCUDADevice<double>(cuda_leaf_value_, &val, 1, __FILE__, __LINE__);
+  CopyFromHostToCUDADevice<double>(cuda_leaf_count_, &count, 1, __FILE__, __LINE__);
 }
 
 }  // namespace LightGBM
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index e2338a739e73..8b781705bab3 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3858,10 +3858,8 @@ def test_dump_model_stump():
     # intentionally create a stump (tree with only a root-node)
     # using restricted # samples
     subidx = random.sample(range(len(y)), 30)
-    X = X[subidx]
-    y = y[subidx]
 
-    train_data = lgb.Dataset(X, label=y)
+    train_data = lgb.Dataset(X[subidx], label=y[subidx])
     params = {
         "objective": "binary",
         "verbose": -1,
@@ -3869,11 +3867,9 @@ def test_dump_model_stump():
     }
     bst = lgb.train(params, train_data, num_boost_round=5)
     dumped_model = bst.dump_model(5, 0)
-    print(dumped_model)
-    assert len(dumped_model["tree_info"]) == 1
     tree_structure = dumped_model["tree_info"][0]["tree_structure"]
+    assert len(dumped_model["tree_info"]) == 1
     assert "leaf_value" in tree_structure
-    assert "leaf_count" in tree_structure
     assert tree_structure["leaf_count"] == 30
 
 
@@ -3902,7 +3898,11 @@ def test_dump_model():
 
 def test_dump_model_linear():
     X, y = load_breast_cancer(return_X_y=True)
-    params = {"objective": "binary", "verbose": -1, "linear_tree": True}
+    params = {
+        "objective": "binary",
+        "verbose": -1,
+        "linear_tree": True,
+    }
     train_data = lgb.Dataset(X, label=y)
     bst = lgb.train(params, train_data, num_boost_round=5)
     dumped_model = bst.dump_model(5, 0)

From e1274dc167c2064c6ae888dd711b8e6a9707bc2f Mon Sep 17 00:00:00 2001
From: Atanas Dimitrov <nasko119@abv.bg>
Date: Sat, 3 Aug 2024 20:28:45 +0300
Subject: [PATCH 08/31] Improve tests

---
 tests/python_package_test/test_engine.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 8b781705bab3..5fbf2e6386cb 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3874,11 +3874,14 @@ def test_dump_model_stump():
 
 
 def test_dump_model():
-    X, y = load_breast_cancer(return_X_y=True)
-    train_data = lgb.Dataset(X, label=y)
+    offset = 100
+    X, y = make_synthetic_regression()
+    train_data = lgb.Dataset(X, label=y+offset)
+
     params = {
-        "objective": "binary",
+        "objective": "regression",
         "verbose": -1,
+        "boost_from_average": True,
     }
     bst = lgb.train(params, train_data, num_boost_round=5)
     dumped_model = bst.dump_model(5, 0)
@@ -3892,7 +3895,12 @@ def test_dump_model():
     # CUDA does not return correct values for the root
     if getenv("TASK", "") != "cuda":
         for tree in dumped_model["tree_info"]:
-            assert not np.allclose(tree["tree_structure"]["internal_value"], 0)
+            assert not np.all(tree["tree_structure"]["internal_value"] == 0)
+        np.testing.assert_allclose(
+            dumped_model["tree_info"][0]["tree_structure"]["internal_value"],
+            offset,
+            atol=1
+        )
         assert_all_trees_valid(dumped_model)
 
 

From 38ee92c2d9c3aafa5a8afb4943585ab0a26e136b Mon Sep 17 00:00:00 2001
From: Atanas Dimitrov <nasko119@abv.bg>
Date: Sun, 4 Aug 2024 20:44:04 +0300
Subject: [PATCH 09/31] Add linear test for stump

---
 tests/python_package_test/test_engine.py | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 5fbf2e6386cb..8e41d29c1456 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3853,7 +3853,8 @@ def test_reset_params_works_with_metric_num_class_and_boosting():
     assert new_bst.params == expected_params
 
 
-def test_dump_model_stump():
+@pytest.mark.parametrize("linear_tree", [False, True])
+def test_dump_model_stump(linear_tree):
     X, y = load_breast_cancer(return_X_y=True)
     # intentionally create a stump (tree with only a root-node)
     # using restricted # samples
@@ -3864,6 +3865,7 @@ def test_dump_model_stump():
         "objective": "binary",
         "verbose": -1,
         "n_jobs": 1,
+        "linear_tree": linear_tree,
     }
     bst = lgb.train(params, train_data, num_boost_round=5)
     dumped_model = bst.dump_model(5, 0)
@@ -3876,7 +3878,7 @@ def test_dump_model_stump():
 def test_dump_model():
     offset = 100
     X, y = make_synthetic_regression()
-    train_data = lgb.Dataset(X, label=y+offset)
+    train_data = lgb.Dataset(X, label=y + offset)
 
     params = {
         "objective": "regression",
@@ -3893,15 +3895,14 @@ def test_dump_model():
     assert "leaf_count" in dumped_model_str
 
     # CUDA does not return correct values for the root
-    if getenv("TASK", "") != "cuda":
-        for tree in dumped_model["tree_info"]:
-            assert not np.all(tree["tree_structure"]["internal_value"] == 0)
-        np.testing.assert_allclose(
-            dumped_model["tree_info"][0]["tree_structure"]["internal_value"],
-            offset,
-            atol=1
-        )
-        assert_all_trees_valid(dumped_model)
+    if getenv("TASK", "") == "cuda":
+        return
+
+    for tree in dumped_model["tree_info"]:
+        assert not np.all(tree["tree_structure"]["internal_value"] == 0)
+
+    np.testing.assert_allclose(dumped_model["tree_info"][0]["tree_structure"]["internal_value"], offset, atol=1)
+    assert_all_trees_valid(dumped_model)
 
 
 def test_dump_model_linear():

From 3b423de1f6a559590fbe7363b8d49326de8c07ad Mon Sep 17 00:00:00 2001
From: Atanas Dimitrov <nasko119@abv.bg>
Date: Mon, 5 Aug 2024 18:32:51 +0300
Subject: [PATCH 10/31] Fix CUDA compilation

---
 src/io/cuda/cuda_tree.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/io/cuda/cuda_tree.cpp b/src/io/cuda/cuda_tree.cpp
index d7da93daeed1..c5dee89ca3af 100644
--- a/src/io/cuda/cuda_tree.cpp
+++ b/src/io/cuda/cuda_tree.cpp
@@ -331,9 +331,9 @@ void CUDATree::SyncLeafOutputFromCUDAToHost() {
 }
 
 void CUDATree::AsConstantTree(double val, int count) {
-  Tree::AsConstantTree(val, int count);
+  Tree::AsConstantTree(val, count);
   CopyFromHostToCUDADevice<double>(cuda_leaf_value_, &val, 1, __FILE__, __LINE__);
-  CopyFromHostToCUDADevice<double>(cuda_leaf_count_, &count, 1, __FILE__, __LINE__);
+  CopyFromHostToCUDADevice<int>(cuda_leaf_count_, &count, 1, __FILE__, __LINE__);
 }
 
 }  // namespace LightGBM

From 3ffcac65bf148343306fe7a50312dff1c77e8bbc Mon Sep 17 00:00:00 2001
From: Atanas Dimitrov <nasko119@abv.bg>
Date: Thu, 15 Aug 2024 01:27:06 +0300
Subject: [PATCH 11/31] Comments after code review

---
 src/io/tree.cpp                          | 3 +--
 tests/python_package_test/test_dask.py   | 3 ++-
 tests/python_package_test/test_engine.py | 1 -
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/io/tree.cpp b/src/io/tree.cpp
index c051c8060a33..975f09d209df 100644
--- a/src/io/tree.cpp
+++ b/src/io/tree.cpp
@@ -417,12 +417,11 @@ std::string Tree::ToJSON() const {
   str_buf << "\"shrinkage\":" << shrinkage_ << "," << '\n';
   if (num_leaves_ == 1) {
     str_buf << "\"tree_structure\":{";
+    str_buf << "\"leaf_value\":" << leaf_value_[0] << ", " << '\n';
     if (is_linear_) {
-      str_buf << "\"leaf_value\":" << leaf_value_[0] << ", " << '\n';
       str_buf << "\"leaf_count\":" << leaf_count_[0] << ", " << '\n';
       str_buf << LinearModelToJSON(0);
     } else {
-      str_buf << "\"leaf_value\":" << leaf_value_[0] << ", " << '\n';
       str_buf << "\"leaf_count\":" << leaf_count_[0];
     }
     str_buf << "}" << '\n';
diff --git a/tests/python_package_test/test_dask.py b/tests/python_package_test/test_dask.py
index 7dcc21ac4d6a..37d6db2541f5 100644
--- a/tests/python_package_test/test_dask.py
+++ b/tests/python_package_test/test_dask.py
@@ -1464,7 +1464,8 @@ def test_init_score(task, output, cluster):
             init_scores = dy.map_blocks(lambda x: np.full((x.size, size_factor), init_score))
         model = model_factory(client=client, **params)
         model.fit(dX, dy, sample_weight=dw, init_score=init_scores, group=dg)
-        assert model.fitted_
+        # value of the root node is 0 when init_score is set
+        assert model.booster_.trees_to_dataframe()["value"][0] == 0
 
 
 def sklearn_checks_to_run():
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index c490583b6032..afc2ddc535e1 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3865,7 +3865,6 @@ def test_dump_model_stump(linear_tree):
     params = {
         "objective": "binary",
         "verbose": -1,
-        "n_jobs": 1,
         "linear_tree": linear_tree,
     }
     bst = lgb.train(params, train_data, num_boost_round=5)

From d5a82c49182a490283376d9abaa3e654e5eaacd8 Mon Sep 17 00:00:00 2001
From: Atanas Dimitrov <nasko119@abv.bg>
Date: Thu, 15 Aug 2024 03:27:14 +0300
Subject: [PATCH 12/31] Fix test

---
 tests/python_package_test/test_dask.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/tests/python_package_test/test_dask.py b/tests/python_package_test/test_dask.py
index 37d6db2541f5..a0c3ba850b2b 100644
--- a/tests/python_package_test/test_dask.py
+++ b/tests/python_package_test/test_dask.py
@@ -1444,8 +1444,8 @@ def test_training_succeeds_when_data_is_dataframe_and_label_is_column_array(task
 @pytest.mark.parametrize("task", tasks)
 @pytest.mark.parametrize("output", data_output)
 def test_init_score(task, output, cluster):
-    if task == "ranking" and output == "scipy_csr_matrix":
-        pytest.skip("LGBMRanker is not currently tested on sparse matrices")
+    if task == "ranking":
+        pytest.skip("LGBMRanker is not currently tested")
 
     with Client(cluster) as client:
         _, _, _, _, dX, dy, dw, dg = _create_data(objective=task, output=output, group=None)
@@ -1462,10 +1462,18 @@ def test_init_score(task, output, cluster):
             init_scores = dy.map_partitions(lambda x: pd.DataFrame([[init_score] * size_factor] * x.size))
         else:
             init_scores = dy.map_blocks(lambda x: np.full((x.size, size_factor), init_score))
+
         model = model_factory(client=client, **params)
-        model.fit(dX, dy, sample_weight=dw, init_score=init_scores, group=dg)
-        # value of the root node is 0 when init_score is set
-        assert model.booster_.trees_to_dataframe()["value"][0] == 0
+        model.fit(dX, dy, sample_weight=dw, group=dg)
+
+        model_init_score = model_factory(client=client, **params)
+        model_init_score.fit(dX, dy, sample_weight=dw, init_score=init_scores, group=dg)
+
+        # check if init score changes root value
+        assert (
+            model.booster_.trees_to_dataframe()["value"][0]
+            != model_init_score.booster_.trees_to_dataframe()["value"][0]
+        )
 
 
 def sklearn_checks_to_run():

From be7675d7b63b65438e1dd2fcd9d362b560e73ede Mon Sep 17 00:00:00 2001
From: Atanas Dimitrov <nasko119@abv.bg>
Date: Thu, 15 Aug 2024 03:40:25 +0300
Subject: [PATCH 13/31] Reenable cuda testing

---
 tests/python_package_test/test_engine.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index afc2ddc535e1..b29f9abfa961 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3894,10 +3894,6 @@ def test_dump_model():
     assert "leaf_value" in dumped_model_str
     assert "leaf_count" in dumped_model_str
 
-    # CUDA does not return correct values for the root
-    if getenv("TASK", "") == "cuda":
-        return
-
     for tree in dumped_model["tree_info"]:
         assert not np.all(tree["tree_structure"]["internal_value"] == 0)
 

From f616e0384f52c099a273b9831f2668da0bf2b732 Mon Sep 17 00:00:00 2001
From: Atanas Dimitrov <nasko119@abv.bg>
Date: Thu, 15 Aug 2024 03:41:05 +0300
Subject: [PATCH 14/31] Tests

---
 tests/python_package_test/test_dask.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python_package_test/test_dask.py b/tests/python_package_test/test_dask.py
index a0c3ba850b2b..3af65fc945f1 100644
--- a/tests/python_package_test/test_dask.py
+++ b/tests/python_package_test/test_dask.py
@@ -1445,7 +1445,7 @@ def test_training_succeeds_when_data_is_dataframe_and_label_is_column_array(task
 @pytest.mark.parametrize("output", data_output)
 def test_init_score(task, output, cluster):
     if task == "ranking":
-        pytest.skip("LGBMRanker is not currently tested")
+        pytest.skip("LGBMRanker is not currently tested for init_score")
 
     with Client(cluster) as client:
         _, _, _, _, dX, dy, dw, dg = _create_data(objective=task, output=output, group=None)

From c28a2cfde81c7654ea9f99dd709beac68c726708 Mon Sep 17 00:00:00 2001
From: Atanas Dimitrov <nasko119@abv.bg>
Date: Thu, 15 Aug 2024 13:12:05 +0300
Subject: [PATCH 15/31] test cuda

---
 src/treelearner/cuda/cuda_leaf_splits.cpp             | 7 +++++--
 src/treelearner/cuda/cuda_leaf_splits.hpp             | 4 ++--
 src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp | 8 ++++++++
 src/treelearner/cuda/cuda_single_gpu_tree_learner.hpp | 1 +
 4 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/src/treelearner/cuda/cuda_leaf_splits.cpp b/src/treelearner/cuda/cuda_leaf_splits.cpp
index 57b5b777c142..803d4674ee48 100644
--- a/src/treelearner/cuda/cuda_leaf_splits.cpp
+++ b/src/treelearner/cuda/cuda_leaf_splits.cpp
@@ -38,12 +38,14 @@ void CUDALeafSplits::InitValues(
   const double lambda_l1, const double lambda_l2,
   const score_t* cuda_gradients, const score_t* cuda_hessians,
   const data_size_t* cuda_bagging_data_indices, const data_size_t* cuda_data_indices_in_leaf,
-  const data_size_t num_used_indices, hist_t* cuda_hist_in_leaf, double* root_sum_hessians) {
+  const data_size_t num_used_indices, hist_t* cuda_hist_in_leaf,
+  double* root_sum_gradients, double* root_sum_hessians) {
   cuda_gradients_ = cuda_gradients;
   cuda_hessians_ = cuda_hessians;
   cuda_sum_of_gradients_buffer_.SetValue(0);
   cuda_sum_of_hessians_buffer_.SetValue(0);
   LaunchInitValuesKernal(lambda_l1, lambda_l2, cuda_bagging_data_indices, cuda_data_indices_in_leaf, num_used_indices, cuda_hist_in_leaf);
+  CopyFromCUDADeviceToHost<double>(root_sum_gradients, cuda_sum_of_gradients_buffer_.RawData(), 1, __FILE__, __LINE__);
   CopyFromCUDADeviceToHost<double>(root_sum_hessians, cuda_sum_of_hessians_buffer_.RawData(), 1, __FILE__, __LINE__);
   SynchronizeCUDADevice(__FILE__, __LINE__);
 }
@@ -53,11 +55,12 @@ void CUDALeafSplits::InitValues(
   const int16_t* cuda_gradients_and_hessians,
   const data_size_t* cuda_bagging_data_indices,
   const data_size_t* cuda_data_indices_in_leaf, const data_size_t num_used_indices,
-  hist_t* cuda_hist_in_leaf, double* root_sum_hessians,
+  hist_t* cuda_hist_in_leaf, double* root_sum_gradients, double* root_sum_hessians,
   const score_t* grad_scale, const score_t* hess_scale) {
   cuda_gradients_ = reinterpret_cast<const score_t*>(cuda_gradients_and_hessians);
   cuda_hessians_ = nullptr;
   LaunchInitValuesKernal(lambda_l1, lambda_l2, cuda_bagging_data_indices, cuda_data_indices_in_leaf, num_used_indices, cuda_hist_in_leaf, grad_scale, hess_scale);
+  CopyFromCUDADeviceToHost<double>(root_sum_gradients, cuda_sum_of_gradients_buffer_.RawData(), 1, __FILE__, __LINE__);
   CopyFromCUDADeviceToHost<double>(root_sum_hessians, cuda_sum_of_hessians_buffer_.RawData(), 1, __FILE__, __LINE__);
   SynchronizeCUDADevice(__FILE__, __LINE__);
 }
diff --git a/src/treelearner/cuda/cuda_leaf_splits.hpp b/src/treelearner/cuda/cuda_leaf_splits.hpp
index 33a9ea578a1f..c2635346098b 100644
--- a/src/treelearner/cuda/cuda_leaf_splits.hpp
+++ b/src/treelearner/cuda/cuda_leaf_splits.hpp
@@ -44,14 +44,14 @@ class CUDALeafSplits {
     const score_t* cuda_gradients, const score_t* cuda_hessians,
     const data_size_t* cuda_bagging_data_indices,
     const data_size_t* cuda_data_indices_in_leaf, const data_size_t num_used_indices,
-    hist_t* cuda_hist_in_leaf, double* root_sum_hessians);
+    hist_t* cuda_hist_in_leaf, double* root_sum_gradients, double* root_sum_hessians);
 
   void InitValues(
     const double lambda_l1, const double lambda_l2,
     const int16_t* cuda_gradients_and_hessians,
     const data_size_t* cuda_bagging_data_indices,
     const data_size_t* cuda_data_indices_in_leaf, const data_size_t num_used_indices,
-    hist_t* cuda_hist_in_leaf, double* root_sum_hessians,
+    hist_t* cuda_hist_in_leaf, double* root_sum_gradients, double* root_sum_hessians,
     const score_t* grad_scale, const score_t* hess_scale);
 
   void InitValues();
diff --git a/src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp b/src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp
index 8f8ff15f0715..755fb1486245 100644
--- a/src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp
+++ b/src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp
@@ -66,6 +66,7 @@ void CUDASingleGPUTreeLearner::Init(const Dataset* train_data, bool is_constant_
   leaf_best_split_default_left_.resize(config_->num_leaves, 0);
   leaf_num_data_.resize(config_->num_leaves, 0);
   leaf_data_start_.resize(config_->num_leaves, 0);
+  leaf_sum_gradients_.resize(config_->num_leaves, 0.0f);
   leaf_sum_hessians_.resize(config_->num_leaves, 0.0f);
 
   if (!boosting_on_cuda_) {
@@ -122,6 +123,7 @@ void CUDASingleGPUTreeLearner::BeforeTrain() {
       cuda_data_partition_->cuda_data_indices(),
       root_num_data,
       cuda_histogram_constructor_->cuda_hist_pointer(),
+      &leaf_sum_gradients_[0],
       &leaf_sum_hessians_[0],
       cuda_gradient_discretizer_->grad_scale_ptr(),
       cuda_gradient_discretizer_->hess_scale_ptr());
@@ -137,6 +139,7 @@ void CUDASingleGPUTreeLearner::BeforeTrain() {
       cuda_data_partition_->cuda_data_indices(),
       root_num_data,
       cuda_histogram_constructor_->cuda_hist_pointer(),
+      &leaf_sum_gradients_[0],
       &leaf_sum_hessians_[0]);
   }
   leaf_num_data_[0] = root_num_data;
@@ -162,6 +165,11 @@ Tree* CUDASingleGPUTreeLearner::Train(const score_t* gradients,
   const bool track_branch_features = !(config_->interaction_constraints_vector.empty());
   std::unique_ptr<CUDATree> tree(new CUDATree(config_->num_leaves, track_branch_features,
     config_->linear_tree, config_->gpu_device_id, has_categorical_feature_));
+  // set the root value by hand, as it is not handled by splits
+  tree->SetLeafOutput(0, CUDALeafSplits::CalculateSplittedLeafOutput<true, false>(
+    leaf_sum_gradients_[smaller_leaf_index_], leaf_sum_hessians_[smaller_leaf_index_],
+    config_->lambda_l1, config_->lambda_l2,  config_->path_smooth,
+    static_cast<data_size_t>(num_data_), 0));
   for (int i = 0; i < config_->num_leaves - 1; ++i) {
     global_timer.Start("CUDASingleGPUTreeLearner::ConstructHistogramForLeaf");
     const data_size_t num_data_in_smaller_leaf = leaf_num_data_[smaller_leaf_index_];
diff --git a/src/treelearner/cuda/cuda_single_gpu_tree_learner.hpp b/src/treelearner/cuda/cuda_single_gpu_tree_learner.hpp
index a1ea79efa1a1..6121ae5d22a5 100644
--- a/src/treelearner/cuda/cuda_single_gpu_tree_learner.hpp
+++ b/src/treelearner/cuda/cuda_single_gpu_tree_learner.hpp
@@ -103,6 +103,7 @@ class CUDASingleGPUTreeLearner: public SerialTreeLearner {
   std::vector<uint8_t> leaf_best_split_default_left_;
   std::vector<data_size_t> leaf_num_data_;
   std::vector<data_size_t> leaf_data_start_;
+  std::vector<double> leaf_sum_gradients_;
   std::vector<double> leaf_sum_hessians_;
   int smaller_leaf_index_;
   int larger_leaf_index_;

From 6113f9095120f7697d9be55a91156063927c52b6 Mon Sep 17 00:00:00 2001
From: Atanas Dimitrov <nasko119@abv.bg>
Date: Thu, 15 Aug 2024 13:21:52 +0300
Subject: [PATCH 16/31] .

---
 .../cuda/cuda_single_gpu_tree_learner.cpp         | 15 +++++++--------
 .../cuda/cuda_single_gpu_tree_learner.hpp         |  3 +--
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp b/src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp
index 755fb1486245..3a62d7ec26e5 100644
--- a/src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp
+++ b/src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp
@@ -321,10 +321,10 @@ Tree* CUDASingleGPUTreeLearner::Train(const score_t* gradients,
                                 &leaf_data_start_[right_leaf_index],
                                 &leaf_sum_hessians_[best_leaf_index_],
                                 &leaf_sum_hessians_[right_leaf_index],
-                                &sum_left_gradients,
-                                &sum_right_gradients);
+                                &leaf_sum_gradients_[best_leaf_index_],
+                                &leaf_sum_gradients_[right_leaf_index]);
     #ifdef DEBUG
-    CheckSplitValid(best_leaf_index_, right_leaf_index, sum_left_gradients, sum_right_gradients);
+    CheckSplitValid(best_leaf_index_, right_leaf_index);
     #endif  // DEBUG
     smaller_leaf_index_ = (leaf_num_data_[best_leaf_index_] < leaf_num_data_[right_leaf_index] ? best_leaf_index_ : right_leaf_index);
     larger_leaf_index_ = (smaller_leaf_index_ == best_leaf_index_ ? right_leaf_index : best_leaf_index_);
@@ -382,6 +382,7 @@ void CUDASingleGPUTreeLearner::ResetConfig(const Config* config) {
     leaf_best_split_default_left_.resize(config_->num_leaves, 0);
     leaf_num_data_.resize(config_->num_leaves, 0);
     leaf_data_start_.resize(config_->num_leaves, 0);
+    leaf_sum_gradients_.resize(config_->num_leaves, 0.0f);
     leaf_sum_hessians_.resize(config_->num_leaves, 0.0f);
   }
   cuda_histogram_constructor_->ResetConfig(config);
@@ -570,9 +571,7 @@ void CUDASingleGPUTreeLearner::SelectFeatureByNode(const Tree* tree) {
 #ifdef DEBUG
 void CUDASingleGPUTreeLearner::CheckSplitValid(
   const int left_leaf,
-  const int right_leaf,
-  const double split_sum_left_gradients,
-  const double split_sum_right_gradients) {
+  const int right_leaf) {
   std::vector<data_size_t> left_data_indices(leaf_num_data_[left_leaf]);
   std::vector<data_size_t> right_data_indices(leaf_num_data_[right_leaf]);
   CopyFromCUDADeviceToHost<data_size_t>(left_data_indices.data(),
@@ -593,9 +592,9 @@ void CUDASingleGPUTreeLearner::CheckSplitValid(
     sum_right_gradients += host_gradients_[index];
     sum_right_hessians += host_hessians_[index];
   }
-  CHECK_LE(std::fabs(sum_left_gradients - split_sum_left_gradients), 1e-6f);
+  CHECK_LE(std::fabs(sum_left_gradients - leaf_sum_gradients_[left_leaf]), 1e-6f);
   CHECK_LE(std::fabs(sum_left_hessians - leaf_sum_hessians_[left_leaf]), 1e-6f);
-  CHECK_LE(std::fabs(sum_right_gradients - split_sum_right_gradients), 1e-6f);
+  CHECK_LE(std::fabs(sum_right_gradients - leaf_sum_gradients_[right_leaf]), 1e-6f);
   CHECK_LE(std::fabs(sum_right_hessians - leaf_sum_hessians_[right_leaf]), 1e-6f);
 }
 #endif  // DEBUG
diff --git a/src/treelearner/cuda/cuda_single_gpu_tree_learner.hpp b/src/treelearner/cuda/cuda_single_gpu_tree_learner.hpp
index 6121ae5d22a5..b50d3a8884ca 100644
--- a/src/treelearner/cuda/cuda_single_gpu_tree_learner.hpp
+++ b/src/treelearner/cuda/cuda_single_gpu_tree_learner.hpp
@@ -71,8 +71,7 @@ class CUDASingleGPUTreeLearner: public SerialTreeLearner {
 
   #ifdef DEBUG
   void CheckSplitValid(
-    const int left_leaf, const int right_leaf,
-    const double sum_left_gradients, const double sum_right_gradients);
+    const int left_leaf, const int right_leaf);
   #endif  // DEBUG
 
   void RenewDiscretizedTreeLeaves(CUDATree* cuda_tree);

From 94cf7f040cabbdd51284b4b176d41407cd296834 Mon Sep 17 00:00:00 2001
From: Atanas Dimitrov <nasko119@abv.bg>
Date: Thu, 15 Aug 2024 13:22:26 +0300
Subject: [PATCH 17/31] Fix warning

---
 src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp b/src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp
index 3a62d7ec26e5..55040caf454d 100644
--- a/src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp
+++ b/src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp
@@ -301,8 +301,6 @@ Tree* CUDASingleGPUTreeLearner::Train(const score_t* gradients,
                                        best_split_info);
     }
 
-    double sum_left_gradients = 0.0f;
-    double sum_right_gradients = 0.0f;
     cuda_data_partition_->Split(best_split_info,
                                 best_leaf_index_,
                                 right_leaf_index,

From 01aa952391c1ed4a8bbd9a907506646027abc682 Mon Sep 17 00:00:00 2001
From: Atanas Dimitrov <nasko119@abv.bg>
Date: Thu, 15 Aug 2024 13:32:17 +0300
Subject: [PATCH 18/31] reenable tests

---
 tests/python_package_test/test_engine.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index c490583b6032..92541d3a3f47 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3895,10 +3895,6 @@ def test_dump_model():
     assert "leaf_value" in dumped_model_str
     assert "leaf_count" in dumped_model_str
 
-    # CUDA does not return correct values for the root
-    if getenv("TASK", "") == "cuda":
-        return
-
     for tree in dumped_model["tree_info"]:
         assert not np.all(tree["tree_structure"]["internal_value"] == 0)
 

From fadaa83bf20f20e59e33b254952845810f9e47e7 Mon Sep 17 00:00:00 2001
From: Atanas Dimitrov <nasko119@abv.bg>
Date: Thu, 15 Aug 2024 13:35:02 +0300
Subject: [PATCH 19/31] .

---
 tests/python_package_test/test_engine.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 92541d3a3f47..b29f9abfa961 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3865,7 +3865,6 @@ def test_dump_model_stump(linear_tree):
     params = {
         "objective": "binary",
         "verbose": -1,
-        "n_jobs": 1,
         "linear_tree": linear_tree,
     }
     bst = lgb.train(params, train_data, num_boost_round=5)

From a323acb76d1fe2c3ea4853f67340d614ef0c3bae Mon Sep 17 00:00:00 2001
From: Atanas Dimitrov <nasko119@abv.bg>
Date: Thu, 15 Aug 2024 14:00:46 +0300
Subject: [PATCH 20/31] fix cuda

---
 src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp b/src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp
index 55040caf454d..59131befef32 100644
--- a/src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp
+++ b/src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp
@@ -170,6 +170,7 @@ Tree* CUDASingleGPUTreeLearner::Train(const score_t* gradients,
     leaf_sum_gradients_[smaller_leaf_index_], leaf_sum_hessians_[smaller_leaf_index_],
     config_->lambda_l1, config_->lambda_l2,  config_->path_smooth,
     static_cast<data_size_t>(num_data_), 0));
+  cuda_tree->SyncLeafOutputFromHostToCUDA();
   for (int i = 0; i < config_->num_leaves - 1; ++i) {
     global_timer.Start("CUDASingleGPUTreeLearner::ConstructHistogramForLeaf");
     const data_size_t num_data_in_smaller_leaf = leaf_num_data_[smaller_leaf_index_];

From 0fd0c59308a2f83736a308553d80df4c002a0c7b Mon Sep 17 00:00:00 2001
From: Atanas Dimitrov <nasko119@abv.bg>
Date: Thu, 15 Aug 2024 14:06:39 +0300
Subject: [PATCH 21/31] Fix compilation error

---
 src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp b/src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp
index 59131befef32..952ef52f8023 100644
--- a/src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp
+++ b/src/treelearner/cuda/cuda_single_gpu_tree_learner.cpp
@@ -170,7 +170,7 @@ Tree* CUDASingleGPUTreeLearner::Train(const score_t* gradients,
     leaf_sum_gradients_[smaller_leaf_index_], leaf_sum_hessians_[smaller_leaf_index_],
     config_->lambda_l1, config_->lambda_l2,  config_->path_smooth,
     static_cast<data_size_t>(num_data_), 0));
-  cuda_tree->SyncLeafOutputFromHostToCUDA();
+  tree->SyncLeafOutputFromHostToCUDA();
   for (int i = 0; i < config_->num_leaves - 1; ++i) {
     global_timer.Start("CUDASingleGPUTreeLearner::ConstructHistogramForLeaf");
     const data_size_t num_data_in_smaller_leaf = leaf_num_data_[smaller_leaf_index_];

From 4cc5dd45a31d74ad832a285bb8a60611a340b41a Mon Sep 17 00:00:00 2001
From: Atanas Dimitrov <nasko119@abv.bg>
Date: Thu, 15 Aug 2024 14:27:30 +0300
Subject: [PATCH 22/31] Fix weight

---
 src/io/cuda/cuda_tree.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/io/cuda/cuda_tree.cu b/src/io/cuda/cuda_tree.cu
index 62020c3a09ae..09a3164dbf52 100644
--- a/src/io/cuda/cuda_tree.cu
+++ b/src/io/cuda/cuda_tree.cu
@@ -210,7 +210,7 @@ __global__ void SplitCategoricalKernel(  // split information
     split_gain[new_node_index] = static_cast<float>(cuda_split_info->gain);
   } else if (thread_index == 4) {
     // save current leaf value to internal node before change
-    internal_weight[new_node_index] = leaf_weight[leaf_index];
+    internal_weight[new_node_index] = cuda_split_info->left_sum_hessians + cuda_split_info->right_sum_hessians;
     leaf_weight[leaf_index] = cuda_split_info->left_sum_hessians;
   } else if (thread_index == 5) {
     internal_value[new_node_index] = leaf_value[leaf_index];

From a743a87739b2cf04040f36344483d71e32e02b50 Mon Sep 17 00:00:00 2001
From: Atanas Dimitrov <nasko119@abv.bg>
Date: Thu, 15 Aug 2024 15:18:06 +0300
Subject: [PATCH 23/31] Fix numerical

---
 src/io/cuda/cuda_tree.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/io/cuda/cuda_tree.cu b/src/io/cuda/cuda_tree.cu
index 09a3164dbf52..87abfc1353b4 100644
--- a/src/io/cuda/cuda_tree.cu
+++ b/src/io/cuda/cuda_tree.cu
@@ -94,7 +94,7 @@ __global__ void SplitKernel(  // split information
     split_gain[new_node_index] = static_cast<float>(cuda_split_info->gain);
   } else if (thread_index == 4) {
     // save current leaf value to internal node before change
-    internal_weight[new_node_index] = leaf_weight[leaf_index];
+    internal_weight[new_node_index] = cuda_split_info->left_sum_hessians + cuda_split_info->right_sum_hessians;
     leaf_weight[leaf_index] = cuda_split_info->left_sum_hessians;
   } else if (thread_index == 5) {
     internal_value[new_node_index] = leaf_value[leaf_index];

From 031c945ba3e065357488effb4d7454d99f7bad46 Mon Sep 17 00:00:00 2001
From: Atanas Dimitrov <nasko119@abv.bg>
Date: Fri, 16 Aug 2024 13:20:28 +0300
Subject: [PATCH 24/31] Make tests more robust

---
 tests/python_package_test/test_dask.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/tests/python_package_test/test_dask.py b/tests/python_package_test/test_dask.py
index 3af65fc945f1..6e1389b0ecd5 100644
--- a/tests/python_package_test/test_dask.py
+++ b/tests/python_package_test/test_dask.py
@@ -1465,15 +1465,14 @@ def test_init_score(task, output, cluster):
 
         model = model_factory(client=client, **params)
         model.fit(dX, dy, sample_weight=dw, group=dg)
+        pred = model.predict(dX, raw_score=True)
 
         model_init_score = model_factory(client=client, **params)
         model_init_score.fit(dX, dy, sample_weight=dw, init_score=init_scores, group=dg)
+        pred_init_score = model_init_score.predict(dX, raw_score=True)
 
-        # check if init score changes root value
-        assert (
-            model.booster_.trees_to_dataframe()["value"][0]
-            != model_init_score.booster_.trees_to_dataframe()["value"][0]
-        )
+        # check if init score changes predictions
+        assert not np.allclose(pred, pred_init_score)
 
 
 def sklearn_checks_to_run():

From 634b0fcffba2d828391c0d54c5f15c788b7243a5 Mon Sep 17 00:00:00 2001
From: Atanas Dimitrov <nasko119@abv.bg>
Date: Tue, 17 Sep 2024 16:42:57 +0300
Subject: [PATCH 25/31] Fix test failing because of accuracy reasons

---
 tests/python_package_test/test_dask.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/python_package_test/test_dask.py b/tests/python_package_test/test_dask.py
index dd7b1817448b..b47d31f16507 100644
--- a/tests/python_package_test/test_dask.py
+++ b/tests/python_package_test/test_dask.py
@@ -2,7 +2,6 @@
 """Tests for lightgbm.dask module"""
 
 import inspect
-import random
 import socket
 from itertools import groupby
 from os import getenv
@@ -1444,16 +1443,16 @@ def test_training_succeeds_when_data_is_dataframe_and_label_is_column_array(task
 @pytest.mark.parametrize("task", tasks)
 @pytest.mark.parametrize("output", data_output)
 def test_init_score(task, output, cluster):
-    if task == "ranking":
-        pytest.skip("LGBMRanker is not currently tested for init_score")
+    if task == "ranking" and output == "scipy_csr_matrix":
+        pytest.skip("LGBMRanker is not currently tested on sparse matrices")
 
     with Client(cluster) as client:
         _, _, _, _, dX, dy, dw, dg = _create_data(objective=task, output=output, group=None)
 
         model_factory = task_to_dask_factory[task]
 
-        params = {"n_estimators": 1, "num_leaves": 2, "time_out": 5}
-        init_score = random.random()
+        params = {"n_estimators": 30, "num_leaves": 15, "time_out": 5}
+        init_score = 1
         size_factor = 1
         if task == "multiclass-classification":
             size_factor = 3  # number of classes
@@ -1472,7 +1471,8 @@ def test_init_score(task, output, cluster):
         pred_init_score = model_init_score.predict(dX, raw_score=True)
 
         # check if init score changes predictions
-        assert not np.allclose(pred, pred_init_score)
+        with pytest.raises(AssertionError):
+            assert_eq(pred, pred_init_score, atol=0.0001)
 
 
 def sklearn_checks_to_run():

From 3fe4577da970fdd6a055721a724ee9aa22dafee1 Mon Sep 17 00:00:00 2001
From: Atanas Dimitrov <nasko119@abv.bg>
Date: Sat, 21 Sep 2024 19:01:07 +0300
Subject: [PATCH 26/31] Fix test_dask::test_init_scores

---
 tests/python_package_test/test_dask.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/tests/python_package_test/test_dask.py b/tests/python_package_test/test_dask.py
index b47d31f16507..15f9bed859b7 100644
--- a/tests/python_package_test/test_dask.py
+++ b/tests/python_package_test/test_dask.py
@@ -1452,15 +1452,14 @@ def test_init_score(task, output, cluster):
         model_factory = task_to_dask_factory[task]
 
         params = {"n_estimators": 30, "num_leaves": 15, "time_out": 5}
-        init_score = 1
-        size_factor = 1
-        if task == "multiclass-classification":
-            size_factor = 3  # number of classes
+        num_classes = 3 if task == "multiclass-classification" else 1
+
+        rnd = np.random.RandomState(42)
 
         if output.startswith("dataframe"):
-            init_scores = dy.map_partitions(lambda x: pd.DataFrame([[init_score] * size_factor] * x.size))
+            init_scores = dy.map_partitions(lambda x: pd.DataFrame(rnd.uniform(size=(x.size, num_classes))))
         else:
-            init_scores = dy.map_blocks(lambda x: np.full((x.size, size_factor), init_score))
+            init_scores = dy.map_blocks(lambda x: rnd.uniform(size=(x.size, num_classes)))
 
         model = model_factory(client=client, **params)
         model.fit(dX, dy, sample_weight=dw, group=dg)
@@ -1472,7 +1471,7 @@ def test_init_score(task, output, cluster):
 
         # check if init score changes predictions
         with pytest.raises(AssertionError):
-            assert_eq(pred, pred_init_score, atol=0.0001)
+            assert_eq(pred, pred_init_score)
 
 
 def sklearn_checks_to_run():

From 9e3e8eda11cc900f73c8ad29563ebb49d806b737 Mon Sep 17 00:00:00 2001
From: Atanas Dimitrov <nasko119@abv.bg>
Date: Sat, 21 Sep 2024 19:22:40 +0300
Subject: [PATCH 27/31] Decrease size of trees in test

---
 tests/python_package_test/test_dask.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/python_package_test/test_dask.py b/tests/python_package_test/test_dask.py
index 15f9bed859b7..1929d4ba53c5 100644
--- a/tests/python_package_test/test_dask.py
+++ b/tests/python_package_test/test_dask.py
@@ -1450,12 +1450,11 @@ def test_init_score(task, output, cluster):
         _, _, _, _, dX, dy, dw, dg = _create_data(objective=task, output=output, group=None)
 
         model_factory = task_to_dask_factory[task]
+        rnd = np.random.RandomState(42)
 
-        params = {"n_estimators": 30, "num_leaves": 15, "time_out": 5}
+        params = {"n_estimators": 1, "num_leaves": 2, "time_out": 5}
         num_classes = 3 if task == "multiclass-classification" else 1
 
-        rnd = np.random.RandomState(42)
-
         if output.startswith("dataframe"):
             init_scores = dy.map_partitions(lambda x: pd.DataFrame(rnd.uniform(size=(x.size, num_classes))))
         else:

From e76d5bc62f1d9010f4322193b3f8be567b75fa2a Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Mon, 7 Oct 2024 23:16:00 -0500
Subject: [PATCH 28/31] add a test on predictions from a model of all stumps

---
 tests/python_package_test/test_engine.py | 28 ++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index a2573899d499..4e36072131fc 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3812,6 +3812,34 @@ def inner_test(X, y, params, early_stopping_rounds):
     inner_test(X, y, params, early_stopping_rounds=None)
 
 
+@pytest.mark.parametrize("use_init_score", [False, True])
+def test_predict_stump(rng, use_init_score):
+    X, y = load_breast_cancer(return_X_y=True)
+    dataset_kwargs = {"data": X, "label": y}
+    if use_init_score:
+        dataset_kwargs.update({"init_score": rng.uniform(size=y.shape)})
+    bst = lgb.train(
+        train_set=lgb.Dataset(**dataset_kwargs),
+        params={"objective": "binary", "min_data_in_leaf": X.shape[0]},
+        num_boost_round=5,
+    )
+    # checking prediction from 1 iteration and the whole model, to prevent bugs
+    # of the form "a model of n stumps predicts n * initial_score"
+    preds_1 = bst.predict(X, raw_score=True, num_iteration=1)
+    preds_all = bst.predict(X, raw_score=True)
+    if use_init_score:
+        # if init_score was provided, a model of stumps should predict all 0s
+        all_zeroes = np.full_like(preds_1, fill_value=0.0)
+        np.testing.assert_allclose(preds_1, all_zeroes)
+        np.testing.assert_allclose(preds_all, all_zeroes)
+    else:
+        # if init_score was not provided, prediction for a model of stumps should be
+        # the "average" of the labels
+        y_avg = np.log(y.mean() / (1.0 - y.mean()))
+        np.testing.assert_allclose(preds_1, np.full_like(preds_1, fill_value=y_avg))
+        np.testing.assert_allclose(preds_all, np.full_like(preds_all, fill_value=y_avg))
+
+
 def test_average_precision_metric():
     # test against sklearn average precision metric
     X, y = load_breast_cancer(return_X_y=True)

From 0af46318eb3a9c45089582c8c80cd96fa1678c74 Mon Sep 17 00:00:00 2001
From: Atanas Dimitrov <nasko119@gmail.com>
Date: Tue, 8 Oct 2024 20:18:25 +0300
Subject: [PATCH 29/31] Comments after code review

---
 tests/python_package_test/test_dask.py   | 11 +++++----
 tests/python_package_test/test_engine.py | 29 ++++++++++--------------
 tests/python_package_test/utils.py       | 10 +++++---
 3 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/tests/python_package_test/test_dask.py b/tests/python_package_test/test_dask.py
index 1929d4ba53c5..dccc4065ba55 100644
--- a/tests/python_package_test/test_dask.py
+++ b/tests/python_package_test/test_dask.py
@@ -1442,7 +1442,7 @@ def test_training_succeeds_when_data_is_dataframe_and_label_is_column_array(task
 
 @pytest.mark.parametrize("task", tasks)
 @pytest.mark.parametrize("output", data_output)
-def test_init_score(task, output, cluster):
+def test_init_score(task, output, cluster, rng):
     if task == "ranking" and output == "scipy_csr_matrix":
         pytest.skip("LGBMRanker is not currently tested on sparse matrices")
 
@@ -1450,15 +1450,16 @@ def test_init_score(task, output, cluster):
         _, _, _, _, dX, dy, dw, dg = _create_data(objective=task, output=output, group=None)
 
         model_factory = task_to_dask_factory[task]
-        rnd = np.random.RandomState(42)
 
         params = {"n_estimators": 1, "num_leaves": 2, "time_out": 5}
-        num_classes = 3 if task == "multiclass-classification" else 1
+        num_classes = 1
+        if task == "multiclass-classification":
+            num_classes = 3
 
         if output.startswith("dataframe"):
-            init_scores = dy.map_partitions(lambda x: pd.DataFrame(rnd.uniform(size=(x.size, num_classes))))
+            init_scores = dy.map_partitions(lambda x: pd.DataFrame(rng.uniform(size=(x.size, num_classes))))
         else:
-            init_scores = dy.map_blocks(lambda x: rnd.uniform(size=(x.size, num_classes)))
+            init_scores = dy.map_blocks(lambda x: rng.uniform(size=(x.size, num_classes)))
 
         model = model_factory(client=client, **params)
         model.fit(dX, dy, sample_weight=dw, group=dg)
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 4e36072131fc..286f066a3526 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -3887,28 +3887,21 @@ def test_reset_params_works_with_metric_num_class_and_boosting():
 @pytest.mark.parametrize("linear_tree", [False, True])
 def test_dump_model_stump(linear_tree):
     X, y = load_breast_cancer(return_X_y=True)
-    # intentionally create a stump (tree with only a root-node)
-    # using restricted # samples
-    subidx = random.sample(range(len(y)), 30)
 
-    train_data = lgb.Dataset(X[subidx], label=y[subidx])
-    params = {
-        "objective": "binary",
-        "verbose": -1,
-        "linear_tree": linear_tree,
-    }
+    train_data = lgb.Dataset(X, label=y)
+    params = {"objective": "binary", "verbose": -1, "linear_tree": linear_tree, "min_data_in_leaf": len(y)}
     bst = lgb.train(params, train_data, num_boost_round=5)
-    dumped_model = bst.dump_model(5, 0)
+    dumped_model = bst.dump_model(num_iteration=5, start_iteration=0)
     tree_structure = dumped_model["tree_info"][0]["tree_structure"]
     assert len(dumped_model["tree_info"]) == 1
     assert "leaf_value" in tree_structure
-    assert tree_structure["leaf_count"] == 30
+    assert tree_structure["leaf_count"] == len(y)
 
 
 def test_dump_model():
-    offset = 100
+    initial_score_offset = 57.5
     X, y = make_synthetic_regression()
-    train_data = lgb.Dataset(X, label=y + offset)
+    train_data = lgb.Dataset(X, label=y + initial_score_offset)
 
     params = {
         "objective": "regression",
@@ -3916,7 +3909,7 @@ def test_dump_model():
         "boost_from_average": True,
     }
     bst = lgb.train(params, train_data, num_boost_round=5)
-    dumped_model = bst.dump_model(5, 0)
+    dumped_model = bst.dump_model(num_iteration=5, start_iteration=0)
     dumped_model_str = str(dumped_model)
     assert "leaf_features" not in dumped_model_str
     assert "leaf_coeff" not in dumped_model_str
@@ -3925,9 +3918,11 @@ def test_dump_model():
     assert "leaf_count" in dumped_model_str
 
     for tree in dumped_model["tree_info"]:
-        assert not np.all(tree["tree_structure"]["internal_value"] == 0)
+        assert tree["tree_structure"]["internal_value"] != 0
 
-    np.testing.assert_allclose(dumped_model["tree_info"][0]["tree_structure"]["internal_value"], offset, atol=1)
+    assert dumped_model["tree_info"][0]["tree_structure"]["internal_value"] == pytest.approx(
+        initial_score_offset, abs=1
+    )
     assert_all_trees_valid(dumped_model)
 
 
@@ -3940,7 +3935,7 @@ def test_dump_model_linear():
     }
     train_data = lgb.Dataset(X, label=y)
     bst = lgb.train(params, train_data, num_boost_round=5)
-    dumped_model = bst.dump_model(5, 0)
+    dumped_model = bst.dump_model(num_iteration=5, start_iteration=0)
     assert_all_trees_valid(dumped_model)
     dumped_model_str = str(dumped_model)
     assert "leaf_features" in dumped_model_str
diff --git a/tests/python_package_test/utils.py b/tests/python_package_test/utils.py
index c8ef7a9fa139..d5e233c579de 100644
--- a/tests/python_package_test/utils.py
+++ b/tests/python_package_test/utils.py
@@ -251,12 +251,16 @@ def assert_subtree_valid(root):
     right_child = root["right_child"]
     (l_w, l_c) = assert_subtree_valid(left_child)
     (r_w, r_c) = assert_subtree_valid(right_child)
-    assert np.allclose(root["internal_weight"], l_w + r_w)
-    assert np.allclose(root["internal_count"], l_c + r_c)
+    assert (
+        abs(root["internal_weight"] - (l_w + r_w)) <= 1e-3
+    ), "root node's internal weight should be exactly the sum of its child nodes' internal counts"
+    assert (
+        root["internal_count"] == l_c + r_c
+    ), "root node's internal count should be exactly the sum of its child nodes' internal counts"
     return (root["internal_weight"], root["internal_count"])
 
 
 def assert_all_trees_valid(model_dump):
     for idx, tree in enumerate(model_dump["tree_info"]):
-        assert tree["tree_index"] == idx
+        assert tree["tree_index"] == idx, f"tree {idx} should have tree_index={idx}. Full tree: {tree}"
         assert_subtree_valid(tree["tree_structure"])

From 04886c0d6d2da2e3d145916556ef34d012b43ec9 Mon Sep 17 00:00:00 2001
From: Atanas Dimitrov <nasko119@gmail.com>
Date: Wed, 9 Oct 2024 23:52:13 +0300
Subject: [PATCH 30/31] Small text QOL

---
 tests/python_package_test/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python_package_test/utils.py b/tests/python_package_test/utils.py
index d5e233c579de..85bf796b2cc4 100644
--- a/tests/python_package_test/utils.py
+++ b/tests/python_package_test/utils.py
@@ -253,7 +253,7 @@ def assert_subtree_valid(root):
     (r_w, r_c) = assert_subtree_valid(right_child)
     assert (
         abs(root["internal_weight"] - (l_w + r_w)) <= 1e-3
-    ), "root node's internal weight should be exactly the sum of its child nodes' internal counts"
+    ), "root node's internal weight should be approximately the sum of its child nodes' internal weights"
     assert (
         root["internal_count"] == l_c + r_c
     ), "root node's internal count should be exactly the sum of its child nodes' internal counts"

From 15fc3bfee90846a626560f4fc2acea991ca38f27 Mon Sep 17 00:00:00 2001
From: Atanas Dimitrov <nasko119@gmail.com>
Date: Thu, 10 Oct 2024 01:32:55 +0300
Subject: [PATCH 31/31] Add test_predict_stump on dask

---
 tests/python_package_test/test_dask.py | 47 +++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/tests/python_package_test/test_dask.py b/tests/python_package_test/test_dask.py
index dccc4065ba55..ff1fb2e155ed 100644
--- a/tests/python_package_test/test_dask.py
+++ b/tests/python_package_test/test_dask.py
@@ -1451,7 +1451,15 @@ def test_init_score(task, output, cluster, rng):
 
         model_factory = task_to_dask_factory[task]
 
-        params = {"n_estimators": 1, "num_leaves": 2, "time_out": 5}
+        params = {
+            "n_estimators": 1,
+            "num_leaves": 2,
+            "time_out": 5,
+            "seed": 708,
+            "deterministic": True,
+            "force_row_wise": True,
+            "num_thread": 1,
+        }
         num_classes = 1
         if task == "multiclass-classification":
             num_classes = 3
@@ -1533,6 +1541,43 @@ def test_predict_with_raw_score(task, output, cluster):
             assert_eq(raw_predictions, pred_proba_raw)
 
 
+@pytest.mark.parametrize("output", data_output)
+@pytest.mark.parametrize("use_init_score", [False, True])
+def test_predict_stump(output, use_init_score, cluster, rng):
+    with Client(cluster) as client:
+        task = "binary-classification"
+        n_samples = 1_000
+        _, _, _, _, dX, dy, _, dg = _create_data(objective=task, n_samples=n_samples, output=output)
+
+        model_factory = task_to_dask_factory[task]
+
+        params = {"objective": "binary", "n_estimators": 5, "min_data_in_leaf": n_samples}
+
+        if not use_init_score:
+            init_scores = None
+        elif output.startswith("dataframe"):
+            init_scores = dy.map_partitions(lambda x: pd.DataFrame(rng.uniform(size=x.size)))
+        else:
+            init_scores = dy.map_blocks(lambda x: rng.uniform(size=x.size))
+
+        model = model_factory(client=client, **params)
+        model.fit(dX, dy, group=dg, init_score=init_scores)
+        preds_1 = model.predict(dX, raw_score=True, num_iteration=1).compute()
+        preds_all = model.predict(dX, raw_score=True).compute()
+
+        if use_init_score:
+            # if init_score was provided, a model of stumps should predict all 0s
+            all_zeroes = np.full_like(preds_1, fill_value=0.0)
+            assert_eq(preds_1, all_zeroes)
+            assert_eq(preds_all, all_zeroes)
+        else:
+            # if init_score was not provided, prediction for a model of stumps should be
+            # the "average" of the labels
+            y_avg = np.log(dy.mean() / (1.0 - dy.mean()))
+            assert_eq(preds_1, np.full_like(preds_1, fill_value=y_avg))
+            assert_eq(preds_all, np.full_like(preds_all, fill_value=y_avg))
+
+
 def test_distributed_quantized_training(tmp_path, cluster):
     with Client(cluster) as client:
         X, y, w, _, dX, dy, dw, _ = _create_data(objective="regression", output="array")