From 51cac221b062094f5206de4e9ce1491817649ff0 Mon Sep 17 00:00:00 2001
From: Malte Foerster <mfoerster@nvidia.com>
Date: Mon, 25 Sep 2023 14:29:32 +0000
Subject: [PATCH] added dedicated utilities for get/set/mod columns by index

---
 legateboost/__init__.py   |  5 +++++
 legateboost/metrics.py    | 11 +++++++++--
 legateboost/objectives.py |  8 +++++---
 legateboost/utils.py      | 40 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 59 insertions(+), 5 deletions(-)

diff --git a/legateboost/__init__.py b/legateboost/__init__.py
index 611dd62b..816d9c12 100644
--- a/legateboost/__init__.py
+++ b/legateboost/__init__.py
@@ -15,3 +15,8 @@
     ExponentialObjective,
     BaseObjective,
 )
+from .utils import (
+    pick_col_by_idx,
+    set_col_by_idx,
+    mod_col_by_idx,
+)
diff --git a/legateboost/metrics.py b/legateboost/metrics.py
index e4cfd06c..74fc9cee 100644
--- a/legateboost/metrics.py
+++ b/legateboost/metrics.py
@@ -2,6 +2,8 @@
 
 import cunumeric as cn
 
+from .utils import pick_col_by_idx, set_col_by_idx
+
 
 class BaseMetric(ABC):
     """The base class for metrics.
@@ -169,7 +171,10 @@ def metric(self, y: cn.ndarray, pred: cn.ndarray, w: cn.ndarray) -> float:
         # multi-class case
         assert pred.ndim == 2
         label = y.astype(cn.int32)
-        logloss = -cn.log(pred[cn.arange(label.size), label])
+
+        logloss = -cn.log(pick_col_by_idx(pred, label))
+        # logloss = -cn.log(pred[cn.arange(label.size), label])
+
         return float((logloss * w).sum() / w_sum)
 
     def name(self) -> str:
@@ -201,7 +206,9 @@ def metric(self, y: cn.ndarray, pred: cn.ndarray, w: cn.ndarray) -> float:
         K = pred.shape[1]  # number of classes
         f = cn.log(pred) * (K - 1)  # undo softmax
         y_k = cn.full((y.size, K), -1.0 / (K - 1.0))
-        y_k[cn.arange(y.size), y.astype(cn.int32)] = 1.0
+
+        set_col_by_idx(y_k, y.astype(cn.int32), 1.0)
+        # y_k[cn.arange(y.size), y.astype(cn.int32)] = 1.0
 
         exp = cn.exp(-1 / K * cn.sum(y_k * f, axis=1))
         return float((exp * w).sum() / w.sum())
diff --git a/legateboost/objectives.py b/legateboost/objectives.py
index 3945a15d..30807243 100644
--- a/legateboost/objectives.py
+++ b/legateboost/objectives.py
@@ -13,7 +13,7 @@
     NormalLLMetric,
     QuantileMetric,
 )
-from .utils import preround
+from .utils import mod_col_by_idx, preround, set_col_by_idx
 
 
 class BaseObjective(ABC):
@@ -254,7 +254,8 @@ def gradient(
         label = y.astype(cn.int32).squeeze()
         h = pred * (1.0 - pred)
         g = pred.copy()
-        g[cn.arange(y.size), label] -= 1.0
+        mod_col_by_idx(g, label, -1.0)
+        # g[cn.arange(y.size), label] -= 1.0
         return g, cn.maximum(h, eps)
 
     def transform(self, pred: cn.ndarray) -> cn.ndarray:
@@ -324,7 +325,8 @@ def gradient(self, y: cn.ndarray, pred: cn.ndarray) -> cn.ndarray:
         f = cn.log(pred) * (K - 1)  # undo softmax
         y_k = cn.full((y.size, K), -1.0 / (K - 1.0))
         labels = y.astype(cn.int32).squeeze()
-        y_k[cn.arange(y.size), labels] = 1.0
+        set_col_by_idx(y_k, labels, 1.0)
+        # y_k[cn.arange(y.size), labels] = 1.0
         exp = cn.exp(-1 / K * cn.sum(y_k * f, axis=1))
 
         return (
diff --git a/legateboost/utils.py b/legateboost/utils.py
index e06ae4e5..bbebcd16 100644
--- a/legateboost/utils.py
+++ b/legateboost/utils.py
@@ -25,6 +25,46 @@ def replace(data: Any) -> None:
         self.__dict__.update(state)
 
 
+def pick_col_by_idx(a: cn.ndarray, b: cn.ndarray) -> cn.ndarray:
+    """Alternative implementation for a[cn.arange(b.size), b]"""
+
+    assert a.ndim == 2
+    assert b.ndim == 1
+    assert a.shape[0] == b.shape[0]
+
+    range = cn.arange(a.shape[1])
+    bools = b[:, cn.newaxis] == range[cn.newaxis, :]
+    result = a * bools
+    return result.sum(axis=1)
+
+
+def set_col_by_idx(a: cn.ndarray, b: cn.ndarray, delta: float) -> None:
+    """Alternative implementation for a[cn.arange(b.size), b] = delta"""
+
+    assert a.ndim == 2
+    assert b.ndim == 1
+    assert a.shape[0] == b.shape[0]
+
+    range = cn.arange(a.shape[1])
+    bools = b[:, cn.newaxis] == range[cn.newaxis, :]
+    a -= a * bools
+    a += delta * bools
+    return
+
+
+def mod_col_by_idx(a: cn.ndarray, b: cn.ndarray, delta: float) -> None:
+    """Alternative implementation for a[cn.arange(b.size), b] += delta."""
+
+    assert a.ndim == 2
+    assert b.ndim == 1
+    assert a.shape[0] == b.shape[0]
+
+    range = cn.arange(a.shape[1])
+    bools = b[:, cn.newaxis] == range[cn.newaxis, :]
+    a += delta * bools
+    return
+
+
 def preround(x: cn.ndarray) -> cn.ndarray:
     """Apply this function to grad/hess ensure reproducible floating point
     summation.