From 51cac221b062094f5206de4e9ce1491817649ff0 Mon Sep 17 00:00:00 2001 From: Malte Foerster Date: Mon, 25 Sep 2023 14:29:32 +0000 Subject: [PATCH] added dedicated utilities for get/set/mod columns by index --- legateboost/__init__.py | 5 +++++ legateboost/metrics.py | 11 +++++++++-- legateboost/objectives.py | 8 +++++--- legateboost/utils.py | 40 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 59 insertions(+), 5 deletions(-) diff --git a/legateboost/__init__.py b/legateboost/__init__.py index 611dd62b..816d9c12 100644 --- a/legateboost/__init__.py +++ b/legateboost/__init__.py @@ -15,3 +15,8 @@ ExponentialObjective, BaseObjective, ) +from .utils import ( + pick_col_by_idx, + set_col_by_idx, + mod_col_by_idx, +) diff --git a/legateboost/metrics.py b/legateboost/metrics.py index e4cfd06c..74fc9cee 100644 --- a/legateboost/metrics.py +++ b/legateboost/metrics.py @@ -2,6 +2,8 @@ import cunumeric as cn +from .utils import pick_col_by_idx, set_col_by_idx + class BaseMetric(ABC): """The base class for metrics. @@ -169,7 +171,10 @@ def metric(self, y: cn.ndarray, pred: cn.ndarray, w: cn.ndarray) -> float: # multi-class case assert pred.ndim == 2 label = y.astype(cn.int32) - logloss = -cn.log(pred[cn.arange(label.size), label]) + + logloss = -cn.log(pick_col_by_idx(pred, label)) + # logloss = -cn.log(pred[cn.arange(label.size), label]) + return float((logloss * w).sum() / w_sum) def name(self) -> str: @@ -201,7 +206,9 @@ def metric(self, y: cn.ndarray, pred: cn.ndarray, w: cn.ndarray) -> float: K = pred.shape[1] # number of classes f = cn.log(pred) * (K - 1) # undo softmax y_k = cn.full((y.size, K), -1.0 / (K - 1.0)) - y_k[cn.arange(y.size), y.astype(cn.int32)] = 1.0 + + set_col_by_idx(y_k, y.astype(cn.int32), 1.0) + # y_k[cn.arange(y.size), y.astype(cn.int32)] = 1.0 exp = cn.exp(-1 / K * cn.sum(y_k * f, axis=1)) return float((exp * w).sum() / w.sum()) diff --git a/legateboost/objectives.py b/legateboost/objectives.py index 3945a15d..30807243 100644 --- a/legateboost/objectives.py +++ b/legateboost/objectives.py @@ -13,7 +13,7 @@ NormalLLMetric, QuantileMetric, ) -from .utils import preround +from .utils import mod_col_by_idx, preround, set_col_by_idx class BaseObjective(ABC): @@ -254,7 +254,8 @@ def gradient( label = y.astype(cn.int32).squeeze() h = pred * (1.0 - pred) g = pred.copy() - g[cn.arange(y.size), label] -= 1.0 + mod_col_by_idx(g, label, -1.0) + # g[cn.arange(y.size), label] -= 1.0 return g, cn.maximum(h, eps) def transform(self, pred: cn.ndarray) -> cn.ndarray: @@ -324,7 +325,8 @@ def gradient(self, y: cn.ndarray, pred: cn.ndarray) -> cn.ndarray: f = cn.log(pred) * (K - 1) # undo softmax y_k = cn.full((y.size, K), -1.0 / (K - 1.0)) labels = y.astype(cn.int32).squeeze() - y_k[cn.arange(y.size), labels] = 1.0 + set_col_by_idx(y_k, labels, 1.0) + # y_k[cn.arange(y.size), labels] = 1.0 exp = cn.exp(-1 / K * cn.sum(y_k * f, axis=1)) return ( diff --git a/legateboost/utils.py b/legateboost/utils.py index e06ae4e5..bbebcd16 100644 --- a/legateboost/utils.py +++ b/legateboost/utils.py @@ -25,6 +25,46 @@ def replace(data: Any) -> None: self.__dict__.update(state) +def pick_col_by_idx(a: cn.ndarray, b: cn.ndarray) -> cn.ndarray: + """Alternative implementation for a[cn.arange(b.size), b]""" + + assert a.ndim == 2 + assert b.ndim == 1 + assert a.shape[0] == b.shape[0] + + range = cn.arange(a.shape[1]) + bools = b[:, cn.newaxis] == range[cn.newaxis, :] + result = a * bools + return result.sum(axis=1) + + +def set_col_by_idx(a: cn.ndarray, b: cn.ndarray, delta: float) -> None: + """Alternative implementation for a[cn.arange(b.size), b] = delta""" + + assert a.ndim == 2 + assert b.ndim == 1 + assert a.shape[0] == b.shape[0] + + range = cn.arange(a.shape[1]) + bools = b[:, cn.newaxis] == range[cn.newaxis, :] + a -= a * bools + a += delta * bools + return + + +def mod_col_by_idx(a: cn.ndarray, b: cn.ndarray, delta: float) -> None: + """Alternative implementation for a[cn.arange(b.size), b] += delta.""" + + assert a.ndim == 2 + assert b.ndim == 1 + assert a.shape[0] == b.shape[0] + + range = cn.arange(a.shape[1]) + bools = b[:, cn.newaxis] == range[cn.newaxis, :] + a += delta * bools + return + + def preround(x: cn.ndarray) -> cn.ndarray: """Apply this function to grad/hess ensure reproducible floating point summation.