Skip to content

Commit

Permalink
Add linear models
Browse files Browse the repository at this point in the history
  • Loading branch information
RAMitchell committed Sep 7, 2023
1 parent 5f6f6d3 commit f8f9248
Show file tree
Hide file tree
Showing 12 changed files with 242 additions and 57 deletions.
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,16 @@ for i in range(total_estimators // estimators_per_batch):

The above example can be found here: [examples/batch_training](examples/batch_training/README.md).

### Different model types
Legateboost supports tree models, linear models, kernel ridge regression models, custom user models and any combinations of these models.

The following example shows a model combining linear and decision tree base learners.

```python
model = lb.LBRegressor(base_models=(lb.models.Linear(),)*5 + (lb.models.Tree(max_depth=1),)*15, **params).fit(X, y)
```

<img src="examples/linear_model/linear_model.png" alt="drawing" width="800"/>

## Installation

Expand Down
4 changes: 4 additions & 0 deletions examples/linear_model/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Linear model
This example shows how to train a mixed model with linear and tree components. The dataset is a linear function with some added noise, then a step in the middle of the function. This is challenging for a linear model due to the step, and challenging for a tree model due to the sloped function (see the characteristic axis aligned step function of the tree model). We create a combined model by first boosting 5 iterations of a linear model and then 15 iterations of the tree model. The result is a model that is better able to fit the linear function and the step function.

<img src="linear_model.png" alt="drawing" width="800"/>
3 changes: 3 additions & 0 deletions examples/linear_model/linear_model.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
60 changes: 60 additions & 0 deletions examples/linear_model/linear_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from pathlib import Path

import seaborn as sns
from matplotlib import pyplot as plt
from matplotlib.ticker import FuncFormatter

import cunumeric as cn
import legateboost as lb

sns.set()
plt.rcParams["font.family"] = "serif"

rs = cn.random.RandomState(42)
X = cn.linspace(0, 10, 200)[:, cn.newaxis]
y_true = X[:, 0].copy()
y_true[X.shape[0] // 2 :] += 3.0
y = y_true + rs.normal(0, 0.25, X.shape[0])
params = {"n_estimators": 20, "learning_rate": 0.5, "verbose": True, "random_state": 20}
eval_result = {}
linear_model = lb.LBRegressor(base_models=(lb.models.Linear(),), **params).fit(
X, y, eval_set=[(X, y_true)], eval_result=eval_result
)
linear_test_error = cn.sqrt(eval_result["eval-0"]["mse"])
tree_model = lb.LBRegressor(base_models=(lb.models.Tree(max_depth=1),), **params).fit(
X, y, eval_set=[(X, y_true)], eval_result=eval_result
)
tree_test_error = cn.sqrt(eval_result["eval-0"]["mse"])
model = lb.LBRegressor(
base_models=(lb.models.Linear(),) * 5 + (lb.models.Tree(max_depth=1),) * 15,
**params
).fit(X, y, eval_set=[(X, y_true)], eval_result=eval_result)
mixed_test_error = cn.sqrt(eval_result["eval-0"]["mse"])

# plot
fig, ax = plt.subplots(1, 2, figsize=(12, 6))
plt.gca().xaxis.set_major_formatter(FuncFormatter(lambda x, _: int(x)))
sns.scatterplot(x=X[:, 0], y=y, color=".2", alpha=0.5, label="f(x)+noise", ax=ax[0])
sns.lineplot(x=X[:, 0], y=linear_model.predict(X), label="linear model", ax=ax[0])
sns.lineplot(x=X[:, 0], y=tree_model.predict(X), label="tree model", ax=ax[0])
sns.lineplot(x=X[:, 0], y=model.predict(X), label="linear + tree model", ax=ax[0])
ax[0].set_xlabel("X")

sns.lineplot(
x=range(params["n_estimators"]), y=linear_test_error, label="linear model", ax=ax[1]
)
sns.lineplot(
x=range(params["n_estimators"]), y=tree_test_error, label="tree model", ax=ax[1]
)
sns.lineplot(
x=range(params["n_estimators"]),
y=mixed_test_error,
label="linear + tree model",
ax=ax[1],
)
ax[1].set_xlabel("n_estimators")
ax[1].set_ylabel("test error")
plt.suptitle("Linear Models + Tree Models")
plt.tight_layout()
image_dir = Path(__file__).parent
plt.savefig(image_dir / "linear_model.png")
52 changes: 25 additions & 27 deletions legateboost/legateboost.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import warnings
from copy import deepcopy
from typing import Any, List, Optional, Tuple, Union

import numpy as np
Expand All @@ -12,7 +13,7 @@

from .input_validation import check_sample_weight, check_X_y
from .metrics import BaseMetric, metrics
from .models import Tree
from .models import BaseModel, Tree
from .objectives import BaseObjective, objectives
from .utils import PickleCunumericMixin, preround

Expand All @@ -25,9 +26,9 @@ def __init__(
metric: Union[str, BaseMetric, list[Union[str, BaseMetric]]] = "default",
learning_rate: float = 0.1,
init: Union[str, None] = "average",
base_models: Tuple[BaseModel, ...] = (Tree(max_depth=3),),
verbose: int = 0,
random_state: Optional[np.random.RandomState] = None,
max_depth: int = 3,
version: str = "native",
) -> None:
self.n_estimators = n_estimators
Expand All @@ -37,9 +38,9 @@ def __init__(
self.init = init
self.verbose = verbose
self.random_state = random_state
self.max_depth = max_depth
self.version = version
self.model_init_: cn.ndarray
self.base_models = base_models

def _more_tags(self) -> Any:
return {
Expand Down Expand Up @@ -215,21 +216,19 @@ def _partial_fit(
# current model prediction
train_pred = self._predict(X)
eval_preds = [self._predict(X_eval) for X_eval, _, _ in _eval_set]
for _ in range(self.n_estimators):
for i in range(self.n_estimators):
# obtain gradients
g, h = self._get_weighted_gradient(
y, train_pred, sample_weight, self.learning_rate
)
# build new tree

# build new model
self.models_.append(
Tree(
X,
g,
h,
self.max_depth,
self.random_state_,
deepcopy(self.base_models[i % len(self.base_models)]).set_random_state(
self.random_state_
)
)
self.models_[-1].fit(X, g, h)

# update current predictions
train_pred += self.models_[-1].predict(X)
Expand Down Expand Up @@ -368,7 +367,7 @@ def fit(
"""
sample_weight = check_sample_weight(sample_weight, len(y))
self.n_features_in_ = X.shape[1]
self.models_: List[Tree] = []
self.models_: List[BaseModel] = []
# initialise random state if an integer was passed
self.random_state_ = check_random_state(self.random_state)

Expand All @@ -387,8 +386,6 @@ def fit(
self.model_init_ = self._objective_instance.initialise_prediction(
y, sample_weight, self.init == "average"
)
self.sum_model_weights_ = sample_weight.sum()

self.is_fitted_ = True

return self._partial_fit(X, y, sample_weight, eval_set, eval_result)
Expand All @@ -407,7 +404,7 @@ def _predict(self, X: cn.ndarray) -> cn.ndarray:
pred += m.predict(X)
return pred

def dump_trees(self) -> str:
def dump_models(self) -> str:
check_is_fitted(self, "is_fitted_")
text = "init={}\n".format(self.model_init_)
for m in self.models_:
Expand All @@ -417,8 +414,7 @@ def dump_trees(self) -> str:

class LBRegressor(LBBase, RegressorMixin):
"""Implementation of a gradient boosting algorithm for regression problems.
Uses decision trees as weak learners and iteratively improves the model by
minimizing a loss function.
Learns component models to iteratively improve a loss function.
Parameters
----------
Expand All @@ -431,19 +427,20 @@ class LBRegressor(LBBase, RegressorMixin):
the accompanying metric. Possible values: ['mse'] or instance of BaseMetric. Can
be a list multiple metrics.
learning_rate :
The learning rate shrinks the contribution of each tree.
The learning rate shrinks the contribution of each model.
init :
The initial prediction of the model. If `None`, the initial prediction
is zero. If 'average', the initial prediction minimises a second order
approximation of the loss-function (simply the mean label in the case of
regression).
base_models :
The base models to use for each iteration. The model used in each iteration
i is base_models[i % len(base_models)].
verbose :
Controls the verbosity when fitting and predicting.
random_state :
Controls the randomness of the estimator. Pass an int for reproducible
results across multiple function calls.
max_depth :
The maximum depth of the decision trees.
Attributes
----------
Expand Down Expand Up @@ -477,19 +474,19 @@ def __init__(
metric: Union[str, BaseMetric, list[Union[str, BaseMetric]]] = "default",
learning_rate: float = 0.1,
init: Union[str, None] = "average",
base_models: Tuple[BaseModel, ...] = (Tree(max_depth=3),),
verbose: int = 0,
random_state: Optional[np.random.RandomState] = None,
max_depth: int = 3,
) -> None:
super().__init__(
n_estimators=n_estimators,
objective=objective,
metric=metric,
learning_rate=learning_rate,
init=init,
base_models=base_models,
verbose=verbose,
random_state=random_state,
max_depth=max_depth,
)

def _more_tags(self) -> Any:
Expand Down Expand Up @@ -581,18 +578,19 @@ class LBClassifier(LBBase, ClassifierMixin):
choose the accompanying metric. Possible values: ['log_loss', 'exp'] or
instance of BaseMetric. Can be a list multiple metrics.
learning_rate :
The learning rate shrinks the contribution of each tree by `learning_rate`.
The learning rate shrinks the contribution of each model.
init :
The initial prediction of the model. If `None`, the initial prediction
is zero. If 'average', the initial prediction minimises a second order
approximation of the loss-function.
base_models:
The base models to use for each iteration. The model used in each iteration
i is base_models[i % len(base_models)].
verbose :
Controls the verbosity of the boosting process.
random_state :
Controls the randomness of the estimator. Pass an int for reproducible output
across multiple function calls.
max_depth :
The maximum depth of the individual trees.
Attributes
----------
Expand Down Expand Up @@ -627,19 +625,19 @@ def __init__(
metric: Union[str, BaseMetric, list[Union[str, BaseMetric]]] = "default",
learning_rate: float = 0.1,
init: Union[str, None] = "average",
base_models: Tuple[BaseModel, ...] = (Tree(max_depth=3),),
verbose: int = 0,
random_state: Optional[np.random.RandomState] = None,
max_depth: int = 3,
) -> None:
super().__init__(
n_estimators=n_estimators,
objective=objective,
metric=metric,
learning_rate=learning_rate,
init=init,
base_models=base_models,
verbose=verbose,
random_state=random_state,
max_depth=max_depth,
)

def partial_fit(
Expand Down
2 changes: 2 additions & 0 deletions legateboost/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
from .tree import Tree
from .linear import Linear
from .base_model import BaseModel
43 changes: 43 additions & 0 deletions legateboost/models/base_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from abc import ABC, abstractmethod

import numpy as np

import cunumeric as cn

from ..utils import PickleCunumericMixin


class BaseModel(PickleCunumericMixin, ABC):
def set_random_state(self, random_state: np.random.RandomState) -> "BaseModel":
self.random_state = random_state
return self

@abstractmethod
def fit(
self,
X: cn.ndarray,
g: cn.ndarray,
h: cn.ndarray,
) -> "BaseModel":
pass

@abstractmethod
def update(
self,
X: cn.ndarray,
g: cn.ndarray,
h: cn.ndarray,
) -> "BaseModel":
pass

@abstractmethod
def predict(self, X: cn.ndarray) -> cn.ndarray:
pass

@abstractmethod
def __str__(self) -> str:
pass

@abstractmethod
def __eq__(self, other: object) -> bool:
pass
44 changes: 44 additions & 0 deletions legateboost/models/linear.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
import cunumeric as cn

from .base_model import BaseModel


class Linear(BaseModel):
def fit(
self,
X: cn.ndarray,
g: cn.ndarray,
h: cn.ndarray,
) -> "Linear":

num_outputs = g.shape[1]
self.bias = -g.sum(axis=0) / h.sum(axis=0)
g = g + self.bias[cn.newaxis, :] * h
self.betas = cn.zeros((X.shape[1], num_outputs))
for k in range(num_outputs):
W = cn.sqrt(h[:, k])
Xw = X * W[:, cn.newaxis]
yw = W * (-g[:, k] / h[:, k])
self.betas[:, k] = cn.linalg.lstsq(Xw, yw)[0]
return self

def clear(self) -> None:
self.bias.fill(0)
self.betas.fill(0)

def update(
self,
X: cn.ndarray,
g: cn.ndarray,
h: cn.ndarray,
) -> "Linear":
return self.fit(X, g, h)

def predict(self, X: cn.ndarray) -> cn.ndarray:
return self.bias + X.dot(self.betas)

def __str__(self) -> str:
return "Bias: " + str(self.bias) + "\nCoefficients: " + str(self.betas) + "\n"

def __eq__(self, other: object) -> bool:
return (other.betas == self.betas).all()
Loading

0 comments on commit f8f9248

Please sign in to comment.