Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Accuracy tuning #189

Merged
merged 24 commits into from
Oct 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
4daae02
Add `TunablePrecision` object and example on how to use it
stijnh Mar 8, 2023
ee93e2a
Add initial support for `AccuracyObserver`
stijnh Mar 15, 2023
2b99fc7
Extend documentation in `accuracy.py`
stijnh Mar 29, 2023
777a2ff
Use `bfloat16` from `bfloat16` package instead of tensorflow
stijnh Apr 5, 2023
95c2d99
Make `SequentialRunner` always compute metrics for every configuration
stijnh Apr 19, 2023
16d1764
Fix test for `process_metrics` to allow overwriting existing results
stijnh Apr 19, 2023
74f87e0
Re-add `Observer.register_configuration` that was lost due in merge w…
stijnh Apr 19, 2023
ee351be
Support scalars in `TunablePrecision`
stijnh May 9, 2023
0789e01
Rename `flag_gpu_args` to `flat_gpu_args` in `core.py`
stijnh May 9, 2023
00d6522
Add support for observers in C backend
stijnh Jun 13, 2023
24e14ed
Allow `run_kernel` to deal with `Tunable` arguments
stijnh Jun 27, 2023
cd996b9
Support more names for floating-point types in TunablePrecision
stijnh Jul 3, 2023
c798e0f
Support several well-known error metrics in ErrorObserver
stijnh Jul 3, 2023
4bffd6e
Fix division by zero in ErrorObserver when ground-truth contains zeros
stijnh Jul 4, 2023
d67eefd
Add RMSRE error metric
stijnh Jul 4, 2023
8412f78
Fix crash when object is `ErrorConfig`
stijnh Aug 8, 2023
6bb6156
Add support for custom tolerances for relative error metrics
stijnh Oct 2, 2023
5070c97
Rename `AccuracyObserver` to `OutputObserver` and `ErrorObserver` to …
stijnh Oct 2, 2023
ff10346
Make `error_metric_from_name` more resilient in handling user provide…
stijnh Oct 2, 2023
afc8c7f
Move `OutputObserver` from `accuracy` to `observers`
stijnh Oct 2, 2023
754fc45
Rename method of `OutputObserver` from `process_kernel_output` to `pr…
stijnh Oct 2, 2023
91ed892
Add tests for `kernel_tuner.accuracy`
stijnh Oct 2, 2023
24346cd
Update accuracy example to use currect API
stijnh Oct 2, 2023
647c04b
Fix failing test for `error_metric_from_name`
stijnh Oct 2, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions examples/cuda/accuracy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/usr/bin/env python

import numpy
from pprint import pprint
from kernel_tuner import tune_kernel
from kernel_tuner.accuracy import TunablePrecision, AccuracyObserver


def tune():
kernel_string = """
#include <cuda_fp16.h>
using half = __half;

template <typename T>
__global__ void vector_add(int n, const T* left, const T* right, T* output) {
int i = blockDim.x * blockIdx.x + threadIdx.x;

if (i < n) {
output[i] = left[i] + right[i];
}
}
"""

size = 100000000

n = numpy.int32(size)
a = numpy.random.randn(size).astype(numpy.float64)
b = numpy.random.randn(size).astype(numpy.float64)
c = numpy.zeros_like(b)

args = [
n,
TunablePrecision("float_type", a),
TunablePrecision("float_type", b),
TunablePrecision("float_type", c),
]

answer = [None, None, None, a + b]

tune_params = dict()
tune_params["block_size_x"] = [32, 64, 128, 256, 512, 1024]
tune_params["float_type"] = ["float", "double", "half"]

observers = [
AccuracyObserver("RMSE", "error_rmse"),
AccuracyObserver("MRE", "error_relative"),
]

results, env = tune_kernel(
"vector_add<float_type>",
kernel_string,
size,
args,
tune_params,
answer=answer,
observers=observers,
lang="CUDA",
)

pprint(results)


if __name__ == "__main__":
tune()
308 changes: 308 additions & 0 deletions kernel_tuner/accuracy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,308 @@
from collections import UserDict
from typing import Dict
import numpy as np
import logging
import re

from kernel_tuner.observers import OutputObserver


class Tunable(UserDict):
def __init__(self, param_key: str, arrays: Dict):
"""The ``Tunable`` object can be used as an input argument when tuning
kernels. It is a container that holds several arrays internally and
selects one array during benchmarking based on the value of a tunable parameter.

Example
-------
Consider this example::

arg = Tunable("matrix_layout", dict("c"=matrix, "f"=matrix.transpose()))

In this example, we create a Tunable object that selects either matrix
or matrix.transpose() for benchmarking, depending on the value of the
tunable parameter "matrix_layout". The first argument is the name of the tunable
paramater. The second argument is a dictionary that maps the tunable parameter
values "c" and "f" to the arrays ``matrix`` and ``matrix.transpose()``, respectively.
During benchmarking, the Tunable object selects the array passed to the kernel based
on the value of "matrix_layout".

:param param_key: : The tunable parameter used to select the array for benchmarking.
:param arrays: A dictionary that maps the value of that tunable parameter to options.
"""
if isinstance(arrays, (tuple, list)):
arrays = dict(enumerate(arrays))

super().__init__(arrays)
self.param_key = param_key

def select_for_configuration(self, params):
if callable(self.param_key):
option = self.param_key(params)
elif self.param_key in params:
option = params[self.param_key]
else:
option = eval(self.param_key, params, params)

if option not in self.data:
list = ", ".join(map(str, self.data.keys()))
raise KeyError(
f"'{option}' is not a valid parameter value, should be one of: {list}"
)

return self.data[option]

def __call__(self, params):
return self.select_for_configuration(params)


def _find_bfloat16_if_available():
# Try to get bfloat16 if available.
try:
from bfloat16 import bfloat16
return bfloat16
except ImportError:
pass

try:
from tensorflow import bfloat16
return bfloat16.as_numpy_dtype
except ImportError:
pass

logging.warning(
"could not find `bfloat16` data type for numpy, "
+ "please install either the package `bfloat16` or `tensorflow`"
)

return None


def _to_float_dtype(x: str) -> np.dtype:
"""Convert a string to a numpy data type (``dtype``). This function recognizes
common names (such as ``f16`` or ``kfloat``), and uses ``np.dtype(x)`` as a
fallback.
"""
if isinstance(x, str):
x = x.lower()

if x in ("bfloat16", "bf16", "kbfloat16", "__nv_bfloat16"):
result = _find_bfloat16_if_available()
if result is not None:
return result

if x in ("half", "f16", "float16", "__half", "khalf", 16):
return np.half
if x in ("float", "single", "f32", "float32", "kfloat", 32):
return np.float32
if x in ("double", "f64", "float64", "kdouble", 64):
return np.float64

return np.dtype(x)


class TunablePrecision(Tunable):
def __init__(
self, param_key: str, array: np.ndarray, dtypes: Dict[str, np.dtype] = None
):
"""The ``Tunable`` object can be used as an input argument when tuning
kernels. It is a container that internally holds several arrays
containing the same data, but stored in using different levels of
precision. During benchamrking, one array is selected based on the value
of the tunable parameter called ``param_key``.

Example
-------
Consider this example::

arg = TunablePrecision("matrix_type", matrix)

This creates a ``TunablePrecision`` argument that selects the required
floating-point precision for ``matrix`` based on the tunable parameter
``"matrix_type"``.

:param param_key: The tunable parameter used to select the level of precision.
:param array: The input array. It will automatically be converted to
all data types given by ``dtypes``.
:param dtypes: Dictionary that maps names to numpy data types. The default
types are ``double``, ``float``, and ``half``.
"""
# If no dtypes are given, generate a default list
if not dtypes:
dtypes = dict(half=np.half, float=np.single, double=np.double)

bfloat16 = _find_bfloat16_if_available()
if bfloat16 is not None:
dtypes["bfloat16"] = bfloat16


# If dtype is a list, convert it to a dictionary
if isinstance(dtypes, (list, tuple)):
dtypes = dict((name, _to_float_dtype(name)) for name in dtypes)

arrays = dict()
for precision, dtype in dtypes.items():
# We convert the array into a `np.ndarray` by using `np.array`.
# However, if the value is a numpy scalar, then we do not want to
# convert it into an array but instead keep the original value
if not isinstance(array, np.generic):
array = np.array(array)

arrays[precision] = array.astype(dtype)

super().__init__(param_key, arrays)


def error_metric_from_name(user_key, EPS=1e-8):
"""Find the error metric function for the given name.

Returns an function that takes two parameters (the ground-truth and the
estimated values) as numpy arrays and returns the error between the two
according to the given error metric.

Valid values for the ``key`` are:

* MSE (mean square error)
* RSME (Root mean square error)
* NRMSE (normalized root mean square error)
* RMSRE (root mean square relative error)
* RMSLE (root mean square log error)
* MAE (mean absolute error)
* MRE (mean relative error)
* MALE (mean absolute log error)
* max (maximum absolute error)
* max rel (maximum relative error)

The value of `EPS` is used for relative errors to prevent division by zero.
``
"""

# Prepocess the provided name:
# - convert to lowercase
# - remove the word "error"
# - remove underscores and dashes
# - strip whitespaces
# - replace common abreviations
key = user_key.lower()
key = re.sub(r"\berror\b", " ", key)
key = re.sub(r"[\s_-]+", " ", key)
key = key.strip()

replacements = {
"average": "mean",
"avg": "mean",
"square": "squared",
"sq": "squared",
"max": "maximum",
"rel": "relative",
"abs": "absolute",
"log": "logarithmic",
}

for pattern, replacement in replacements.items():
key = re.sub(rf"\b{pattern}\b", replacement, key)

# Select the right metric
if key in ("mse", "mean squared"):

def metric(a, b):
return np.average(np.square(a - b))

elif key in ("rmse", "root mean squared"):

def metric(a, b):
return np.sqrt(np.average(np.square(a - b)))

elif key in ("nrmse", "normalized root mean squared"):

def metric(a, b):
return np.sqrt(np.average(np.square(a - b)) / np.average(np.square(a)))

elif key in ("mae", "absolute", "mean absolute"):

def metric(a, b):
return np.average(np.abs(a - b))

elif key in ("mre", "relative", "mean relative"):

def metric(a, b):
return np.average(np.abs(a - b) / np.maximum(np.abs(a), EPS))

elif key in ("rmsre", "root mean squared relative"):

def metric(a, b):
return np.sqrt(np.average(np.square(a - b) / np.square(np.maximum(a, EPS))))

elif key in ("male", "mean absolute logarithmic"):

def metric(a, b):
return np.average(np.abs(np.log10(a + EPS) - np.log10(b + EPS)))

elif key in ("rmsle", "root mean squared logarithmic"):

def metric(a, b):
return np.sqrt(np.average(np.square(np.log10(a + EPS) - np.log10(b + EPS))))

elif key in ("maximum absolute", "maximum"):

def metric(a, b):
return np.amax(np.abs(a - b))

elif key in ("maximum relative",):

def metric(a, b):
return np.amax(np.abs(a - b) / np.maximum(np.abs(a), EPS))

else:
raise ValueError(f"invalid error metric provided: {user_key}")

# cast both arguments to f64 before passing them to the metric
return lambda a, b: metric(
a.astype(np.float64, copy=False), b.astype(np.float64, copy=False)
)


class AccuracyObserver(OutputObserver):
"""``AccuracyObserver`` measures the error on the output produced by a kernel
by comparing the output against a reference output.

By default, it uses the root mean-squared error (RMSE) and uses the
metric name ``"error"``.
"""

def __init__(self, metric=None, key="error", *, atol=1e-8):
"""Create a new ``AccuracyObserver``.

:param metric: The error metric. This should be a string that is
accepted by ``error_metric_from_name`` such as ``"absolute error"``
or ``"relative error"``. Alternatively, it can be a
function that accepts two numpy arrays as arguments
(the reference output and the kernel output)
:param key: The name of this metric in the results.
:param atol: The tolerance used in relative metrics to prevent
division by zero. It is ignored by absolute error metrics.
"""

# Default metric is RMSE
if not metric:
metric = "rmse"

# If it is a string, convert it to a function
if isinstance(metric, str):
metric = error_metric_from_name(metric, atol)

self.key = key
self.metric = metric
self.result = None

def process_output(self, answers, outputs):
errors = []

for answer, output in zip(answers, outputs):
if answer is not None:
errors.append(self.metric(answer, output))

self.result = max(errors)

def get_results(self):
return dict([(self.key, self.result)])
6 changes: 4 additions & 2 deletions kernel_tuner/backends/c.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,20 +43,22 @@
class CFunctions(CompilerBackend):
"""Class that groups the code for running and compiling C functions"""

def __init__(self, iterations=7, compiler_options=None, compiler=None):
def __init__(self, iterations=7, compiler_options=None, compiler=None, observers=None):
"""instantiate CFunctions object used for interacting with C code

:param iterations: Number of iterations used while benchmarking a kernel, 7 by default.
:type iterations: int
"""
self.observers = observers or []
self.observers.append(CRuntimeObserver(self))

self.iterations = iterations
self.max_threads = 1024
self.compiler_options = compiler_options
# if no compiler is specified, use g++ by default
self.compiler = compiler or "g++"
self.lib = None
self.using_openmp = False
self.observers = [CRuntimeObserver(self)]
self.last_result = None

try:
Expand Down
Loading
Loading