Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cuDF numba cuda 12 updates #13337

Merged
Merged
Show file tree
Hide file tree
Changes from 36 commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
76109ce
move functions, use config option to enable mvc, do so before importi…
brandon-b-miller May 11, 2023
de2b678
move more of numbas setup to _numba_setup
brandon-b-miller May 11, 2023
442fefc
update comment in __init__
brandon-b-miller May 11, 2023
f5f915d
add a few docs
brandon-b-miller May 11, 2023
19dd82c
add a debug statement for now
brandon-b-miller May 11, 2023
d360008
only raise in cec mode
brandon-b-miller May 11, 2023
9c76c61
try bumping to numba 0.57
brandon-b-miller May 11, 2023
950f98f
conditionally import ptxcompiler
brandon-b-miller May 12, 2023
c8142ea
update comments a bit
brandon-b-miller May 15, 2023
8c7bae8
Apply suggestions from code review
brandon-b-miller May 15, 2023
c4edd0e
Merge branch 'cudf-numba-cuda12-updates' of github.com:brandon-b-mill…
brandon-b-miller May 15, 2023
b8d290d
_numba_setup -> _setup_numba
brandon-b-miller May 15, 2023
a50c642
address more reviews
brandon-b-miller May 15, 2023
6ba957c
Merge branch 'branch-23.06' into cudf-numba-cuda12-updates
brandon-b-miller May 16, 2023
96b6f01
use a context manager to squash occupancy warnings for numba kernels
brandon-b-miller May 16, 2023
47d8a2e
revert numba upgrade
brandon-b-miller May 17, 2023
66226c6
Merge branch 'branch-23.06' into cudf-numba-cuda12-updates
brandon-b-miller May 17, 2023
b9634f9
adjust logic, introduce runtime check in apply/groupby udfs
brandon-b-miller May 17, 2023
7a594b3
Address reviews
brandon-b-miller May 17, 2023
cf642d0
partially address reviews
brandon-b-miller May 18, 2023
e7b49e9
merge latest and resolve conflicts
brandon-b-miller May 19, 2023
cb5a756
Revert "revert numba upgrade"
brandon-b-miller May 19, 2023
dcc73e1
_setup_numba.py -> _numba.py, CUDFNumbaConfig -> _CUDFNumbaConfig
brandon-b-miller May 19, 2023
053193a
try vendoring some ptxcompiler code
brandon-b-miller May 19, 2023
c2285fa
add the comment about the MVC config option and numba.cuda imports ba…
brandon-b-miller May 19, 2023
b72eef0
fix imports
brandon-b-miller May 19, 2023
bd27a2f
switch error
brandon-b-miller May 19, 2023
8c9c070
slightly adjust logic
brandon-b-miller May 19, 2023
662b30b
add missing return
brandon-b-miller May 22, 2023
93af613
shuffle imports
brandon-b-miller May 22, 2023
2ff5c5d
delete explicit runtime check for MVC in cuda 12+ as it's needed more…
brandon-b-miller May 22, 2023
5cb0ce6
attempt a simplifying change
brandon-b-miller May 23, 2023
fc69663
update ptx/ctk version mapping table
brandon-b-miller May 23, 2023
0f1079c
merge latest and resolve conflicts
brandon-b-miller May 23, 2023
0797cde
fix local imports
brandon-b-miller May 23, 2023
e799992
remove extraneous testing code
brandon-b-miller May 23, 2023
41e92a9
Apply suggestions from code review
brandon-b-miller May 23, 2023
8839f8c
cleanup
brandon-b-miller May 23, 2023
c27a4b1
clarify cuda 12 comments
brandon-b-miller May 23, 2023
6925612
version map changes
brandon-b-miller May 23, 2023
439a667
remove function from ptxcompiler that is not used
brandon-b-miller May 23, 2023
1bfb382
address remaining reviews
brandon-b-miller May 23, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ dependencies:
- nbsphinx
- ninja
- notebook
- numba>=0.56.4,<0.57
- numba>=0.57
- numpy>=1.21,<1.24
- numpydoc
- nvcc_linux-64=11.8
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ requirements:
- typing_extensions
- pandas >=1.3,<1.6.0dev0
- cupy >=12.0.0
- numba >=0.56.4,<0.57
- numba >=0.57
- numpy >=1.21,<1.24 # Temporarily upper bound numpy to avoid overflow deprecations
- {{ pin_compatible('pyarrow', max_pin='x.x.x') }}
- libcudf {{ version }}
Expand Down
2 changes: 1 addition & 1 deletion dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ dependencies:
packages:
- cachetools
- cuda-python>=11.7.1,<12.0
- &numba numba>=0.56.4,<0.57
- &numba numba>=0.57
brandon-b-miller marked this conversation as resolved.
Show resolved Hide resolved
- nvtx>=0.2.1
- packaging
- rmm==23.6.*
Expand Down
28 changes: 5 additions & 23 deletions python/cudf/cudf/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
# Copyright (c) 2018-2023, NVIDIA CORPORATION.

# this must be called before numba.cuda is imported, because
brandon-b-miller marked this conversation as resolved.
Show resolved Hide resolved
# it sets the numba config variable responsible for enabling
# MVC. Setting it after importing cuda has no effect.
brandon-b-miller marked this conversation as resolved.
Show resolved Hide resolved
from cudf.utils._numba import _setup_numba
from cudf.utils.gpu_utils import validate_setup

_setup_numba()
validate_setup()

import cupy
Expand Down Expand Up @@ -83,32 +88,9 @@
from cudf.utils.dtypes import _NA_REP
from cudf.utils.utils import clear_cache, set_allocator

try:
from cubinlinker.patch import patch_numba_linker_if_needed
except ImportError:
pass
else:
# Patch Numba to support CUDA enhanced compatibility.
# cuDF requires a stronger set of conditions than what is
# checked by patch_numba_linker_if_needed due to the PTX
# files needed for JIT Groupby Apply and string UDFs
from cudf.core.udf.utils import _PTX_FILE, _setup_numba_linker

_setup_numba_linker(_PTX_FILE)

del patch_numba_linker_if_needed

cuda.set_memory_manager(RMMNumbaManager)
cupy.cuda.set_allocator(rmm_cupy_allocator)

try:
# Numba 0.54: Disable low occupancy warnings
numba_config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
except AttributeError:
# Numba < 0.54: No occupancy warnings
pass
del numba_config


rmm.register_reinitialize_hook(clear_cache)

Expand Down
4 changes: 3 additions & 1 deletion python/cudf/cudf/core/indexed_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
_return_arr_from_dtype,
)
from cudf.utils import docutils
from cudf.utils._numba import _CUDFNumbaConfig
from cudf.utils.utils import _cudf_nvtx_annotate

doc_reset_index_template = """
Expand Down Expand Up @@ -2193,7 +2194,8 @@ def _apply(self, func, kernel_getter, *args, **kwargs):
input_args = _get_input_args_from_frame(self)
launch_args = output_args + input_args + list(args)
try:
kernel.forall(len(self))(*launch_args)
with _CUDFNumbaConfig():
kernel.forall(len(self))(*launch_args)
except Exception as e:
raise RuntimeError("UDF kernel execution failed.") from e

Expand Down
4 changes: 3 additions & 1 deletion python/cudf/cudf/core/udf/groupby_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
_supported_cols_from_frame,
_supported_dtypes_from_frame,
)
from cudf.utils._numba import _CUDFNumbaConfig
from cudf.utils.utils import _cudf_nvtx_annotate


Expand Down Expand Up @@ -196,7 +197,8 @@ def jit_groupby_apply(offsets, grouped_values, function, *args):
)

# Launch kernel
specialized[ngroups, tpb](*launch_args)
with _CUDFNumbaConfig():
specialized[ngroups, tpb](*launch_args)

return output

Expand Down
148 changes: 3 additions & 145 deletions python/cudf/cudf/core/udf/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# Copyright (c) 2020-2023, NVIDIA CORPORATION.

import glob
import os
from typing import Any, Callable, Dict

Expand All @@ -13,7 +12,6 @@
from numba.core.datamodel import default_manager, models
from numba.core.errors import TypingError
from numba.core.extending import register_model
from numba.cuda.cudadrv.driver import Linker
from numba.np import numpy_support
from numba.types import CPointer, Poison, Record, Tuple, boolean, int64, void

Expand All @@ -33,6 +31,7 @@
udf_string,
)
from cudf.utils import cudautils
from cudf.utils._numba import _get_ptx_file
from cudf.utils.dtypes import (
BOOL_TYPES,
DATETIME_TYPES,
Expand Down Expand Up @@ -63,58 +62,6 @@
precompiled: cachetools.LRUCache = cachetools.LRUCache(maxsize=32)
launch_arg_getters: Dict[Any, Any] = {}


def _get_best_ptx_file(archs, max_compute_capability):
"""
Determine of the available PTX files which one is
the most recent up to and including the device cc
"""
filtered_archs = [x for x in archs if x[0] <= max_compute_capability]
if filtered_archs:
return max(filtered_archs, key=lambda y: y[0])
else:
return None


def _get_ptx_file(path, prefix):
if "RAPIDS_NO_INITIALIZE" in os.environ:
# cc=60 ptx is always built
cc = int(os.environ.get("STRINGS_UDF_CC", "60"))
else:
dev = cuda.get_current_device()

# Load the highest compute capability file available that is less than
# the current device's.
cc = int("".join(str(x) for x in dev.compute_capability))
files = glob.glob(os.path.join(path, f"{prefix}*.ptx"))
if len(files) == 0:
raise RuntimeError(f"Missing PTX files for cc={cc}")
regular_sms = []

for f in files:
file_name = os.path.basename(f)
sm_number = file_name.rstrip(".ptx").lstrip(prefix)
if sm_number.endswith("a"):
processed_sm_number = int(sm_number.rstrip("a"))
if processed_sm_number == cc:
return f
else:
regular_sms.append((int(sm_number), f))

regular_result = None

if regular_sms:
regular_result = _get_best_ptx_file(regular_sms, cc)

if regular_result is None:
raise RuntimeError(
"This cuDF installation is missing the necessary PTX "
f"files that are <={cc}."
)
else:
return regular_result[1]


_PTX_FILE = _get_ptx_file(os.path.dirname(__file__), "shim_")


Expand Down Expand Up @@ -306,6 +253,8 @@ def _compile_or_get(
we then obtain the return type from that separate compilation and
use it to allocate an output column of the right dtype.
"""
# runtime check for CEC mode which is disabled for CUDA 12 for now
brandon-b-miller marked this conversation as resolved.
Show resolved Hide resolved
brandon-b-miller marked this conversation as resolved.
Show resolved Hide resolved

if not all(is_scalar(arg) for arg in args):
raise TypeError("only scalar valued args are supported by apply")

Expand Down Expand Up @@ -392,97 +341,6 @@ def _get_extensionty_size(ty):
return llty.get_abi_size(target_data)


def _get_cuda_version_from_ptx_file(path):
"""
https://docs.nvidia.com/cuda/parallel-thread-execution/
Each PTX module must begin with a .version
directive specifying the PTX language version

example header:
//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-31057947
// Cuda compilation tools, release 11.6, V11.6.124
// Based on NVVM 7.0.1
//

.version 7.6
.target sm_52
.address_size 64

"""
with open(path) as ptx_file:
for line in ptx_file:
if line.startswith(".version"):
ver_line = line
break
else:
raise ValueError("Could not read CUDA version from ptx file.")
version = ver_line.strip("\n").split(" ")[1]
# from ptx_docs/release_notes above:
ver_map = {
"7.5": (11, 5),
"7.6": (11, 6),
"7.7": (11, 7),
"7.8": (11, 8),
"8.0": (12, 0),
}

cuda_ver = ver_map.get(version)
if cuda_ver is None:
raise ValueError(
f"Could not map PTX version {version} to a CUDA version"
)

return cuda_ver


def _setup_numba_linker(path):
from ptxcompiler.patch import NO_DRIVER, safe_get_versions

from cudf.core.udf.utils import (
_get_cuda_version_from_ptx_file,
maybe_patch_numba_linker,
)

versions = safe_get_versions()
if versions != NO_DRIVER:
driver_version, runtime_version = versions
ptx_toolkit_version = _get_cuda_version_from_ptx_file(path)
maybe_patch_numba_linker(
driver_version, runtime_version, ptx_toolkit_version
)


def maybe_patch_numba_linker(
driver_version, runtime_version, ptx_toolkit_version
):
from cubinlinker.patch import (
_numba_version_ok,
get_logger,
new_patched_linker,
)

# Numba thinks cubinlinker is only needed if the driver is older than
# the ctk, but when PTX files are present, it might also need to patch
# because those PTX files may newer than the driver as well
logger = get_logger()

if (driver_version < ptx_toolkit_version) or (
driver_version < runtime_version
):
logger.debug(
"Driver version %s.%s needs patching due to PTX files"
% driver_version
)
if _numba_version_ok:
logger.debug("Patching Numba Linker")
Linker.new = new_patched_linker
else:
logger.debug("Cannot patch Numba Linker - unsupported version")


@initfunc
def set_malloc_heap_size(size=None):
"""
Expand Down
15 changes: 10 additions & 5 deletions python/cudf/cudf/tests/test_extension_compilation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021-2022, NVIDIA CORPORATION.
# Copyright (c) 2021-2023, NVIDIA CORPORATION.
import operator

import cupy as cp
Expand All @@ -12,6 +12,7 @@
from cudf.core.udf.api import Masked
from cudf.core.udf.masked_typing import MaskedType
from cudf.testing._utils import parametrize_numeric_dtypes_pairwise
from cudf.utils._numba import _CUDFNumbaConfig

arith_ops = (
operator.add,
Expand Down Expand Up @@ -106,7 +107,8 @@ def test_kernel(x, y, err):
err[0] = 3

err = cp.asarray([0], dtype="int8")
test_kernel[1, 1](1, 2, err)
with _CUDFNumbaConfig():
test_kernel[1, 1](1, 2, err)
assert err[0] == 0


Expand Down Expand Up @@ -214,7 +216,8 @@ def test_kernel(err):
err[0] = 2

err = cp.asarray([0], dtype="int8")
test_kernel[1, 1](err)
with _CUDFNumbaConfig():
test_kernel[1, 1](err)
assert err[0] == 0


Expand Down Expand Up @@ -304,7 +307,8 @@ def test_kernel(err):
err[0] = 2

err = cp.asarray([0], dtype="int8")
test_kernel[1, 1](err)
with _CUDFNumbaConfig():
test_kernel[1, 1](err)
assert err[0] == 0


Expand All @@ -326,5 +330,6 @@ def test_kernel(err):
err[0] = 1

err = cp.asarray([0], dtype="int8")
test_kernel[1, 1](err)
with _CUDFNumbaConfig():
test_kernel[1, 1](err)
assert err[0] == 0
8 changes: 5 additions & 3 deletions python/cudf/cudf/tests/test_string_udfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
)
from cudf.core.udf.utils import _PTX_FILE, _get_extensionty_size
from cudf.testing._utils import assert_eq, sv_to_udf_str
from cudf.utils._numba import _CUDFNumbaConfig


def get_kernels(func, dtype, size):
Expand Down Expand Up @@ -85,16 +86,17 @@ def run_udf_test(data, func, dtype):
sv_kernel, udf_str_kernel = get_kernels(func, dtype, len(data))

expect = pd.Series(data).apply(func)

sv_kernel.forall(len(data))(str_views, output)
with _CUDFNumbaConfig():
sv_kernel.forall(len(data))(str_views, output)
if dtype == "str":
result = column_from_udf_string_array(output)
else:
result = output

got = cudf.Series(result, dtype=dtype)
assert_eq(expect, got, check_dtype=False)
udf_str_kernel.forall(len(data))(str_views, output)
with _CUDFNumbaConfig():
udf_str_kernel.forall(len(data))(str_views, output)
if dtype == "str":
result = column_from_udf_string_array(output)
else:
Expand Down
Loading