From 76109ce40b3694a8b61ae39fa44d9e87468abc47 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 11 May 2023 04:35:52 -0700 Subject: [PATCH 01/37] move functions, use config option to enable mvc, do so before importing driver --- python/cudf/cudf/__init__.py | 7 +- python/cudf/cudf/core/udf/utils.py | 146 +------------------------ python/cudf/cudf/utils/_numba_setup.py | 131 ++++++++++++++++++++++ 3 files changed, 136 insertions(+), 148 deletions(-) create mode 100644 python/cudf/cudf/utils/_numba_setup.py diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py index 06310e278a2..dfcd9f11236 100644 --- a/python/cudf/cudf/__init__.py +++ b/python/cudf/cudf/__init__.py @@ -5,7 +5,7 @@ validate_setup() import cupy -from numba import config as numba_config, cuda +from numba import config as numba_config import rmm from rmm.allocators.cupy import rmm_cupy_allocator @@ -92,11 +92,12 @@ # cuDF requires a stronger set of conditions than what is # checked by patch_numba_linker_if_needed due to the PTX # files needed for JIT Groupby Apply and string UDFs - from cudf.core.udf.utils import _PTX_FILE, _setup_numba_linker + from cudf.utils._numba_setup import ANY_PTX_FILE, _setup_numba_linker - _setup_numba_linker(_PTX_FILE) + _setup_numba_linker(ANY_PTX_FILE) del patch_numba_linker_if_needed +from numba import cuda cuda.set_memory_manager(RMMNumbaManager) cupy.cuda.set_allocator(rmm_cupy_allocator) diff --git a/python/cudf/cudf/core/udf/utils.py b/python/cudf/cudf/core/udf/utils.py index d890b94127f..a387880c55d 100644 --- a/python/cudf/cudf/core/udf/utils.py +++ b/python/cudf/cudf/core/udf/utils.py @@ -1,6 +1,5 @@ # Copyright (c) 2020-2023, NVIDIA CORPORATION. -import glob import os from typing import Any, Callable, Dict @@ -13,7 +12,6 @@ from numba.core.datamodel import default_manager, models from numba.core.errors import TypingError from numba.core.extending import register_model -from numba.cuda.cudadrv.driver import Linker from numba.np import numpy_support from numba.types import CPointer, Poison, Record, Tuple, boolean, int64, void @@ -33,6 +31,7 @@ udf_string, ) from cudf.utils import cudautils +from cudf.utils._numba_setup import _get_ptx_file from cudf.utils.dtypes import ( BOOL_TYPES, DATETIME_TYPES, @@ -63,58 +62,6 @@ precompiled: cachetools.LRUCache = cachetools.LRUCache(maxsize=32) launch_arg_getters: Dict[Any, Any] = {} - -def _get_best_ptx_file(archs, max_compute_capability): - """ - Determine of the available PTX files which one is - the most recent up to and including the device cc - """ - filtered_archs = [x for x in archs if x[0] <= max_compute_capability] - if filtered_archs: - return max(filtered_archs, key=lambda y: y[0]) - else: - return None - - -def _get_ptx_file(path, prefix): - if "RAPIDS_NO_INITIALIZE" in os.environ: - # cc=60 ptx is always built - cc = int(os.environ.get("STRINGS_UDF_CC", "60")) - else: - dev = cuda.get_current_device() - - # Load the highest compute capability file available that is less than - # the current device's. - cc = int("".join(str(x) for x in dev.compute_capability)) - files = glob.glob(os.path.join(path, f"{prefix}*.ptx")) - if len(files) == 0: - raise RuntimeError(f"Missing PTX files for cc={cc}") - regular_sms = [] - - for f in files: - file_name = os.path.basename(f) - sm_number = file_name.rstrip(".ptx").lstrip(prefix) - if sm_number.endswith("a"): - processed_sm_number = int(sm_number.rstrip("a")) - if processed_sm_number == cc: - return f - else: - regular_sms.append((int(sm_number), f)) - - regular_result = None - - if regular_sms: - regular_result = _get_best_ptx_file(regular_sms, cc) - - if regular_result is None: - raise RuntimeError( - "This cuDF installation is missing the necessary PTX " - f"files that are <={cc}." - ) - else: - return regular_result[1] - - _PTX_FILE = _get_ptx_file(os.path.dirname(__file__), "shim_") @@ -390,97 +337,6 @@ def _get_extensionty_size(ty): return llty.get_abi_size(target_data) -def _get_cuda_version_from_ptx_file(path): - """ - https://docs.nvidia.com/cuda/parallel-thread-execution/ - Each PTX module must begin with a .version - directive specifying the PTX language version - - example header: - // - // Generated by NVIDIA NVVM Compiler - // - // Compiler Build ID: CL-31057947 - // Cuda compilation tools, release 11.6, V11.6.124 - // Based on NVVM 7.0.1 - // - - .version 7.6 - .target sm_52 - .address_size 64 - - """ - with open(path) as ptx_file: - for line in ptx_file: - if line.startswith(".version"): - ver_line = line - break - else: - raise ValueError("Could not read CUDA version from ptx file.") - version = ver_line.strip("\n").split(" ")[1] - # from ptx_docs/release_notes above: - ver_map = { - "7.5": (11, 5), - "7.6": (11, 6), - "7.7": (11, 7), - "7.8": (11, 8), - "8.0": (12, 0), - } - - cuda_ver = ver_map.get(version) - if cuda_ver is None: - raise ValueError( - f"Could not map PTX version {version} to a CUDA version" - ) - - return cuda_ver - - -def _setup_numba_linker(path): - from ptxcompiler.patch import NO_DRIVER, safe_get_versions - - from cudf.core.udf.utils import ( - _get_cuda_version_from_ptx_file, - maybe_patch_numba_linker, - ) - - versions = safe_get_versions() - if versions != NO_DRIVER: - driver_version, runtime_version = versions - ptx_toolkit_version = _get_cuda_version_from_ptx_file(path) - maybe_patch_numba_linker( - driver_version, runtime_version, ptx_toolkit_version - ) - - -def maybe_patch_numba_linker( - driver_version, runtime_version, ptx_toolkit_version -): - from cubinlinker.patch import ( - _numba_version_ok, - get_logger, - new_patched_linker, - ) - - # Numba thinks cubinlinker is only needed if the driver is older than - # the ctk, but when PTX files are present, it might also need to patch - # because those PTX files may newer than the driver as well - logger = get_logger() - - if (driver_version < ptx_toolkit_version) or ( - driver_version < runtime_version - ): - logger.debug( - "Driver version %s.%s needs patching due to PTX files" - % driver_version - ) - if _numba_version_ok: - logger.debug("Patching Numba Linker") - Linker.new = new_patched_linker - else: - logger.debug("Cannot patch Numba Linker - unsupported version") - - @initfunc def set_malloc_heap_size(size=None): """ diff --git a/python/cudf/cudf/utils/_numba_setup.py b/python/cudf/cudf/utils/_numba_setup.py new file mode 100644 index 00000000000..4a67055d60a --- /dev/null +++ b/python/cudf/cudf/utils/_numba_setup.py @@ -0,0 +1,131 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. + +import glob +import os + +from numba import config + +ANY_PTX_FILE = os.path.dirname(__file__) + "/../core/udf/shim_60.ptx" + + +def _get_best_ptx_file(archs, max_compute_capability): + """ + Determine of the available PTX files which one is + the most recent up to and including the device cc + """ + filtered_archs = [x for x in archs if x[0] <= max_compute_capability] + if filtered_archs: + return max(filtered_archs, key=lambda y: y[0]) + else: + return None + + +def _get_ptx_file(path, prefix): + if "RAPIDS_NO_INITIALIZE" in os.environ: + # cc=60 ptx is always built + cc = int(os.environ.get("STRINGS_UDF_CC", "60")) + else: + from numba import cuda + + dev = cuda.get_current_device() + + # Load the highest compute capability file available that is less than + # the current device's. + cc = int("".join(str(x) for x in dev.compute_capability)) + files = glob.glob(os.path.join(path, f"{prefix}*.ptx")) + if len(files) == 0: + raise RuntimeError(f"Missing PTX files for cc={cc}") + regular_sms = [] + + for f in files: + file_name = os.path.basename(f) + sm_number = file_name.rstrip(".ptx").lstrip(prefix) + if sm_number.endswith("a"): + processed_sm_number = int(sm_number.rstrip("a")) + if processed_sm_number == cc: + return f + else: + regular_sms.append((int(sm_number), f)) + + regular_result = None + + if regular_sms: + regular_result = _get_best_ptx_file(regular_sms, cc) + + if regular_result is None: + raise RuntimeError( + "This cuDF installation is missing the necessary PTX " + f"files that are <={cc}." + ) + else: + return regular_result[1] + + +def _setup_numba_linker(path): + from ptxcompiler.patch import NO_DRIVER, safe_get_versions + + versions = safe_get_versions() + if versions != NO_DRIVER: + driver_version, runtime_version = versions + ptx_toolkit_version = _get_cuda_version_from_ptx_file(path) + maybe_patch_numba_linker( + driver_version, runtime_version, ptx_toolkit_version + ) + + +def maybe_patch_numba_linker( + driver_version, runtime_version, ptx_toolkit_version +): + # Numba thinks cubinlinker is only needed if the driver is older than + # the ctk, but when PTX files are present, it might also need to patch + # because those PTX files may newer than the driver as well + if (driver_version < ptx_toolkit_version) or ( + driver_version < runtime_version + ): + config.NUMBA_CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY = 1 + + +def _get_cuda_version_from_ptx_file(path): + """ + https://docs.nvidia.com/cuda/parallel-thread-execution/ + Each PTX module must begin with a .version + directive specifying the PTX language version + + example header: + // + // Generated by NVIDIA NVVM Compiler + // + // Compiler Build ID: CL-31057947 + // Cuda compilation tools, release 11.6, V11.6.124 + // Based on NVVM 7.0.1 + // + + .version 7.6 + .target sm_52 + .address_size 64 + + """ + with open(path) as ptx_file: + for line in ptx_file: + if line.startswith(".version"): + ver_line = line + break + else: + raise ValueError("Could not read CUDA version from ptx file.") + version = ver_line.strip("\n").split(" ")[1] + # from ptx_docs/release_notes above: + ver_map = { + "7.5": (11, 5), + "7.6": (11, 6), + "7.7": (11, 7), + "7.8": (11, 8), + "8.0": (12, 0), + } + + cuda_ver = ver_map.get(version) + if cuda_ver is None: + raise ValueError( + f"Could not map PTX version {version} to a CUDA version" + ) + + return cuda_ver From de2b678944294dd8040b1a352da88a1ac2f5dd3f Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 11 May 2023 04:53:14 -0700 Subject: [PATCH 02/37] move more of numbas setup to _numba_setup --- python/cudf/cudf/__init__.py | 25 ++++--------------------- python/cudf/cudf/utils/_numba_setup.py | 5 +++++ 2 files changed, 9 insertions(+), 21 deletions(-) diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py index dfcd9f11236..acc9e6e4b04 100644 --- a/python/cudf/cudf/__init__.py +++ b/python/cudf/cudf/__init__.py @@ -80,36 +80,19 @@ read_text, ) from cudf.options import describe_option, get_option, set_option +from cudf.utils._numba_setup import _setup_numba from cudf.utils.dtypes import _NA_REP from cudf.utils.utils import clear_cache, set_allocator -try: - from cubinlinker.patch import patch_numba_linker_if_needed -except ImportError: - pass -else: - # Patch Numba to support CUDA enhanced compatibility. - # cuDF requires a stronger set of conditions than what is - # checked by patch_numba_linker_if_needed due to the PTX - # files needed for JIT Groupby Apply and string UDFs - from cudf.utils._numba_setup import ANY_PTX_FILE, _setup_numba_linker +_setup_numba() - _setup_numba_linker(ANY_PTX_FILE) - - del patch_numba_linker_if_needed +# This must be imported after _setup_numba_linker is called and the numba +# config is modified otherwise the config option will have no effect from numba import cuda cuda.set_memory_manager(RMMNumbaManager) cupy.cuda.set_allocator(rmm_cupy_allocator) -try: - # Numba 0.54: Disable low occupancy warnings - numba_config.CUDA_LOW_OCCUPANCY_WARNINGS = 0 -except AttributeError: - # Numba < 0.54: No occupancy warnings - pass -del numba_config - rmm.register_reinitialize_hook(clear_cache) diff --git a/python/cudf/cudf/utils/_numba_setup.py b/python/cudf/cudf/utils/_numba_setup.py index 4a67055d60a..8abb86bdce4 100644 --- a/python/cudf/cudf/utils/_numba_setup.py +++ b/python/cudf/cudf/utils/_numba_setup.py @@ -8,6 +8,11 @@ ANY_PTX_FILE = os.path.dirname(__file__) + "/../core/udf/shim_60.ptx" +def _setup_numba(): + _setup_numba_linker(ANY_PTX_FILE) + config.CUDA_LOW_OCCUPANCY_WARNINGS = 0 + + def _get_best_ptx_file(archs, max_compute_capability): """ Determine of the available PTX files which one is From 442fefc537949f0fdf9b624e8e4ef0135f161566 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 11 May 2023 05:03:13 -0700 Subject: [PATCH 03/37] update comment in __init__ --- python/cudf/cudf/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py index acc9e6e4b04..c510ededeba 100644 --- a/python/cudf/cudf/__init__.py +++ b/python/cudf/cudf/__init__.py @@ -86,8 +86,8 @@ _setup_numba() -# This must be imported after _setup_numba_linker is called and the numba -# config is modified otherwise the config option will have no effect +# This must be imported after _setup_numba is called and the numba +# config is modified otherwise the config options will have no effect from numba import cuda cuda.set_memory_manager(RMMNumbaManager) From f5f915d86dabea2ad013c2416964ad8821a160f9 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 11 May 2023 05:22:14 -0700 Subject: [PATCH 04/37] add a few docs --- python/cudf/cudf/utils/_numba_setup.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/python/cudf/cudf/utils/_numba_setup.py b/python/cudf/cudf/utils/_numba_setup.py index 8abb86bdce4..ddcada62eee 100644 --- a/python/cudf/cudf/utils/_numba_setup.py +++ b/python/cudf/cudf/utils/_numba_setup.py @@ -9,6 +9,14 @@ def _setup_numba(): + """ + Configure numba for use with cuDF. This consists of potentially + putting numba into enhanced compatibility mode based on the user + driver and runtime versions as well as the version of the cuda + toolkit used to build the PTX files shipped with the user cuDF + package. It also sets any other config options within numba that + are desired for cuDF's operation. + """ _setup_numba_linker(ANY_PTX_FILE) config.CUDA_LOW_OCCUPANCY_WARNINGS = 0 From 19dd82c49874bcf0404eec9e92c719645a010e02 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 11 May 2023 09:30:25 -0700 Subject: [PATCH 05/37] add a debug statement for now --- python/cudf/cudf/utils/_numba_setup.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/utils/_numba_setup.py b/python/cudf/cudf/utils/_numba_setup.py index ddcada62eee..d1d2ef6fd32 100644 --- a/python/cudf/cudf/utils/_numba_setup.py +++ b/python/cudf/cudf/utils/_numba_setup.py @@ -92,10 +92,11 @@ def maybe_patch_numba_linker( # Numba thinks cubinlinker is only needed if the driver is older than # the ctk, but when PTX files are present, it might also need to patch # because those PTX files may newer than the driver as well - if (driver_version < ptx_toolkit_version) or ( - driver_version < runtime_version - ): - config.NUMBA_CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY = 1 + raise AssertionError( + f"driver_version={driver_version}," + f"runtime_version={runtime_version}," + f"ptx_toolkit_version={ptx_toolkit_version}" + ) def _get_cuda_version_from_ptx_file(path): From d360008c258007560fb5b3bcaa5b3a8bff16d8fd Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 11 May 2023 09:32:00 -0700 Subject: [PATCH 06/37] only raise in cec mode --- python/cudf/cudf/utils/_numba_setup.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/python/cudf/cudf/utils/_numba_setup.py b/python/cudf/cudf/utils/_numba_setup.py index d1d2ef6fd32..434bdd66394 100644 --- a/python/cudf/cudf/utils/_numba_setup.py +++ b/python/cudf/cudf/utils/_numba_setup.py @@ -92,11 +92,15 @@ def maybe_patch_numba_linker( # Numba thinks cubinlinker is only needed if the driver is older than # the ctk, but when PTX files are present, it might also need to patch # because those PTX files may newer than the driver as well - raise AssertionError( - f"driver_version={driver_version}," - f"runtime_version={runtime_version}," - f"ptx_toolkit_version={ptx_toolkit_version}" - ) + if (driver_version < ptx_toolkit_version) or ( + driver_version < runtime_version + ): + config.NUMBA_CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY = 1 + raise AssertionError( + f"driver_version={driver_version}," + f"runtime_version={runtime_version}," + f"ptx_toolkit_version={ptx_toolkit_version}" + ) def _get_cuda_version_from_ptx_file(path): From 9c76c6138fe098be2ee9a3b6f9583bf6769cd33b Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 11 May 2023 11:23:11 -0700 Subject: [PATCH 07/37] try bumping to numba 0.57 --- conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +- conda/recipes/cudf/meta.yaml | 4 ++-- dependencies.yaml | 2 +- python/cudf/cudf/utils/_numba_setup.py | 5 ----- python/cudf/pyproject.toml | 2 +- python/dask_cudf/pyproject.toml | 2 +- 6 files changed, 6 insertions(+), 11 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 4031f1aa1c3..b6daea7c2bc 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -48,7 +48,7 @@ dependencies: - nbsphinx - ninja - notebook -- numba>=0.56.4,<0.57 +- numba>=0.57 - numpy>=1.21,<1.24 - numpydoc - nvcc_linux-64=11.8 diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml index f8074711b88..97c438bec59 100644 --- a/conda/recipes/cudf/meta.yaml +++ b/conda/recipes/cudf/meta.yaml @@ -53,7 +53,7 @@ requirements: - cython >=0.29,<0.30 - scikit-build >=0.13.1 - setuptools - - numba >=0.56.4,<0.57 + - numba >=0.57 - dlpack >=0.5,<0.6.0a0 - pyarrow =11 - libcudf ={{ version }} @@ -65,7 +65,7 @@ requirements: - typing_extensions - pandas >=1.3,<1.6.0dev0 - cupy >=12.0.0 - - numba >=0.56.4,<0.57 + - numba >=0.57 - numpy >=1.21,<1.24 # Temporarily upper bound numpy to avoid overflow deprecations - {{ pin_compatible('pyarrow', max_pin='x.x.x') }} - libcudf {{ version }} diff --git a/dependencies.yaml b/dependencies.yaml index 70d7f8c1ec8..e3fcbe69932 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -350,7 +350,7 @@ dependencies: packages: - cachetools - cuda-python>=11.7.1,<12.0 - - &numba numba>=0.56.4,<0.57 + - &numba numba>=0.57 - nvtx>=0.2.1 - packaging - rmm==23.6.* diff --git a/python/cudf/cudf/utils/_numba_setup.py b/python/cudf/cudf/utils/_numba_setup.py index 434bdd66394..ddcada62eee 100644 --- a/python/cudf/cudf/utils/_numba_setup.py +++ b/python/cudf/cudf/utils/_numba_setup.py @@ -96,11 +96,6 @@ def maybe_patch_numba_linker( driver_version < runtime_version ): config.NUMBA_CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY = 1 - raise AssertionError( - f"driver_version={driver_version}," - f"runtime_version={runtime_version}," - f"ptx_toolkit_version={ptx_toolkit_version}" - ) def _get_cuda_version_from_ptx_file(path): diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index d13324a7404..b08dd92d52f 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -31,7 +31,7 @@ dependencies = [ "cuda-python>=11.7.1,<12.0", "cupy-cuda11x>=12.0.0", "fsspec>=0.6.0", - "numba>=0.56.4,<0.57", + "numba>=0.57", "numpy>=1.21,<1.24", "nvtx>=0.2.1", "packaging", diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml index ff2a3f2d095..42b6c26c002 100644 --- a/python/dask_cudf/pyproject.toml +++ b/python/dask_cudf/pyproject.toml @@ -40,7 +40,7 @@ dynamic = ["entry-points"] [project.optional-dependencies] test = [ "dask-cuda==23.6.*", - "numba>=0.56.4,<0.57", + "numba>=0.57", "pytest", "pytest-cov", "pytest-xdist", From 950f98f08d228f23a26d0a51709d2e316427d9c8 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Fri, 12 May 2023 09:56:53 -0700 Subject: [PATCH 08/37] conditionally import ptxcompiler --- python/cudf/cudf/utils/_numba_setup.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/python/cudf/cudf/utils/_numba_setup.py b/python/cudf/cudf/utils/_numba_setup.py index ddcada62eee..02541d8a0cc 100644 --- a/python/cudf/cudf/utils/_numba_setup.py +++ b/python/cudf/cudf/utils/_numba_setup.py @@ -75,15 +75,19 @@ def _get_ptx_file(path, prefix): def _setup_numba_linker(path): - from ptxcompiler.patch import NO_DRIVER, safe_get_versions - - versions = safe_get_versions() - if versions != NO_DRIVER: - driver_version, runtime_version = versions - ptx_toolkit_version = _get_cuda_version_from_ptx_file(path) - maybe_patch_numba_linker( - driver_version, runtime_version, ptx_toolkit_version - ) + try: + # ptxcompiler will not be present for cuda 12+ + from ptxcompiler.patch import NO_DRIVER, safe_get_versions + + versions = safe_get_versions() + if versions != NO_DRIVER: + driver_version, runtime_version = versions + ptx_toolkit_version = _get_cuda_version_from_ptx_file(path) + maybe_patch_numba_linker( + driver_version, runtime_version, ptx_toolkit_version + ) + except ImportError: + pass def maybe_patch_numba_linker( From c8142ea542e5f1036ffafef3381d9471c728229b Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 15 May 2023 06:05:17 -0700 Subject: [PATCH 09/37] update comments a bit --- python/cudf/cudf/utils/_numba_setup.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/python/cudf/cudf/utils/_numba_setup.py b/python/cudf/cudf/utils/_numba_setup.py index 02541d8a0cc..ddc6c38dc1d 100644 --- a/python/cudf/cudf/utils/_numba_setup.py +++ b/python/cudf/cudf/utils/_numba_setup.py @@ -76,16 +76,22 @@ def _get_ptx_file(path, prefix): def _setup_numba_linker(path): try: - # ptxcompiler will not be present for cuda 12+ + # By default, ptxcompiler will not be installed with CUDA 12 + # packages. This is ok, because in this situation putting + # numba in enhanced compatibility mode is not necessary. from ptxcompiler.patch import NO_DRIVER, safe_get_versions versions = safe_get_versions() if versions != NO_DRIVER: driver_version, runtime_version = versions - ptx_toolkit_version = _get_cuda_version_from_ptx_file(path) - maybe_patch_numba_linker( - driver_version, runtime_version, ptx_toolkit_version - ) + # Don't check if CEC is necessary in the possible edge + # case where a user has a CUDA 12 package and ptxcompiler + # in their environment anyways, perhaps installed separately + if driver_version < (12, 0): + ptx_toolkit_version = _get_cuda_version_from_ptx_file(path) + maybe_patch_numba_linker( + driver_version, runtime_version, ptx_toolkit_version + ) except ImportError: pass From 8c7bae8861a101334cd3733a90e6594eb1fc2533 Mon Sep 17 00:00:00 2001 From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com> Date: Mon, 15 May 2023 09:18:10 -0500 Subject: [PATCH 10/37] Apply suggestions from code review Co-authored-by: Bradley Dice --- python/cudf/cudf/utils/_numba_setup.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/python/cudf/cudf/utils/_numba_setup.py b/python/cudf/cudf/utils/_numba_setup.py index 02541d8a0cc..109e6c6c4b9 100644 --- a/python/cudf/cudf/utils/_numba_setup.py +++ b/python/cudf/cudf/utils/_numba_setup.py @@ -12,8 +12,8 @@ def _setup_numba(): """ Configure numba for use with cuDF. This consists of potentially putting numba into enhanced compatibility mode based on the user - driver and runtime versions as well as the version of the cuda - toolkit used to build the PTX files shipped with the user cuDF + driver and runtime versions as well as the version of the CUDA + Toolkit used to build the PTX files shipped with the user cuDF package. It also sets any other config options within numba that are desired for cuDF's operation. """ @@ -24,11 +24,11 @@ def _setup_numba(): def _get_best_ptx_file(archs, max_compute_capability): """ Determine of the available PTX files which one is - the most recent up to and including the device cc + the most recent up to and including the device compute capability. """ filtered_archs = [x for x in archs if x[0] <= max_compute_capability] if filtered_archs: - return max(filtered_archs, key=lambda y: y[0]) + return max(filtered_archs, key=lambda x: x[0]) else: return None @@ -94,8 +94,8 @@ def maybe_patch_numba_linker( driver_version, runtime_version, ptx_toolkit_version ): # Numba thinks cubinlinker is only needed if the driver is older than - # the ctk, but when PTX files are present, it might also need to patch - # because those PTX files may newer than the driver as well + # the CUDA runtime, but when PTX files are present, it might also need to patch + # because those PTX files may be compiled by a CUDA version that is newer than the driver as well if (driver_version < ptx_toolkit_version) or ( driver_version < runtime_version ): From b8d290d34793681eef4c23a6b42f7d5215e70164 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 15 May 2023 07:20:54 -0700 Subject: [PATCH 11/37] _numba_setup -> _setup_numba --- python/cudf/cudf/__init__.py | 2 +- python/cudf/cudf/utils/{_numba_setup.py => _setup_numba.py} | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) rename python/cudf/cudf/utils/{_numba_setup.py => _setup_numba.py} (97%) diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py index c510ededeba..b4fcc4a63e0 100644 --- a/python/cudf/cudf/__init__.py +++ b/python/cudf/cudf/__init__.py @@ -80,7 +80,7 @@ read_text, ) from cudf.options import describe_option, get_option, set_option -from cudf.utils._numba_setup import _setup_numba +from cudf.utils._setup_numba import _setup_numba from cudf.utils.dtypes import _NA_REP from cudf.utils.utils import clear_cache, set_allocator diff --git a/python/cudf/cudf/utils/_numba_setup.py b/python/cudf/cudf/utils/_setup_numba.py similarity index 97% rename from python/cudf/cudf/utils/_numba_setup.py rename to python/cudf/cudf/utils/_setup_numba.py index dca58fed77b..5db9af110e2 100644 --- a/python/cudf/cudf/utils/_numba_setup.py +++ b/python/cudf/cudf/utils/_setup_numba.py @@ -100,8 +100,9 @@ def maybe_patch_numba_linker( driver_version, runtime_version, ptx_toolkit_version ): # Numba thinks cubinlinker is only needed if the driver is older than - # the CUDA runtime, but when PTX files are present, it might also need to patch - # because those PTX files may be compiled by a CUDA version that is newer than the driver as well + # the CUDA runtime, but when PTX files are present, it might also need + # to patch because those PTX files may be compiled by a CUDA version + # that is newer than the driver as well if (driver_version < ptx_toolkit_version) or ( driver_version < runtime_version ): From a50c642025af900762bd403be68be8fbe32dc611 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 15 May 2023 07:49:36 -0700 Subject: [PATCH 12/37] address more reviews --- python/cudf/cudf/utils/_setup_numba.py | 32 ++++++++++++++------------ 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/python/cudf/cudf/utils/_setup_numba.py b/python/cudf/cudf/utils/_setup_numba.py index 5db9af110e2..762dee18dc1 100644 --- a/python/cudf/cudf/utils/_setup_numba.py +++ b/python/cudf/cudf/utils/_setup_numba.py @@ -5,7 +5,7 @@ from numba import config -ANY_PTX_FILE = os.path.dirname(__file__) + "/../core/udf/shim_60.ptx" +CC_60_PTX_FILE = os.path.dirname(__file__) + "/../core/udf/shim_60.ptx" def _setup_numba(): @@ -17,7 +17,10 @@ def _setup_numba(): package. It also sets any other config options within numba that are desired for cuDF's operation. """ - _setup_numba_linker(ANY_PTX_FILE) + _setup_numba_linker(CC_60_PTX_FILE) + + # disable low occupancy warnings for internal usages of numba, + # such as in our iloc implementation config.CUDA_LOW_OCCUPANCY_WARNINGS = 0 @@ -80,20 +83,19 @@ def _setup_numba_linker(path): # packages. This is ok, because in this situation putting # numba in enhanced compatibility mode is not necessary. from ptxcompiler.patch import NO_DRIVER, safe_get_versions - - versions = safe_get_versions() - if versions != NO_DRIVER: - driver_version, runtime_version = versions - # Don't check if CEC is necessary in the possible edge - # case where a user has a CUDA 12 package and ptxcompiler - # in their environment anyways, perhaps installed separately - if driver_version < (12, 0): - ptx_toolkit_version = _get_cuda_version_from_ptx_file(path) - maybe_patch_numba_linker( - driver_version, runtime_version, ptx_toolkit_version - ) except ImportError: - pass + return + versions = safe_get_versions() + if versions != NO_DRIVER: + driver_version, runtime_version = versions + # Don't check if CEC is necessary in the possible edge + # case where a user has a CUDA 12 package and ptxcompiler + # in their environment anyways, perhaps installed separately + if driver_version < (12, 0): + ptx_toolkit_version = _get_cuda_version_from_ptx_file(path) + maybe_patch_numba_linker( + driver_version, runtime_version, ptx_toolkit_version + ) def maybe_patch_numba_linker( From 96b6f01fda82f3dcc074959fc51975d0cdc3c881 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 16 May 2023 09:15:14 -0700 Subject: [PATCH 13/37] use a context manager to squash occupancy warnings for numba kernels --- python/cudf/cudf/core/indexed_frame.py | 4 +- python/cudf/cudf/core/udf/groupby_utils.py | 4 +- python/cudf/cudf/core/udf/utils.py | 2 +- python/cudf/cudf/tests/test_dataframe_copy.py | 5 ++- .../cudf/tests/test_extension_compilation.py | 15 ++++--- python/cudf/cudf/tests/test_string_udfs.py | 8 ++-- python/cudf/cudf/utils/_setup_numba.py | 13 ++++-- python/cudf/cudf/utils/applyutils.py | 10 +++-- python/cudf/cudf/utils/cudautils.py | 42 ++++++++++--------- python/cudf/cudf/utils/queryutils.py | 4 +- 10 files changed, 67 insertions(+), 40 deletions(-) diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 7141958f62d..c6b2c10b9da 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -68,6 +68,7 @@ _return_arr_from_dtype, ) from cudf.utils import docutils +from cudf.utils._setup_numba import CUDFNumbaConfig from cudf.utils.utils import _cudf_nvtx_annotate doc_reset_index_template = """ @@ -2193,7 +2194,8 @@ def _apply(self, func, kernel_getter, *args, **kwargs): input_args = _get_input_args_from_frame(self) launch_args = output_args + input_args + list(args) try: - kernel.forall(len(self))(*launch_args) + with CUDFNumbaConfig(): + kernel.forall(len(self))(*launch_args) except Exception as e: raise RuntimeError("UDF kernel execution failed.") from e diff --git a/python/cudf/cudf/core/udf/groupby_utils.py b/python/cudf/cudf/core/udf/groupby_utils.py index ae09dd1d704..30f27f1b654 100644 --- a/python/cudf/cudf/core/udf/groupby_utils.py +++ b/python/cudf/cudf/core/udf/groupby_utils.py @@ -27,6 +27,7 @@ _supported_dtypes_from_frame, precompiled, ) +from cudf.utils._setup_numba import CUDFNumbaConfig from cudf.utils.utils import _cudf_nvtx_annotate @@ -198,6 +199,7 @@ def jit_groupby_apply(offsets, grouped_values, function, *args): ) # Launch kernel - specialized[ngroups, tpb](*launch_args) + with CUDFNumbaConfig(): + specialized[ngroups, tpb](*launch_args) return output diff --git a/python/cudf/cudf/core/udf/utils.py b/python/cudf/cudf/core/udf/utils.py index a387880c55d..075d9677209 100644 --- a/python/cudf/cudf/core/udf/utils.py +++ b/python/cudf/cudf/core/udf/utils.py @@ -31,7 +31,7 @@ udf_string, ) from cudf.utils import cudautils -from cudf.utils._numba_setup import _get_ptx_file +from cudf.utils._setup_numba import _get_ptx_file from cudf.utils.dtypes import ( BOOL_TYPES, DATETIME_TYPES, diff --git a/python/cudf/cudf/tests/test_dataframe_copy.py b/python/cudf/cudf/tests/test_dataframe_copy.py index 85e994bd733..3f75a50fb40 100644 --- a/python/cudf/cudf/tests/test_dataframe_copy.py +++ b/python/cudf/cudf/tests/test_dataframe_copy.py @@ -8,6 +8,7 @@ from cudf.core.dataframe import DataFrame from cudf.testing._utils import ALL_TYPES, assert_eq +from cudf.utils._setup_numba import CUDFNumbaConfig """ DataFrame copy expectations @@ -159,8 +160,8 @@ def test_kernel_deep_copy(): gdf = DataFrame.from_pandas(pdf) cdf = gdf.copy(deep=True) sr = gdf["b"] - - add_one[1, len(sr)](sr._column.data_array_view(mode="write")) + with CUDFNumbaConfig(): + add_one[1, len(sr)](sr._column.data_array_view(mode="write")) assert not gdf.to_string().split() == cdf.to_string().split() diff --git a/python/cudf/cudf/tests/test_extension_compilation.py b/python/cudf/cudf/tests/test_extension_compilation.py index f1ed17c5df5..525204bf002 100644 --- a/python/cudf/cudf/tests/test_extension_compilation.py +++ b/python/cudf/cudf/tests/test_extension_compilation.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2022, NVIDIA CORPORATION. +# Copyright (c) 2021-2023, NVIDIA CORPORATION. import operator import cupy as cp @@ -12,6 +12,7 @@ from cudf.core.udf.api import Masked from cudf.core.udf.masked_typing import MaskedType from cudf.testing._utils import parametrize_numeric_dtypes_pairwise +from cudf.utils._setup_numba import CUDFNumbaConfig arith_ops = ( operator.add, @@ -106,7 +107,8 @@ def test_kernel(x, y, err): err[0] = 3 err = cp.asarray([0], dtype="int8") - test_kernel[1, 1](1, 2, err) + with CUDFNumbaConfig(): + test_kernel[1, 1](1, 2, err) assert err[0] == 0 @@ -214,7 +216,8 @@ def test_kernel(err): err[0] = 2 err = cp.asarray([0], dtype="int8") - test_kernel[1, 1](err) + with CUDFNumbaConfig(): + test_kernel[1, 1](err) assert err[0] == 0 @@ -304,7 +307,8 @@ def test_kernel(err): err[0] = 2 err = cp.asarray([0], dtype="int8") - test_kernel[1, 1](err) + with CUDFNumbaConfig(): + test_kernel[1, 1](err) assert err[0] == 0 @@ -326,5 +330,6 @@ def test_kernel(err): err[0] = 1 err = cp.asarray([0], dtype="int8") - test_kernel[1, 1](err) + with CUDFNumbaConfig(): + test_kernel[1, 1](err) assert err[0] == 0 diff --git a/python/cudf/cudf/tests/test_string_udfs.py b/python/cudf/cudf/tests/test_string_udfs.py index 049dfdc8e30..62bffad0a01 100644 --- a/python/cudf/cudf/tests/test_string_udfs.py +++ b/python/cudf/cudf/tests/test_string_udfs.py @@ -22,6 +22,7 @@ ) from cudf.core.udf.utils import _PTX_FILE, _get_extensionty_size from cudf.testing._utils import assert_eq, sv_to_udf_str +from cudf.utils._setup_numba import CUDFNumbaConfig def get_kernels(func, dtype, size): @@ -85,8 +86,8 @@ def run_udf_test(data, func, dtype): sv_kernel, udf_str_kernel = get_kernels(func, dtype, len(data)) expect = pd.Series(data).apply(func) - - sv_kernel.forall(len(data))(str_views, output) + with CUDFNumbaConfig(): + sv_kernel.forall(len(data))(str_views, output) if dtype == "str": result = column_from_udf_string_array(output) else: @@ -94,7 +95,8 @@ def run_udf_test(data, func, dtype): got = cudf.Series(result, dtype=dtype) assert_eq(expect, got, check_dtype=False) - udf_str_kernel.forall(len(data))(str_views, output) + with CUDFNumbaConfig(): + udf_str_kernel.forall(len(data))(str_views, output) if dtype == "str": result = column_from_udf_string_array(output) else: diff --git a/python/cudf/cudf/utils/_setup_numba.py b/python/cudf/cudf/utils/_setup_numba.py index 762dee18dc1..287f5958fb6 100644 --- a/python/cudf/cudf/utils/_setup_numba.py +++ b/python/cudf/cudf/utils/_setup_numba.py @@ -19,10 +19,6 @@ def _setup_numba(): """ _setup_numba_linker(CC_60_PTX_FILE) - # disable low occupancy warnings for internal usages of numba, - # such as in our iloc implementation - config.CUDA_LOW_OCCUPANCY_WARNINGS = 0 - def _get_best_ptx_file(archs, max_compute_capability): """ @@ -155,3 +151,12 @@ def _get_cuda_version_from_ptx_file(path): ) return cuda_ver + + +class CUDFNumbaConfig: + def __enter__(self): + self.enter_val = config.CUDA_LOW_OCCUPANCY_WARNINGS + config.CUDA_LOW_OCCUPANCY_WARNINGS = 0 + + def __exit__(self, exc_type, exc_value, traceback): + config.CUDA_LOW_OCCUPANCY_WARNINGS = self.enter_val diff --git a/python/cudf/cudf/utils/applyutils.py b/python/cudf/cudf/utils/applyutils.py index 933b98367b6..14709bcbbfb 100644 --- a/python/cudf/cudf/utils/applyutils.py +++ b/python/cudf/cudf/utils/applyutils.py @@ -12,6 +12,7 @@ from cudf.core.buffer import acquire_spill_lock from cudf.core.column import column from cudf.utils import utils +from cudf.utils._setup_numba import CUDFNumbaConfig from cudf.utils.docutils import docfmt_partial _doc_applyparams = """ @@ -195,7 +196,8 @@ def compile(self, func, argnames, extra_argnames): return kernel def launch_kernel(self, df, args): - self.kernel.forall(len(df))(*args) + with CUDFNumbaConfig(): + self.kernel.forall(len(df))(*args) class ApplyChunksCompiler(ApplyKernelCompilerBase): @@ -209,12 +211,14 @@ def compile(self, func, argnames, extra_argnames): def launch_kernel(self, df, args, chunks, blkct=None, tpb=None): chunks = self.normalize_chunks(len(df), chunks) if blkct is None and tpb is None: - self.kernel.forall(len(df))(len(df), chunks, *args) + with CUDFNumbaConfig(): + self.kernel.forall(len(df))(len(df), chunks, *args) else: assert tpb is not None if blkct is None: blkct = chunks.size - self.kernel[blkct, tpb](len(df), chunks, *args) + with CUDFNumbaConfig(): + self.kernel[blkct, tpb](len(df), chunks, *args) def normalize_chunks(self, size, chunks): if isinstance(chunks, int): diff --git a/python/cudf/cudf/utils/cudautils.py b/python/cudf/cudf/utils/cudautils.py index e2bd4556ce8..4f047d3c125 100755 --- a/python/cudf/cudf/utils/cudautils.py +++ b/python/cudf/cudf/utils/cudautils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2022, NVIDIA CORPORATION. +# Copyright (c) 2018-2023, NVIDIA CORPORATION. from pickle import dumps @@ -8,6 +8,7 @@ from numba.np import numpy_support import cudf +from cudf.utils._setup_numba import CUDFNumbaConfig # # Misc kernels @@ -80,19 +81,20 @@ def find_index_of_val(arr, val, mask=None, compare="eq"): """ found = cuda.device_array(shape=(arr.shape), dtype="int32") if found.size > 0: - if compare == "gt": - gpu_mark_gt.forall(found.size)(arr, val, found, arr.size) - elif compare == "lt": - gpu_mark_lt.forall(found.size)(arr, val, found, arr.size) - else: - if arr.dtype in ("float32", "float64"): - gpu_mark_found_float.forall(found.size)( - arr, val, found, arr.size - ) + with CUDFNumbaConfig(): + if compare == "gt": + gpu_mark_gt.forall(found.size)(arr, val, found, arr.size) + elif compare == "lt": + gpu_mark_lt.forall(found.size)(arr, val, found, arr.size) else: - gpu_mark_found_int.forall(found.size)( - arr, val, found, arr.size - ) + if arr.dtype in ("float32", "float64"): + gpu_mark_found_float.forall(found.size)( + arr, val, found, arr.size + ) + else: + gpu_mark_found_int.forall(found.size)( + arr, val, found, arr.size + ) return cudf.core.column.column.as_column(found).set_mask(mask) @@ -154,9 +156,10 @@ def gpu_window_sizes_from_offset(arr, window_sizes, offset): def window_sizes_from_offset(arr, offset): window_sizes = cuda.device_array(shape=(arr.shape), dtype="int32") if arr.size > 0: - gpu_window_sizes_from_offset.forall(arr.size)( - arr, window_sizes, offset - ) + with CUDFNumbaConfig(): + gpu_window_sizes_from_offset.forall(arr.size)( + arr, window_sizes, offset + ) return window_sizes @@ -177,9 +180,10 @@ def gpu_grouped_window_sizes_from_offset( def grouped_window_sizes_from_offset(arr, group_starts, offset): window_sizes = cuda.device_array(shape=(arr.shape), dtype="int32") if arr.size > 0: - gpu_grouped_window_sizes_from_offset.forall(arr.size)( - arr, window_sizes, group_starts, offset - ) + with CUDFNumbaConfig(): + gpu_grouped_window_sizes_from_offset.forall(arr.size)( + arr, window_sizes, group_starts, offset + ) return window_sizes diff --git a/python/cudf/cudf/utils/queryutils.py b/python/cudf/cudf/utils/queryutils.py index 4ce89b526d6..6cbe702db90 100644 --- a/python/cudf/cudf/utils/queryutils.py +++ b/python/cudf/cudf/utils/queryutils.py @@ -11,6 +11,7 @@ from cudf.core.buffer import acquire_spill_lock from cudf.core.column import column_empty from cudf.utils import applyutils +from cudf.utils._setup_numba import CUDFNumbaConfig from cudf.utils.dtypes import ( BOOL_TYPES, DATETIME_TYPES, @@ -247,6 +248,7 @@ def query_execute(df, expr, callenv): out = column_empty(nrows, dtype=np.bool_) # run kernel args = [out] + colarrays + envargs - kernel.forall(nrows)(*args) + with CUDFNumbaConfig(): + kernel.forall(nrows)(*args) out_mask = applyutils.make_aggregate_nullmask(df, columns=columns) return out.set_mask(out_mask).fillna(False) From 47d8a2e7149edd5071ffe0349d1cc393f9c88302 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 17 May 2023 09:43:07 -0700 Subject: [PATCH 14/37] revert numba upgrade --- conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +- conda/recipes/cudf/meta.yaml | 4 ++-- dependencies.yaml | 2 +- python/cudf/pyproject.toml | 2 +- python/dask_cudf/pyproject.toml | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index b6daea7c2bc..4031f1aa1c3 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -48,7 +48,7 @@ dependencies: - nbsphinx - ninja - notebook -- numba>=0.57 +- numba>=0.56.4,<0.57 - numpy>=1.21,<1.24 - numpydoc - nvcc_linux-64=11.8 diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml index 97c438bec59..f8074711b88 100644 --- a/conda/recipes/cudf/meta.yaml +++ b/conda/recipes/cudf/meta.yaml @@ -53,7 +53,7 @@ requirements: - cython >=0.29,<0.30 - scikit-build >=0.13.1 - setuptools - - numba >=0.57 + - numba >=0.56.4,<0.57 - dlpack >=0.5,<0.6.0a0 - pyarrow =11 - libcudf ={{ version }} @@ -65,7 +65,7 @@ requirements: - typing_extensions - pandas >=1.3,<1.6.0dev0 - cupy >=12.0.0 - - numba >=0.57 + - numba >=0.56.4,<0.57 - numpy >=1.21,<1.24 # Temporarily upper bound numpy to avoid overflow deprecations - {{ pin_compatible('pyarrow', max_pin='x.x.x') }} - libcudf {{ version }} diff --git a/dependencies.yaml b/dependencies.yaml index e3fcbe69932..70d7f8c1ec8 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -350,7 +350,7 @@ dependencies: packages: - cachetools - cuda-python>=11.7.1,<12.0 - - &numba numba>=0.57 + - &numba numba>=0.56.4,<0.57 - nvtx>=0.2.1 - packaging - rmm==23.6.* diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index b08dd92d52f..d13324a7404 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -31,7 +31,7 @@ dependencies = [ "cuda-python>=11.7.1,<12.0", "cupy-cuda11x>=12.0.0", "fsspec>=0.6.0", - "numba>=0.57", + "numba>=0.56.4,<0.57", "numpy>=1.21,<1.24", "nvtx>=0.2.1", "packaging", diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml index 42b6c26c002..ff2a3f2d095 100644 --- a/python/dask_cudf/pyproject.toml +++ b/python/dask_cudf/pyproject.toml @@ -40,7 +40,7 @@ dynamic = ["entry-points"] [project.optional-dependencies] test = [ "dask-cuda==23.6.*", - "numba>=0.57", + "numba>=0.56.4,<0.57", "pytest", "pytest-cov", "pytest-xdist", From b9634f9e240217f6f95a27fcfaf7c29a2a67f28f Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 17 May 2023 11:34:00 -0700 Subject: [PATCH 15/37] adjust logic, introduce runtime check in apply/groupby udfs --- python/cudf/cudf/core/udf/utils.py | 10 ++++++++++ python/cudf/cudf/utils/_setup_numba.py | 27 +++++++++++--------------- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/python/cudf/cudf/core/udf/utils.py b/python/cudf/cudf/core/udf/utils.py index 218e3c3c294..4a3ed23b6bd 100644 --- a/python/cudf/cudf/core/udf/utils.py +++ b/python/cudf/cudf/core/udf/utils.py @@ -253,6 +253,16 @@ def _compile_or_get( we then obtain the return type from that separate compilation and use it to allocate an output column of the right dtype. """ + # runtime check for CEC mode which is disabled for CUDA 12 for now + if cuda.cudadrv.driver.get_version() == ( + 12, + 0, + ) and cuda.cudadrv.runtime.get_version() > (12, 0): + raise ValueError( + "Minor version compatibility not yet supported for " + "CUDA driver versions newer than 12.0" + ) + if not all(is_scalar(arg) for arg in args): raise TypeError("only scalar valued args are supported by apply") diff --git a/python/cudf/cudf/utils/_setup_numba.py b/python/cudf/cudf/utils/_setup_numba.py index 287f5958fb6..127c1e3e489 100644 --- a/python/cudf/cudf/utils/_setup_numba.py +++ b/python/cudf/cudf/utils/_setup_numba.py @@ -4,6 +4,7 @@ import os from numba import config +from numba.cuda.cudadrv.driver import Linker CC_60_PTX_FILE = os.path.dirname(__file__) + "/../core/udf/shim_60.ptx" @@ -78,6 +79,7 @@ def _setup_numba_linker(path): # By default, ptxcompiler will not be installed with CUDA 12 # packages. This is ok, because in this situation putting # numba in enhanced compatibility mode is not necessary. + from cubinlinker.patch import _numba_version_ok, new_patched_linker from ptxcompiler.patch import NO_DRIVER, safe_get_versions except ImportError: return @@ -89,22 +91,15 @@ def _setup_numba_linker(path): # in their environment anyways, perhaps installed separately if driver_version < (12, 0): ptx_toolkit_version = _get_cuda_version_from_ptx_file(path) - maybe_patch_numba_linker( - driver_version, runtime_version, ptx_toolkit_version - ) - - -def maybe_patch_numba_linker( - driver_version, runtime_version, ptx_toolkit_version -): - # Numba thinks cubinlinker is only needed if the driver is older than - # the CUDA runtime, but when PTX files are present, it might also need - # to patch because those PTX files may be compiled by a CUDA version - # that is newer than the driver as well - if (driver_version < ptx_toolkit_version) or ( - driver_version < runtime_version - ): - config.NUMBA_CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY = 1 + # Numba thinks cubinlinker is only needed if the driver is older + # than the CUDA runtime, but when PTX files are present, it might + # also need to patch because those PTX files may be compiled by + # a CUDA version that is newer than the driver as well + if (driver_version < ptx_toolkit_version) or ( + driver_version < runtime_version + ): + if _numba_version_ok: + Linker.new = new_patched_linker def _get_cuda_version_from_ptx_file(path): From 7a594b3c7da9064e0d9baa3c747c291c27c7fa76 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Wed, 17 May 2023 13:55:30 -0700 Subject: [PATCH 16/37] Address reviews --- python/cudf/cudf/__init__.py | 2 -- python/cudf/cudf/utils/_setup_numba.py | 15 +++++++++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py index b4fcc4a63e0..170b808b42c 100644 --- a/python/cudf/cudf/__init__.py +++ b/python/cudf/cudf/__init__.py @@ -86,8 +86,6 @@ _setup_numba() -# This must be imported after _setup_numba is called and the numba -# config is modified otherwise the config options will have no effect from numba import cuda cuda.set_memory_manager(RMMNumbaManager) diff --git a/python/cudf/cudf/utils/_setup_numba.py b/python/cudf/cudf/utils/_setup_numba.py index 127c1e3e489..4fe7f29e939 100644 --- a/python/cudf/cudf/utils/_setup_numba.py +++ b/python/cudf/cudf/utils/_setup_numba.py @@ -11,12 +11,8 @@ def _setup_numba(): """ - Configure numba for use with cuDF. This consists of potentially - putting numba into enhanced compatibility mode based on the user - driver and runtime versions as well as the version of the CUDA - Toolkit used to build the PTX files shipped with the user cuDF - package. It also sets any other config options within numba that - are desired for cuDF's operation. + Perform any numba patching or configuration desired upon + cuDF import. """ _setup_numba_linker(CC_60_PTX_FILE) @@ -75,6 +71,13 @@ def _get_ptx_file(path, prefix): def _setup_numba_linker(path): + """ + Configure the numba linker for use with cuDF. This consists of + potentially putting numba into enhanced compatibility mode + based on the user driver and runtime versions as well as the + version of the CUDA Toolkit used to build the PTX files shipped + with the user cuDF package. + """ try: # By default, ptxcompiler will not be installed with CUDA 12 # packages. This is ok, because in this situation putting From cf642d0bde1377cc2809c89de6ef125892d17e50 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 18 May 2023 11:08:17 -0700 Subject: [PATCH 17/37] partially address reviews --- python/cudf/cudf/utils/_setup_numba.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/python/cudf/cudf/utils/_setup_numba.py b/python/cudf/cudf/utils/_setup_numba.py index 4fe7f29e939..b0d863be1c1 100644 --- a/python/cudf/cudf/utils/_setup_numba.py +++ b/python/cudf/cudf/utils/_setup_numba.py @@ -9,14 +9,6 @@ CC_60_PTX_FILE = os.path.dirname(__file__) + "/../core/udf/shim_60.ptx" -def _setup_numba(): - """ - Perform any numba patching or configuration desired upon - cuDF import. - """ - _setup_numba_linker(CC_60_PTX_FILE) - - def _get_best_ptx_file(archs, max_compute_capability): """ Determine of the available PTX files which one is @@ -70,7 +62,7 @@ def _get_ptx_file(path, prefix): return regular_result[1] -def _setup_numba_linker(path): +def _setup_numba(): """ Configure the numba linker for use with cuDF. This consists of potentially putting numba into enhanced compatibility mode @@ -93,7 +85,9 @@ def _setup_numba_linker(path): # case where a user has a CUDA 12 package and ptxcompiler # in their environment anyways, perhaps installed separately if driver_version < (12, 0): - ptx_toolkit_version = _get_cuda_version_from_ptx_file(path) + ptx_toolkit_version = _get_cuda_version_from_ptx_file( + CC_60_PTX_FILE + ) # Numba thinks cubinlinker is only needed if the driver is older # than the CUDA runtime, but when PTX files are present, it might # also need to patch because those PTX files may be compiled by @@ -103,6 +97,8 @@ def _setup_numba_linker(path): ): if _numba_version_ok: Linker.new = new_patched_linker + else: + config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY = 1 def _get_cuda_version_from_ptx_file(path): From cb5a75604363390f48847f2b788cb662f5438ad3 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Fri, 19 May 2023 06:23:08 -0700 Subject: [PATCH 18/37] Revert "revert numba upgrade" This reverts commit 47d8a2e7149edd5071ffe0349d1cc393f9c88302. --- conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +- conda/recipes/cudf/meta.yaml | 4 ++-- dependencies.yaml | 2 +- python/cudf/pyproject.toml | 2 +- python/dask_cudf/pyproject.toml | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 4031f1aa1c3..b6daea7c2bc 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -48,7 +48,7 @@ dependencies: - nbsphinx - ninja - notebook -- numba>=0.56.4,<0.57 +- numba>=0.57 - numpy>=1.21,<1.24 - numpydoc - nvcc_linux-64=11.8 diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml index f8074711b88..97c438bec59 100644 --- a/conda/recipes/cudf/meta.yaml +++ b/conda/recipes/cudf/meta.yaml @@ -53,7 +53,7 @@ requirements: - cython >=0.29,<0.30 - scikit-build >=0.13.1 - setuptools - - numba >=0.56.4,<0.57 + - numba >=0.57 - dlpack >=0.5,<0.6.0a0 - pyarrow =11 - libcudf ={{ version }} @@ -65,7 +65,7 @@ requirements: - typing_extensions - pandas >=1.3,<1.6.0dev0 - cupy >=12.0.0 - - numba >=0.56.4,<0.57 + - numba >=0.57 - numpy >=1.21,<1.24 # Temporarily upper bound numpy to avoid overflow deprecations - {{ pin_compatible('pyarrow', max_pin='x.x.x') }} - libcudf {{ version }} diff --git a/dependencies.yaml b/dependencies.yaml index 70d7f8c1ec8..e3fcbe69932 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -350,7 +350,7 @@ dependencies: packages: - cachetools - cuda-python>=11.7.1,<12.0 - - &numba numba>=0.56.4,<0.57 + - &numba numba>=0.57 - nvtx>=0.2.1 - packaging - rmm==23.6.* diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml index d13324a7404..b08dd92d52f 100644 --- a/python/cudf/pyproject.toml +++ b/python/cudf/pyproject.toml @@ -31,7 +31,7 @@ dependencies = [ "cuda-python>=11.7.1,<12.0", "cupy-cuda11x>=12.0.0", "fsspec>=0.6.0", - "numba>=0.56.4,<0.57", + "numba>=0.57", "numpy>=1.21,<1.24", "nvtx>=0.2.1", "packaging", diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml index ff2a3f2d095..42b6c26c002 100644 --- a/python/dask_cudf/pyproject.toml +++ b/python/dask_cudf/pyproject.toml @@ -40,7 +40,7 @@ dynamic = ["entry-points"] [project.optional-dependencies] test = [ "dask-cuda==23.6.*", - "numba>=0.56.4,<0.57", + "numba>=0.57", "pytest", "pytest-cov", "pytest-xdist", From dcc73e10a0704182d9ff84eaa352018f9ed37172 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Fri, 19 May 2023 07:21:21 -0700 Subject: [PATCH 19/37] _setup_numba.py -> _numba.py, CUDFNumbaConfig -> _CUDFNumbaConfig --- python/cudf/cudf/core/indexed_frame.py | 4 ++-- python/cudf/cudf/core/udf/groupby_utils.py | 4 ++-- python/cudf/cudf/tests/test_extension_compilation.py | 10 +++++----- python/cudf/cudf/tests/test_string_udfs.py | 6 +++--- python/cudf/cudf/utils/{_setup_numba.py => _numba.py} | 9 ++------- python/cudf/cudf/utils/applyutils.py | 8 ++++---- python/cudf/cudf/utils/cudautils.py | 8 ++++---- python/cudf/cudf/utils/queryutils.py | 4 ++-- 8 files changed, 24 insertions(+), 29 deletions(-) rename python/cudf/cudf/utils/{_setup_numba.py => _numba.py} (93%) diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index c6b2c10b9da..abd8ad162c7 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -68,7 +68,7 @@ _return_arr_from_dtype, ) from cudf.utils import docutils -from cudf.utils._setup_numba import CUDFNumbaConfig +from cudf.utils._numba import _CUDFNumbaConfig from cudf.utils.utils import _cudf_nvtx_annotate doc_reset_index_template = """ @@ -2194,7 +2194,7 @@ def _apply(self, func, kernel_getter, *args, **kwargs): input_args = _get_input_args_from_frame(self) launch_args = output_args + input_args + list(args) try: - with CUDFNumbaConfig(): + with _CUDFNumbaConfig(): kernel.forall(len(self))(*launch_args) except Exception as e: raise RuntimeError("UDF kernel execution failed.") from e diff --git a/python/cudf/cudf/core/udf/groupby_utils.py b/python/cudf/cudf/core/udf/groupby_utils.py index e0d41515f9b..60eba7eb37b 100644 --- a/python/cudf/cudf/core/udf/groupby_utils.py +++ b/python/cudf/cudf/core/udf/groupby_utils.py @@ -27,7 +27,7 @@ _supported_cols_from_frame, _supported_dtypes_from_frame, ) -from cudf.utils._setup_numba import CUDFNumbaConfig +from cudf.utils._numba import _CUDFNumbaConfig from cudf.utils.utils import _cudf_nvtx_annotate @@ -197,7 +197,7 @@ def jit_groupby_apply(offsets, grouped_values, function, *args): ) # Launch kernel - with CUDFNumbaConfig(): + with _CUDFNumbaConfig(): specialized[ngroups, tpb](*launch_args) return output diff --git a/python/cudf/cudf/tests/test_extension_compilation.py b/python/cudf/cudf/tests/test_extension_compilation.py index 525204bf002..857cc114ffa 100644 --- a/python/cudf/cudf/tests/test_extension_compilation.py +++ b/python/cudf/cudf/tests/test_extension_compilation.py @@ -12,7 +12,7 @@ from cudf.core.udf.api import Masked from cudf.core.udf.masked_typing import MaskedType from cudf.testing._utils import parametrize_numeric_dtypes_pairwise -from cudf.utils._setup_numba import CUDFNumbaConfig +from cudf.utils._numba import _CUDFNumbaConfig arith_ops = ( operator.add, @@ -107,7 +107,7 @@ def test_kernel(x, y, err): err[0] = 3 err = cp.asarray([0], dtype="int8") - with CUDFNumbaConfig(): + with _CUDFNumbaConfig(): test_kernel[1, 1](1, 2, err) assert err[0] == 0 @@ -216,7 +216,7 @@ def test_kernel(err): err[0] = 2 err = cp.asarray([0], dtype="int8") - with CUDFNumbaConfig(): + with _CUDFNumbaConfig(): test_kernel[1, 1](err) assert err[0] == 0 @@ -307,7 +307,7 @@ def test_kernel(err): err[0] = 2 err = cp.asarray([0], dtype="int8") - with CUDFNumbaConfig(): + with _CUDFNumbaConfig(): test_kernel[1, 1](err) assert err[0] == 0 @@ -330,6 +330,6 @@ def test_kernel(err): err[0] = 1 err = cp.asarray([0], dtype="int8") - with CUDFNumbaConfig(): + with _CUDFNumbaConfig(): test_kernel[1, 1](err) assert err[0] == 0 diff --git a/python/cudf/cudf/tests/test_string_udfs.py b/python/cudf/cudf/tests/test_string_udfs.py index 62bffad0a01..88c73ccf964 100644 --- a/python/cudf/cudf/tests/test_string_udfs.py +++ b/python/cudf/cudf/tests/test_string_udfs.py @@ -22,7 +22,7 @@ ) from cudf.core.udf.utils import _PTX_FILE, _get_extensionty_size from cudf.testing._utils import assert_eq, sv_to_udf_str -from cudf.utils._setup_numba import CUDFNumbaConfig +from cudf.utils._numba import _CUDFNumbaConfig def get_kernels(func, dtype, size): @@ -86,7 +86,7 @@ def run_udf_test(data, func, dtype): sv_kernel, udf_str_kernel = get_kernels(func, dtype, len(data)) expect = pd.Series(data).apply(func) - with CUDFNumbaConfig(): + with _CUDFNumbaConfig(): sv_kernel.forall(len(data))(str_views, output) if dtype == "str": result = column_from_udf_string_array(output) @@ -95,7 +95,7 @@ def run_udf_test(data, func, dtype): got = cudf.Series(result, dtype=dtype) assert_eq(expect, got, check_dtype=False) - with CUDFNumbaConfig(): + with _CUDFNumbaConfig(): udf_str_kernel.forall(len(data))(str_views, output) if dtype == "str": result = column_from_udf_string_array(output) diff --git a/python/cudf/cudf/utils/_setup_numba.py b/python/cudf/cudf/utils/_numba.py similarity index 93% rename from python/cudf/cudf/utils/_setup_numba.py rename to python/cudf/cudf/utils/_numba.py index b0d863be1c1..053d7a0e1cb 100644 --- a/python/cudf/cudf/utils/_setup_numba.py +++ b/python/cudf/cudf/utils/_numba.py @@ -4,7 +4,6 @@ import os from numba import config -from numba.cuda.cudadrv.driver import Linker CC_60_PTX_FILE = os.path.dirname(__file__) + "/../core/udf/shim_60.ptx" @@ -74,7 +73,6 @@ def _setup_numba(): # By default, ptxcompiler will not be installed with CUDA 12 # packages. This is ok, because in this situation putting # numba in enhanced compatibility mode is not necessary. - from cubinlinker.patch import _numba_version_ok, new_patched_linker from ptxcompiler.patch import NO_DRIVER, safe_get_versions except ImportError: return @@ -95,10 +93,7 @@ def _setup_numba(): if (driver_version < ptx_toolkit_version) or ( driver_version < runtime_version ): - if _numba_version_ok: - Linker.new = new_patched_linker - else: - config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY = 1 + config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY = 1 def _get_cuda_version_from_ptx_file(path): @@ -147,7 +142,7 @@ def _get_cuda_version_from_ptx_file(path): return cuda_ver -class CUDFNumbaConfig: +class _CUDFNumbaConfig: def __enter__(self): self.enter_val = config.CUDA_LOW_OCCUPANCY_WARNINGS config.CUDA_LOW_OCCUPANCY_WARNINGS = 0 diff --git a/python/cudf/cudf/utils/applyutils.py b/python/cudf/cudf/utils/applyutils.py index 14709bcbbfb..b8cf6910402 100644 --- a/python/cudf/cudf/utils/applyutils.py +++ b/python/cudf/cudf/utils/applyutils.py @@ -12,7 +12,7 @@ from cudf.core.buffer import acquire_spill_lock from cudf.core.column import column from cudf.utils import utils -from cudf.utils._setup_numba import CUDFNumbaConfig +from cudf.utils._numba import _CUDFNumbaConfig from cudf.utils.docutils import docfmt_partial _doc_applyparams = """ @@ -196,7 +196,7 @@ def compile(self, func, argnames, extra_argnames): return kernel def launch_kernel(self, df, args): - with CUDFNumbaConfig(): + with _CUDFNumbaConfig(): self.kernel.forall(len(df))(*args) @@ -211,13 +211,13 @@ def compile(self, func, argnames, extra_argnames): def launch_kernel(self, df, args, chunks, blkct=None, tpb=None): chunks = self.normalize_chunks(len(df), chunks) if blkct is None and tpb is None: - with CUDFNumbaConfig(): + with _CUDFNumbaConfig(): self.kernel.forall(len(df))(len(df), chunks, *args) else: assert tpb is not None if blkct is None: blkct = chunks.size - with CUDFNumbaConfig(): + with _CUDFNumbaConfig(): self.kernel[blkct, tpb](len(df), chunks, *args) def normalize_chunks(self, size, chunks): diff --git a/python/cudf/cudf/utils/cudautils.py b/python/cudf/cudf/utils/cudautils.py index 4f047d3c125..a10eaab0bff 100755 --- a/python/cudf/cudf/utils/cudautils.py +++ b/python/cudf/cudf/utils/cudautils.py @@ -8,7 +8,7 @@ from numba.np import numpy_support import cudf -from cudf.utils._setup_numba import CUDFNumbaConfig +from cudf.utils._numba import _CUDFNumbaConfig # # Misc kernels @@ -81,7 +81,7 @@ def find_index_of_val(arr, val, mask=None, compare="eq"): """ found = cuda.device_array(shape=(arr.shape), dtype="int32") if found.size > 0: - with CUDFNumbaConfig(): + with _CUDFNumbaConfig(): if compare == "gt": gpu_mark_gt.forall(found.size)(arr, val, found, arr.size) elif compare == "lt": @@ -156,7 +156,7 @@ def gpu_window_sizes_from_offset(arr, window_sizes, offset): def window_sizes_from_offset(arr, offset): window_sizes = cuda.device_array(shape=(arr.shape), dtype="int32") if arr.size > 0: - with CUDFNumbaConfig(): + with _CUDFNumbaConfig(): gpu_window_sizes_from_offset.forall(arr.size)( arr, window_sizes, offset ) @@ -180,7 +180,7 @@ def gpu_grouped_window_sizes_from_offset( def grouped_window_sizes_from_offset(arr, group_starts, offset): window_sizes = cuda.device_array(shape=(arr.shape), dtype="int32") if arr.size > 0: - with CUDFNumbaConfig(): + with _CUDFNumbaConfig(): gpu_grouped_window_sizes_from_offset.forall(arr.size)( arr, window_sizes, group_starts, offset ) diff --git a/python/cudf/cudf/utils/queryutils.py b/python/cudf/cudf/utils/queryutils.py index 6cbe702db90..51093375eda 100644 --- a/python/cudf/cudf/utils/queryutils.py +++ b/python/cudf/cudf/utils/queryutils.py @@ -11,7 +11,7 @@ from cudf.core.buffer import acquire_spill_lock from cudf.core.column import column_empty from cudf.utils import applyutils -from cudf.utils._setup_numba import CUDFNumbaConfig +from cudf.utils._numba import _CUDFNumbaConfig from cudf.utils.dtypes import ( BOOL_TYPES, DATETIME_TYPES, @@ -248,7 +248,7 @@ def query_execute(df, expr, callenv): out = column_empty(nrows, dtype=np.bool_) # run kernel args = [out] + colarrays + envargs - with CUDFNumbaConfig(): + with _CUDFNumbaConfig(): kernel.forall(nrows)(*args) out_mask = applyutils.make_aggregate_nullmask(df, columns=columns) return out.set_mask(out_mask).fillna(False) From 053193a8add7d015e984562c714faf40807a06d5 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Fri, 19 May 2023 07:55:28 -0700 Subject: [PATCH 20/37] try vendoring some ptxcompiler code --- python/cudf/cudf/utils/_numba.py | 46 +++++++++++++++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/utils/_numba.py b/python/cudf/cudf/utils/_numba.py index 053d7a0e1cb..06aa51aa7fb 100644 --- a/python/cudf/cudf/utils/_numba.py +++ b/python/cudf/cudf/utils/_numba.py @@ -1,11 +1,43 @@ # Copyright (c) 2023, NVIDIA CORPORATION. import glob +import math import os +import subprocess +import sys +import warnings from numba import config CC_60_PTX_FILE = os.path.dirname(__file__) + "/../core/udf/shim_60.ptx" +NO_DRIVER = (math.inf, math.inf) + +CMD = """\ +from ctypes import c_int, byref +from numba import cuda +dv = c_int(0) +cuda.cudadrv.driver.driver.cuDriverGetVersion(byref(dv)) +drv_major = dv.value // 1000 +drv_minor = (dv.value - (drv_major * 1000)) // 10 +run_major, run_minor = cuda.runtime.get_version() +print(f'{drv_major} {drv_minor} {run_major} {run_minor}') +""" + + +def _get_versions(): + """ + This function is mostly vendored from ptxcompiler and is used + to check the system CUDA driver and runtime versions in its absence. + """ + cp = subprocess.run([sys.executable, "-c", CMD], capture_output=True) + if cp.returncode: + return NO_DRIVER + + versions = [int(s) for s in cp.stdout.strip().split()] + driver_version = tuple(versions[:2]) + runtime_version = tuple(versions[2:]) + + return driver_version, runtime_version def _get_best_ptx_file(archs, max_compute_capability): @@ -75,7 +107,19 @@ def _setup_numba(): # numba in enhanced compatibility mode is not necessary. from ptxcompiler.patch import NO_DRIVER, safe_get_versions except ImportError: - return + versions = _get_versions() + if versions != NO_DRIVER: + driver_version, runtime_version = versions + if runtime_version > driver_version: + warnings.warn( + f"Using CUDA toolkit version {runtime_version} with CUDA " + f"driver version {driver_version} requires minor version " + "compatibility, which is not yet supported for CUDA " + "driver versions newer than 12.0. It is likely that many " + "cuDF operations will not work in this state. Please " + f"install CUDA toolkit version {driver_version} to " + "continue using cuDF." + ) versions = safe_get_versions() if versions != NO_DRIVER: driver_version, runtime_version = versions From c2285fa67adec6d5f836fe6903c5ec685964a559 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Fri, 19 May 2023 07:59:22 -0700 Subject: [PATCH 21/37] add the comment about the MVC config option and numba.cuda imports back in --- python/cudf/cudf/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py index 170b808b42c..4ea19d543ec 100644 --- a/python/cudf/cudf/__init__.py +++ b/python/cudf/cudf/__init__.py @@ -84,6 +84,9 @@ from cudf.utils.dtypes import _NA_REP from cudf.utils.utils import clear_cache, set_allocator +# this must be called before numba.cuda is imported, because +# it sets the numba config variable responsible for enabling +# MVC. Setting it after importing cuda has no effect. _setup_numba() from numba import cuda From b72eef004618e209bc69c3b82749e566ba1709f6 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Fri, 19 May 2023 08:24:16 -0700 Subject: [PATCH 22/37] fix imports --- python/cudf/cudf/__init__.py | 2 +- python/cudf/cudf/core/udf/utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py index 4ea19d543ec..d44d622bdb7 100644 --- a/python/cudf/cudf/__init__.py +++ b/python/cudf/cudf/__init__.py @@ -80,7 +80,7 @@ read_text, ) from cudf.options import describe_option, get_option, set_option -from cudf.utils._setup_numba import _setup_numba +from cudf.utils._numba import _setup_numba from cudf.utils.dtypes import _NA_REP from cudf.utils.utils import clear_cache, set_allocator diff --git a/python/cudf/cudf/core/udf/utils.py b/python/cudf/cudf/core/udf/utils.py index 4a3ed23b6bd..fbddd75344f 100644 --- a/python/cudf/cudf/core/udf/utils.py +++ b/python/cudf/cudf/core/udf/utils.py @@ -31,7 +31,7 @@ udf_string, ) from cudf.utils import cudautils -from cudf.utils._setup_numba import _get_ptx_file +from cudf.utils._numba import _get_ptx_file from cudf.utils.dtypes import ( BOOL_TYPES, DATETIME_TYPES, From bd27a2f2bd8e09fa6d7d179b163636e5c7453503 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Fri, 19 May 2023 09:04:13 -0700 Subject: [PATCH 23/37] switch error --- python/cudf/cudf/utils/_numba.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/utils/_numba.py b/python/cudf/cudf/utils/_numba.py index 06aa51aa7fb..e275c3fa881 100644 --- a/python/cudf/cudf/utils/_numba.py +++ b/python/cudf/cudf/utils/_numba.py @@ -106,7 +106,7 @@ def _setup_numba(): # packages. This is ok, because in this situation putting # numba in enhanced compatibility mode is not necessary. from ptxcompiler.patch import NO_DRIVER, safe_get_versions - except ImportError: + except ModuleNotFoundError: versions = _get_versions() if versions != NO_DRIVER: driver_version, runtime_version = versions From 8c9c070927901e6f45f613929f0332ad1dda5f6d Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Fri, 19 May 2023 09:29:20 -0700 Subject: [PATCH 24/37] slightly adjust logic --- python/cudf/cudf/utils/_numba.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/utils/_numba.py b/python/cudf/cudf/utils/_numba.py index e275c3fa881..ec6421886c4 100644 --- a/python/cudf/cudf/utils/_numba.py +++ b/python/cudf/cudf/utils/_numba.py @@ -10,7 +10,7 @@ from numba import config CC_60_PTX_FILE = os.path.dirname(__file__) + "/../core/udf/shim_60.ptx" -NO_DRIVER = (math.inf, math.inf) +_NO_DRIVER = (math.inf, math.inf) CMD = """\ from ctypes import c_int, byref @@ -31,7 +31,7 @@ def _get_versions(): """ cp = subprocess.run([sys.executable, "-c", CMD], capture_output=True) if cp.returncode: - return NO_DRIVER + return _NO_DRIVER versions = [int(s) for s in cp.stdout.strip().split()] driver_version = tuple(versions[:2]) @@ -108,7 +108,7 @@ def _setup_numba(): from ptxcompiler.patch import NO_DRIVER, safe_get_versions except ModuleNotFoundError: versions = _get_versions() - if versions != NO_DRIVER: + if versions != _NO_DRIVER: driver_version, runtime_version = versions if runtime_version > driver_version: warnings.warn( From 662b30bba001beea5dff19621c1dd14d5ff9c4bd Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Sun, 21 May 2023 19:13:27 -0700 Subject: [PATCH 25/37] add missing return --- python/cudf/cudf/utils/_numba.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/cudf/cudf/utils/_numba.py b/python/cudf/cudf/utils/_numba.py index ec6421886c4..fd59244985f 100644 --- a/python/cudf/cudf/utils/_numba.py +++ b/python/cudf/cudf/utils/_numba.py @@ -120,6 +120,7 @@ def _setup_numba(): f"install CUDA toolkit version {driver_version} to " "continue using cuDF." ) + return versions = safe_get_versions() if versions != NO_DRIVER: driver_version, runtime_version = versions From 93af61398b8d2fd50a51b0fa2d8ace1ceda49ce9 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 22 May 2023 04:05:02 -0700 Subject: [PATCH 26/37] shuffle imports --- python/cudf/cudf/__init__.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py index d44d622bdb7..96aee9f065e 100644 --- a/python/cudf/cudf/__init__.py +++ b/python/cudf/cudf/__init__.py @@ -1,11 +1,16 @@ # Copyright (c) 2018-2023, NVIDIA CORPORATION. +# this must be called before numba.cuda is imported, because +# it sets the numba config variable responsible for enabling +# MVC. Setting it after importing cuda has no effect. +from cudf.utils._numba import _setup_numba from cudf.utils.gpu_utils import validate_setup +_setup_numba() validate_setup() import cupy -from numba import config as numba_config +from numba import config as numba_config, cuda import rmm from rmm.allocators.cupy import rmm_cupy_allocator @@ -80,17 +85,9 @@ read_text, ) from cudf.options import describe_option, get_option, set_option -from cudf.utils._numba import _setup_numba from cudf.utils.dtypes import _NA_REP from cudf.utils.utils import clear_cache, set_allocator -# this must be called before numba.cuda is imported, because -# it sets the numba config variable responsible for enabling -# MVC. Setting it after importing cuda has no effect. -_setup_numba() - -from numba import cuda - cuda.set_memory_manager(RMMNumbaManager) cupy.cuda.set_allocator(rmm_cupy_allocator) From 2ff5c5d8f31196bd2de158f89e964ec305e8d711 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 22 May 2023 06:40:31 -0700 Subject: [PATCH 27/37] delete explicit runtime check for MVC in cuda 12+ as it's needed more globally now --- python/cudf/cudf/core/udf/utils.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/python/cudf/cudf/core/udf/utils.py b/python/cudf/cudf/core/udf/utils.py index fbddd75344f..944e5a05842 100644 --- a/python/cudf/cudf/core/udf/utils.py +++ b/python/cudf/cudf/core/udf/utils.py @@ -254,14 +254,6 @@ def _compile_or_get( use it to allocate an output column of the right dtype. """ # runtime check for CEC mode which is disabled for CUDA 12 for now - if cuda.cudadrv.driver.get_version() == ( - 12, - 0, - ) and cuda.cudadrv.runtime.get_version() > (12, 0): - raise ValueError( - "Minor version compatibility not yet supported for " - "CUDA driver versions newer than 12.0" - ) if not all(is_scalar(arg) for arg in args): raise TypeError("only scalar valued args are supported by apply") From 5cb0ce6d67b2f2c8813f3140cbbf3311ca4c7743 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 22 May 2023 17:06:43 -0700 Subject: [PATCH 28/37] attempt a simplifying change --- python/cudf/cudf/__init__.py | 3 + python/cudf/cudf/utils/_numba.py | 64 ++++---------- python/cudf/cudf/utils/_ptxcompiler.py | 118 +++++++++++++++++++++++++ 3 files changed, 136 insertions(+), 49 deletions(-) create mode 100644 python/cudf/cudf/utils/_ptxcompiler.py diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py index 96aee9f065e..ad99b583045 100644 --- a/python/cudf/cudf/__init__.py +++ b/python/cudf/cudf/__init__.py @@ -1,5 +1,8 @@ # Copyright (c) 2018-2023, NVIDIA CORPORATION. +from cuda import cuda, cudart +cuda.cuDriverGetVersion() +cudart.cudaRuntimeGetVersion() # this must be called before numba.cuda is imported, because # it sets the numba config variable responsible for enabling # MVC. Setting it after importing cuda has no effect. diff --git a/python/cudf/cudf/utils/_numba.py b/python/cudf/cudf/utils/_numba.py index fd59244985f..6daf41a1f73 100644 --- a/python/cudf/cudf/utils/_numba.py +++ b/python/cudf/cudf/utils/_numba.py @@ -1,43 +1,12 @@ # Copyright (c) 2023, NVIDIA CORPORATION. import glob -import math import os -import subprocess -import sys import warnings from numba import config CC_60_PTX_FILE = os.path.dirname(__file__) + "/../core/udf/shim_60.ptx" -_NO_DRIVER = (math.inf, math.inf) - -CMD = """\ -from ctypes import c_int, byref -from numba import cuda -dv = c_int(0) -cuda.cudadrv.driver.driver.cuDriverGetVersion(byref(dv)) -drv_major = dv.value // 1000 -drv_minor = (dv.value - (drv_major * 1000)) // 10 -run_major, run_minor = cuda.runtime.get_version() -print(f'{drv_major} {drv_minor} {run_major} {run_minor}') -""" - - -def _get_versions(): - """ - This function is mostly vendored from ptxcompiler and is used - to check the system CUDA driver and runtime versions in its absence. - """ - cp = subprocess.run([sys.executable, "-c", CMD], capture_output=True) - if cp.returncode: - return _NO_DRIVER - - versions = [int(s) for s in cp.stdout.strip().split()] - driver_version = tuple(versions[:2]) - runtime_version = tuple(versions[2:]) - - return driver_version, runtime_version def _get_best_ptx_file(archs, max_compute_capability): @@ -107,27 +76,24 @@ def _setup_numba(): # numba in enhanced compatibility mode is not necessary. from ptxcompiler.patch import NO_DRIVER, safe_get_versions except ModuleNotFoundError: - versions = _get_versions() - if versions != _NO_DRIVER: - driver_version, runtime_version = versions - if runtime_version > driver_version: - warnings.warn( - f"Using CUDA toolkit version {runtime_version} with CUDA " - f"driver version {driver_version} requires minor version " - "compatibility, which is not yet supported for CUDA " - "driver versions newer than 12.0. It is likely that many " - "cuDF operations will not work in this state. Please " - f"install CUDA toolkit version {driver_version} to " - "continue using cuDF." - ) - return + # use vendored version + from _ptxcompiler import NO_DRIVER, safe_get_versions + versions = safe_get_versions() if versions != NO_DRIVER: driver_version, runtime_version = versions - # Don't check if CEC is necessary in the possible edge - # case where a user has a CUDA 12 package and ptxcompiler - # in their environment anyways, perhaps installed separately - if driver_version < (12, 0): + if driver_version >= (12, 0) and runtime_version > driver_version: + warnings.warn( + f"Using CUDA toolkit version {runtime_version} with CUDA " + f"driver version {driver_version} requires minor version " + "compatibility, which is not yet supported for CUDA " + "driver versions newer than 12.0. It is likely that many " + "cuDF operations will not work in this state. Please " + f"install CUDA toolkit version {driver_version} to " + "continue using cuDF." + ) + else: + # Support MVC for all CUDA versions in the 11.x range ptx_toolkit_version = _get_cuda_version_from_ptx_file( CC_60_PTX_FILE ) diff --git a/python/cudf/cudf/utils/_ptxcompiler.py b/python/cudf/cudf/utils/_ptxcompiler.py new file mode 100644 index 00000000000..c4b8eda2337 --- /dev/null +++ b/python/cudf/cudf/utils/_ptxcompiler.py @@ -0,0 +1,118 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import math +import os +import subprocess +import sys +import warnings + +NO_DRIVER = (math.inf, math.inf) + +CMD = """\ +from ctypes import c_int, byref +from numba import cuda +dv = c_int(0) +cuda.cudadrv.driver.driver.cuDriverGetVersion(byref(dv)) +drv_major = dv.value // 1000 +drv_minor = (dv.value - (drv_major * 1000)) // 10 +run_major, run_minor = cuda.runtime.get_version() +print(f'{drv_major} {drv_minor} {run_major} {run_minor}') +""" + + +def patch_forced_by_user(): + # The patch is needed if the user explicitly + # forced it with an environment variable. + apply = os.getenv("PTXCOMPILER_APPLY_NUMBA_CODEGEN_PATCH") + if apply is not None: + try: + apply = int(apply) + except ValueError: + apply = False + + return bool(apply) + + +def check_disabled_in_env(): + # We should avoid checking whether the patch is + # needed if the user requested that we don't check + # (e.g. in a non-fork-safe environment) + check = os.getenv("PTXCOMPILER_CHECK_NUMBA_CODEGEN_PATCH_NEEDED") + if check is not None: + try: + check = int(check) + except ValueError: + check = False + else: + check = True + + return not check + + +def get_versions(): + cp = subprocess.run([sys.executable, "-c", CMD], capture_output=True) + if cp.returncode: + msg = ( + f"Error getting driver and runtime versions:\n\nstdout:\n\n" + f"{cp.stdout.decode()}\n\nstderr:\n\n{cp.stderr.decode()}\n\n" + "Not patching Numba" + ) + warnings.warn(msg, UserWarning) + return NO_DRIVER + + versions = [int(s) for s in cp.stdout.strip().split()] + driver_version = tuple(versions[:2]) + runtime_version = tuple(versions[2:]) + + return driver_version, runtime_version + + +def safe_get_versions(): + """ + Return a 2-tuple of deduced driver and runtime versions. + + To ensure that this function does not initialize a CUDA context, + calls to the runtime and driver are made in a subprocess. + + If PTXCOMPILER_CHECK_NUMBA_CODEGEN_PATCH_NEEDED is set + in the environment, then this subprocess call is not launched. + To specify the driver and runtime versions of the environment + in this case, set PTXCOMPILER_KNOWN_DRIVER_VERSION and + PTXCOMPILER_KNOWN_RUNTIME_VERSION appropriately. + """ + if check_disabled_in_env(): + try: + # allow user to specify driver/runtime + # versions manually, if necessary + driver_version = os.environ[ + "PTXCOMPILER_KNOWN_DRIVER_VERSION" + ].split(".") + runtime_version = os.environ[ + "PTXCOMPILER_KNOWN_RUNTIME_VERSION" + ].split(".") + driver_version, runtime_version = ( + tuple(map(int, driver_version)), + tuple(map(int, runtime_version)), + ) + except (KeyError, ValueError): + warnings.warn( + "No way to determine driver and runtime versions for " + "patching, set PTXCOMPILER_KNOWN_DRIVER_VERSION and " + "PTXCOMPILER_KNOWN_RUNTIME_VERSION" + ) + return NO_DRIVER + else: + driver_version, runtime_version = get_versions() + return driver_version, runtime_version From fc69663fc8ed27b4817c104a2f238dcc5d4bbe5e Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 22 May 2023 17:35:44 -0700 Subject: [PATCH 29/37] update ptx/ctk version mapping table --- python/cudf/cudf/utils/_numba.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/cudf/cudf/utils/_numba.py b/python/cudf/cudf/utils/_numba.py index 6daf41a1f73..71c2a2932bc 100644 --- a/python/cudf/cudf/utils/_numba.py +++ b/python/cudf/cudf/utils/_numba.py @@ -137,11 +137,17 @@ def _get_cuda_version_from_ptx_file(path): version = ver_line.strip("\n").split(" ")[1] # from ptx_docs/release_notes above: ver_map = { + "7.0": (11, 0), + "7.1": (11, 1), + "7.2": (11, 2), + "7.3": (11, 3), + "7.4": (11, 4), "7.5": (11, 5), "7.6": (11, 6), "7.7": (11, 7), "7.8": (11, 8), "8.0": (12, 0), + "8.1": (12, 1), } cuda_ver = ver_map.get(version) From 0797cdeab35128eba56e3419522d9c1ac78542cd Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 22 May 2023 18:22:32 -0700 Subject: [PATCH 30/37] fix local imports --- python/cudf/cudf/utils/_numba.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/utils/_numba.py b/python/cudf/cudf/utils/_numba.py index 71c2a2932bc..f691553e9d2 100644 --- a/python/cudf/cudf/utils/_numba.py +++ b/python/cudf/cudf/utils/_numba.py @@ -77,7 +77,7 @@ def _setup_numba(): from ptxcompiler.patch import NO_DRIVER, safe_get_versions except ModuleNotFoundError: # use vendored version - from _ptxcompiler import NO_DRIVER, safe_get_versions + from cudf.utils._ptxcompiler import NO_DRIVER, safe_get_versions versions = safe_get_versions() if versions != NO_DRIVER: From e7999927f17cfb5adbec5b52f2aa5ebf801897e7 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 22 May 2023 20:29:40 -0700 Subject: [PATCH 31/37] remove extraneous testing code --- python/cudf/cudf/__init__.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py index ad99b583045..96aee9f065e 100644 --- a/python/cudf/cudf/__init__.py +++ b/python/cudf/cudf/__init__.py @@ -1,8 +1,5 @@ # Copyright (c) 2018-2023, NVIDIA CORPORATION. -from cuda import cuda, cudart -cuda.cuDriverGetVersion() -cudart.cudaRuntimeGetVersion() # this must be called before numba.cuda is imported, because # it sets the numba config variable responsible for enabling # MVC. Setting it after importing cuda has no effect. From 41e92a9d90a32740733bd37d13ffb71b8884fbb3 Mon Sep 17 00:00:00 2001 From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com> Date: Tue, 23 May 2023 07:17:35 -0500 Subject: [PATCH 32/37] Apply suggestions from code review Co-authored-by: Bradley Dice Co-authored-by: Graham Markall <535640+gmarkall@users.noreply.github.com> --- python/cudf/cudf/__init__.py | 4 ++-- python/cudf/cudf/utils/_numba.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py index 96aee9f065e..de0f2d67add 100644 --- a/python/cudf/cudf/__init__.py +++ b/python/cudf/cudf/__init__.py @@ -1,8 +1,8 @@ # Copyright (c) 2018-2023, NVIDIA CORPORATION. -# this must be called before numba.cuda is imported, because +# _setup_numba _must be called before numba.cuda is imported, because # it sets the numba config variable responsible for enabling -# MVC. Setting it after importing cuda has no effect. +# Minor Version Compatibility. Setting it after importing numba.cuda has no effect. from cudf.utils._numba import _setup_numba from cudf.utils.gpu_utils import validate_setup diff --git a/python/cudf/cudf/utils/_numba.py b/python/cudf/cudf/utils/_numba.py index f691553e9d2..49d9ea679fd 100644 --- a/python/cudf/cudf/utils/_numba.py +++ b/python/cudf/cudf/utils/_numba.py @@ -87,7 +87,7 @@ def _setup_numba(): f"Using CUDA toolkit version {runtime_version} with CUDA " f"driver version {driver_version} requires minor version " "compatibility, which is not yet supported for CUDA " - "driver versions newer than 12.0. It is likely that many " + "driver versions 12.0 and above. It is likely that many " "cuDF operations will not work in this state. Please " f"install CUDA toolkit version {driver_version} to " "continue using cuDF." From 8839f8c7157d3f3c8bddef39cbf5cc376087701e Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 23 May 2023 05:19:48 -0700 Subject: [PATCH 33/37] cleanup --- python/cudf/cudf/core/udf/utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/cudf/cudf/core/udf/utils.py b/python/cudf/cudf/core/udf/utils.py index 944e5a05842..35a3f6c1ffd 100644 --- a/python/cudf/cudf/core/udf/utils.py +++ b/python/cudf/cudf/core/udf/utils.py @@ -253,8 +253,6 @@ def _compile_or_get( we then obtain the return type from that separate compilation and use it to allocate an output column of the right dtype. """ - # runtime check for CEC mode which is disabled for CUDA 12 for now - if not all(is_scalar(arg) for arg in args): raise TypeError("only scalar valued args are supported by apply") From c27a4b1312bbb23f5e52d6d548659e2f2cd1351e Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 23 May 2023 05:26:08 -0700 Subject: [PATCH 34/37] clarify cuda 12 comments --- python/cudf/cudf/utils/_numba.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/utils/_numba.py b/python/cudf/cudf/utils/_numba.py index 49d9ea679fd..b381ad93ec5 100644 --- a/python/cudf/cudf/utils/_numba.py +++ b/python/cudf/cudf/utils/_numba.py @@ -70,10 +70,12 @@ def _setup_numba(): version of the CUDA Toolkit used to build the PTX files shipped with the user cuDF package. """ + # ptxcompiler is a requirement for cuda 11.x packages but not + # cuda 12.x packages. However its version checking machinery + # is still necessary. If a user happens to have ptxcompiler + # in a cuda 12 environment, it's use for the purposes of + # checking the driver and runtime versions is harmless try: - # By default, ptxcompiler will not be installed with CUDA 12 - # packages. This is ok, because in this situation putting - # numba in enhanced compatibility mode is not necessary. from ptxcompiler.patch import NO_DRIVER, safe_get_versions except ModuleNotFoundError: # use vendored version From 6925612438d6efb7c6c1f8104ed8dd861419d80e Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 23 May 2023 05:28:31 -0700 Subject: [PATCH 35/37] version map changes --- python/cudf/cudf/utils/_numba.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/python/cudf/cudf/utils/_numba.py b/python/cudf/cudf/utils/_numba.py index b381ad93ec5..4e9ef65b918 100644 --- a/python/cudf/cudf/utils/_numba.py +++ b/python/cudf/cudf/utils/_numba.py @@ -137,13 +137,12 @@ def _get_cuda_version_from_ptx_file(path): else: raise ValueError("Could not read CUDA version from ptx file.") version = ver_line.strip("\n").split(" ")[1] - # from ptx_docs/release_notes above: + # This dictionary maps from supported versions of NVVM to the + # PTX version it produces. The lowest value should be the minimum + # CUDA version required to compile the library. Currently CUDA 11.5 + # or higher is required to build cudf. New CUDA versions should + # be added to this dictionary when officially supported. ver_map = { - "7.0": (11, 0), - "7.1": (11, 1), - "7.2": (11, 2), - "7.3": (11, 3), - "7.4": (11, 4), "7.5": (11, 5), "7.6": (11, 6), "7.7": (11, 7), From 439a667966d25056932010a05d92e327c789a831 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 23 May 2023 05:42:11 -0700 Subject: [PATCH 36/37] remove function from ptxcompiler that is not used --- python/cudf/cudf/utils/_ptxcompiler.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/python/cudf/cudf/utils/_ptxcompiler.py b/python/cudf/cudf/utils/_ptxcompiler.py index c4b8eda2337..5ef27916da7 100644 --- a/python/cudf/cudf/utils/_ptxcompiler.py +++ b/python/cudf/cudf/utils/_ptxcompiler.py @@ -32,19 +32,6 @@ """ -def patch_forced_by_user(): - # The patch is needed if the user explicitly - # forced it with an environment variable. - apply = os.getenv("PTXCOMPILER_APPLY_NUMBA_CODEGEN_PATCH") - if apply is not None: - try: - apply = int(apply) - except ValueError: - apply = False - - return bool(apply) - - def check_disabled_in_env(): # We should avoid checking whether the patch is # needed if the user requested that we don't check From 1bfb3827f3ac14b7ea9717944aee67e4ec72a4d4 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 23 May 2023 09:05:44 -0700 Subject: [PATCH 37/37] address remaining reviews --- python/cudf/cudf/utils/_numba.py | 4 +++- python/cudf/cudf/utils/_ptxcompiler.py | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/utils/_numba.py b/python/cudf/cudf/utils/_numba.py index 4e9ef65b918..194db9c90a6 100644 --- a/python/cudf/cudf/utils/_numba.py +++ b/python/cudf/cudf/utils/_numba.py @@ -6,7 +6,9 @@ from numba import config -CC_60_PTX_FILE = os.path.dirname(__file__) + "/../core/udf/shim_60.ptx" +CC_60_PTX_FILE = os.path.join( + os.path.dirname(__file__), "../core/udf/shim_60.ptx" +) def _get_best_ptx_file(archs, max_compute_capability): diff --git a/python/cudf/cudf/utils/_ptxcompiler.py b/python/cudf/cudf/utils/_ptxcompiler.py index 5ef27916da7..54f5ea08ee1 100644 --- a/python/cudf/cudf/utils/_ptxcompiler.py +++ b/python/cudf/cudf/utils/_ptxcompiler.py @@ -20,7 +20,7 @@ NO_DRIVER = (math.inf, math.inf) -CMD = """\ +NUMBA_CHECK_VERSION_CMD = """\ from ctypes import c_int, byref from numba import cuda dv = c_int(0) @@ -49,7 +49,9 @@ def check_disabled_in_env(): def get_versions(): - cp = subprocess.run([sys.executable, "-c", CMD], capture_output=True) + cp = subprocess.run( + [sys.executable, "-c", NUMBA_CHECK_VERSION_CMD], capture_output=True + ) if cp.returncode: msg = ( f"Error getting driver and runtime versions:\n\nstdout:\n\n"