diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 4031f1aa1c3..b6daea7c2bc 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -48,7 +48,7 @@ dependencies:
 - nbsphinx
 - ninja
 - notebook
-- numba>=0.56.4,<0.57
+- numba>=0.57
 - numpy>=1.21,<1.24
 - numpydoc
 - nvcc_linux-64=11.8
diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
index 005792d187f..6a0faa0ebbc 100644
--- a/conda/recipes/cudf/meta.yaml
+++ b/conda/recipes/cudf/meta.yaml
@@ -64,7 +64,7 @@ requirements:
     - typing_extensions
     - pandas >=1.3,<1.6.0dev0
     - cupy >=12.0.0
-    - numba >=0.56.4,<0.57
+    - numba >=0.57
     - numpy >=1.21,<1.24  # Temporarily upper bound numpy to avoid overflow deprecations
     - {{ pin_compatible('pyarrow', max_pin='x.x.x') }}
     - libcudf {{ version }}
diff --git a/dependencies.yaml b/dependencies.yaml
index 70d7f8c1ec8..e3fcbe69932 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -350,7 +350,7 @@ dependencies:
         packages:
           - cachetools
           - cuda-python>=11.7.1,<12.0
-          - &numba numba>=0.56.4,<0.57
+          - &numba numba>=0.57
           - nvtx>=0.2.1
           - packaging
           - rmm==23.6.*
diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py
index 06310e278a2..de0f2d67add 100644
--- a/python/cudf/cudf/__init__.py
+++ b/python/cudf/cudf/__init__.py
@@ -1,7 +1,12 @@
 # Copyright (c) 2018-2023, NVIDIA CORPORATION.
 
+# _setup_numba _must be called before numba.cuda is imported, because
+# it sets the numba config variable responsible for enabling
+# Minor Version Compatibility. Setting it after importing numba.cuda has no effect.
+from cudf.utils._numba import _setup_numba
 from cudf.utils.gpu_utils import validate_setup
 
+_setup_numba()
 validate_setup()
 
 import cupy
@@ -83,32 +88,9 @@
 from cudf.utils.dtypes import _NA_REP
 from cudf.utils.utils import clear_cache, set_allocator
 
-try:
-    from cubinlinker.patch import patch_numba_linker_if_needed
-except ImportError:
-    pass
-else:
-    # Patch Numba to support CUDA enhanced compatibility.
-    # cuDF requires a stronger set of conditions than what is
-    # checked by patch_numba_linker_if_needed due to the PTX
-    # files needed for JIT Groupby Apply and string UDFs
-    from cudf.core.udf.utils import _PTX_FILE, _setup_numba_linker
-
-    _setup_numba_linker(_PTX_FILE)
-
-    del patch_numba_linker_if_needed
-
 cuda.set_memory_manager(RMMNumbaManager)
 cupy.cuda.set_allocator(rmm_cupy_allocator)
 
-try:
-    # Numba 0.54: Disable low occupancy warnings
-    numba_config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
-except AttributeError:
-    # Numba < 0.54: No occupancy warnings
-    pass
-del numba_config
-
 
 rmm.register_reinitialize_hook(clear_cache)
 
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 7141958f62d..abd8ad162c7 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -68,6 +68,7 @@
     _return_arr_from_dtype,
 )
 from cudf.utils import docutils
+from cudf.utils._numba import _CUDFNumbaConfig
 from cudf.utils.utils import _cudf_nvtx_annotate
 
 doc_reset_index_template = """
@@ -2193,7 +2194,8 @@ def _apply(self, func, kernel_getter, *args, **kwargs):
         input_args = _get_input_args_from_frame(self)
         launch_args = output_args + input_args + list(args)
         try:
-            kernel.forall(len(self))(*launch_args)
+            with _CUDFNumbaConfig():
+                kernel.forall(len(self))(*launch_args)
         except Exception as e:
             raise RuntimeError("UDF kernel execution failed.") from e
 
diff --git a/python/cudf/cudf/core/udf/groupby_utils.py b/python/cudf/cudf/core/udf/groupby_utils.py
index f0d168fb733..60eba7eb37b 100644
--- a/python/cudf/cudf/core/udf/groupby_utils.py
+++ b/python/cudf/cudf/core/udf/groupby_utils.py
@@ -27,6 +27,7 @@
     _supported_cols_from_frame,
     _supported_dtypes_from_frame,
 )
+from cudf.utils._numba import _CUDFNumbaConfig
 from cudf.utils.utils import _cudf_nvtx_annotate
 
 
@@ -196,7 +197,8 @@ def jit_groupby_apply(offsets, grouped_values, function, *args):
     )
 
     # Launch kernel
-    specialized[ngroups, tpb](*launch_args)
+    with _CUDFNumbaConfig():
+        specialized[ngroups, tpb](*launch_args)
 
     return output
 
diff --git a/python/cudf/cudf/core/udf/utils.py b/python/cudf/cudf/core/udf/utils.py
index 9d7df530ccc..35a3f6c1ffd 100644
--- a/python/cudf/cudf/core/udf/utils.py
+++ b/python/cudf/cudf/core/udf/utils.py
@@ -1,6 +1,5 @@
 # Copyright (c) 2020-2023, NVIDIA CORPORATION.
 
-import glob
 import os
 from typing import Any, Callable, Dict
 
@@ -13,7 +12,6 @@
 from numba.core.datamodel import default_manager, models
 from numba.core.errors import TypingError
 from numba.core.extending import register_model
-from numba.cuda.cudadrv.driver import Linker
 from numba.np import numpy_support
 from numba.types import CPointer, Poison, Record, Tuple, boolean, int64, void
 
@@ -33,6 +31,7 @@
     udf_string,
 )
 from cudf.utils import cudautils
+from cudf.utils._numba import _get_ptx_file
 from cudf.utils.dtypes import (
     BOOL_TYPES,
     DATETIME_TYPES,
@@ -63,58 +62,6 @@
 precompiled: cachetools.LRUCache = cachetools.LRUCache(maxsize=32)
 launch_arg_getters: Dict[Any, Any] = {}
 
-
-def _get_best_ptx_file(archs, max_compute_capability):
-    """
-    Determine of the available PTX files which one is
-    the most recent up to and including the device cc
-    """
-    filtered_archs = [x for x in archs if x[0] <= max_compute_capability]
-    if filtered_archs:
-        return max(filtered_archs, key=lambda y: y[0])
-    else:
-        return None
-
-
-def _get_ptx_file(path, prefix):
-    if "RAPIDS_NO_INITIALIZE" in os.environ:
-        # cc=60 ptx is always built
-        cc = int(os.environ.get("STRINGS_UDF_CC", "60"))
-    else:
-        dev = cuda.get_current_device()
-
-        # Load the highest compute capability file available that is less than
-        # the current device's.
-        cc = int("".join(str(x) for x in dev.compute_capability))
-    files = glob.glob(os.path.join(path, f"{prefix}*.ptx"))
-    if len(files) == 0:
-        raise RuntimeError(f"Missing PTX files for cc={cc}")
-    regular_sms = []
-
-    for f in files:
-        file_name = os.path.basename(f)
-        sm_number = file_name.rstrip(".ptx").lstrip(prefix)
-        if sm_number.endswith("a"):
-            processed_sm_number = int(sm_number.rstrip("a"))
-            if processed_sm_number == cc:
-                return f
-        else:
-            regular_sms.append((int(sm_number), f))
-
-    regular_result = None
-
-    if regular_sms:
-        regular_result = _get_best_ptx_file(regular_sms, cc)
-
-    if regular_result is None:
-        raise RuntimeError(
-            "This cuDF installation is missing the necessary PTX "
-            f"files that are <={cc}."
-        )
-    else:
-        return regular_result[1]
-
-
 _PTX_FILE = _get_ptx_file(os.path.dirname(__file__), "shim_")
 
 
@@ -392,97 +339,6 @@ def _get_extensionty_size(ty):
     return llty.get_abi_size(target_data)
 
 
-def _get_cuda_version_from_ptx_file(path):
-    """
-    https://docs.nvidia.com/cuda/parallel-thread-execution/
-    Each PTX module must begin with a .version
-    directive specifying the PTX language version
-
-    example header:
-    //
-    // Generated by NVIDIA NVVM Compiler
-    //
-    // Compiler Build ID: CL-31057947
-    // Cuda compilation tools, release 11.6, V11.6.124
-    // Based on NVVM 7.0.1
-    //
-
-    .version 7.6
-    .target sm_52
-    .address_size 64
-
-    """
-    with open(path) as ptx_file:
-        for line in ptx_file:
-            if line.startswith(".version"):
-                ver_line = line
-                break
-        else:
-            raise ValueError("Could not read CUDA version from ptx file.")
-    version = ver_line.strip("\n").split(" ")[1]
-    # from ptx_docs/release_notes above:
-    ver_map = {
-        "7.5": (11, 5),
-        "7.6": (11, 6),
-        "7.7": (11, 7),
-        "7.8": (11, 8),
-        "8.0": (12, 0),
-    }
-
-    cuda_ver = ver_map.get(version)
-    if cuda_ver is None:
-        raise ValueError(
-            f"Could not map PTX version {version} to a CUDA version"
-        )
-
-    return cuda_ver
-
-
-def _setup_numba_linker(path):
-    from ptxcompiler.patch import NO_DRIVER, safe_get_versions
-
-    from cudf.core.udf.utils import (
-        _get_cuda_version_from_ptx_file,
-        maybe_patch_numba_linker,
-    )
-
-    versions = safe_get_versions()
-    if versions != NO_DRIVER:
-        driver_version, runtime_version = versions
-        ptx_toolkit_version = _get_cuda_version_from_ptx_file(path)
-        maybe_patch_numba_linker(
-            driver_version, runtime_version, ptx_toolkit_version
-        )
-
-
-def maybe_patch_numba_linker(
-    driver_version, runtime_version, ptx_toolkit_version
-):
-    from cubinlinker.patch import (
-        _numba_version_ok,
-        get_logger,
-        new_patched_linker,
-    )
-
-    # Numba thinks cubinlinker is only needed if the driver is older than
-    # the ctk, but when PTX files are present, it might also need to patch
-    # because those PTX files may newer than the driver as well
-    logger = get_logger()
-
-    if (driver_version < ptx_toolkit_version) or (
-        driver_version < runtime_version
-    ):
-        logger.debug(
-            "Driver version %s.%s needs patching due to PTX files"
-            % driver_version
-        )
-        if _numba_version_ok:
-            logger.debug("Patching Numba Linker")
-            Linker.new = new_patched_linker
-        else:
-            logger.debug("Cannot patch Numba Linker - unsupported version")
-
-
 @initfunc
 def set_malloc_heap_size(size=None):
     """
diff --git a/python/cudf/cudf/tests/test_extension_compilation.py b/python/cudf/cudf/tests/test_extension_compilation.py
index f1ed17c5df5..857cc114ffa 100644
--- a/python/cudf/cudf/tests/test_extension_compilation.py
+++ b/python/cudf/cudf/tests/test_extension_compilation.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2022, NVIDIA CORPORATION.
+# Copyright (c) 2021-2023, NVIDIA CORPORATION.
 import operator
 
 import cupy as cp
@@ -12,6 +12,7 @@
 from cudf.core.udf.api import Masked
 from cudf.core.udf.masked_typing import MaskedType
 from cudf.testing._utils import parametrize_numeric_dtypes_pairwise
+from cudf.utils._numba import _CUDFNumbaConfig
 
 arith_ops = (
     operator.add,
@@ -106,7 +107,8 @@ def test_kernel(x, y, err):
             err[0] = 3
 
     err = cp.asarray([0], dtype="int8")
-    test_kernel[1, 1](1, 2, err)
+    with _CUDFNumbaConfig():
+        test_kernel[1, 1](1, 2, err)
     assert err[0] == 0
 
 
@@ -214,7 +216,8 @@ def test_kernel(err):
             err[0] = 2
 
     err = cp.asarray([0], dtype="int8")
-    test_kernel[1, 1](err)
+    with _CUDFNumbaConfig():
+        test_kernel[1, 1](err)
     assert err[0] == 0
 
 
@@ -304,7 +307,8 @@ def test_kernel(err):
             err[0] = 2
 
     err = cp.asarray([0], dtype="int8")
-    test_kernel[1, 1](err)
+    with _CUDFNumbaConfig():
+        test_kernel[1, 1](err)
     assert err[0] == 0
 
 
@@ -326,5 +330,6 @@ def test_kernel(err):
             err[0] = 1
 
     err = cp.asarray([0], dtype="int8")
-    test_kernel[1, 1](err)
+    with _CUDFNumbaConfig():
+        test_kernel[1, 1](err)
     assert err[0] == 0
diff --git a/python/cudf/cudf/tests/test_string_udfs.py b/python/cudf/cudf/tests/test_string_udfs.py
index 049dfdc8e30..88c73ccf964 100644
--- a/python/cudf/cudf/tests/test_string_udfs.py
+++ b/python/cudf/cudf/tests/test_string_udfs.py
@@ -22,6 +22,7 @@
 )
 from cudf.core.udf.utils import _PTX_FILE, _get_extensionty_size
 from cudf.testing._utils import assert_eq, sv_to_udf_str
+from cudf.utils._numba import _CUDFNumbaConfig
 
 
 def get_kernels(func, dtype, size):
@@ -85,8 +86,8 @@ def run_udf_test(data, func, dtype):
     sv_kernel, udf_str_kernel = get_kernels(func, dtype, len(data))
 
     expect = pd.Series(data).apply(func)
-
-    sv_kernel.forall(len(data))(str_views, output)
+    with _CUDFNumbaConfig():
+        sv_kernel.forall(len(data))(str_views, output)
     if dtype == "str":
         result = column_from_udf_string_array(output)
     else:
@@ -94,7 +95,8 @@ def run_udf_test(data, func, dtype):
 
     got = cudf.Series(result, dtype=dtype)
     assert_eq(expect, got, check_dtype=False)
-    udf_str_kernel.forall(len(data))(str_views, output)
+    with _CUDFNumbaConfig():
+        udf_str_kernel.forall(len(data))(str_views, output)
     if dtype == "str":
         result = column_from_udf_string_array(output)
     else:
diff --git a/python/cudf/cudf/utils/_numba.py b/python/cudf/cudf/utils/_numba.py
new file mode 100644
index 00000000000..194db9c90a6
--- /dev/null
+++ b/python/cudf/cudf/utils/_numba.py
@@ -0,0 +1,171 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+
+import glob
+import os
+import warnings
+
+from numba import config
+
+CC_60_PTX_FILE = os.path.join(
+    os.path.dirname(__file__), "../core/udf/shim_60.ptx"
+)
+
+
+def _get_best_ptx_file(archs, max_compute_capability):
+    """
+    Determine of the available PTX files which one is
+    the most recent up to and including the device compute capability.
+    """
+    filtered_archs = [x for x in archs if x[0] <= max_compute_capability]
+    if filtered_archs:
+        return max(filtered_archs, key=lambda x: x[0])
+    else:
+        return None
+
+
+def _get_ptx_file(path, prefix):
+    if "RAPIDS_NO_INITIALIZE" in os.environ:
+        # cc=60 ptx is always built
+        cc = int(os.environ.get("STRINGS_UDF_CC", "60"))
+    else:
+        from numba import cuda
+
+        dev = cuda.get_current_device()
+
+        # Load the highest compute capability file available that is less than
+        # the current device's.
+        cc = int("".join(str(x) for x in dev.compute_capability))
+    files = glob.glob(os.path.join(path, f"{prefix}*.ptx"))
+    if len(files) == 0:
+        raise RuntimeError(f"Missing PTX files for cc={cc}")
+    regular_sms = []
+
+    for f in files:
+        file_name = os.path.basename(f)
+        sm_number = file_name.rstrip(".ptx").lstrip(prefix)
+        if sm_number.endswith("a"):
+            processed_sm_number = int(sm_number.rstrip("a"))
+            if processed_sm_number == cc:
+                return f
+        else:
+            regular_sms.append((int(sm_number), f))
+
+    regular_result = None
+
+    if regular_sms:
+        regular_result = _get_best_ptx_file(regular_sms, cc)
+
+    if regular_result is None:
+        raise RuntimeError(
+            "This cuDF installation is missing the necessary PTX "
+            f"files that are <={cc}."
+        )
+    else:
+        return regular_result[1]
+
+
+def _setup_numba():
+    """
+    Configure the numba linker for use with cuDF. This consists of
+    potentially putting numba into enhanced compatibility mode
+    based on the user driver and runtime versions as well as the
+    version of the CUDA Toolkit used to build the PTX files shipped
+    with the user cuDF package.
+    """
+    # ptxcompiler is a requirement for cuda 11.x packages but not
+    # cuda 12.x packages. However its version checking machinery
+    # is still necessary. If a user happens to have ptxcompiler
+    # in a cuda 12 environment, it's use for the purposes of
+    # checking the driver and runtime versions is harmless
+    try:
+        from ptxcompiler.patch import NO_DRIVER, safe_get_versions
+    except ModuleNotFoundError:
+        # use vendored version
+        from cudf.utils._ptxcompiler import NO_DRIVER, safe_get_versions
+
+    versions = safe_get_versions()
+    if versions != NO_DRIVER:
+        driver_version, runtime_version = versions
+        if driver_version >= (12, 0) and runtime_version > driver_version:
+            warnings.warn(
+                f"Using CUDA toolkit version {runtime_version} with CUDA "
+                f"driver version {driver_version} requires minor version "
+                "compatibility, which is not yet supported for CUDA "
+                "driver versions 12.0 and above. It is likely that many "
+                "cuDF operations will not work in this state. Please "
+                f"install CUDA toolkit version {driver_version} to "
+                "continue using cuDF."
+            )
+        else:
+            # Support MVC for all CUDA versions in the 11.x range
+            ptx_toolkit_version = _get_cuda_version_from_ptx_file(
+                CC_60_PTX_FILE
+            )
+            # Numba thinks cubinlinker is only needed if the driver is older
+            # than the CUDA runtime, but when PTX files are present, it might
+            # also need to patch because those PTX files may be compiled by
+            # a CUDA version that is newer than the driver as well
+            if (driver_version < ptx_toolkit_version) or (
+                driver_version < runtime_version
+            ):
+                config.CUDA_ENABLE_MINOR_VERSION_COMPATIBILITY = 1
+
+
+def _get_cuda_version_from_ptx_file(path):
+    """
+    https://docs.nvidia.com/cuda/parallel-thread-execution/
+    Each PTX module must begin with a .version
+    directive specifying the PTX language version
+
+    example header:
+    //
+    // Generated by NVIDIA NVVM Compiler
+    //
+    // Compiler Build ID: CL-31057947
+    // Cuda compilation tools, release 11.6, V11.6.124
+    // Based on NVVM 7.0.1
+    //
+
+    .version 7.6
+    .target sm_52
+    .address_size 64
+
+    """
+    with open(path) as ptx_file:
+        for line in ptx_file:
+            if line.startswith(".version"):
+                ver_line = line
+                break
+        else:
+            raise ValueError("Could not read CUDA version from ptx file.")
+    version = ver_line.strip("\n").split(" ")[1]
+    # This dictionary maps from supported versions of NVVM to the
+    # PTX version it produces. The lowest value should be the minimum
+    # CUDA version required to compile the library. Currently CUDA 11.5
+    # or higher is required to build cudf. New CUDA versions should
+    # be added to this dictionary when officially supported.
+    ver_map = {
+        "7.5": (11, 5),
+        "7.6": (11, 6),
+        "7.7": (11, 7),
+        "7.8": (11, 8),
+        "8.0": (12, 0),
+        "8.1": (12, 1),
+    }
+
+    cuda_ver = ver_map.get(version)
+    if cuda_ver is None:
+        raise ValueError(
+            f"Could not map PTX version {version} to a CUDA version"
+        )
+
+    return cuda_ver
+
+
+class _CUDFNumbaConfig:
+    def __enter__(self):
+        self.enter_val = config.CUDA_LOW_OCCUPANCY_WARNINGS
+        config.CUDA_LOW_OCCUPANCY_WARNINGS = 0
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        config.CUDA_LOW_OCCUPANCY_WARNINGS = self.enter_val
diff --git a/python/cudf/cudf/utils/_ptxcompiler.py b/python/cudf/cudf/utils/_ptxcompiler.py
new file mode 100644
index 00000000000..54f5ea08ee1
--- /dev/null
+++ b/python/cudf/cudf/utils/_ptxcompiler.py
@@ -0,0 +1,107 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+import os
+import subprocess
+import sys
+import warnings
+
+NO_DRIVER = (math.inf, math.inf)
+
+NUMBA_CHECK_VERSION_CMD = """\
+from ctypes import c_int, byref
+from numba import cuda
+dv = c_int(0)
+cuda.cudadrv.driver.driver.cuDriverGetVersion(byref(dv))
+drv_major = dv.value // 1000
+drv_minor = (dv.value - (drv_major * 1000)) // 10
+run_major, run_minor = cuda.runtime.get_version()
+print(f'{drv_major} {drv_minor} {run_major} {run_minor}')
+"""
+
+
+def check_disabled_in_env():
+    # We should avoid checking whether the patch is
+    # needed if the user requested that we don't check
+    # (e.g. in a non-fork-safe environment)
+    check = os.getenv("PTXCOMPILER_CHECK_NUMBA_CODEGEN_PATCH_NEEDED")
+    if check is not None:
+        try:
+            check = int(check)
+        except ValueError:
+            check = False
+    else:
+        check = True
+
+    return not check
+
+
+def get_versions():
+    cp = subprocess.run(
+        [sys.executable, "-c", NUMBA_CHECK_VERSION_CMD], capture_output=True
+    )
+    if cp.returncode:
+        msg = (
+            f"Error getting driver and runtime versions:\n\nstdout:\n\n"
+            f"{cp.stdout.decode()}\n\nstderr:\n\n{cp.stderr.decode()}\n\n"
+            "Not patching Numba"
+        )
+        warnings.warn(msg, UserWarning)
+        return NO_DRIVER
+
+    versions = [int(s) for s in cp.stdout.strip().split()]
+    driver_version = tuple(versions[:2])
+    runtime_version = tuple(versions[2:])
+
+    return driver_version, runtime_version
+
+
+def safe_get_versions():
+    """
+    Return a 2-tuple of deduced driver and runtime versions.
+
+    To ensure that this function does not initialize a CUDA context,
+    calls to the runtime and driver are made in a subprocess.
+
+    If PTXCOMPILER_CHECK_NUMBA_CODEGEN_PATCH_NEEDED is set
+    in the environment, then this subprocess call is not launched.
+    To specify the driver and runtime versions of the environment
+    in this case, set PTXCOMPILER_KNOWN_DRIVER_VERSION and
+    PTXCOMPILER_KNOWN_RUNTIME_VERSION appropriately.
+    """
+    if check_disabled_in_env():
+        try:
+            # allow user to specify driver/runtime
+            # versions manually, if necessary
+            driver_version = os.environ[
+                "PTXCOMPILER_KNOWN_DRIVER_VERSION"
+            ].split(".")
+            runtime_version = os.environ[
+                "PTXCOMPILER_KNOWN_RUNTIME_VERSION"
+            ].split(".")
+            driver_version, runtime_version = (
+                tuple(map(int, driver_version)),
+                tuple(map(int, runtime_version)),
+            )
+        except (KeyError, ValueError):
+            warnings.warn(
+                "No way to determine driver and runtime versions for "
+                "patching, set PTXCOMPILER_KNOWN_DRIVER_VERSION and "
+                "PTXCOMPILER_KNOWN_RUNTIME_VERSION"
+            )
+            return NO_DRIVER
+    else:
+        driver_version, runtime_version = get_versions()
+    return driver_version, runtime_version
diff --git a/python/cudf/cudf/utils/applyutils.py b/python/cudf/cudf/utils/applyutils.py
index 933b98367b6..b8cf6910402 100644
--- a/python/cudf/cudf/utils/applyutils.py
+++ b/python/cudf/cudf/utils/applyutils.py
@@ -12,6 +12,7 @@
 from cudf.core.buffer import acquire_spill_lock
 from cudf.core.column import column
 from cudf.utils import utils
+from cudf.utils._numba import _CUDFNumbaConfig
 from cudf.utils.docutils import docfmt_partial
 
 _doc_applyparams = """
@@ -195,7 +196,8 @@ def compile(self, func, argnames, extra_argnames):
         return kernel
 
     def launch_kernel(self, df, args):
-        self.kernel.forall(len(df))(*args)
+        with _CUDFNumbaConfig():
+            self.kernel.forall(len(df))(*args)
 
 
 class ApplyChunksCompiler(ApplyKernelCompilerBase):
@@ -209,12 +211,14 @@ def compile(self, func, argnames, extra_argnames):
     def launch_kernel(self, df, args, chunks, blkct=None, tpb=None):
         chunks = self.normalize_chunks(len(df), chunks)
         if blkct is None and tpb is None:
-            self.kernel.forall(len(df))(len(df), chunks, *args)
+            with _CUDFNumbaConfig():
+                self.kernel.forall(len(df))(len(df), chunks, *args)
         else:
             assert tpb is not None
             if blkct is None:
                 blkct = chunks.size
-            self.kernel[blkct, tpb](len(df), chunks, *args)
+            with _CUDFNumbaConfig():
+                self.kernel[blkct, tpb](len(df), chunks, *args)
 
     def normalize_chunks(self, size, chunks):
         if isinstance(chunks, int):
diff --git a/python/cudf/cudf/utils/cudautils.py b/python/cudf/cudf/utils/cudautils.py
index e2bd4556ce8..a10eaab0bff 100755
--- a/python/cudf/cudf/utils/cudautils.py
+++ b/python/cudf/cudf/utils/cudautils.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2022, NVIDIA CORPORATION.
+# Copyright (c) 2018-2023, NVIDIA CORPORATION.
 
 from pickle import dumps
 
@@ -8,6 +8,7 @@
 from numba.np import numpy_support
 
 import cudf
+from cudf.utils._numba import _CUDFNumbaConfig
 
 #
 # Misc kernels
@@ -80,19 +81,20 @@ def find_index_of_val(arr, val, mask=None, compare="eq"):
     """
     found = cuda.device_array(shape=(arr.shape), dtype="int32")
     if found.size > 0:
-        if compare == "gt":
-            gpu_mark_gt.forall(found.size)(arr, val, found, arr.size)
-        elif compare == "lt":
-            gpu_mark_lt.forall(found.size)(arr, val, found, arr.size)
-        else:
-            if arr.dtype in ("float32", "float64"):
-                gpu_mark_found_float.forall(found.size)(
-                    arr, val, found, arr.size
-                )
+        with _CUDFNumbaConfig():
+            if compare == "gt":
+                gpu_mark_gt.forall(found.size)(arr, val, found, arr.size)
+            elif compare == "lt":
+                gpu_mark_lt.forall(found.size)(arr, val, found, arr.size)
             else:
-                gpu_mark_found_int.forall(found.size)(
-                    arr, val, found, arr.size
-                )
+                if arr.dtype in ("float32", "float64"):
+                    gpu_mark_found_float.forall(found.size)(
+                        arr, val, found, arr.size
+                    )
+                else:
+                    gpu_mark_found_int.forall(found.size)(
+                        arr, val, found, arr.size
+                    )
 
     return cudf.core.column.column.as_column(found).set_mask(mask)
 
@@ -154,9 +156,10 @@ def gpu_window_sizes_from_offset(arr, window_sizes, offset):
 def window_sizes_from_offset(arr, offset):
     window_sizes = cuda.device_array(shape=(arr.shape), dtype="int32")
     if arr.size > 0:
-        gpu_window_sizes_from_offset.forall(arr.size)(
-            arr, window_sizes, offset
-        )
+        with _CUDFNumbaConfig():
+            gpu_window_sizes_from_offset.forall(arr.size)(
+                arr, window_sizes, offset
+            )
     return window_sizes
 
 
@@ -177,9 +180,10 @@ def gpu_grouped_window_sizes_from_offset(
 def grouped_window_sizes_from_offset(arr, group_starts, offset):
     window_sizes = cuda.device_array(shape=(arr.shape), dtype="int32")
     if arr.size > 0:
-        gpu_grouped_window_sizes_from_offset.forall(arr.size)(
-            arr, window_sizes, group_starts, offset
-        )
+        with _CUDFNumbaConfig():
+            gpu_grouped_window_sizes_from_offset.forall(arr.size)(
+                arr, window_sizes, group_starts, offset
+            )
     return window_sizes
 
 
diff --git a/python/cudf/cudf/utils/queryutils.py b/python/cudf/cudf/utils/queryutils.py
index 4ce89b526d6..51093375eda 100644
--- a/python/cudf/cudf/utils/queryutils.py
+++ b/python/cudf/cudf/utils/queryutils.py
@@ -11,6 +11,7 @@
 from cudf.core.buffer import acquire_spill_lock
 from cudf.core.column import column_empty
 from cudf.utils import applyutils
+from cudf.utils._numba import _CUDFNumbaConfig
 from cudf.utils.dtypes import (
     BOOL_TYPES,
     DATETIME_TYPES,
@@ -247,6 +248,7 @@ def query_execute(df, expr, callenv):
     out = column_empty(nrows, dtype=np.bool_)
     # run kernel
     args = [out] + colarrays + envargs
-    kernel.forall(nrows)(*args)
+    with _CUDFNumbaConfig():
+        kernel.forall(nrows)(*args)
     out_mask = applyutils.make_aggregate_nullmask(df, columns=columns)
     return out.set_mask(out_mask).fillna(False)
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index d13324a7404..b08dd92d52f 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -31,7 +31,7 @@ dependencies = [
     "cuda-python>=11.7.1,<12.0",
     "cupy-cuda11x>=12.0.0",
     "fsspec>=0.6.0",
-    "numba>=0.56.4,<0.57",
+    "numba>=0.57",
     "numpy>=1.21,<1.24",
     "nvtx>=0.2.1",
     "packaging",
diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml
index ff2a3f2d095..42b6c26c002 100644
--- a/python/dask_cudf/pyproject.toml
+++ b/python/dask_cudf/pyproject.toml
@@ -40,7 +40,7 @@ dynamic = ["entry-points"]
 [project.optional-dependencies]
 test = [
     "dask-cuda==23.6.*",
-    "numba>=0.56.4,<0.57",
+    "numba>=0.57",
     "pytest",
     "pytest-cov",
     "pytest-xdist",