Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes for timeouts in tests #5598

Merged
merged 5 commits into from
Oct 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 12 additions & 2 deletions python/cuml/internals/available_devices.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,22 @@
cache = lru_cache(maxsize=None)


get_cuda_count = gpu_only_import_from("rmm._cuda.gpu", "getDeviceCount")
def gpu_available_no_context_creation():
"""
Function tries to check if GPUs are available in the system without
creating a CUDA context. We check for CuPy presence as a proxy of that.
"""
try:
import cupy

return True
except ImportError:
return False


@cache
def is_cuda_available():
try:
return GPU_ENABLED and get_cuda_count() >= 1
return GPU_ENABLED and gpu_available_no_context_creation()
except UnavailableError:
return False
2 changes: 1 addition & 1 deletion python/cuml/solvers/cd.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ class CD(Base,
<double>self.tol,
<double*>sample_weight_ptr)

self.intercept_ = _c_intercept2_f64
self.intercept_ = _c_intercept2_f64
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unrelated fix?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, it just seemed way to small of a change to warrant a full PR with CI run


self.handle.sync()
del X_m
Expand Down
2 changes: 2 additions & 0 deletions python/cuml/tests/test_nearest_neighbors.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,8 @@ def test_ivfflat_pred(nrows, ncols, n_neighbors, nlist):
def test_ivfpq_pred(
nrows, ncols, n_neighbors, nlist, M, n_bits, usePrecomputedTables
):
if ncols == 512 and usePrecomputedTables is True:
pytest.skip("https://github.com/rapidsai/cuml/issues/5603")
algo_params = {
"nlist": nlist,
"nprobe": int(nlist * 0.2),
Expand Down
109 changes: 109 additions & 0 deletions python/cuml/tests/test_no_cuinit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# Copyright (c) 2023, NVIDIA CORPORATION.

import os
import subprocess
import sys
from shutil import which

import pytest

GDB_COMMANDS = """
set confirm off
set breakpoint pending on
break cuInit
run
exit
"""


@pytest.fixture(scope="module")
def cuda_gdb(request):
gdb = which("cuda-gdb")
if gdb is None:
request.applymarker(
pytest.mark.xfail(reason="No cuda-gdb found, can't detect cuInit"),
)
return gdb
else:
output = subprocess.run(
[gdb, "--version"], capture_output=True, text=True
)
if output.returncode != 0:
request.applymarker(
pytest.mark.xfail(
reason=(
"cuda-gdb not working on this platform, "
f"can't detect cuInit: {output.stderr}"
)
),
)
return gdb


def test_cuml_import_no_cuinit(cuda_gdb):
# When RAPIDS_NO_INITIALIZE is set, importing cuml should _not_
# create a CUDA context (i.e. cuInit should not be called).
# Intercepting the call to cuInit programmatically is tricky since
# the way it is resolved from dynamic libraries by
# cuda-python/numba/cupy is multitudinous (see discussion at
# https://github.com/rapidsai/cuml/pull/12361 which does this, but
# needs provide hooks that override dlsym, cuGetProcAddress, and
# cuInit.
# Instead, we just run under GDB and see if we hit a breakpoint
env = os.environ.copy()
env["RAPIDS_NO_INITIALIZE"] = "1"
output = subprocess.run(
[
cuda_gdb,
"-x",
"-",
"--args",
sys.executable,
"-c",
"import cuml",
],
input=GDB_COMMANDS,
env=env,
capture_output=True,
text=True,
)

cuInit_called = output.stdout.find("in cuInit ()")
print("Command output:\n")
print("*** STDOUT ***")
print(output.stdout)
print("*** STDERR ***")
print(output.stderr)
assert output.returncode == 0
assert cuInit_called < 0


def test_cuml_create_estimator_cuinit(cuda_gdb):
# This tests that our gdb scripting correctly identifies cuInit
# when it definitely should have been called.
env = os.environ.copy()
env["RAPIDS_NO_INITIALIZE"] = "1"
output = subprocess.run(
[
cuda_gdb,
"-x",
"-",
"--args",
sys.executable,
"-c",
"import cupy as cp; a = cp.ones(10)",
],
input=GDB_COMMANDS,
env=env,
capture_output=True,
text=True,
)

cuInit_called = output.stdout.find("in cuInit ()")
print("Command output:\n")
print("*** STDOUT ***")
print(output.stdout)
print("*** STDERR ***")
print(output.stderr)
assert output.returncode == 0
assert cuInit_called >= 0