From 107d44389c6a98d5c80287ba65238d2a8455e853 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Tue, 23 Nov 2021 11:31:50 -0600 Subject: [PATCH 01/16] DBG Add debug pring to ci build.sh --- ci/gpu/build.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 2d19f017da..a49aa043b7 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -202,6 +202,7 @@ else gpuci_logger "Building cuml" "$WORKSPACE/build.sh" -v cuml --codecov + gpuci_logger "Debugging PR for pytests" gpuci_logger "Python pytest for cuml" cd $WORKSPACE/python From 1adb81fa773c948e509a86137e722c59913732b4 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Tue, 23 Nov 2021 14:56:51 -0600 Subject: [PATCH 02/16] DBG Skipp hdbscan in test_api --- python/cuml/test/test_api.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/cuml/test/test_api.py b/python/cuml/test/test_api.py index c4efab811a..6e48c9927b 100644 --- a/python/cuml/test/test_api.py +++ b/python/cuml/test/test_api.py @@ -231,6 +231,7 @@ def test_fit_function(dataset, model_name): "AutoARIMA", "MultinomialNB", "LabelEncoder", + "HDBSCAN" ]: pytest.xfail("These models are not tested yet") From 8d0bce71a0a97cc8fcb7ccb99ef10c99cc50ebc7 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Tue, 23 Nov 2021 16:06:27 -0600 Subject: [PATCH 03/16] DBG Skip test_fit_function in test_api --- python/cuml/test/test_api.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/cuml/test/test_api.py b/python/cuml/test/test_api.py index 6e48c9927b..1e20975c57 100644 --- a/python/cuml/test/test_api.py +++ b/python/cuml/test/test_api.py @@ -222,6 +222,7 @@ def test_mro(model): @pytest.mark.parametrize("model_name", list(models.keys())) # ignore random forest float64 warnings @pytest.mark.filterwarnings("ignore:To use pickling or GPU-based") +@pytest.mark.skip(reason="CEC debugging") def test_fit_function(dataset, model_name): # This test ensures that our estimators return self after a call to fit if model_name in [ From 276b40afda4f1c0b7c9e4dba8bad47743ae4c5fe Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Tue, 23 Nov 2021 20:47:57 -0600 Subject: [PATCH 04/16] DBG Upgrade libcusolver --- conda/recipes/libcuml/meta.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/recipes/libcuml/meta.yaml b/conda/recipes/libcuml/meta.yaml index 6333e5dc84..c3f3f0550f 100644 --- a/conda/recipes/libcuml/meta.yaml +++ b/conda/recipes/libcuml/meta.yaml @@ -57,7 +57,7 @@ requirements: - treelite=2.1.0 - faiss-proc=*=cuda - libfaiss 1.7.0 *_cuda - - libcusolver>=11.2.1 + - libcusolver>=11.3.2 about: home: http://rapids.ai/ From 923ca75f61980710dbd326c46c78eed4681870c7 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Wed, 24 Nov 2021 02:52:41 -0600 Subject: [PATCH 05/16] DBG roll back libcusolver version --- conda/recipes/libcuml/meta.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/recipes/libcuml/meta.yaml b/conda/recipes/libcuml/meta.yaml index c3f3f0550f..6333e5dc84 100644 --- a/conda/recipes/libcuml/meta.yaml +++ b/conda/recipes/libcuml/meta.yaml @@ -57,7 +57,7 @@ requirements: - treelite=2.1.0 - faiss-proc=*=cuda - libfaiss 1.7.0 *_cuda - - libcusolver>=11.3.2 + - libcusolver>=11.2.1 about: home: http://rapids.ai/ From 2f289ea94f757e0904888a8e2f34e9b8d517fded Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Wed, 24 Nov 2021 03:15:15 -0600 Subject: [PATCH 06/16] DBG reset ld_library_path in project flash code path --- ci/gpu/build.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index b8c3564269..d91e28351b 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -202,6 +202,10 @@ else gpuci_logger "Building cuml" "$WORKSPACE/build.sh" -v cuml --codecov + gpuci_logger "Resetting LD_LIBRARY_PATH" + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH_CACHED + export LD_LIBRARY_PATH_CACHED="" + gpuci_logger "Debugging PR for pytests" gpuci_logger "Python pytest for cuml" cd $WORKSPACE/python From f80a1dc1af1f1c24ae98a938efdfb0795a2cecc6 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Fri, 26 Nov 2021 11:20:24 -0500 Subject: [PATCH 07/16] DBG Use project flash to build cuml python package --- ci/cpu/build.sh | 9 +++++++++ ci/gpu/build.sh | 19 ++++++++++++------- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh index c2f6411496..fe3f1ac104 100755 --- a/ci/cpu/build.sh +++ b/ci/cpu/build.sh @@ -60,6 +60,13 @@ conda list --show-channel-urls # FIX Added to deal with Anancoda SSL verification issues during conda builds conda config --set ssl_verify False +# FIXME: for now, force the building of all packages so they are built on a +# machine with a single CUDA version, then have the gpu/build.sh script simply +# install. This should eliminate a mismatch between different CUDA versions on +# cpu vs. gpu builds that is problematic with CUDA 11.5 Enhanced Compat. +BUILD_LIBCUML=1 +BUILD_CUML=1 + ################################################################################ # BUILD - Conda package builds (conda deps: libcuml <- cuml) ################################################################################ @@ -85,6 +92,8 @@ if [ "$BUILD_CUML" == '1' ]; then else gpuci_logger "PROJECT FLASH: Build conda pkg for cuml" gpuci_conda_retry build --croot ${CONDA_BLD_DIR} -c ci/artifacts/cuml/cpu/.conda-bld/ --dirty --no-remove-work-dir conda/recipes/cuml --python=${PYTHON} + mkdir -p ${CONDA_BLD_DIR}/libcuml/work + cp -r ${CONDA_BLD_DIR}/work/* ${CONDA_BLD_DIR}/libcuml/work fi fi diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index d91e28351b..8dbb20ad25 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -192,6 +192,15 @@ else CONDA_FILE=${CONDA_FILE//-/=} #convert to conda install gpuci_logger "Installing $CONDA_FILE" gpuci_mamba_retry install -c ${CONDA_ARTIFACT_PATH} "$CONDA_FILE" + + # FIXME: also install the python package here (see FIXME in cpu build + # script, and below) + gpuci_logger "DEBUG: Attempt 1 to install cuml conda" + CONDA_FILE=`find ${CONDA_ARTIFACT_PATH} -name "cuml*.tar.bz2"` + CONDA_FILE=`basename "$CONDA_FILE" .tar.bz2` #get filename without extension + CONDA_FILE=${CONDA_FILE//-/=} #convert to conda install + echo "Installing $CONDA_FILE" + gpuci_mamba_retry install -c ${CONDA_ARTIFACT_PATH} "$CONDA_FILE" gpuci_logger "Install the main version of dask and distributed" set -x @@ -199,13 +208,9 @@ else pip install "git+https://github.com/dask/dask.git@2021.11.2" --upgrade --no-deps set +x - gpuci_logger "Building cuml" - "$WORKSPACE/build.sh" -v cuml --codecov - - gpuci_logger "Resetting LD_LIBRARY_PATH" - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH_CACHED - export LD_LIBRARY_PATH_CACHED="" - + # gpuci_logger "Building cuml" + # "$WORKSPACE/build.sh" -v cuml --codecov + gpuci_logger "Debugging PR for pytests" gpuci_logger "Python pytest for cuml" cd $WORKSPACE/python From 39c2ecf21ed63dbc3881853610623296116d22ae Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Fri, 26 Nov 2021 11:52:15 -0500 Subject: [PATCH 08/16] DBG Add no build id conda flag --- ci/cpu/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh index fe3f1ac104..75e4a10504 100755 --- a/ci/cpu/build.sh +++ b/ci/cpu/build.sh @@ -91,7 +91,7 @@ if [ "$BUILD_CUML" == '1' ]; then gpuci_conda_retry build --croot ${CONDA_BLD_DIR} conda/recipes/cuml --python=${PYTHON} else gpuci_logger "PROJECT FLASH: Build conda pkg for cuml" - gpuci_conda_retry build --croot ${CONDA_BLD_DIR} -c ci/artifacts/cuml/cpu/.conda-bld/ --dirty --no-remove-work-dir conda/recipes/cuml --python=${PYTHON} + gpuci_conda_retry build --no-build-id --croot ${CONDA_BLD_DIR} -c ci/artifacts/cuml/cpu/.conda-bld/ --dirty --no-remove-work-dir conda/recipes/cuml --python=${PYTHON} mkdir -p ${CONDA_BLD_DIR}/libcuml/work cp -r ${CONDA_BLD_DIR}/work/* ${CONDA_BLD_DIR}/libcuml/work fi From 79c67e79247f90ed80347088b6a93be4fd6b5fe0 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Fri, 26 Nov 2021 12:29:58 -0500 Subject: [PATCH 09/16] DBG conda-build flags correction --- ci/cpu/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh index 75e4a10504..9f5148d099 100755 --- a/ci/cpu/build.sh +++ b/ci/cpu/build.sh @@ -91,7 +91,7 @@ if [ "$BUILD_CUML" == '1' ]; then gpuci_conda_retry build --croot ${CONDA_BLD_DIR} conda/recipes/cuml --python=${PYTHON} else gpuci_logger "PROJECT FLASH: Build conda pkg for cuml" - gpuci_conda_retry build --no-build-id --croot ${CONDA_BLD_DIR} -c ci/artifacts/cuml/cpu/.conda-bld/ --dirty --no-remove-work-dir conda/recipes/cuml --python=${PYTHON} + gpuci_conda_retry build --no-build-id --croot ${CONDA_BLD_DIR} conda/recipes/cuml -c $CONDA_BLD_DIR --dirty --no-remove-work-dir --python=${PYTHON} mkdir -p ${CONDA_BLD_DIR}/libcuml/work cp -r ${CONDA_BLD_DIR}/work/* ${CONDA_BLD_DIR}/libcuml/work fi From 9f299f45f39afc5d575c787c302116349dd56184 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Fri, 26 Nov 2021 13:26:05 -0500 Subject: [PATCH 10/16] DBG Fix for folder permission issue --- ci/cpu/build.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh index 9f5148d099..b923795dc0 100755 --- a/ci/cpu/build.sh +++ b/ci/cpu/build.sh @@ -82,6 +82,7 @@ else gpuci_conda_retry build --no-build-id --croot ${CONDA_BLD_DIR} conda/recipes/libcuml --dirty --no-remove-work-dir mkdir -p ${CONDA_BLD_DIR}/libcuml/work cp -r ${CONDA_BLD_DIR}/work/* ${CONDA_BLD_DIR}/libcuml/work + rm -rf ${CONDA_BLD_DIR}/work fi fi @@ -92,8 +93,9 @@ if [ "$BUILD_CUML" == '1' ]; then else gpuci_logger "PROJECT FLASH: Build conda pkg for cuml" gpuci_conda_retry build --no-build-id --croot ${CONDA_BLD_DIR} conda/recipes/cuml -c $CONDA_BLD_DIR --dirty --no-remove-work-dir --python=${PYTHON} - mkdir -p ${CONDA_BLD_DIR}/libcuml/work - cp -r ${CONDA_BLD_DIR}/work/* ${CONDA_BLD_DIR}/libcuml/work + mkdir -p ${CONDA_BLD_DIR}/cuml/work + cp -r ${CONDA_BLD_DIR}/work/* ${CONDA_BLD_DIR}/cuml/work + rm -rf ${CONDA_BLD_DIR}/work fi fi From 17c2201da635e728962545ef76487f17c6dc3568 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Fri, 26 Nov 2021 15:00:10 -0500 Subject: [PATCH 11/16] DBG modification to run pytests and skip c++ tests temporarily --- ci/gpu/build.sh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 8dbb20ad25..9bacef3186 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -181,11 +181,11 @@ else chrpath -d libcuml++.so patchelf --replace-needed `patchelf --print-needed libcuml++.so | grep faiss` libfaiss.so libcuml++.so - gpuci_logger "GoogleTest for libcuml" + gpuci_logger "DEBUG: skipping GoogleTest for libcuml" cd $LIBCUML_BUILD_DIR chrpath -d ./test/ml patchelf --replace-needed `patchelf --print-needed ./test/ml | grep faiss` libfaiss.so ./test/ml - GTEST_OUTPUT="xml:${WORKSPACE}/test-results/libcuml_cpp/" ./test/ml + # GTEST_OUTPUT="xml:${WORKSPACE}/test-results/libcuml_cpp/" ./test/ml CONDA_FILE=`find ${CONDA_ARTIFACT_PATH} -name "libcuml*.tar.bz2"` CONDA_FILE=`basename "$CONDA_FILE" .tar.bz2` #get filename without extension @@ -211,9 +211,11 @@ else # gpuci_logger "Building cuml" # "$WORKSPACE/build.sh" -v cuml --codecov - gpuci_logger "Debugging PR for pytests" gpuci_logger "Python pytest for cuml" cd $WORKSPACE/python + + # Removing all folders except cuml/test since we are not building cython extensions in place + find ./cuml -mindepth 1 ! -regex '^./cuml/test\(/.*\)?' -delete pytest --cache-clear --basetemp=${WORKSPACE}/cuml-cuda-tmp --junitxml=${WORKSPACE}/junit-cuml.xml -v -s -m "not memleak" --durations=50 --timeout=300 --ignore=cuml/test/dask --ignore=cuml/raft --cov-config=.coveragerc --cov=cuml --cov-report=xml:${WORKSPACE}/python/cuml/cuml-coverage.xml --cov-report term From 9b55d2597fdc9097709b1a9182be960815dce00d Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Fri, 26 Nov 2021 16:49:26 -0500 Subject: [PATCH 12/16] DBG temporarily use project flash cuml python only for 11.0 --- ci/gpu/build.sh | 36 +++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 9bacef3186..7a52fa1683 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -193,29 +193,35 @@ else gpuci_logger "Installing $CONDA_FILE" gpuci_mamba_retry install -c ${CONDA_ARTIFACT_PATH} "$CONDA_FILE" - # FIXME: also install the python package here (see FIXME in cpu build - # script, and below) - gpuci_logger "DEBUG: Attempt 1 to install cuml conda" - CONDA_FILE=`find ${CONDA_ARTIFACT_PATH} -name "cuml*.tar.bz2"` - CONDA_FILE=`basename "$CONDA_FILE" .tar.bz2` #get filename without extension - CONDA_FILE=${CONDA_FILE//-/=} #convert to conda install - echo "Installing $CONDA_FILE" - gpuci_mamba_retry install -c ${CONDA_ARTIFACT_PATH} "$CONDA_FILE" - + # FIXME: Project FLASH only builds for python version 3.7 which is the one used in + # the CUDA 11.0 job, need to change all versions to project flash + if [ "$py_ver" == "3.7" ];then + gpuci_logger "Using Project FLASH to install cuml python" + CONDA_FILE=`find ${CONDA_ARTIFACT_PATH} -name "cuml*.tar.bz2"` + CONDA_FILE=`basename "$CONDA_FILE" .tar.bz2` #get filename without extension + CONDA_FILE=${CONDA_FILE//-/=} #convert to conda install + echo "Installing $CONDA_FILE" + gpuci_mamba_retry install -c ${CONDA_ARTIFACT_PATH} "$CONDA_FILE" + + else + gpuci_logger "Building cuml python in gpu job" + "$WORKSPACE/build.sh" -v cuml --codecov + fi + gpuci_logger "Install the main version of dask and distributed" set -x pip install "git+https://github.com/dask/distributed.git@2021.11.2" --upgrade --no-deps pip install "git+https://github.com/dask/dask.git@2021.11.2" --upgrade --no-deps set +x - - # gpuci_logger "Building cuml" - # "$WORKSPACE/build.sh" -v cuml --codecov + + # When installing cuml with project flash, we need to delete all folders except + # cuml/test since we are not building cython extensions in place + if [ "$py_ver" == "3.7" ];then + find ./cuml -mindepth 1 ! -regex '^./cuml/test\(/.*\)?' -delete + fi gpuci_logger "Python pytest for cuml" cd $WORKSPACE/python - - # Removing all folders except cuml/test since we are not building cython extensions in place - find ./cuml -mindepth 1 ! -regex '^./cuml/test\(/.*\)?' -delete pytest --cache-clear --basetemp=${WORKSPACE}/cuml-cuda-tmp --junitxml=${WORKSPACE}/junit-cuml.xml -v -s -m "not memleak" --durations=50 --timeout=300 --ignore=cuml/test/dask --ignore=cuml/raft --cov-config=.coveragerc --cov=cuml --cov-report=xml:${WORKSPACE}/python/cuml/cuml-coverage.xml --cov-report term From 66251ae677b9d1c09fe77e17e5c0d00096b15f49 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Fri, 26 Nov 2021 17:46:12 -0500 Subject: [PATCH 13/16] FIX wrong location of rm command --- ci/gpu/build.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 7a52fa1683..c872f034f2 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -214,14 +214,14 @@ else pip install "git+https://github.com/dask/dask.git@2021.11.2" --upgrade --no-deps set +x + gpuci_logger "Python pytest for cuml" + cd $WORKSPACE/python + # When installing cuml with project flash, we need to delete all folders except # cuml/test since we are not building cython extensions in place if [ "$py_ver" == "3.7" ];then find ./cuml -mindepth 1 ! -regex '^./cuml/test\(/.*\)?' -delete fi - - gpuci_logger "Python pytest for cuml" - cd $WORKSPACE/python pytest --cache-clear --basetemp=${WORKSPACE}/cuml-cuda-tmp --junitxml=${WORKSPACE}/junit-cuml.xml -v -s -m "not memleak" --durations=50 --timeout=300 --ignore=cuml/test/dask --ignore=cuml/raft --cov-config=.coveragerc --cov=cuml --cov-report=xml:${WORKSPACE}/python/cuml/cuml-coverage.xml --cov-report term From a49a48ad7af3e52e9540a08a6b4cc685c51e5e14 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Mon, 29 Nov 2021 08:25:20 -0500 Subject: [PATCH 14/16] FIX Remove test that was being skipped for debugging --- python/cuml/test/test_api.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/cuml/test/test_api.py b/python/cuml/test/test_api.py index 1e20975c57..c4efab811a 100644 --- a/python/cuml/test/test_api.py +++ b/python/cuml/test/test_api.py @@ -222,7 +222,6 @@ def test_mro(model): @pytest.mark.parametrize("model_name", list(models.keys())) # ignore random forest float64 warnings @pytest.mark.filterwarnings("ignore:To use pickling or GPU-based") -@pytest.mark.skip(reason="CEC debugging") def test_fit_function(dataset, model_name): # This test ensures that our estimators return self after a call to fit if model_name in [ @@ -232,7 +231,6 @@ def test_fit_function(dataset, model_name): "AutoARIMA", "MultinomialNB", "LabelEncoder", - "HDBSCAN" ]: pytest.xfail("These models are not tested yet") From 71204756df13d5068faf0cb788bef6fd734fa8ce Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Mon, 29 Nov 2021 08:26:43 -0500 Subject: [PATCH 15/16] DBG remove skipping libcuml gtests --- ci/gpu/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index c872f034f2..ebce8ea876 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -185,7 +185,7 @@ else cd $LIBCUML_BUILD_DIR chrpath -d ./test/ml patchelf --replace-needed `patchelf --print-needed ./test/ml | grep faiss` libfaiss.so ./test/ml - # GTEST_OUTPUT="xml:${WORKSPACE}/test-results/libcuml_cpp/" ./test/ml + GTEST_OUTPUT="xml:${WORKSPACE}/test-results/libcuml_cpp/" ./test/ml CONDA_FILE=`find ${CONDA_ARTIFACT_PATH} -name "libcuml*.tar.bz2"` CONDA_FILE=`basename "$CONDA_FILE" .tar.bz2` #get filename without extension From cf31cc006a92fe4f8a59be4e04bc4646dabf69e8 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Mon, 29 Nov 2021 08:27:50 -0500 Subject: [PATCH 16/16] DBG remove skipping libcuml gtests --- ci/gpu/build.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index ebce8ea876..efcff5b9c2 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -181,7 +181,6 @@ else chrpath -d libcuml++.so patchelf --replace-needed `patchelf --print-needed libcuml++.so | grep faiss` libfaiss.so libcuml++.so - gpuci_logger "DEBUG: skipping GoogleTest for libcuml" cd $LIBCUML_BUILD_DIR chrpath -d ./test/ml patchelf --replace-needed `patchelf --print-needed ./test/ml | grep faiss` libfaiss.so ./test/ml