-
Notifications
You must be signed in to change notification settings - Fork 889
/
build.sh
executable file
·265 lines (214 loc) · 10.7 KB
/
build.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
#!/bin/bash
# Copyright (c) 2018-2022, NVIDIA CORPORATION.
##############################################
# cuDF GPU build and test script for CI #
##############################################
set -e
NUMARGS=$#
ARGS=$*
# Arg parsing function
function hasArg {
(( ${NUMARGS} != 0 )) && (echo " ${ARGS} " | grep -q " $1 ")
}
# Set path and build parallel level
export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH
export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4}
# Set home to the job's workspace
export HOME="$WORKSPACE"
# Switch to project root; also root of repo checkout
cd "$WORKSPACE"
# Determine CUDA release version
export CUDA_REL=${CUDA_VERSION%.*}
export CONDA_ARTIFACT_PATH="$WORKSPACE/ci/artifacts/cudf/cpu/.conda-bld/"
# Parse git describe
export GIT_DESCRIBE_TAG=`git describe --tags`
export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'`
unset GIT_DESCRIBE_TAG
# Dask & Distributed option to install main(nightly) or `conda-forge` packages.
export INSTALL_DASK_MAIN=0
# ucx-py version
export UCX_PY_VERSION='0.27.*'
################################################################################
# TRAP - Setup trap for removing jitify cache
################################################################################
# Set `LIBCUDF_KERNEL_CACHE_PATH` environment variable to $HOME/.jitify-cache
# because it's local to the container's virtual file system, and not shared with
# other CI jobs like `/tmp` is
export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache"
function remove_libcudf_kernel_cache_dir {
EXITCODE=$?
gpuci_logger "TRAP: Removing kernel cache dir: $LIBCUDF_KERNEL_CACHE_PATH"
rm -rf "$LIBCUDF_KERNEL_CACHE_PATH" \
|| gpuci_logger "[ERROR] TRAP: Could not rm -rf $LIBCUDF_KERNEL_CACHE_PATH"
exit $EXITCODE
}
# Set trap to run on exit
gpuci_logger "TRAP: Set trap to remove jitify cache on exit"
trap remove_libcudf_kernel_cache_dir EXIT
mkdir -p "$LIBCUDF_KERNEL_CACHE_PATH" \
|| gpuci_logger "[ERROR] TRAP: Could not mkdir -p $LIBCUDF_KERNEL_CACHE_PATH"
################################################################################
# SETUP - Check environment
################################################################################
gpuci_logger "Check environment variables"
env
gpuci_logger "Check GPU usage"
nvidia-smi
gpuci_logger "Activate conda env"
. /opt/conda/etc/profile.d/conda.sh
conda activate rapids
gpuci_logger "Check conda environment"
conda info
conda config --show-sources
conda list --show-channel-urls
gpuci_logger "Check compiler versions"
python --version
function install_dask {
# Install the conda-forge or nightly version of dask and distributed
gpuci_logger "Install the conda-forge or nightly version of dask and distributed"
set -x
if [[ "${INSTALL_DASK_MAIN}" == 1 ]]; then
gpuci_logger "gpuci_mamba_retry update dask"
gpuci_mamba_retry update dask
conda list
else
gpuci_logger "gpuci_mamba_retry install conda-forge::dask==2022.7.1 conda-forge::distributed==2022.7.1 conda-forge::dask-core==2022.7.1 --force-reinstall"
gpuci_mamba_retry install conda-forge::dask==2022.7.1 conda-forge::distributed==2022.7.1 conda-forge::dask-core==2022.7.1 --force-reinstall
fi
# Install the main version of streamz
gpuci_logger "Install the main version of streamz"
# Need to uninstall streamz that is already in the env.
pip uninstall -y streamz
pip install "git+https://github.com/python-streamz/streamz.git@master" --upgrade --no-deps
set +x
}
if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then
gpuci_logger "Install dependencies"
gpuci_mamba_retry install -y \
"cudatoolkit=$CUDA_REL" \
"rapids-build-env=$MINOR_VERSION.*" \
"rapids-notebook-env=$MINOR_VERSION.*" \
"dask-cuda=${MINOR_VERSION}" \
"rmm=$MINOR_VERSION.*" \
"ucx-py=${UCX_PY_VERSION}"
# https://docs.rapids.ai/maintainers/depmgmt/
# gpuci_conda_retry remove --force rapids-build-env rapids-notebook-env
# gpuci_mamba_retry install -y "your-pkg=1.0.0"
install_dask
################################################################################
# BUILD - Build libcudf, cuDF, libcudf_kafka, and dask_cudf from source
################################################################################
gpuci_logger "Build from source"
"$WORKSPACE/build.sh" clean libcudf cudf dask_cudf libcudf_kafka cudf_kafka benchmarks tests --ptds
################################################################################
# TEST - Run GoogleTest
################################################################################
set +e -Eo pipefail
EXITCODE=0
trap "EXITCODE=1" ERR
if hasArg --skip-tests; then
gpuci_logger "Skipping Tests"
exit 0
else
gpuci_logger "Check GPU usage"
nvidia-smi
gpuci_logger "GoogleTests"
set -x
cd "$WORKSPACE/cpp/build"
for gt in "$WORKSPACE/cpp/build/gtests/"* ; do
test_name=$(basename ${gt})
echo "Running GoogleTest $test_name"
${gt} --gtest_output=xml:"$WORKSPACE/test-results/"
done
fi
else
#Project Flash
if hasArg --skip-tests; then
gpuci_logger "Skipping Tests"
exit 0
fi
gpuci_logger "Check GPU usage"
nvidia-smi
gpuci_logger "Installing libcudf, libcudf_kafka and libcudf-tests"
gpuci_mamba_retry install -y -c ${CONDA_ARTIFACT_PATH} libcudf libcudf_kafka libcudf-tests
# TODO: Move boa install to gpuci/rapidsai
gpuci_mamba_retry install boa
gpuci_logger "Building cudf, dask-cudf, cudf_kafka and custreamz"
export CONDA_BLD_DIR="$WORKSPACE/.conda-bld"
gpuci_conda_retry mambabuild --croot ${CONDA_BLD_DIR} conda/recipes/cudf --python=$PYTHON -c ${CONDA_ARTIFACT_PATH}
gpuci_conda_retry mambabuild --croot ${CONDA_BLD_DIR} conda/recipes/dask-cudf --python=$PYTHON -c ${CONDA_ARTIFACT_PATH}
gpuci_conda_retry mambabuild --croot ${CONDA_BLD_DIR} conda/recipes/cudf_kafka --python=$PYTHON -c ${CONDA_ARTIFACT_PATH}
gpuci_conda_retry mambabuild --croot ${CONDA_BLD_DIR} conda/recipes/custreamz --python=$PYTHON -c ${CONDA_ARTIFACT_PATH}
gpuci_logger "Installing cudf, dask-cudf, cudf_kafka and custreamz"
gpuci_mamba_retry install cudf dask-cudf cudf_kafka custreamz -c "${CONDA_BLD_DIR}" -c "${CONDA_ARTIFACT_PATH}"
gpuci_logger "GoogleTests"
# Run libcudf and libcudf_kafka gtests from libcudf-tests package
for gt in "$CONDA_PREFIX/bin/gtests/libcudf"*/* ; do
echo "Running GoogleTest $test_name"
${gt} --gtest_output=xml:"$WORKSPACE/test-results/"
done
export LIB_BUILD_DIR="$WORKSPACE/ci/artifacts/cudf/cpu/libcudf_work/cpp/build"
# Copy libcudf build time results
echo "Checking for build time log $LIB_BUILD_DIR/ninja_log.xml"
if [[ -f "$LIB_BUILD_DIR/ninja_log.xml" ]]; then
gpuci_logger "Copying build time results"
cp "$LIB_BUILD_DIR/ninja_log.xml" "$WORKSPACE/test-results/buildtimes-junit.xml"
fi
################################################################################
# MEMCHECK - Run compute-sanitizer on GoogleTest (only in nightly builds)
################################################################################
if [[ "$BUILD_MODE" == "branch" && "$BUILD_TYPE" == "gpu" ]]; then
if [[ "$COMPUTE_SANITIZER_ENABLE" == "true" ]]; then
gpuci_logger "Memcheck on GoogleTests with rmm_mode=cuda"
export GTEST_CUDF_RMM_MODE=cuda
COMPUTE_SANITIZER_CMD="compute-sanitizer --tool memcheck"
mkdir -p "$WORKSPACE/test-results/"
for gt in "$CONDA_PREFIX/bin/gtests/libcudf"*/* ; do
test_name=$(basename ${gt})
if [[ "$test_name" == "ERROR_TEST" ]]; then
continue
fi
echo "Running GoogleTest $test_name"
${COMPUTE_SANITIZER_CMD} ${gt} | tee "$WORKSPACE/test-results/${test_name}.cs.log"
done
unset GTEST_CUDF_RMM_MODE
# test-results/*.cs.log are processed in gpuci
fi
fi
fi
# Both regular and Project Flash proceed here
# set environment variable for numpy 1.16
# will be enabled for later versions by default
np_ver=$(python -c "import numpy; print('.'.join(numpy.__version__.split('.')[:-1]))")
if [ "$np_ver" == "1.16" ];then
export NUMPY_EXPERIMENTAL_ARRAY_FUNCTION=1
fi
################################################################################
# TEST - Run py.test, notebooks
################################################################################
cd "$WORKSPACE/python/cudf/cudf"
# It is essential to cd into $WORKSPACE/python/cudf/cudf as `pytest-xdist` + `coverage` seem to work only at this directory level.
gpuci_logger "Python py.test for cuDF"
py.test -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" --ignore="$WORKSPACE/python/cudf/cudf/benchmarks" --junitxml="$WORKSPACE/junit-cudf.xml" -v --cov-config="$WORKSPACE/python/cudf/.coveragerc" --cov=cudf --cov-report=xml:"$WORKSPACE/python/cudf/cudf-coverage.xml" --cov-report term --dist=loadscope tests
cd "$WORKSPACE/python/dask_cudf"
gpuci_logger "Python py.test for dask-cudf"
py.test -n 8 --cache-clear --basetemp="$WORKSPACE/dask-cudf-cuda-tmp" --junitxml="$WORKSPACE/junit-dask-cudf.xml" -v --cov-config=.coveragerc --cov=dask_cudf --cov-report=xml:"$WORKSPACE/python/dask_cudf/dask-cudf-coverage.xml" --cov-report term dask_cudf
cd "$WORKSPACE/python/custreamz"
gpuci_logger "Python py.test for cuStreamz"
py.test -n 8 --cache-clear --basetemp="$WORKSPACE/custreamz-cuda-tmp" --junitxml="$WORKSPACE/junit-custreamz.xml" -v --cov-config=.coveragerc --cov=custreamz --cov-report=xml:"$WORKSPACE/python/custreamz/custreamz-coverage.xml" --cov-report term custreamz
# Run benchmarks with both cudf and pandas to ensure compatibility is maintained.
# Benchmarks are run in DEBUG_ONLY mode, meaning that only small data sizes are used.
# Therefore, these runs only verify that benchmarks are valid.
# They do not generate meaningful performance measurements.
cd "$WORKSPACE/python/cudf"
gpuci_logger "Python pytest for cuDF benchmarks"
CUDF_BENCHMARKS_DEBUG_ONLY=ON pytest -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" -v --dist=loadscope benchmarks
gpuci_logger "Python pytest for cuDF benchmarks using pandas"
CUDF_BENCHMARKS_USE_PANDAS=ON CUDF_BENCHMARKS_DEBUG_ONLY=ON pytest -n 8 --cache-clear --basetemp="$WORKSPACE/cudf-cuda-tmp" -v --dist=loadscope benchmarks
gpuci_logger "Test notebooks"
"$WORKSPACE/ci/gpu/test-notebooks.sh" 2>&1 | tee nbtest.log
python "$WORKSPACE/ci/utils/nbtestlog2junitxml.py" nbtest.log
if [ -n "${CODECOV_TOKEN}" ]; then
codecov -t $CODECOV_TOKEN
fi
return ${EXITCODE}