diff --git a/integration_tests/run_pyspark_from_build.sh b/integration_tests/run_pyspark_from_build.sh index 853fae66316..e31e462e1a8 100755 --- a/integration_tests/run_pyspark_from_build.sh +++ b/integration_tests/run_pyspark_from_build.sh @@ -309,7 +309,35 @@ EOF fi export PYSP_TEST_spark_rapids_memory_gpu_allocSize=${PYSP_TEST_spark_rapids_memory_gpu_allocSize:-'1536m'} - if ((${#TEST_PARALLEL_OPTS[@]} > 0)); + SPARK_SHELL_SMOKE_TEST="${SPARK_SHELL_SMOKE_TEST:-0}" + if [[ "${SPARK_SHELL_SMOKE_TEST}" != "0" ]]; then + echo "Running spark-shell smoke test..." + SPARK_SHELL_ARGS_ARR=( + --master local-cluster[1,2,1024] + --conf spark.plugins=com.nvidia.spark.SQLPlugin + --conf spark.deploy.maxExecutorRetries=0 + ) + if [[ "${PYSP_TEST_spark_shuffle_manager}" != "" ]]; then + SPARK_SHELL_ARGS_ARR+=( + --conf spark.shuffle.manager="${PYSP_TEST_spark_shuffle_manager}" + --driver-class-path "${PYSP_TEST_spark_driver_extraClassPath}" + --conf spark.executor.extraClassPath="${PYSP_TEST_spark_driver_extraClassPath}" + ) + else + SPARK_SHELL_ARGS_ARR+=(--jars "${PYSP_TEST_spark_jars}") + fi + + # NOTE grep is used not only for checking the output but also + # to workaround the fact that spark-shell catches all failures. + # In this test it exits not because of the failure but because it encounters + # an EOF on stdin and injects a ":quit" command. Without a grep check + # the exit code would be success 0 regardless of the exceptions. + # + <<< 'spark.range(100).agg(Map("id" -> "sum")).collect()' \ + "${SPARK_HOME}"/bin/spark-shell "${SPARK_SHELL_ARGS_ARR[@]}" 2>/dev/null \ + | grep -F 'res0: Array[org.apache.spark.sql.Row] = Array([4950])' + echo "SUCCESS spark-shell smoke test" + elif ((${#TEST_PARALLEL_OPTS[@]} > 0)); then exec python "${RUN_TESTS_COMMAND[@]}" "${TEST_PARALLEL_OPTS[@]}" "${TEST_COMMON_OPTS[@]}" else diff --git a/jenkins/spark-premerge-build.sh b/jenkins/spark-premerge-build.sh index 9b509208986..15c5166001b 100755 --- a/jenkins/spark-premerge-build.sh +++ b/jenkins/spark-premerge-build.sh @@ -88,6 +88,9 @@ mvn_verify() { # Triggering here until we change the jenkins file rapids_shuffle_smoke_test + SPARK_SHELL_SMOKE_TEST=1 \ + PYSP_TEST_spark_shuffle_manager=com.nvidia.spark.rapids.${SHUFFLE_SPARK_SHIM}.RapidsShuffleManager \ + ./integration_tests/run_pyspark_from_build.sh } rapids_shuffle_smoke_test() { diff --git a/jenkins/spark-tests.sh b/jenkins/spark-tests.sh index 4a062f63871..e28799c28d4 100755 --- a/jenkins/spark-tests.sh +++ b/jenkins/spark-tests.sh @@ -270,6 +270,10 @@ TEST_MODE=${TEST_MODE:-'DEFAULT'} if [[ $TEST_MODE == "DEFAULT" ]]; then ./run_pyspark_from_build.sh + SPARK_SHELL_SMOKE_TEST=1 \ + PYSP_TEST_spark_shuffle_manager=com.nvidia.spark.rapids.${SHUFFLE_SPARK_SHIM}.RapidsShuffleManager \ + ./integration_tests/run_pyspark_from_build.sh + # ParquetCachedBatchSerializer cache_test PYSP_TEST_spark_sql_cache_serializer=com.nvidia.spark.ParquetCachedBatchSerializer \ ./run_pyspark_from_build.sh -k cache_test