Add a spark-shell smoke test to premerge and nightly [databricks] (#9504

) Contributes to #5704 This PR aims to catch issues like #9500. It modifies run_pyspark_from_build mostly to avoid recreating the logic of figuring out jar location etc. Currently it may not catch this if do not have Spark 3.5.0 CI yet. But this is how it could reproduce the #9500 ```Bash $ SPARK_HOME=~/dist/spark-3.1.1-bin-hadoop3.2 SPARK_SHELL_SMOKE_TEST=1 ./integration_tests/run_pyspark_from_build.sh ... + grep -F 'res0: Array[org.apache.spark.sql.Row] = Array([4950])' res0: Array[org.apache.spark.sql.Row] = Array([4950]) + echo 'SUCCESS spark-shell smoke test...' SUCCESS spark-shell smoke test $ echo $? 0 $ SPARK_HOME=~/dist/spark-3.5.0-bin-hadoop3 SPARK_SHELL_SMOKE_TEST=1 ./integration_tests/run_pyspark_from_build.sh $ echo $? 1 SPARK_SHELL_SMOKE_TEST=1 \ PYSP_TEST_spark_shuffle_manager=com.nvidia.spark.rapids.spark311.RapidsShuffleManager \ SPARK_HOME=~/dist/spark-3.1.1-bin-hadoop3.2 \ ./integration_tests/run_pyspark_from_build.sh + echo 'SUCCESS spark-shell smoke test' SUCCESS spark-shell smoke test $ echo $? 0 SPARK_SHELL_SMOKE_TEST=1 \ PYSP_TEST_spark_shuffle_manager=com.nvidia.spark.rapids.spark350.RapidsShuffleManager \ SPARK_HOME=~/dist/spark-3.5.0-bin-hadoop3 \ ./integration_tests/run_pyspark_from_build.sh $ echo $? 1 ``` Signed-off-by: Gera Shegalov <[email protected]>
NVIDIA · Oct 24, 2023 · 2c78df5 · 2c78df5
1 parent 9716e6e
commit 2c78df5
Show file tree

Hide file tree

Showing 3 changed files with 36 additions and 1 deletion.
diff --git a/integration_tests/run_pyspark_from_build.sh b/integration_tests/run_pyspark_from_build.sh
@@ -309,7 +309,35 @@ EOF
     fi
     export PYSP_TEST_spark_rapids_memory_gpu_allocSize=${PYSP_TEST_spark_rapids_memory_gpu_allocSize:-'1536m'}
 
-    if ((${#TEST_PARALLEL_OPTS[@]} > 0));
+    SPARK_SHELL_SMOKE_TEST="${SPARK_SHELL_SMOKE_TEST:-0}"
+    if [[ "${SPARK_SHELL_SMOKE_TEST}" != "0" ]]; then
+        echo "Running spark-shell smoke test..."
+        SPARK_SHELL_ARGS_ARR=(
+            --master local-cluster[1,2,1024]
+            --conf spark.plugins=com.nvidia.spark.SQLPlugin
+            --conf spark.deploy.maxExecutorRetries=0
+        )
+        if [[ "${PYSP_TEST_spark_shuffle_manager}" != "" ]]; then
+            SPARK_SHELL_ARGS_ARR+=(
+                --conf spark.shuffle.manager="${PYSP_TEST_spark_shuffle_manager}"
+                --driver-class-path "${PYSP_TEST_spark_driver_extraClassPath}"
+                --conf spark.executor.extraClassPath="${PYSP_TEST_spark_driver_extraClassPath}"
+            )
+        else
+            SPARK_SHELL_ARGS_ARR+=(--jars "${PYSP_TEST_spark_jars}")
+        fi
+
+        # NOTE grep is used not only for checking the output but also
+        # to workaround the fact that spark-shell catches all failures.
+        # In this test it exits not because of the failure but because it encounters
+        # an EOF on stdin and injects a ":quit" command. Without a grep check
+        # the exit code would be success 0 regardless of the exceptions.
+        #
+        <<< 'spark.range(100).agg(Map("id" -> "sum")).collect()' \
+            "${SPARK_HOME}"/bin/spark-shell "${SPARK_SHELL_ARGS_ARR[@]}" 2>/dev/null \
+            | grep -F 'res0: Array[org.apache.spark.sql.Row] = Array([4950])'
+        echo "SUCCESS spark-shell smoke test"
+    elif ((${#TEST_PARALLEL_OPTS[@]} > 0));
     then
         exec python "${RUN_TESTS_COMMAND[@]}" "${TEST_PARALLEL_OPTS[@]}" "${TEST_COMMON_OPTS[@]}"
     else

diff --git a/jenkins/spark-premerge-build.sh b/jenkins/spark-premerge-build.sh
@@ -88,6 +88,9 @@ mvn_verify() {
 
     # Triggering here until we change the jenkins file
     rapids_shuffle_smoke_test
+    SPARK_SHELL_SMOKE_TEST=1 \
+    PYSP_TEST_spark_shuffle_manager=com.nvidia.spark.rapids.${SHUFFLE_SPARK_SHIM}.RapidsShuffleManager \
+        ./integration_tests/run_pyspark_from_build.sh
 }
 
 rapids_shuffle_smoke_test() {

diff --git a/jenkins/spark-tests.sh b/jenkins/spark-tests.sh
@@ -270,6 +270,10 @@ TEST_MODE=${TEST_MODE:-'DEFAULT'}
 if [[ $TEST_MODE == "DEFAULT" ]]; then
   ./run_pyspark_from_build.sh
 
+  SPARK_SHELL_SMOKE_TEST=1 \
+  PYSP_TEST_spark_shuffle_manager=com.nvidia.spark.rapids.${SHUFFLE_SPARK_SHIM}.RapidsShuffleManager \
+    ./integration_tests/run_pyspark_from_build.sh
+
   # ParquetCachedBatchSerializer cache_test
   PYSP_TEST_spark_sql_cache_serializer=com.nvidia.spark.ParquetCachedBatchSerializer \
     ./run_pyspark_from_build.sh -k cache_test