diff --git a/ci/cscs-ci.yml b/ci/cscs-ci.yml
index 243716c459..aa3f35a486 100644
--- a/ci/cscs-ci.yml
+++ b/ci/cscs-ci.yml
@@ -31,7 +31,8 @@ stages:
   # Since the base image name is runtime dependent, we need to carry the value of it to
   # the following jobs via a dotenv file.
   before_script:
-  - DOCKER_TAG=`sha256sum $DOCKERFILE | head -c 16`
+  # include build arguments in hash since we use a parameterized Docker file
+  - DOCKER_TAG=`echo "$(cat $DOCKERFILE) $DOCKER_BUILD_ARGS" | sha256sum | head -c 16`
   - export PERSIST_IMAGE_NAME=$CSCS_REGISTRY_PATH/public/$ARCH/base/gt4py-ci:$DOCKER_TAG-$PYVERSION
   - echo "BASE_IMAGE_${PYVERSION_PREFIX}=$PERSIST_IMAGE_NAME" >> build.env
   artifacts:
@@ -149,7 +150,7 @@ build_py38_image_x86_64:
   variables:
     CRAY_CUDA_MPS: 1
     SLURM_JOB_NUM_NODES: 1
-    SLURM_TIMELIMIT: 120
+    SLURM_TIMELIMIT: 15
     NUM_PROCESSES: auto
     VIRTUALENV_SYSTEM_SITE_PACKAGES: 1
 .test_helper_x86_64:
@@ -177,8 +178,10 @@ build_py38_image_x86_64:
   variables:
     # Grace-Hopper gpu architecture is not enabled by default in CUDA build
     CUDAARCHS: "90"
-    # limit test parallelism to avoid "OSError: too many open files" in the gt4py build stage
-    NUM_PROCESSES: 32
+    # Limit test parallelism to avoid "OSError: too many open files" in the gt4py build stage.
+    # Another problem, observed in test stage, is that gpu tests hang in combination with CUDA MPS,
+    # when high test parallelism is used.
+    NUM_PROCESSES: 16
 
 test_py311_x86_64:
   extends: [.test_helper_x86_64]