diff --git a/ci/cscs-ci.yml b/ci/cscs-ci.yml index 243716c459..aa3f35a486 100644 --- a/ci/cscs-ci.yml +++ b/ci/cscs-ci.yml @@ -31,7 +31,8 @@ stages: # Since the base image name is runtime dependent, we need to carry the value of it to # the following jobs via a dotenv file. before_script: - - DOCKER_TAG=`sha256sum $DOCKERFILE | head -c 16` + # include build arguments in hash since we use a parameterized Docker file + - DOCKER_TAG=`echo "$(cat $DOCKERFILE) $DOCKER_BUILD_ARGS" | sha256sum | head -c 16` - export PERSIST_IMAGE_NAME=$CSCS_REGISTRY_PATH/public/$ARCH/base/gt4py-ci:$DOCKER_TAG-$PYVERSION - echo "BASE_IMAGE_${PYVERSION_PREFIX}=$PERSIST_IMAGE_NAME" >> build.env artifacts: @@ -149,7 +150,7 @@ build_py38_image_x86_64: variables: CRAY_CUDA_MPS: 1 SLURM_JOB_NUM_NODES: 1 - SLURM_TIMELIMIT: 120 + SLURM_TIMELIMIT: 15 NUM_PROCESSES: auto VIRTUALENV_SYSTEM_SITE_PACKAGES: 1 .test_helper_x86_64: @@ -177,8 +178,10 @@ build_py38_image_x86_64: variables: # Grace-Hopper gpu architecture is not enabled by default in CUDA build CUDAARCHS: "90" - # limit test parallelism to avoid "OSError: too many open files" in the gt4py build stage - NUM_PROCESSES: 32 + # Limit test parallelism to avoid "OSError: too many open files" in the gt4py build stage. + # Another problem, observed in test stage, is that gpu tests hang in combination with CUDA MPS, + # when high test parallelism is used. + NUM_PROCESSES: 16 test_py311_x86_64: extends: [.test_helper_x86_64]