diff --git a/.buildkite/test-template-aws.j2 b/.buildkite/test-template-aws.j2 index 09649b625c319..01f7ff1e0e2b5 100644 --- a/.buildkite/test-template-aws.j2 +++ b/.buildkite/test-template-aws.j2 @@ -30,6 +30,7 @@ steps: command: bash .buildkite/run-amd-test.sh "cd {{ (step.working_dir or default_working_dir) | safe }} ; {{ step.command or (step.commands | join(" ; ")) | safe }}" env: DOCKER_BUILDKIT: "1" + priority: 100 soft_fail: true {% endif %} {% endfor %} diff --git a/Dockerfile.rocm b/Dockerfile.rocm index 954958df88fc0..724fa1673c3b3 100644 --- a/Dockerfile.rocm +++ b/Dockerfile.rocm @@ -42,6 +42,7 @@ RUN apt-get update && apt-get install -y \ unzip \ nvidia-cuda-toolkit \ tmux \ + ccache \ && rm -rf /var/lib/apt/lists/* ### Mount Point ### @@ -102,7 +103,9 @@ ENV RAY_EXPERIMENTAL_NOSET_ROCR_VISIBLE_DEVICES=1 ENV VLLM_NCCL_SO_PATH=/opt/rocm/lib/librccl.so -RUN --mount=type=cache,target=/root/.cache/pip \ +ENV CCACHE_DIR=/root/.cache/ccache +RUN --mount=type=cache,target=/root/.cache/ccache \ + --mount=type=cache,target=/root/.cache/pip \ pip install -U -r requirements-rocm.txt \ && patch /opt/rocm/include/hip/amd_detail/amd_hip_bf16.h ./rocm_patch/rocm_bf16.patch \ && python3 setup.py install \