Fix the silent return issue in AOT launcher #4011
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Integration Tests | |
on: | |
workflow_dispatch: | |
pull_request: | |
branches: [main] | |
merge_group: | |
branches: [main] | |
types: [checks_requested] | |
concurrency: | |
group: ${{ github.ref }} | |
cancel-in-progress: ${{ github.ref != 'refs/heads/master' }} | |
env: | |
TRITON_USE_ASSERT_ENABLED_LLVM: "TRUE" | |
jobs: | |
Runner-Preparation: | |
runs-on: ubuntu-latest | |
outputs: | |
matrix-required: ${{ steps.set-matrix.outputs.matrix-required }} | |
matrix-optional: ${{ steps.set-matrix.outputs.matrix-optional }} | |
steps: | |
- name: Prepare runner matrix | |
id: set-matrix | |
run: | | |
if [ x"${{ github.repository }}" == x"openai/triton" ]; then | |
echo '::set-output name=matrix-required::[["self-hosted", "A100"], ["self-hosted", "H100"]]' | |
echo '::set-output name=matrix-optional::[["self-hosted", "gfx908"], ["self-hosted", "arc770"]]' | |
else | |
echo '::set-output name=matrix-required::["ubuntu-latest"]' | |
echo '::set-output name=matrix-optional::["ubuntu-latest"]' | |
fi | |
Integration-Tests-Nvidia: | |
needs: Runner-Preparation | |
runs-on: ${{ matrix.runner }} | |
strategy: | |
matrix: | |
runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-required)}} | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v2 | |
- name: Set CUDA ENV | |
if: ${{(matrix.runner[0] == 'self-hosted') && (matrix.runner[1] == 'V100' || matrix.runner[1] == 'A100' || matrix.runner[1] == 'H100')}} | |
run: | | |
echo "BACKEND=CUDA" >> "${GITHUB_ENV}" | |
- name: Clear cache | |
run: | | |
rm -rf ~/.triton | |
- name: Update PATH | |
run: | | |
echo "PATH=${HOME}/.local/bin:${PATH}" >> "${GITHUB_ENV}" | |
- name: Install Triton | |
if: ${{ env.BACKEND == 'CUDA'}} | |
run: | | |
cd python | |
python3 -m pip install --upgrade pip | |
python3 -m pip install cmake==3.24 | |
python3 -m pip install --no-build-isolation -vvv '.[tests]' | |
python3 -m pip install pytest-xdist | |
- name: Run lit tests | |
if: ${{ env.BACKEND == 'CUDA'}} | |
run: | | |
python3 -m pip install lit | |
cd python | |
LIT_TEST_DIR="build/$(ls build | grep -i cmake)/test" | |
if [ ! -d "${LIT_TEST_DIR}" ]; then | |
echo "Coult not find '${LIT_TEST_DIR}'" ; exit -1 | |
fi | |
lit -v "${LIT_TEST_DIR}" | |
- name: Run python tests on CUDA | |
if: ${{ env.BACKEND == 'CUDA'}} | |
run: | | |
cd python/test/unit | |
python3 -m pytest -n 8 --ignore=runtime | |
# run runtime tests serially to avoid race condition with cache handling. | |
python3 -m pytest runtime/ | |
- name: Create artifacts archive | |
if: ${{(matrix.runner[0] == 'self-hosted') && (matrix.runner[1] == 'V100' || matrix.runner[1] == 'A100' || matrix.runner[1] == 'H100')}} | |
run: | | |
cd ~/.triton | |
tar -czvf artifacts.tar.gz cache | |
- name: Upload artifacts archive | |
if: ${{(matrix.runner[0] == 'self-hosted') && (matrix.runner[1] == 'V100' || matrix.runner[1] == 'A100' || matrix.runner[1] == 'H100')}} | |
uses: actions/upload-artifact@v2 | |
with: | |
name: artifacts ${{ matrix.runner[1] }} | |
path: ~/.triton/artifacts.tar.gz | |
- name: Run CXX unittests | |
if: ${{ env.BACKEND == 'CUDA'}} | |
run: | | |
cd python | |
cd "build/$(ls build | grep -i cmake)" | |
ctest | |
- name: Regression tests | |
if: ${{ contains(matrix.runner, 'A100') }} | |
run: | | |
python3 -m pip install pytest-rerunfailures | |
cd python/test/regression | |
sudo nvidia-smi -i 0 -pm 1 | |
sudo nvidia-smi -i 0 --lock-gpu-clocks=1280,1280 | |
python3 -m pytest -vs . --reruns 10 | |
sudo nvidia-smi -i 0 -rgc | |
Integration-Tests-Third-Party: | |
needs: Runner-Preparation | |
runs-on: ${{ matrix.runner }} | |
strategy: | |
matrix: | |
runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-optional)}} | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v2 | |
- name: Set ROCM ENV | |
if: ${{(matrix.runner[0] == 'self-hosted') && (matrix.runner[1] == 'gfx908')}} | |
run: | | |
echo "BACKEND=ROCM" >> "${GITHUB_ENV}" | |
- name: Set XPU ENV | |
if: ${{(matrix.runner[0] == 'self-hosted') && (matrix.runner[1] == 'arc770')}} | |
run: | | |
echo "BACKEND=XPU" >> "${GITHUB_ENV}" | |
- name: Clear cache | |
run: | | |
rm -rf ~/.triton | |
- name: Update PATH | |
run: | | |
echo "PATH=${HOME}/.local/bin:${PATH}" >> "${GITHUB_ENV}" | |
- name: Check pre-commit | |
if: ${{ matrix.runner != 'macos-10.15' && (matrix.runner[1] != 'arc770') }} | |
run: | | |
python3 -m pip install --upgrade pre-commit | |
python3 -m pre_commit run --all-files --verbose | |
- name: Check pre-commit arc770 | |
if: ${{ matrix.runner != 'macos-10.15' && (matrix.runner[1] == 'arc770') }} | |
run: | | |
source ${HOME}/triton_vars.sh | |
source ${HOME}/miniconda3/bin/activate | |
conda activate triton-xpu-ci | |
python3 -m pip install --upgrade pre-commit | |
python3 -m pre_commit run --all-files | |
- name: Install Triton on ROCM | |
if: ${{ env.BACKEND == 'ROCM'}} | |
run: | | |
cd python | |
python3 -m pip install --upgrade pip | |
python3 -m pip install cmake==3.24 | |
python3 -m pip install torch==1.13.1 --index-url https://download.pytorch.org/whl/rocm5.2 | |
python3 -m pip install --no-build-isolation -vvv '.[tests]' | |
- name: Install Triton on XPU | |
if: ${{ env.BACKEND == 'XPU'}} | |
run: | | |
source ${HOME}/triton_vars.sh | |
source ${HOME}/miniconda3/bin/activate | |
conda activate triton-xpu-ci | |
git submodule update --init --recursive | |
cd python | |
python3 -m pip install --upgrade pip | |
python3 -m pip install cmake==3.24 | |
export TRITON_CODEGEN_INTEL_XPU_BACKEND=1 | |
python3 -m pip uninstall -y triton | |
python3 setup.py build | |
python3 -m pip install --no-build-isolation -vvv '.[tests]' | |
- name: Run python tests on ROCM | |
if: ${{ env.BACKEND == 'ROCM'}} | |
run: | | |
cd python/test/unit/language | |
python3 -m pytest --capture=tee-sys -rfs --verbose "test_core.py::test_empty_kernel" | |
- name: Run python tests on XPU | |
if: ${{ env.BACKEND == 'XPU'}} | |
run: | | |
source ${HOME}/triton_vars.sh | |
source ${HOME}/miniconda3/bin/activate | |
conda activate triton-xpu-ci | |
cd python/test/backend/third_party_backends | |
python3 -m pytest --capture=tee-sys -rfs --verbose --backend xpu | |
Compare-artifacts: | |
needs: Integration-Tests-Nvidia | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v2 | |
- name: Install gh CLI | |
run: | | |
sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-key 23F3D4EA75716059 | |
echo "deb [arch=$(dpkg --print-architecture)] https://cli.github.com/packages focal main" | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null | |
sudo apt update | |
sudo apt install gh | |
- name: Download latest main artifacts | |
env: | |
ARTIFACT_NAME: artifacts A100 | |
ARTIFACT_JOB_NAME: Integration-Tests-Nvidia | |
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
run: | | |
OWNER_REPO="${{ github.repository }}" | |
echo "OWNER_REPO: $OWNER_REPO" | |
PR_NUMBERS=($(gh api --method GET repos/$OWNER_REPO/pulls -f state=closed | jq -r ".[] | select(.merged_at != null) | .number")) | |
# Not all PRs go through integration tests | |
success=0 | |
for PR_NUMBER in "${PR_NUMBERS[@]}" | |
do | |
echo "Last merged PR number: $PR_NUMBER" | |
BRANCH_NAME=$(gh api repos/$OWNER_REPO/pulls/$PR_NUMBER --jq '.head.ref') | |
echo "BRANCH_NAME: $BRANCH_NAME" | |
USER_ID=$(gh api repos/$OWNER_REPO/pulls/$PR_NUMBER --jq '.user.id') | |
echo "USER_ID: $USER_ID" | |
page=1 | |
while true; do | |
run_id=$(gh api --method GET "repos/$OWNER_REPO/actions/runs?page=$page&per_page=100" | jq --arg branch_name "$BRANCH_NAME" --arg run_name "Integration Tests" --arg user_id "$USER_ID" '.workflow_runs[] | select(.head_branch == $branch_name and .name == $run_name and .actor.id == ($user_id | tonumber))' | jq '.id' | head -1) | |
if [ "$run_id" != "" ]; then | |
echo "First run ID on branch $BRANCH_NAME is: $run_id" | |
WORKFLOW_RUN_ID=$run_id | |
break | |
fi | |
((page++)) | |
done | |
echo "WORKFLOW_RUN_ID: $WORKFLOW_RUN_ID" | |
ARTIFACT_URL=$(gh api repos/$OWNER_REPO/actions/runs/$WORKFLOW_RUN_ID/artifacts | jq --arg artifact_name "$ARTIFACT_NAME" '.artifacts[] | select(.name == $artifact_name).archive_download_url' --raw-output) | |
echo "ARTIFACT_URL: $ARTIFACT_URL" | |
if [ -n "$ARTIFACT_URL" ]; then | |
echo "Downloading artifact: $ARTIFACT_URL" | |
curl --location --remote-header-name -H "Authorization: token $GH_TOKEN" -o reference.zip "$ARTIFACT_URL" | |
# Print the size of the downloaded artifact | |
echo "Artifact size (stat): $(stat --printf="%s bytes" reference.zip)" | |
echo "Artifact size (du): $(du -sh reference.zip)" | |
unzip reference.zip | |
tar -xzf artifacts.tar.gz | |
rm reference.zip | |
rm artifacts.tar.gz | |
mv cache reference | |
success=1 | |
break | |
fi | |
done | |
if [ $success -eq 0 ]; then | |
echo "No artifact found with the name: $ARTIFACT_NAME" | |
exit 1 | |
fi | |
- name: Download current job artifacts | |
uses: actions/download-artifact@v2 | |
with: | |
name: artifacts A100 | |
- name: Unzip current job artifacts | |
run: | | |
# Print the size of the downloaded artifact | |
echo "Artifact size (stat): $(stat --printf="%s bytes" artifacts.tar.gz)" | |
echo "Artifact size (du): $(du -sh artifacts.tar.gz)" | |
tar -xzf artifacts.tar.gz | |
rm artifacts.tar.gz | |
mv cache current | |
- name: Compare artifacts | |
run: | | |
set +e | |
python3 python/test/tools/compare_files.py --path1 reference --path2 current --kernels python/test/kernel_comparison/kernels.yml | |
exit_code=$? | |
set -e | |
echo $exit_code | |
if [ $exit_code -eq 0 ]; then | |
echo "Artifacts are identical" | |
echo "COMPARISON_RESULT=true" >> $GITHUB_ENV | |
elif [ $exit_code -eq 1 ]; then | |
echo "Artifacts are different" | |
echo "COMPARISON_RESULT=false" >> $GITHUB_ENV | |
else | |
echo "Error while comparing artifacts" | |
echo "COMPARISON_RESULT=error" >> $GITHUB_ENV | |
fi | |
echo "COMPARISON_RESULT=env.COMPARISON_RESULT" | |
- name: Check exit code and handle failure | |
if: ${{ env.COMPARISON_RESULT == 'error' }} | |
run: | | |
echo "Error while comparing artifacts" | |
exit 1 | |
- name: Fetch Run ID | |
id: get_run_id | |
run: echo "RUN_ID=${{ github.run_id }}" >> $GITHUB_ENV | |
- name: Upload results as artifact | |
uses: actions/upload-artifact@v2 | |
with: | |
name: kernels-reference-check | |
path: kernels_reference_check.txt | |
- name: Check output and comment on PR | |
if: ${{ env.COMPARISON_RESULT == 'false' }} | |
uses: actions/github-script@v5 | |
with: | |
github-token: ${{ secrets.CI_ACCESS_TOKEN }} | |
script: | | |
const run_id = ${{ env.RUN_ID }}; | |
const issue_number = context.payload.pull_request.number; | |
const message = `:warning: **This PR does not produce bitwise identical kernels as the branch it's merged against.** Please check artifacts for details. [Download the output file here](https://github.com/${{ github.repository }}/actions/runs/${run_id}).`; | |
await github.rest.issues.createComment({ | |
owner: context.repo.owner, | |
repo: context.repo.repo, | |
issue_number: issue_number, | |
body: message | |
}); |