Skip to content

Test extracting matmul tests into iree-test-suites. #3

Test extracting matmul tests into iree-test-suites.

Test extracting matmul tests into iree-test-suites. #3

# Copyright 2023 The IREE Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
name: PkgCI Regression Test
on:
pull_request:
# workflow_call:
# inputs:
# artifact_run_id:
# type: string
# default: ""
# workflow_dispatch:
# inputs:
# artifact_run_id:
# type: string
# default: ""
jobs:
# test_onnx:
# name: "test_onnx :: ${{ matrix.name }}"
# runs-on: ${{ matrix.runs-on }}
# strategy:
# fail-fast: false
# matrix:
# include:
# # CPU
# - name: cpu_llvm_sync
# config-file: onnx_ops_cpu_llvm_sync.json
# numprocesses: auto
# runs-on: ubuntu-20.04
# # AMD GPU
# - name: amdgpu_rocm_rdna3
# numprocesses: 1
# config-file: onnx_ops_gpu_rocm_rdna3.json
# runs-on: nodai-amdgpu-w7900-x86-64
# - name: amdgpu_vulkan
# numprocesses: 4
# config-file: onnx_ops_gpu_vulkan.json
# runs-on: nodai-amdgpu-w7900-x86-64
# # NVIDIA GPU
# - name: nvidiagpu_cuda
# config-file: onnx_ops_gpu_cuda.json
# numprocesses: 4
# runs-on:
# - self-hosted # must come first
# - runner-group=${{ github.event_name == 'pull_request' && 'presubmit' || 'postsubmit' }}
# - environment=prod
# - gpu # TODO(scotttodd): qualify further with vendor/model
# - os-family=Linux
# - name: nvidiagpu_vulkan
# config-file: onnx_ops_gpu_vulkan.json
# numprocesses: 4
# runs-on:
# - self-hosted # must come first
# - runner-group=${{ github.event_name == 'pull_request' && 'presubmit' || 'postsubmit' }}
# - environment=prod
# - gpu # TODO(scotttodd): qualify further with vendor/model
# - os-family=Linux
# env:
# PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages
# CONFIG_FILE_PATH: tests/external/iree-test-suites/onnx_ops/${{ matrix.config-file }}
# NUMPROCESSES: ${{ matrix.numprocesses }}
# LOG_FILE_PATH: /tmp/test_onnx_ops_${{ matrix.name }}_logs.json
# VENV_DIR: ${{ github.workspace }}/venv
# steps:
# - name: Checking out IREE repository
# uses: actions/[email protected]
# with:
# submodules: false
# - uses: actions/[email protected]
# with:
# # Must match the subset of versions built in pkgci_build_packages.
# python-version: "3.11"
# - uses: actions/[email protected]
# with:
# name: linux_x86_64_release_packages
# path: ${{ env.PACKAGE_DOWNLOAD_DIR }}
# - name: Setup venv
# run: |
# ./build_tools/pkgci/setup_venv.py ${VENV_DIR} \
# --artifact-path=${PACKAGE_DOWNLOAD_DIR} \
# --fetch-gh-workflow=${{ inputs.artifact_run_id }}
# - name: Checkout test suites repository
# uses: actions/[email protected]
# with:
# repository: iree-org/iree-test-suites
# ref: 323c2df477544d789c56e4dfda9b047580b77cbf
# path: iree-test-suites
# - name: Install ONNX ops test suite requirements
# run: |
# source ${VENV_DIR}/bin/activate
# python -m pip install -r iree-test-suites/onnx_ops/requirements.txt
# - name: Run ONNX ops test suite
# run: |
# source ${VENV_DIR}/bin/activate
# pytest iree-test-suites/onnx_ops/ \
# -rpfE \
# --numprocesses ${NUMPROCESSES} \
# --timeout=30 \
# --durations=20 \
# --config-files=${CONFIG_FILE_PATH} \
# --report-log=${LOG_FILE_PATH}
# - name: "Updating config file with latest XFAIL lists"
# if: failure()
# run: |
# source ${VENV_DIR}/bin/activate
# python iree-test-suites/onnx_ops/update_config_xfails.py \
# --log-file=${LOG_FILE_PATH} \
# --config-file=${CONFIG_FILE_PATH}
# cat ${CONFIG_FILE_PATH}
# - name: "Uploading new config file"
# if: failure()
# uses: actions/[email protected]
# with:
# name: ${{ matrix.config-file }}
# path: ${{ env.CONFIG_FILE_PATH }}
test_matmul:
name: "test_matmul :: ${{ matrix.name }}"
runs-on: ${{ matrix.runs-on }}
strategy:
fail-fast: false
matrix:
include:
# CPU
- name: cpu
runs-on: ubuntu-20.04
env:
PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages
VENV_DIR: ${{ github.workspace }}/venv
steps:
- name: Checking out IREE repository
uses: actions/[email protected]
with:
submodules: false
- uses: actions/[email protected]
with:
# Must match the subset of versions built in pkgci_build_packages.
python-version: "3.11"
# - uses: actions/[email protected]
# with:
# name: linux_x86_64_release_packages
# path: ${{ env.PACKAGE_DOWNLOAD_DIR }}
# - name: Setup venv
# run: |
# ./build_tools/pkgci/setup_venv.py ${VENV_DIR} \
# --artifact-path=${PACKAGE_DOWNLOAD_DIR} \
# --fetch-gh-workflow=${{ inputs.artifact_run_id }}
- name: Setup venv
run: |
python -m venv ${VENV_DIR}
source ${VENV_DIR}/bin/activate
python -m pip install \
--find-links https://iree.dev/pip-release-links.html \
--upgrade iree-compiler iree-runtime
- name: Checkout test suites repository
uses: actions/[email protected]
with:
repository: ScottTodd/iree-test-suites
ref: affdb261852372f80fb6ab1e58d695fdf43b3997
path: iree-test-suites
- name: Install build dependencies
run: |
sudo apt update
sudo apt install -y cmake clang ninja-build libstdc++-12-dev
echo "CC=clang" >> $GITHUB_ENV
echo "CXX=clang++" >> $GITHUB_ENV
- name: Configure test suite CMake project
run: |
source ${VENV_DIR}/bin/activate
cmake -G Ninja -S iree-test-suites/matmul -B build/ \
-DCMAKE_BUILD_TYPE=RelWithDebInfo \
-DIREE_USE_LOCAL_REPO=ON \
-DIREE_LOCAL_REPO_PATH=. \
-DIREE_HOST_BIN_DIR=${VENV_DIR}/bin
- name: Build test suite CMake project
run: |
cmake --build build/
cmake --build build/ --target iree-test-suites-matmul-deps
- name: CTest test suite CMake project
run: |
ctest --test-dir build/ -R iree-test-suites
# test_models:
# name: "test_models :: ${{ matrix.name }}"
# runs-on: ${{ matrix.runs-on }}
# strategy:
# fail-fast: false
# # Note: these jobs should use persistent runners with local caches.
# # Downloading test files (50GB+) without a cache can take 20+ minutes.
# matrix:
# include:
# # CPU
# - name: cpu_llvm_task
# models-config-file: models_cpu_llvm_task.json
# iree-tests-cache: ~/iree_tests_cache
# runs-on: nodai-amdgpu-w7900-x86-64
# # AMD GPU
# - name: amdgpu_rocm_mi250_gfx90a
# models-config-file: models_gpu_rocm_gfx90a.json
# iree-tests-cache: /groups/aig_sharks/iree-tests-cache
# runs-on: nodai-amdgpu-mi250-x86-64
# - name: amdgpu_rocm_mi300_gfx942
# models-config-file: models_gpu_rocm_gfx942.json
# iree-tests-cache: ~/iree_tests_cache
# runs-on: nodai-amdgpu-mi300-x86-64
# - name: amdgpu_vulkan
# models-config-file: models_gpu_vulkan.json
# iree-tests-cache: ~/iree_tests_cache
# runs-on: nodai-amdgpu-w7900-x86-64
# # NVIDIA GPU
# # None at the moment. Could maybe use the persistent a100 runners:
# # - self-hosted # must come first
# # - runner-group=${{ needs.setup.outputs.runner-group }}
# # - environment=${{ needs.setup.outputs.runner-env }}
# # - a100
# # - os-family=Linux
# # (note: would need to plumb the presubmit/postsubmit runner-group through to here too)
# env:
# PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages
# IREE_TEST_FILES: ${{ matrix.iree-tests-cache }}
# IREE_TEST_PATH_EXTENSION: ${{ github.workspace }}/build_tools/pkgci/external_test_suite
# MODELS_CONFIG_FILE_PATH: build_tools/pkgci/external_test_suite/${{ matrix.models-config-file }}
# VENV_DIR: ${{ github.workspace }}/venv
# LD_LIBRARY_PATH: /home/esaimana/Python-3.11.9
# steps:
# # TODO(saienduri): Find alternative to this temporary step that manipulates permission of github actions
# # directory to be able to clean after every PR
# - name: Pre Checkout MI300 Step
# if: contains(matrix.name, 'gfx942')
# run: |
# sudo chmod -R 777 ~/actions-runner/_work
# - name: Checking out IREE repository
# uses: actions/[email protected]
# with:
# submodules: false
# - uses: actions/[email protected]
# with:
# # Must match the subset of versions built in pkgci_build_packages.
# python-version: "3.11"
# - uses: actions/[email protected]
# with:
# name: linux_x86_64_release_packages
# path: ${{ env.PACKAGE_DOWNLOAD_DIR }}
# - name: Setup venv
# run: |
# ./build_tools/pkgci/setup_venv.py ${VENV_DIR} \
# --artifact-path=${PACKAGE_DOWNLOAD_DIR} \
# --fetch-gh-workflow=${{ inputs.artifact_run_id }}
# # Out of tree tests
# - name: Check out external TestSuite repository
# uses: actions/[email protected]
# with:
# repository: nod-ai/SHARK-TestSuite
# ref: f5615ab29da491c0047146258dfa3a0c40c735e5
# path: SHARK-TestSuite
# submodules: false
# lfs: true
# - name: Install external TestSuite Python requirements
# run: |
# source ${VENV_DIR}/bin/activate
# python3 -m pip install -r SHARK-TestSuite/iree_tests/requirements.txt
# pip install --no-compile --pre --upgrade -e SHARK-TestSuite/common_tools
# - name: Download remote files for real weight model tests
# run: |
# source ${VENV_DIR}/bin/activate
# python SHARK-TestSuite/iree_tests/download_remote_files.py --root-dir iree_tests/pytorch/models
# python SHARK-TestSuite/iree_tests/download_remote_files.py --root-dir iree_tests/sharktank
# - name: Run external tests - models with real weights
# if: "matrix.models-config-file != '' && !cancelled()"
# run: |
# source ${VENV_DIR}/bin/activate
# pytest \
# SHARK-TestSuite/iree_tests/pytorch/models \
# SHARK-TestSuite/iree_tests/sharktank \
# -rA \
# -k real_weights \
# --no-skip-tests-missing-files \
# --capture=no \
# --log-cli-level=info \
# --timeout=1200 \
# --durations=0 \
# --config-files=${MODELS_CONFIG_FILE_PATH}
# test_regression_suite:
# name: "test_regression_suite :: ${{ matrix.name }}"
# runs-on: ${{ matrix.runs-on }}
# strategy:
# fail-fast: false
# # Note: these jobs should use persistent runners with local caches.
# # Downloading test files (50GB+) without a cache can take 20+ minutes.
# matrix:
# include:
# # CPU
# - name: cpu_llvm_task
# models-config-file: models_cpu_llvm_task.json
# backend: cpu
# iree-tests-cache: ~/iree_tests_cache
# runs-on: nodai-amdgpu-w7900-x86-64
# # AMD GPU
# - name: amdgpu_rocm_mi250_gfx90a
# rocm-chip: gfx90a
# backend: rocm
# iree-tests-cache: /groups/aig_sharks/iree-tests-cache
# runs-on: nodai-amdgpu-mi250-x86-64
# - name: amdgpu_rocm_mi300_gfx942
# rocm-chip: gfx942
# backend: rocm
# iree-tests-cache: ~/iree_tests_cache
# runs-on: nodai-amdgpu-mi300-x86-64
# env:
# PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages
# IREE_TEST_FILES: ${{ matrix.iree-tests-cache }}
# IREE_TEST_PATH_EXTENSION: ${{ github.workspace }}/build_tools/pkgci/external_test_suite
# VENV_DIR: ${{ github.workspace }}/venv
# LD_LIBRARY_PATH: /home/esaimana/Python-3.11.9
# steps:
# # TODO(saienduri): Find alternative to this temporary step that manipulates permission of github actions
# # directory to be able to clean after every PR
# - name: Pre Checkout MI300 Step
# if: contains(matrix.name, 'gfx942')
# run: |
# sudo chmod -R 777 ~/actions-runner/_work
# - name: Checking out IREE repository
# uses: actions/[email protected]
# with:
# submodules: false
# - uses: actions/[email protected]
# with:
# # Must match the subset of versions built in pkgci_build_packages.
# python-version: "3.11"
# - uses: actions/[email protected]
# with:
# name: linux_x86_64_release_packages
# path: ${{ env.PACKAGE_DOWNLOAD_DIR }}
# - name: Setup venv
# run: |
# ./build_tools/pkgci/setup_venv.py ${VENV_DIR} \
# --artifact-path=${PACKAGE_DOWNLOAD_DIR} \
# --fetch-gh-workflow=${{ inputs.artifact_run_id }}
# source ${VENV_DIR}/bin/activate
# pip install -e experimental/regression_suite
# # TODO(#17344): regenerate .mlirbc files, test plat_rdna3_rocm on rocm
# # # In-tree tests
# # - name: Run experimental/regression_suite tests
# # run: |
# # source ${VENV_DIR}/bin/activate
# # pytest \
# # -rA -s -m "plat_host_cpu and presubmit" \
# # experimental/regression_suite
# - name: "Running SDXL special model tests"
# if: "!cancelled()"
# run: |
# source ${VENV_DIR}/bin/activate
# pytest ./experimental/regression_suite/shark-test-suite-models/sdxl \
# -k ${{ matrix.backend }} \
# -rpfE \
# --capture=no \
# --log-cli-level=info \
# --timeout=1200 \
# --durations=0
# env:
# ROCM_CHIP: ${{ matrix.rocm-chip }}
# - name: "Running SD3 special model tests"
# if: "!cancelled()"
# run: |
# source ${VENV_DIR}/bin/activate
# pytest ./experimental/regression_suite/shark-test-suite-models/sd3 \
# -k ${{ matrix.backend }} \
# -rpfE \
# --capture=no \
# --log-cli-level=info \
# --timeout=1200 \
# --durations=0
# env:
# ROCM_CHIP: ${{ matrix.rocm-chip }}
# # Note: mi250 benchmark times are more lenient than mi300 (allowing about
# # 10% deviation from observed averages), since the mi250 runners we use
# # are more unstable and we care most about peak performance on mi300.
# - name: "Running SDXL rocm pipeline benchmark (mi250)"
# if: contains(matrix.name, 'rocm_mi250_gfx90a')
# run: |
# source ${VENV_DIR}/bin/activate
# pytest ./experimental/benchmarks/sdxl/benchmark_sdxl_rocm.py \
# --goldentime-rocm-e2e-ms 1450.0 \
# --goldentime-rocm-unet-ms 370.0 \
# --goldentime-rocm-clip-ms 18.5 \
# --goldentime-rocm-vae-ms 315.0 \
# --goldendispatch-rocm-unet 1691 \
# --goldendispatch-rocm-clip 1225 \
# --goldendispatch-rocm-vae 248 \
# --goldensize-rocm-unet-bytes 2280000 \
# --goldensize-rocm-clip-bytes 860000 \
# --goldensize-rocm-vae-bytes 840000 \
# --gpu-number 6 \
# --rocm-chip gfx90a \
# --log-cli-level=info \
# --retries 7
# echo "$(<job_summary.md )" >> $GITHUB_STEP_SUMMARY
# rm job_summary.md
# - name: "Running SDXL rocm pipeline benchmark (mi300)"
# if: contains(matrix.name, 'rocm_mi300_gfx942')
# run: |
# source ${VENV_DIR}/bin/activate
# pytest ./experimental/benchmarks/sdxl/benchmark_sdxl_rocm.py \
# --goldentime-rocm-e2e-ms 325.0 \
# --goldentime-rocm-unet-ms 77.0 \
# --goldentime-rocm-clip-ms 15.5 \
# --goldentime-rocm-vae-ms 74.0 \
# --goldendispatch-rocm-unet 1691 \
# --goldendispatch-rocm-clip 1225 \
# --goldendispatch-rocm-vae 248 \
# --goldensize-rocm-unet-bytes 2270000 \
# --goldensize-rocm-clip-bytes 860000 \
# --goldensize-rocm-vae-bytes 840000 \
# --gpu-number 0 \
# --rocm-chip gfx942 \
# --log-cli-level=info \
# --retries 7
# echo "$(<job_summary.md )" >> $GITHUB_STEP_SUMMARY