Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Run all framework sanity check tests and organize jobs. #18420

Merged
merged 11 commits into from
Sep 4, 2024
Merged
30 changes: 24 additions & 6 deletions .github/workflows/pkgci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,24 @@ jobs:
with:
package_version: 0.dev1

# Package sanity checks.
unit_test:
name: Unit Test
needs: [setup, build_packages]
if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'unit_test')
uses: ./.github/workflows/pkgci_unit_test.yml

# Tests for large programs.
regression_test:
name: Regression Test
needs: [setup, build_packages]
if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'regression_test')
uses: ./.github/workflows/pkgci_regression_test.yml

########################### Hardware/accelerators ###########################
# Jobs that run unit tests on special hardware platforms or accelerators
#############################################################################

test_amd_mi250:
name: Test AMD MI250
needs: [setup, build_packages]
Expand All @@ -73,22 +79,34 @@ jobs:
if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'test_nvidia_t4')
uses: ./.github/workflows/pkgci_test_nvidia_t4.yml

test_tensorflow_cpu:
name: Test TensorFlow CPU
needs: [setup, build_packages]
if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'test_tensorflow_cpu')
uses: ./.github/workflows/pkgci_test_tensorflow_cpu.yml

test_android:
name: Test Android
needs: [setup, build_packages]
if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'test_android')
uses: ./.github/workflows/pkgci_test_android.yml
with:
write-caches: ${{ needs.setup.outputs.write-caches }}

test_riscv64:
name: Test RISC-V 64
needs: [setup, build_packages]
if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'test_riscv64')
uses: ./.github/workflows/pkgci_test_riscv64.yml
with:
write-caches: ${{ needs.setup.outputs.write-caches }}

################################# Frameworks ################################
# Jobs that test machine learning frameworks or their Python APIs
#############################################################################

test_onnx:
name: Test ONNX
needs: [setup, build_packages]
if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'test_onnx')
uses: ./.github/workflows/pkgci_test_onnx.yml

test_tensorflow:
name: Test TensorFlow
needs: [setup, build_packages]
if: contains(fromJson(needs.setup.outputs.enabled-jobs), 'test_tensorflow')
uses: ./.github/workflows/pkgci_test_tensorflow.yml
102 changes: 0 additions & 102 deletions .github/workflows/pkgci_regression_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,108 +18,6 @@ on:
default: ""

jobs:
test_onnx:
name: "test_onnx :: ${{ matrix.name }}"
runs-on: ${{ matrix.runs-on }}
strategy:
fail-fast: false
matrix:
include:
# CPU
- name: cpu_llvm_sync
config-file: onnx_ops_cpu_llvm_sync.json
numprocesses: auto
runs-on: ubuntu-20.04

# AMD GPU
- name: amdgpu_rocm_rdna3
numprocesses: 1
config-file: onnx_ops_gpu_rocm_rdna3.json
runs-on: nodai-amdgpu-w7900-x86-64
- name: amdgpu_vulkan
numprocesses: 4
config-file: onnx_ops_gpu_vulkan.json
runs-on: nodai-amdgpu-w7900-x86-64

# NVIDIA GPU
- name: nvidiagpu_cuda
config-file: onnx_ops_gpu_cuda.json
numprocesses: 4
runs-on:
- self-hosted # must come first
- runner-group=${{ github.event_name == 'pull_request' && 'presubmit' || 'postsubmit' }}
- environment=prod
- gpu # TODO(scotttodd): qualify further with vendor/model
- os-family=Linux
- name: nvidiagpu_vulkan
config-file: onnx_ops_gpu_vulkan.json
numprocesses: 4
runs-on:
- self-hosted # must come first
- runner-group=${{ github.event_name == 'pull_request' && 'presubmit' || 'postsubmit' }}
- environment=prod
- gpu # TODO(scotttodd): qualify further with vendor/model
- os-family=Linux
env:
PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages
CONFIG_FILE_PATH: tests/external/iree-test-suites/onnx_ops/${{ matrix.config-file }}
NUMPROCESSES: ${{ matrix.numprocesses }}
LOG_FILE_PATH: /tmp/test_onnx_ops_${{ matrix.name }}_logs.json
VENV_DIR: ${{ github.workspace }}/venv
steps:
- name: Checking out IREE repository
uses: actions/[email protected]
with:
submodules: false
- uses: actions/[email protected]
with:
# Must match the subset of versions built in pkgci_build_packages.
python-version: "3.11"
- uses: actions/[email protected]
with:
name: linux_x86_64_release_packages
path: ${{ env.PACKAGE_DOWNLOAD_DIR }}
- name: Setup venv
run: |
./build_tools/pkgci/setup_venv.py ${VENV_DIR} \
--artifact-path=${PACKAGE_DOWNLOAD_DIR} \
--fetch-gh-workflow=${{ inputs.artifact_run_id }}

- name: Checkout test suites repository
uses: actions/[email protected]
with:
repository: iree-org/iree-test-suites
ref: 9e921d0ea271a85f772eee22965585461c9b14c2
path: iree-test-suites
- name: Install ONNX ops test suite requirements
run: |
source ${VENV_DIR}/bin/activate
python -m pip install -r iree-test-suites/onnx_ops/requirements.txt
- name: Run ONNX ops test suite
run: |
source ${VENV_DIR}/bin/activate
pytest iree-test-suites/onnx_ops/ \
-rpfE \
--numprocesses ${NUMPROCESSES} \
--timeout=30 \
--durations=20 \
--config-files=${CONFIG_FILE_PATH} \
--report-log=${LOG_FILE_PATH}
- name: "Updating config file with latest XFAIL lists"
if: failure()
run: |
source ${VENV_DIR}/bin/activate
python iree-test-suites/onnx_ops/update_config_xfails.py \
--log-file=${LOG_FILE_PATH} \
--config-file=${CONFIG_FILE_PATH}
cat ${CONFIG_FILE_PATH}
- name: "Uploading new config file"
if: failure()
uses: actions/[email protected]
with:
name: ${{ matrix.config-file }}
path: ${{ env.CONFIG_FILE_PATH }}

test_models:
name: "test_models :: ${{ matrix.name }}"
runs-on: ${{ matrix.runs-on }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pkgci_test_amd_mi250.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ on:
default: ""

jobs:
test:
test_mi250:
runs-on: nodai-amdgpu-mi250-x86-64
env:
PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pkgci_test_amd_mi300.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ on:
default: ""

jobs:
test:
test_mi300:
runs-on: nodai-amdgpu-mi300-x86-64
env:
PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pkgci_test_amd_w7900.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ on:
default: ""

jobs:
test:
test_w7900:
runs-on: nodai-amdgpu-w7900-x86-64
env:
PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pkgci_test_android.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ permissions:
contents: read

jobs:
cross_compile:
android_arm64:
Comment on lines 33 to +34
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just noticed that the Android and RISCV cross compile jobs are sharing a cache (named "cross_compile") when they shouldn't. Pushed another commit that gives each a unique cache key.

runs-on: ubuntu-20.04
env:
PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pkgci_test_nvidia_t4.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ on:
default: ""

jobs:
test:
test_t4:
runs-on:
- self-hosted # must come first
- runner-group=${{ github.event_name == 'pull_request' && 'presubmit' || 'postsubmit' }}
Expand Down
121 changes: 121 additions & 0 deletions .github/workflows/pkgci_test_onnx.yml
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not super happy from a naming perspective with having test like pkgci_test_amd_mi300.yml where the platform is what stands in the foreground and now having tests were a framework stands in the foreground and that runs on multiple hw platforms. Maybe we can rework this someday.

Anyway, definitely a step into the right direction 👍

Agreed. Currently, some large chunks of code are copy/pasted, and grouping by platform would make that worse. I had ideas for making that easier on nod-ai/SHARK-TestSuite#288.

Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
# Copyright 2024 The IREE Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

name: PkgCI Test ONNX
on:
workflow_call:
inputs:
artifact_run_id:
type: string
default: ""
workflow_dispatch:
inputs:
artifact_run_id:
type: string
default: ""

jobs:
test_onnx_ops:
name: "test_onnx :: ${{ matrix.name }}"
runs-on: ${{ matrix.runs-on }}
strategy:
fail-fast: false
matrix:
include:
# CPU
- name: cpu_llvm_sync
config-file: onnx_ops_cpu_llvm_sync.json
numprocesses: auto
runs-on: ubuntu-20.04

# AMD GPU
- name: amdgpu_rocm_rdna3
numprocesses: 1
config-file: onnx_ops_gpu_rocm_rdna3.json
runs-on: nodai-amdgpu-w7900-x86-64
- name: amdgpu_vulkan
numprocesses: 4
config-file: onnx_ops_gpu_vulkan.json
runs-on: nodai-amdgpu-w7900-x86-64

# NVIDIA GPU
- name: nvidiagpu_cuda
config-file: onnx_ops_gpu_cuda.json
numprocesses: 4
runs-on:
- self-hosted # must come first
- runner-group=${{ github.event_name == 'pull_request' && 'presubmit' || 'postsubmit' }}
- environment=prod
- gpu # TODO(scotttodd): qualify further with vendor/model
- os-family=Linux
- name: nvidiagpu_vulkan
config-file: onnx_ops_gpu_vulkan.json
numprocesses: 4
runs-on:
- self-hosted # must come first
- runner-group=${{ github.event_name == 'pull_request' && 'presubmit' || 'postsubmit' }}
- environment=prod
- gpu # TODO(scotttodd): qualify further with vendor/model
- os-family=Linux
env:
PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages
CONFIG_FILE_PATH: tests/external/iree-test-suites/onnx_ops/${{ matrix.config-file }}
NUMPROCESSES: ${{ matrix.numprocesses }}
LOG_FILE_PATH: /tmp/test_onnx_ops_${{ matrix.name }}_logs.json
VENV_DIR: ${{ github.workspace }}/venv
steps:
- name: Checking out IREE repository
uses: actions/[email protected]
with:
submodules: false
- uses: actions/[email protected]
with:
# Must match the subset of versions built in pkgci_build_packages.
python-version: "3.11"
- uses: actions/[email protected]
with:
name: linux_x86_64_release_packages
path: ${{ env.PACKAGE_DOWNLOAD_DIR }}
- name: Setup venv
run: |
./build_tools/pkgci/setup_venv.py ${VENV_DIR} \
--artifact-path=${PACKAGE_DOWNLOAD_DIR} \
--fetch-gh-workflow=${{ inputs.artifact_run_id }}

- name: Checkout test suites repository
uses: actions/[email protected]
with:
repository: iree-org/iree-test-suites
ref: 9e921d0ea271a85f772eee22965585461c9b14c2
path: iree-test-suites
- name: Install ONNX ops test suite requirements
run: |
source ${VENV_DIR}/bin/activate
python -m pip install -r iree-test-suites/onnx_ops/requirements.txt
- name: Run ONNX ops test suite
run: |
source ${VENV_DIR}/bin/activate
pytest iree-test-suites/onnx_ops/ \
-rpfE \
--numprocesses ${NUMPROCESSES} \
--timeout=30 \
--durations=20 \
--config-files=${CONFIG_FILE_PATH} \
--report-log=${LOG_FILE_PATH}
- name: "Updating config file with latest XFAIL lists"
if: failure()
run: |
source ${VENV_DIR}/bin/activate
python iree-test-suites/onnx_ops/update_config_xfails.py \
--log-file=${LOG_FILE_PATH} \
--config-file=${CONFIG_FILE_PATH}
cat ${CONFIG_FILE_PATH}
- name: "Uploading new config file"
if: failure()
uses: actions/[email protected]
with:
name: ${{ matrix.config-file }}
path: ${{ env.CONFIG_FILE_PATH }}
2 changes: 1 addition & 1 deletion .github/workflows/pkgci_test_riscv64.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ permissions:
contents: read

jobs:
cross_compile:
riscv64:
runs-on: ubuntu-20.04
env:
PACKAGE_DOWNLOAD_DIR: ${{ github.workspace }}/.packages
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

name: PkgCI Test TensorFlow (CPU)
name: PkgCI Test TensorFlow
on:
workflow_call:
inputs:
Expand Down
19 changes: 19 additions & 0 deletions .github/workflows/pkgci_unit_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,22 @@ jobs:
source ${VENV_DIR}/bin/activate
echo "Testing compiler package:"
python -m iree.compiler._package_test

# Test tools and importers that require additional deps.
- name: Test ONNX importer
run: |
source ${VENV_DIR}/bin/activate
python -m pip install onnx>=1.16.0
python compiler/bindings/python/test/tools/import_onnx_test.py
python compiler/bindings/python/test/extras/onnx_importer_test.py
ScottTodd marked this conversation as resolved.
Show resolved Hide resolved
- name: Test FX (PyTorch) importer
run: |
source ${VENV_DIR}/bin/activate
python -m pip install torch>=2.3.0
python compiler/bindings/python/test/extras/fx_importer_test.py
- name: Test TensorFlow importer
run: |
source ${VENV_DIR}/bin/activate
bash ./build_tools/scripts/setup_tf_python.sh
python compiler/bindings/python/test/tools/compiler_tflite_test.py
python compiler/bindings/python/test/tools/compiler_tf_test.py
6 changes: 0 additions & 6 deletions build_tools/cmake/run_tf_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,3 @@ if (( ${tests_passed} != 1 )); then
echo "Some tests failed!!!"
exit 1
fi

echo "***** Running TF and TFLite python api tests *****"

TF_API_TEST_DIR="compiler/bindings/python/test/tools"

pytest ${TF_API_TEST_DIR}/compiler_tflite_test.py ${TF_API_TEST_DIR}/compiler_tf_test.py
Loading
Loading