From ba1b77597c8ccf5cfef6c6c1f6cb95d3048d71b5 Mon Sep 17 00:00:00 2001 From: Julio Perez <37191411+jperez999@users.noreply.github.com> Date: Sat, 14 Oct 2023 23:53:38 -0400 Subject: [PATCH] use rapids infra to run testing (#1216) * use rapids infra to run testing * remove branch tags from logic * remove artifact dupe of jobs field * add back in logic for branch identification * add gpus flag to container call * checking gpu with nvidia-smi * using private container test * adding correct address for container * add new container pull logic to all test sets * consolidate testing because all in one container --- .github/workflows/gpu.yml | 43 +++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/.github/workflows/gpu.yml b/.github/workflows/gpu.yml index 694ff16a9e..7a137c68fe 100644 --- a/.github/workflows/gpu.yml +++ b/.github/workflows/gpu.yml @@ -1,22 +1,25 @@ -name: gpu-ci +name: GPU CI on: workflow_dispatch: push: - branches: [main] + branches: + - main + - "pull-request/[0-9]+" tags: - "v[0-9]+.[0-9]+.[0-9]+" - pull_request: - branches: [main] - types: [opened, synchronize, reopened] - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true jobs: gpu-ci: - runs-on: 1GPU + runs-on: linux-amd64-gpu-p100-latest-1 + container: + image: nvcr.io/nvstaging/merlin/merlin-ci-runner:latest + env: + NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} + options: --shm-size=1G + credentials: + username: $oauthtoken + password: ${{ secrets.NGC_TOKEN }} steps: - uses: actions/checkout@v3 @@ -24,6 +27,8 @@ jobs: fetch-depth: 0 - name: Run tests run: | + nvidia-smi + pip install tox ref_type=${{ github.ref_type }} branch=main if [[ $ref_type == "tag"* ]] @@ -34,17 +39,25 @@ jobs: if [[ "${{ github.ref }}" != 'refs/heads/main' ]]; then extra_pytest_markers="and changed" fi - cd ${{ github.workspace }}; PYTEST_MARKERS="unit and not (examples or integration or notebook) and (singlegpu or not multigpu) $extra_pytest_markers" MERLIN_BRANCH=$branch COMPARE_BRANCH=${{ github.base_ref }} tox -e gpu - - tests-examples: - runs-on: 1GPU + PYTEST_MARKERS="unit and not (examples or integration or notebook) and (singlegpu or not multigpu) $extra_pytest_markers" MERLIN_BRANCH=$branch COMPARE_BRANCH=${{ github.base_ref }} tox -e gpu + gpu-ci-examples: + runs-on: linux-amd64-gpu-p100-latest-1 + container: + image: nvcr.io/nvstaging/merlin/merlin-ci-runner:latest + env: + NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} + options: --shm-size=1G + credentials: + username: $oauthtoken + password: ${{ secrets.NGC_TOKEN }} steps: - uses: actions/checkout@v3 with: fetch-depth: 0 - name: Run tests run: | + pip install tox ref_type=${{ github.ref_type }} branch=main if [[ $ref_type == "tag"* ]] @@ -55,4 +68,4 @@ jobs: if [[ "${{ github.ref }}" != 'refs/heads/main' ]]; then extra_pytest_markers="and changed" fi - cd ${{ github.workspace }}; PYTEST_MARKERS="(examples or notebook) $extra_pytest_markers" MERLIN_BRANCH=$branch COMPARE_BRANCH=${{ github.base_ref }} tox -e gpu + PYTEST_MARKERS="(examples or notebook) $extra_pytest_markers" MERLIN_BRANCH=$branch COMPARE_BRANCH=${{ github.base_ref }} tox -e gpu