forked from NVIDIA/NeMo
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ci: Switch to FW templates for build (NVIDIA#11077)
* ci: Switch to FW templates for build Signed-off-by: Oliver Koenig <[email protected]> * fix Signed-off-by: Oliver Koenig <[email protected]> * fix Signed-off-by: Oliver Koenig <[email protected]> * fix Signed-off-by: Oliver Koenig <[email protected]> * fix image tag Signed-off-by: Oliver Koenig <[email protected]> * fix Signed-off-by: Oliver Koenig <[email protected]> * use labels for building and cleaning Signed-off-by: Oliver Koenig <[email protected]> * fix tag Signed-off-by: Oliver Koenig <[email protected]> * fix Signed-off-by: Oliver Koenig <[email protected]> * fix Signed-off-by: Oliver Koenig <[email protected]> --------- Signed-off-by: Oliver Koenig <[email protected]>
- Loading branch information
Showing
3 changed files
with
21 additions
and
61 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -48,69 +48,29 @@ jobs: | |
id: all | ||
run: | | ||
echo "main=${{ contains(fromJSON(steps.test_to_run.outputs.main), 'all') }}" | tee -a "$GITHUB_OUTPUT" | ||
gpu-test: | ||
needs: [pre-flight] | ||
runs-on: self-hosted-azure | ||
if: ${{ github.event.label.name == 'Run CICD' || github.event_name == 'workflow_dispatch' }} | ||
steps: | ||
- name: Run nvidia-smi test | ||
run: | | ||
whoami | ||
nvidia-smi | ||
|
||
cicd-cluster-clean: | ||
runs-on: self-hosted-azure-builder | ||
needs: [pre-flight] | ||
cicd-test-container-build: | ||
if: ${{ github.event.label.name == 'Run CICD' || github.event_name == 'workflow_dispatch' }} | ||
steps: | ||
- name: Clean server from old files | ||
run: | | ||
docker system prune --filter "until=24h" --filter "label=nemo.library=nemo-core" --force | ||
uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/[email protected] | ||
with: | ||
image-name: nemo_container | ||
dockerfile: Dockerfile.ci | ||
image-label: nemo-core | ||
build-args: | | ||
IMAGE_LABEL=nemo-core | ||
prune-filter-timerange: 24h | ||
|
||
cicd-test-container-setup: | ||
needs: [cicd-cluster-clean, pre-flight] | ||
needs: [cicd-test-container-build, pre-flight] | ||
runs-on: self-hosted-azure-builder | ||
if: ${{ github.event.label.name == 'Run CICD' || github.event_name == 'workflow_dispatch' }} | ||
outputs: | ||
test_to_run: ${{ needs.pre-flight.outputs.test_to_run }} | ||
all: ${{ needs.pre-flight.outputs.all }} | ||
steps: | ||
- name: Checkout repository | ||
uses: actions/checkout@v4 | ||
with: | ||
path: ${{ github.run_id }} | ||
|
||
- name: Set up Docker Buildx | ||
uses: docker/setup-buildx-action@v3 | ||
with: | ||
# We use `docker` driver as this speeds things up for | ||
# trivial (non-multi-stage) builds. | ||
driver: docker | ||
|
||
- name: Restore cache | ||
run: | | ||
docker pull nemoci.azurecr.io/nemo_container:latest | ||
docker pull nemoci.azurecr.io/nemo_container_${{ github.event.number || 'noop' }} || true | ||
- name: Build and push | ||
uses: docker/build-push-action@v5 | ||
with: | ||
file: Dockerfile.ci | ||
push: true | ||
cache-from: | | ||
nemoci.azurecr.io/nemo_container:latest | ||
nemoci.azurecr.io/nemo_container_${{ github.event.number || 'noop' }} | ||
cache-to: type=inline | ||
tags: | | ||
nemoci.azurecr.io/nemo_container_${{ github.run_id }} | ||
nemoci.azurecr.io/nemo_container_${{ github.event.number || 'noop' }} | ||
nemoci.azurecr.io/nemo_container:latest | ||
- name: Run some checks | ||
run: | | ||
docker run --rm --device=/dev/nvidia0 --gpus all --shm-size=8g --env TRANSFORMERS_OFFLINE=0 --env HYDRA_FULL_ERROR=1 --env PYTHONUNBUFFERED=1 nemoci.azurecr.io/nemo_container_${{ github.run_id }} bash -c '\ | ||
docker run --rm --device=/dev/nvidia0 --gpus all --shm-size=8g --env TRANSFORMERS_OFFLINE=0 --env HYDRA_FULL_ERROR=1 --env PYTHONUNBUFFERED=1 nemoci.azurecr.io/nemo_container:${{ github.run_id }} bash -c '\ | ||
# PyTorch Lightning version | ||
python -c "import pytorch_lightning; print(pytorch_lightning.__version__)" | ||
|
@@ -475,7 +435,7 @@ jobs: | |
# needs: [cicd-test-container-setup] | ||
# runs-on: self-hosted-azure | ||
# container: | ||
# image: nemoci.azurecr.io/nemo_container_${{ github.run_id }} | ||
# image: nemoci.azurecr.io/nemo_container:${{ github.run_id }} | ||
# options: | ||
# # --user 0:128 | ||
# --device=/dev/nvidia0 | ||
|
@@ -527,7 +487,7 @@ jobs: | |
# runs-on: self-hosted-azure | ||
# timeout-minutes: 10 | ||
# container: | ||
# image: nemoci.azurecr.io/nemo_container_${{ github.run_id }} | ||
# image: nemoci.azurecr.io/nemo_container:${{ github.run_id }} | ||
# options: | ||
# # --user 0:128 | ||
# --device=/dev/nvidia0 | ||
|
@@ -3611,7 +3571,7 @@ jobs: | |
# runs-on: self-hosted-azure | ||
# timeout-minutes: 10 | ||
# container: | ||
# image: nemoci.azurecr.io/nemo_container_${{ github.run_id }} | ||
# image: nemoci.azurecr.io/nemo_container:${{ github.run_id }} | ||
# options: | ||
# # --user 0:128 | ||
# --device=/dev/nvidia0 | ||
|
@@ -3676,7 +3636,7 @@ jobs: | |
# needs: [cicd-test-container-setup] | ||
# runs-on: self-hosted-azure | ||
# container: | ||
# image: nemoci.azurecr.io/nemo_container_${{ github.run_id }} | ||
# image: nemoci.azurecr.io/nemo_container:${{ github.run_id }} | ||
# options: | ||
# # --user 0:128 | ||
# --device=/dev/nvidia0 | ||
|
@@ -4341,7 +4301,6 @@ jobs: | |
Nemo_CICD_Test: | ||
needs: | ||
- pre-flight | ||
- gpu-test | ||
- cicd-test-container-setup | ||
|
||
- L0_Unit_Tests_GPU_ASR | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters