Skip to content

Commit

Permalink
test
Browse files Browse the repository at this point in the history
  • Loading branch information
ko3n1g committed Apr 26, 2024
1 parent 0fa40ad commit 29541c9
Showing 1 changed file with 276 additions and 38 deletions.
314 changes: 276 additions & 38 deletions .github/workflows/_sandbox.yaml
Original file line number Diff line number Diff line change
@@ -1,33 +1,68 @@
name: Sandbox
run-name: CI-amd64

on:
schedule:
- cron: '30 9 * * *' # Pacific Time 01:30 AM in UTC
pull_request:
types:
- opened
- reopened
- ready_for_review
- synchronize
paths-ignore:
- "**.md"
- '**.md'
workflow_dispatch:
inputs:
PUBLISH:
type: boolean
description: Publish dated images and update the 'latest' tag?
default: false
required: false
BUMP_MANIFEST:
type: boolean
description: Bump git repos in manifest.yaml to head of tree?
default: false
required: false
MERGE_BUMPED_MANIFEST:
type: boolean
description: '(used if BUMP_MANIFEST=true) If true: attempt to PR/merge manifest branch'
default: false
required: false

env:
DEFAULT_MANIFEST_ARTIFACT_NAME: bumped-manifest
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}

permissions:
contents: read # to fetch code
contents: write # to fetch code and push branch
actions: write # to cancel previous workflows
packages: write # to upload container
pull-requests: write # to make pull request for manifest bump

env:
DEFAULT_MANIFEST_ARTIFACT_NAME: bumped-manifest

jobs:
metadata:
runs-on: ubuntu-22.04
outputs:
BUILD_DATE: ${{ steps.date.outputs.BUILD_DATE }}
PUBLISH: ${{ steps.if-publish.outputs.PUBLISH }}
BUMP_MANIFEST: ${{ steps.manifest-branch.outputs.BUMP_MANIFEST }}
MANIFEST_ARTIFACT_NAME: ${{ steps.manifest-branch.outputs.MANIFEST_ARTIFACT_NAME }}
MANIFEST_BRANCH: ${{ steps.manifest-branch.outputs.MANIFEST_BRANCH }}
MERGE_BUMPED_MANIFEST: ${{ steps.manifest-branch.outputs.MERGE_BUMBED_MANIFEST }}
steps:
- name: Cancel workflow run if the trigger is a draft PR
id: cancel-if-draft
if: github.event_name == 'pull_request' && github.event.pull_request.draft == true
run: |
echo "Cancelling workflow for draft PR"
curl -X POST -H "Authorization: token ${{ github.token }}" \
-H "Accept: application/vnd.github.v3+json" \
"https://api.github.com/repos/${{ github.repository }}/actions/runs/${{ github.run_id }}/cancel"
while true; do sleep 1; done # blocks execution in case workflow cancellation takes time
- name: Set build date
id: date
shell: bash -x -e {0}
Expand All @@ -45,7 +80,7 @@ jobs:
id: manifest-branch
shell: bash -x -e {0}
run: |
BUMP_MANIFEST=${{ 'true' }}
BUMP_MANIFEST=${{ github.event_name == 'schedule' || inputs.BUMP_MANIFEST || 'false' }}
MERGE_BUMPED_MANIFEST=${{ github.event_name == 'schedule' || inputs.MERGE_BUMPED_MANIFEST || 'false' }}
# Prepend nightly manifest branch with "z" to make it appear at the end
if [[ "$BUMP_MANIFEST" == "true" ]]; then
Expand Down Expand Up @@ -103,48 +138,251 @@ jobs:
.github/container/manifest.yaml
.github/container/patches
build-base:
uses: ./.github/workflows/_build_base.yaml
amd64:
needs: [metadata, bump-manifest]
uses: ./.github/workflows/_ci.yaml
with:
ARCHITECTURE: amd64
BUILD_DATE: 20240418
BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }}
MANIFEST_ARTIFACT_NAME: ${{ needs.metadata.outputs.MANIFEST_ARTIFACT_NAME }}
secrets: inherit

build-jax:
needs: build-base
uses: ./.github/workflows/_build.yaml
arm64:
needs: [metadata, bump-manifest]
uses: ./.github/workflows/_ci.yaml
with:
ARCHITECTURE: amd64
ARTIFACT_NAME: artifact-jax-build
BADGE_FILENAME: badge-jax-build
BUILD_DATE: 20240418
BASE_IMAGE: ${{ needs.build-base.outputs.DOCKER_TAG }}
CONTAINER_NAME: jax
DOCKERFILE: .github/container/Dockerfile.jax
RUNNER_SIZE: large
ARCHITECTURE: arm64
BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }}
MANIFEST_ARTIFACT_NAME: ${{ needs.metadata.outputs.MANIFEST_ARTIFACT_NAME }}
secrets: inherit

build-upstream-maxtext:
needs: build-jax
uses: ./.github/workflows/_build.yaml
# Only merge if everything succeeds
merge-new-manifest:
runs-on: ubuntu-22.04
if: ${{ !cancelled() && needs.metadata.outputs.MERGE_BUMPED_MANIFEST == 'true' && needs.metadata.outputs.MANIFEST_BRANCH != github.sha }}
needs:
- metadata
- amd64
- arm64
steps:
- name: "Tests Succeeded: ${{ !contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') }}"
id: test_result
run: echo "SUCCEEDED=${{ !contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') }}" | tee -a $GITHUB_OUTPUT

- name: Check out the repository under ${GITHUB_WORKSPACE}
uses: actions/checkout@v4

- name: Delete checked-out manifest and patches
run: |
rm .github/container/manifest.yaml
rm -rf .github/container/patches
- name: Replace checked-out manifest file/patches with bumped one
uses: actions/download-artifact@v4
with:
name: ${{ needs.metadata.outputs.MANIFEST_ARTIFACT_NAME }}
path: .github/container/

- name: 'Create local manifest branch: ${{ needs.metadata.outputs.MANIFEST_BRANCH }}'
id: local_branch
shell: bash -x -e {0}
run: |
git config user.name "JAX-Toolbox CI"
git config user.email "[email protected]"
git switch -c ${{ needs.metadata.outputs.MANIFEST_BRANCH }}
git status
git add .github/container/patches/
git status
# In the unusual situation where the manifest is the same even after bumping,
# we will produce an empty commit with --allow-empty, which allows a PR to be
# made and merged even with no changeset.
git commit --allow-empty -a -m "Nightly Manifest Bump (${{ needs.metadata.outputs.BUILD_DATE }}) from: https://github.com/NVIDIA/JAX-Toolbox/actions/runs/${{ github.run_id }}"
- name: Try to merge manifest branch
id: merge_local
if: steps.test_result.outputs.SUCCEEDED == 'true'
# Merge can fail
continue-on-error: true
shell: bash -x -e {0}
run: |
git switch ${{ github.ref_name }}
# Pull this ref in case it was updated
git pull --rebase
git merge --ff-only ${{ needs.metadata.outputs.MANIFEST_BRANCH }}
# Push the new change
git push origin ${{ github.ref_name }}
# We will create a Draft PR & remote branch if:
# 1. The tests failed
# 2. The merge failed
- name: Create remote manifest branch
id: create_remote_branch
if: steps.test_result.outputs.SUCCEEDED == 'false' || steps.merge_local.outcome != 'success'
shell: bash -x -e {0}
run: |
# Always abort in case in-progress merge
git merge --abort || true
git switch ${{ needs.metadata.outputs.MANIFEST_BRANCH }}
# Since the merge failed, create a remote and follow up with a PR
git push --set-upstream origin ${{ needs.metadata.outputs.MANIFEST_BRANCH }}
- name: Creating Draft PR for MANIFEST_BRANCH=${{ needs.metadata.outputs.MANIFEST_BRANCH }}
id: create_pr
if: steps.test_result.outputs.SUCCEEDED == 'false' || steps.merge_local.outcome != 'success'
uses: octokit/[email protected]
with:
route: POST /repos/{owner_and_repo}/pulls
owner_and_repo: ${{ github.repository }}
head: ${{ needs.metadata.outputs.MANIFEST_BRANCH }}
# Always try to merge back into the branch that triggered this workflow
base: ${{ github.ref }}
body: |
https://github.com/NVIDIA/JAX-Toolbox/actions/runs/${{ github.run_id }}
title: Nightly Manifest Bump (${{ needs.metadata.outputs.BUILD_DATE }})
draft: true
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

- name: 'Log created PR: #${{ fromJson(steps.create_pr.outputs.data).number }}'
if: steps.create_pr.outcome == 'success'
run: |
echo "https://github.com/NVIDIA/JAX-Toolbox/pull/${{ fromJson(steps.create_pr.outputs.data).number }}" | tee -a $GITHUB_STEP_SUMMARY
# Guard delete in simple check to protect other branches
- name: Check that the branch matches znightly- prefix
run: |
if [[ "${{ needs.metadata.outputs.MANIFEST_BRANCH }}" != znightly-* ]]; then
echo Tried to delete MANIFEST_BRANCH=${{ needs.metadata.outputs.MANIFEST_BRANCH }}, but did not start with "znightly-"
exit 1
fi
# If merging fails b/c upstream conflict, branch is deleted to avoid clutter since changeset is preserved in PR
- name: Deleting remote MANIFEST_BRANCH=${{ needs.metadata.outputs.MANIFEST_BRANCH }}
# Delete can fail if branch was already deleted or not created, e.g., if the PR successfully merges, then branch is also already deleted.
continue-on-error: true
uses: octokit/[email protected]
with:
route: DELETE /repos/{owner_and_repo}/git/refs/heads/${{ needs.metadata.outputs.MANIFEST_BRANCH }}
owner_and_repo: ${{ github.repository }}
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

make-publish-configs:
runs-on: ubuntu-22.04
if: ${{ !cancelled() }}
env:
MEALKIT_IMAGE_REPO: ${{ needs.metadata.outputs.PUBLISH == 'true' && 'jax-mealkit' || 'mock-jax-mealkit' }}
FINAL_IMAGE_REPO: ${{ needs.metadata.outputs.PUBLISH == 'true' && 'jax' || 'mock-jax' }}
needs:
- metadata
- amd64
- arm64
outputs:
PUBLISH_CONFIGS: ${{ steps.generate-configs.outputs.PUBLISH_CONFIGS }}
steps:
- id: generate-configs
shell: bash -eu -o pipefail {0}
run: |
declare -a FLAVORS=(
base
jax
triton
equinox
maxtext
levanter
upstream-t5x
upstream-pax
upstream-maxtext
t5x
pax
grok
)
declare -a STAGES=(
mealkit
final
)
## create JSON specs for a 1D matrix of container publication jobs
ALL_TAGS=$(
echo '${{ needs.amd64.outputs.DOCKER_TAGS }}' \
'${{ needs.arm64.outputs.DOCKER_TAGS }}' |\
jq -s 'add'
)
PUBLISH_CONFIGS='[]'
for stage in "${STAGES[@]}"; do
for flavor in "${FLAVORS[@]}";do
# collect images for different platforms, e.g. amd64 and arm64
matching_tags=$(
echo "$ALL_TAGS" |\
jq -c ".[] | select(.stage == \"${stage}\" and .flavor == \"${flavor}\" and .tag != \"\")"
)
# source_image is a list of all platform-specific tags
source_image=$(echo "${matching_tags}" | jq -c "[.tag]" | jq -s 'add')
# if the build job failed without producing any images, skip this flavor
n_source_images=$(echo "$source_image" | jq 'length')
if [[ $n_source_images -gt 0 ]]; then
echo "PUBLISH image $flavor with $n_source_images $stage containers"
# tag priority is the highest priority of all platform-specific tags
priority=$(echo "${matching_tags}" | jq -r ".priority" | jq -s 'max')
# put all final images in the `ghcr.io/nvidia/jax` namespace
# and mealkit images in `ghcr.io/nvidia/jax-toolbox-mealkit` namespace
case ${stage} in
mealkit)
target_image=${MEALKIT_IMAGE_REPO}
;;
final)
target_image=${FINAL_IMAGE_REPO}
;;
esac
PUBLISH_CONFIGS=$(
echo ${PUBLISH_CONFIGS} | jq -c ". + [{
\"flavor\": \"${flavor}\",
\"target_image\": \"${target_image}\",
\"priority\": \"${priority}\",
\"source_image\": ${source_image},
\"stage\": \"${stage}\"
}]"
)
else
echo "SKIPPED image $flavor with 0 $stage containers"
fi
done
done
PUBLISH_CONFIGS=$(echo "$PUBLISH_CONFIGS" | jq -c '{"config": .}')
echo ${PUBLISH_CONFIGS} | jq
echo "PUBLISH_CONFIGS=${PUBLISH_CONFIGS}" >> $GITHUB_OUTPUT
publish-containers:
needs:
- metadata
- make-publish-configs
if: ${{ !cancelled() && needs.make-publish-configs.outputs.PUBLISH_CONFIGS.config != '{"config":[]}' }}
strategy:
fail-fast: false
matrix: ${{ fromJson(needs.make-publish-configs.outputs.PUBLISH_CONFIGS) }}
uses: ./.github/workflows/_publish_container.yaml
with:
ARCHITECTURE: amd64
ARTIFACT_NAME: artifact-maxtext-build
BADGE_FILENAME: badge-maxtext-build
BUILD_DATE: 20240418
BASE_IMAGE: ${{ needs.build-jax.outputs.DOCKER_TAG_MEALKIT }}
CONTAINER_NAME: maxtext
DOCKERFILE: .github/container/Dockerfile.maxtext.amd64
secrets: inherit
ARTIFACT_NAME: ${{ matrix.config.stage }}-${{ matrix.config.flavor }}
ARTIFACT_TAG: ${{ matrix.config.flavor }}-${{ needs.metadata.outputs.BUILD_DATE }}
SOURCE_IMAGE: ${{ join(matrix.config.source_image, ' ') }}
TARGET_IMAGE: ${{ matrix.config.target_image }}
TARGET_TAGS: |
type=raw,value=${{ matrix.config.flavor }},priority=${{ matrix.config.priority }}
type=raw,value=${{ matrix.config.flavor }}-${{ needs.metadata.outputs.BUILD_DATE }},priority=${{ matrix.config.priority }}
build-rosetta-maxtext:
needs: build-upstream-maxtext
uses: ./.github/workflows/_build_rosetta.yaml
finalize:
needs: [metadata, amd64, arm64, publish-containers]
if: '!cancelled()'
uses: ./.github/workflows/_finalize.yaml
with:
ARCHITECTURE: amd64
BUILD_DATE: 20240418
BASE_IMAGE: ${{ needs.build-upstream-maxtext.outputs.DOCKER_TAG_MEALKIT }}
BASE_LIBRARY: maxtext
BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }}
PUBLISH_BADGE: ${{ needs.metadata.outputs.PUBLISH == 'true' }}
secrets: inherit

0 comments on commit 29541c9

Please sign in to comment.