-
Notifications
You must be signed in to change notification settings - Fork 47
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
276 additions
and
38 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,33 +1,68 @@ | ||
name: Sandbox | ||
run-name: CI-amd64 | ||
|
||
on: | ||
schedule: | ||
- cron: '30 9 * * *' # Pacific Time 01:30 AM in UTC | ||
pull_request: | ||
types: | ||
- opened | ||
- reopened | ||
- ready_for_review | ||
- synchronize | ||
paths-ignore: | ||
- "**.md" | ||
- '**.md' | ||
workflow_dispatch: | ||
inputs: | ||
PUBLISH: | ||
type: boolean | ||
description: Publish dated images and update the 'latest' tag? | ||
default: false | ||
required: false | ||
BUMP_MANIFEST: | ||
type: boolean | ||
description: Bump git repos in manifest.yaml to head of tree? | ||
default: false | ||
required: false | ||
MERGE_BUMPED_MANIFEST: | ||
type: boolean | ||
description: '(used if BUMP_MANIFEST=true) If true: attempt to PR/merge manifest branch' | ||
default: false | ||
required: false | ||
|
||
env: | ||
DEFAULT_MANIFEST_ARTIFACT_NAME: bumped-manifest | ||
concurrency: | ||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | ||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} | ||
|
||
permissions: | ||
contents: read # to fetch code | ||
contents: write # to fetch code and push branch | ||
actions: write # to cancel previous workflows | ||
packages: write # to upload container | ||
pull-requests: write # to make pull request for manifest bump | ||
|
||
env: | ||
DEFAULT_MANIFEST_ARTIFACT_NAME: bumped-manifest | ||
|
||
jobs: | ||
metadata: | ||
runs-on: ubuntu-22.04 | ||
outputs: | ||
BUILD_DATE: ${{ steps.date.outputs.BUILD_DATE }} | ||
PUBLISH: ${{ steps.if-publish.outputs.PUBLISH }} | ||
BUMP_MANIFEST: ${{ steps.manifest-branch.outputs.BUMP_MANIFEST }} | ||
MANIFEST_ARTIFACT_NAME: ${{ steps.manifest-branch.outputs.MANIFEST_ARTIFACT_NAME }} | ||
MANIFEST_BRANCH: ${{ steps.manifest-branch.outputs.MANIFEST_BRANCH }} | ||
MERGE_BUMPED_MANIFEST: ${{ steps.manifest-branch.outputs.MERGE_BUMBED_MANIFEST }} | ||
steps: | ||
- name: Cancel workflow run if the trigger is a draft PR | ||
id: cancel-if-draft | ||
if: github.event_name == 'pull_request' && github.event.pull_request.draft == true | ||
run: | | ||
echo "Cancelling workflow for draft PR" | ||
curl -X POST -H "Authorization: token ${{ github.token }}" \ | ||
-H "Accept: application/vnd.github.v3+json" \ | ||
"https://api.github.com/repos/${{ github.repository }}/actions/runs/${{ github.run_id }}/cancel" | ||
while true; do sleep 1; done # blocks execution in case workflow cancellation takes time | ||
- name: Set build date | ||
id: date | ||
shell: bash -x -e {0} | ||
|
@@ -45,7 +80,7 @@ jobs: | |
id: manifest-branch | ||
shell: bash -x -e {0} | ||
run: | | ||
BUMP_MANIFEST=${{ 'true' }} | ||
BUMP_MANIFEST=${{ github.event_name == 'schedule' || inputs.BUMP_MANIFEST || 'false' }} | ||
MERGE_BUMPED_MANIFEST=${{ github.event_name == 'schedule' || inputs.MERGE_BUMPED_MANIFEST || 'false' }} | ||
# Prepend nightly manifest branch with "z" to make it appear at the end | ||
if [[ "$BUMP_MANIFEST" == "true" ]]; then | ||
|
@@ -103,48 +138,251 @@ jobs: | |
.github/container/manifest.yaml | ||
.github/container/patches | ||
build-base: | ||
uses: ./.github/workflows/_build_base.yaml | ||
amd64: | ||
needs: [metadata, bump-manifest] | ||
uses: ./.github/workflows/_ci.yaml | ||
with: | ||
ARCHITECTURE: amd64 | ||
BUILD_DATE: 20240418 | ||
BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }} | ||
MANIFEST_ARTIFACT_NAME: ${{ needs.metadata.outputs.MANIFEST_ARTIFACT_NAME }} | ||
secrets: inherit | ||
|
||
build-jax: | ||
needs: build-base | ||
uses: ./.github/workflows/_build.yaml | ||
arm64: | ||
needs: [metadata, bump-manifest] | ||
uses: ./.github/workflows/_ci.yaml | ||
with: | ||
ARCHITECTURE: amd64 | ||
ARTIFACT_NAME: artifact-jax-build | ||
BADGE_FILENAME: badge-jax-build | ||
BUILD_DATE: 20240418 | ||
BASE_IMAGE: ${{ needs.build-base.outputs.DOCKER_TAG }} | ||
CONTAINER_NAME: jax | ||
DOCKERFILE: .github/container/Dockerfile.jax | ||
RUNNER_SIZE: large | ||
ARCHITECTURE: arm64 | ||
BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }} | ||
MANIFEST_ARTIFACT_NAME: ${{ needs.metadata.outputs.MANIFEST_ARTIFACT_NAME }} | ||
secrets: inherit | ||
|
||
build-upstream-maxtext: | ||
needs: build-jax | ||
uses: ./.github/workflows/_build.yaml | ||
# Only merge if everything succeeds | ||
merge-new-manifest: | ||
runs-on: ubuntu-22.04 | ||
if: ${{ !cancelled() && needs.metadata.outputs.MERGE_BUMPED_MANIFEST == 'true' && needs.metadata.outputs.MANIFEST_BRANCH != github.sha }} | ||
needs: | ||
- metadata | ||
- amd64 | ||
- arm64 | ||
steps: | ||
- name: "Tests Succeeded: ${{ !contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') }}" | ||
id: test_result | ||
run: echo "SUCCEEDED=${{ !contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') }}" | tee -a $GITHUB_OUTPUT | ||
|
||
- name: Check out the repository under ${GITHUB_WORKSPACE} | ||
uses: actions/checkout@v4 | ||
|
||
- name: Delete checked-out manifest and patches | ||
run: | | ||
rm .github/container/manifest.yaml | ||
rm -rf .github/container/patches | ||
- name: Replace checked-out manifest file/patches with bumped one | ||
uses: actions/download-artifact@v4 | ||
with: | ||
name: ${{ needs.metadata.outputs.MANIFEST_ARTIFACT_NAME }} | ||
path: .github/container/ | ||
|
||
- name: 'Create local manifest branch: ${{ needs.metadata.outputs.MANIFEST_BRANCH }}' | ||
id: local_branch | ||
shell: bash -x -e {0} | ||
run: | | ||
git config user.name "JAX-Toolbox CI" | ||
git config user.email "[email protected]" | ||
git switch -c ${{ needs.metadata.outputs.MANIFEST_BRANCH }} | ||
git status | ||
git add .github/container/patches/ | ||
git status | ||
# In the unusual situation where the manifest is the same even after bumping, | ||
# we will produce an empty commit with --allow-empty, which allows a PR to be | ||
# made and merged even with no changeset. | ||
git commit --allow-empty -a -m "Nightly Manifest Bump (${{ needs.metadata.outputs.BUILD_DATE }}) from: https://github.com/NVIDIA/JAX-Toolbox/actions/runs/${{ github.run_id }}" | ||
- name: Try to merge manifest branch | ||
id: merge_local | ||
if: steps.test_result.outputs.SUCCEEDED == 'true' | ||
# Merge can fail | ||
continue-on-error: true | ||
shell: bash -x -e {0} | ||
run: | | ||
git switch ${{ github.ref_name }} | ||
# Pull this ref in case it was updated | ||
git pull --rebase | ||
git merge --ff-only ${{ needs.metadata.outputs.MANIFEST_BRANCH }} | ||
# Push the new change | ||
git push origin ${{ github.ref_name }} | ||
# We will create a Draft PR & remote branch if: | ||
# 1. The tests failed | ||
# 2. The merge failed | ||
- name: Create remote manifest branch | ||
id: create_remote_branch | ||
if: steps.test_result.outputs.SUCCEEDED == 'false' || steps.merge_local.outcome != 'success' | ||
shell: bash -x -e {0} | ||
run: | | ||
# Always abort in case in-progress merge | ||
git merge --abort || true | ||
git switch ${{ needs.metadata.outputs.MANIFEST_BRANCH }} | ||
# Since the merge failed, create a remote and follow up with a PR | ||
git push --set-upstream origin ${{ needs.metadata.outputs.MANIFEST_BRANCH }} | ||
- name: Creating Draft PR for MANIFEST_BRANCH=${{ needs.metadata.outputs.MANIFEST_BRANCH }} | ||
id: create_pr | ||
if: steps.test_result.outputs.SUCCEEDED == 'false' || steps.merge_local.outcome != 'success' | ||
uses: octokit/[email protected] | ||
with: | ||
route: POST /repos/{owner_and_repo}/pulls | ||
owner_and_repo: ${{ github.repository }} | ||
head: ${{ needs.metadata.outputs.MANIFEST_BRANCH }} | ||
# Always try to merge back into the branch that triggered this workflow | ||
base: ${{ github.ref }} | ||
body: | | ||
https://github.com/NVIDIA/JAX-Toolbox/actions/runs/${{ github.run_id }} | ||
title: Nightly Manifest Bump (${{ needs.metadata.outputs.BUILD_DATE }}) | ||
draft: true | ||
env: | ||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||
|
||
- name: 'Log created PR: #${{ fromJson(steps.create_pr.outputs.data).number }}' | ||
if: steps.create_pr.outcome == 'success' | ||
run: | | ||
echo "https://github.com/NVIDIA/JAX-Toolbox/pull/${{ fromJson(steps.create_pr.outputs.data).number }}" | tee -a $GITHUB_STEP_SUMMARY | ||
# Guard delete in simple check to protect other branches | ||
- name: Check that the branch matches znightly- prefix | ||
run: | | ||
if [[ "${{ needs.metadata.outputs.MANIFEST_BRANCH }}" != znightly-* ]]; then | ||
echo Tried to delete MANIFEST_BRANCH=${{ needs.metadata.outputs.MANIFEST_BRANCH }}, but did not start with "znightly-" | ||
exit 1 | ||
fi | ||
# If merging fails b/c upstream conflict, branch is deleted to avoid clutter since changeset is preserved in PR | ||
- name: Deleting remote MANIFEST_BRANCH=${{ needs.metadata.outputs.MANIFEST_BRANCH }} | ||
# Delete can fail if branch was already deleted or not created, e.g., if the PR successfully merges, then branch is also already deleted. | ||
continue-on-error: true | ||
uses: octokit/[email protected] | ||
with: | ||
route: DELETE /repos/{owner_and_repo}/git/refs/heads/${{ needs.metadata.outputs.MANIFEST_BRANCH }} | ||
owner_and_repo: ${{ github.repository }} | ||
env: | ||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||
|
||
make-publish-configs: | ||
runs-on: ubuntu-22.04 | ||
if: ${{ !cancelled() }} | ||
env: | ||
MEALKIT_IMAGE_REPO: ${{ needs.metadata.outputs.PUBLISH == 'true' && 'jax-mealkit' || 'mock-jax-mealkit' }} | ||
FINAL_IMAGE_REPO: ${{ needs.metadata.outputs.PUBLISH == 'true' && 'jax' || 'mock-jax' }} | ||
needs: | ||
- metadata | ||
- amd64 | ||
- arm64 | ||
outputs: | ||
PUBLISH_CONFIGS: ${{ steps.generate-configs.outputs.PUBLISH_CONFIGS }} | ||
steps: | ||
- id: generate-configs | ||
shell: bash -eu -o pipefail {0} | ||
run: | | ||
declare -a FLAVORS=( | ||
base | ||
jax | ||
triton | ||
equinox | ||
maxtext | ||
levanter | ||
upstream-t5x | ||
upstream-pax | ||
upstream-maxtext | ||
t5x | ||
pax | ||
grok | ||
) | ||
declare -a STAGES=( | ||
mealkit | ||
final | ||
) | ||
## create JSON specs for a 1D matrix of container publication jobs | ||
ALL_TAGS=$( | ||
echo '${{ needs.amd64.outputs.DOCKER_TAGS }}' \ | ||
'${{ needs.arm64.outputs.DOCKER_TAGS }}' |\ | ||
jq -s 'add' | ||
) | ||
PUBLISH_CONFIGS='[]' | ||
for stage in "${STAGES[@]}"; do | ||
for flavor in "${FLAVORS[@]}";do | ||
# collect images for different platforms, e.g. amd64 and arm64 | ||
matching_tags=$( | ||
echo "$ALL_TAGS" |\ | ||
jq -c ".[] | select(.stage == \"${stage}\" and .flavor == \"${flavor}\" and .tag != \"\")" | ||
) | ||
# source_image is a list of all platform-specific tags | ||
source_image=$(echo "${matching_tags}" | jq -c "[.tag]" | jq -s 'add') | ||
# if the build job failed without producing any images, skip this flavor | ||
n_source_images=$(echo "$source_image" | jq 'length') | ||
if [[ $n_source_images -gt 0 ]]; then | ||
echo "PUBLISH image $flavor with $n_source_images $stage containers" | ||
# tag priority is the highest priority of all platform-specific tags | ||
priority=$(echo "${matching_tags}" | jq -r ".priority" | jq -s 'max') | ||
# put all final images in the `ghcr.io/nvidia/jax` namespace | ||
# and mealkit images in `ghcr.io/nvidia/jax-toolbox-mealkit` namespace | ||
case ${stage} in | ||
mealkit) | ||
target_image=${MEALKIT_IMAGE_REPO} | ||
;; | ||
final) | ||
target_image=${FINAL_IMAGE_REPO} | ||
;; | ||
esac | ||
PUBLISH_CONFIGS=$( | ||
echo ${PUBLISH_CONFIGS} | jq -c ". + [{ | ||
\"flavor\": \"${flavor}\", | ||
\"target_image\": \"${target_image}\", | ||
\"priority\": \"${priority}\", | ||
\"source_image\": ${source_image}, | ||
\"stage\": \"${stage}\" | ||
}]" | ||
) | ||
else | ||
echo "SKIPPED image $flavor with 0 $stage containers" | ||
fi | ||
done | ||
done | ||
PUBLISH_CONFIGS=$(echo "$PUBLISH_CONFIGS" | jq -c '{"config": .}') | ||
echo ${PUBLISH_CONFIGS} | jq | ||
echo "PUBLISH_CONFIGS=${PUBLISH_CONFIGS}" >> $GITHUB_OUTPUT | ||
publish-containers: | ||
needs: | ||
- metadata | ||
- make-publish-configs | ||
if: ${{ !cancelled() && needs.make-publish-configs.outputs.PUBLISH_CONFIGS.config != '{"config":[]}' }} | ||
strategy: | ||
fail-fast: false | ||
matrix: ${{ fromJson(needs.make-publish-configs.outputs.PUBLISH_CONFIGS) }} | ||
uses: ./.github/workflows/_publish_container.yaml | ||
with: | ||
ARCHITECTURE: amd64 | ||
ARTIFACT_NAME: artifact-maxtext-build | ||
BADGE_FILENAME: badge-maxtext-build | ||
BUILD_DATE: 20240418 | ||
BASE_IMAGE: ${{ needs.build-jax.outputs.DOCKER_TAG_MEALKIT }} | ||
CONTAINER_NAME: maxtext | ||
DOCKERFILE: .github/container/Dockerfile.maxtext.amd64 | ||
secrets: inherit | ||
ARTIFACT_NAME: ${{ matrix.config.stage }}-${{ matrix.config.flavor }} | ||
ARTIFACT_TAG: ${{ matrix.config.flavor }}-${{ needs.metadata.outputs.BUILD_DATE }} | ||
SOURCE_IMAGE: ${{ join(matrix.config.source_image, ' ') }} | ||
TARGET_IMAGE: ${{ matrix.config.target_image }} | ||
TARGET_TAGS: | | ||
type=raw,value=${{ matrix.config.flavor }},priority=${{ matrix.config.priority }} | ||
type=raw,value=${{ matrix.config.flavor }}-${{ needs.metadata.outputs.BUILD_DATE }},priority=${{ matrix.config.priority }} | ||
build-rosetta-maxtext: | ||
needs: build-upstream-maxtext | ||
uses: ./.github/workflows/_build_rosetta.yaml | ||
finalize: | ||
needs: [metadata, amd64, arm64, publish-containers] | ||
if: '!cancelled()' | ||
uses: ./.github/workflows/_finalize.yaml | ||
with: | ||
ARCHITECTURE: amd64 | ||
BUILD_DATE: 20240418 | ||
BASE_IMAGE: ${{ needs.build-upstream-maxtext.outputs.DOCKER_TAG_MEALKIT }} | ||
BASE_LIBRARY: maxtext | ||
BUILD_DATE: ${{ needs.metadata.outputs.BUILD_DATE }} | ||
PUBLISH_BADGE: ${{ needs.metadata.outputs.PUBLISH == 'true' }} | ||
secrets: inherit |