Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ODH] Onboard rocm pytorch and tensorflow runtimes on the ci main config #54567

Merged
merged 1 commit into from
Jul 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,14 @@ images:
dockerfile_path: Dockerfile
from: base-anaconda-python-3.8
to: jupyter-datascience-anaconda-python-3.8
- context_dir: runtimes/rocm-pytorch/ubi9-python-3.9
dockerfile_path: Dockerfile
from: amd-ubi9-python-3.9
to: runtime-rocm-pytorch-ubi9-python-3.9
- context_dir: runtimes/rocm-tensorflow/ubi9-python-3.9
dockerfile_path: Dockerfile
from: amd-ubi9-python-3.9
to: runtime-rocm-tensorflow-ubi9-python-3.9
promotion:
to:
- namespace: opendatahub-io
Expand Down Expand Up @@ -916,6 +924,24 @@ tests:
IMAGE_REPO: workbench-images
RELEASE_VERSION: amd-jupyter-minimal-c9s-python-3.9
workflow: opendatahub-io-ci-image-mirror
- as: runtime-rocm-pytorch-ubi9-python-3-9-image-mirror
postsubmit: true
steps:
dependencies:
SOURCE_IMAGE_REF: runtime-rocm-pytorch-ubi9-python-3.9
env:
IMAGE_REPO: workbench-images
RELEASE_VERSION: runtime-rocm-pytorch-ubi9-python-3.9
workflow: opendatahub-io-ci-image-mirror
- as: runtime-rocm-pytorch-ubi9-python-3-9-pr-image-mirror
run_if_changed: (base\/amd-ubi9-python-3.9\/*)|(runtimes\/rocm-pytorch\/ubi9-python-3.9\/*)
steps:
dependencies:
SOURCE_IMAGE_REF: runtime-rocm-pytorch-ubi9-python-3.9
env:
IMAGE_REPO: workbench-images
RELEASE_VERSION: runtime-rocm-pytorch-ubi9-python-3.9
workflow: opendatahub-io-ci-image-mirror
- as: jupyter-datascience-anaconda-python-3-8-pr-image-mirror
run_if_changed: (base\/anaconda-python-3.8\/*)|(jupyter\/datascience\/anaconda-python-3.8\/*)
steps:
Expand Down Expand Up @@ -1456,6 +1482,46 @@ tests:
cpu: 100m
memory: 200Mi
workflow: ipi-gcp
- as: amd-runtimes-ubi9-e2e-tests
run_if_changed: (runtimes\/rocm-pytorch\/ubi9-python-3.9\/*)|(runtimes\/rocm-tensorlow\/ubi9-python-3.9\/*)
steps:
cluster_profile: gcp-opendatahub
test:
- as: runtime-rocm-pytorch-ubi9-python-3.9-test-e2e
cli: latest
commands: |
oc new-project runtimes-rocm-pytorch-ubi9-python-3-9
IFS=':' read -r -a NOTEBOOK_IMAGE <<< "${RUNTIME_ROCM_PYTORCH_IMAGE}"
make deploy9-runtimes-rocm-pytorch-ubi9-python-3.9 \
-e IMAGE_REGISTRY="${NOTEBOOK_IMAGE[0]}" -e NOTEBOOK_TAG="${NOTEBOOK_IMAGE[1]}"
make validate-runtime-image image=runtime-rocm-pytorch-ubi9-python-3.9
make undeploy9-runtimes-rocm-pytorch-ubi9-python-3.9
dependencies:
- env: RUNTIME_ROCM_PYTORCH_IMAGE
name: runtime-rocm-pytorch-ubi9-python-3.9
from: src
resources:
requests:
cpu: 100m
memory: 200Mi
- as: runtime-rocm-tensorflow-ubi9-python-3.9-test-e2e
cli: latest
commands: |
oc new-project runtimes-rocm-tensorflow-ubi9-python-3-9
IFS=':' read -r -a NOTEBOOK_IMAGE <<< "${RUNTIME_ROCM_TENSORFLOW_IMAGE}"
make deploy9-runtimes-rocm-tensorflow-ubi9-python-3.9 \
-e IMAGE_REGISTRY="${NOTEBOOK_IMAGE[0]}" -e NOTEBOOK_TAG="${NOTEBOOK_IMAGE[1]}"
make validate-runtime-image image=runtime-rocm-tensorflow-ubi9-python-3.9
make undeploy9-runtimes-rocm-tensorflow-ubi9-python-3.9
dependencies:
- env: RUNTIME_ROCM_TENSORFLOW_IMAGE
name: runtime-rocm-tensorflow-ubi9-python-3.9
from: src
resources:
requests:
cpu: 100m
memory: 200Mi
workflow: ipi-gcp
zz_generated_metadata:
branch: main
org: opendatahub-io
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2264,3 +2264,61 @@ postsubmits:
- name: result-aggregator
secret:
secretName: result-aggregator
- agent: kubernetes
always_run: true
branches:
- ^main$
cluster: build03
decorate: true
decoration_config:
skip_cloning: true
labels:
ci.openshift.io/generator: prowgen
max_concurrency: 1
name: branch-ci-opendatahub-io-notebooks-main-runtime-rocm-pytorch-ubi9-python-3-9-image-mirror
spec:
containers:
- args:
- --gcs-upload-secret=/secrets/gcs/service-account.json
- --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson
- --report-credentials-file=/etc/report/credentials
- --secret-dir=/secrets/ci-pull-credentials
- --target=runtime-rocm-pytorch-ubi9-python-3-9-image-mirror
command:
- ci-operator
image: ci-operator:latest
imagePullPolicy: Always
name: ""
resources:
requests:
cpu: 10m
volumeMounts:
- mountPath: /secrets/ci-pull-credentials
name: ci-pull-credentials
readOnly: true
- mountPath: /secrets/gcs
name: gcs-credentials
readOnly: true
- mountPath: /secrets/manifest-tool
name: manifest-tool-local-pusher
readOnly: true
- mountPath: /etc/pull-secret
name: pull-secret
readOnly: true
- mountPath: /etc/report
name: result-aggregator
readOnly: true
serviceAccountName: ci-operator
volumes:
- name: ci-pull-credentials
secret:
secretName: ci-pull-credentials
- name: manifest-tool-local-pusher
secret:
secretName: manifest-tool-local-pusher
- name: pull-secret
secret:
secretName: registry-pull-credentials
- name: result-aggregator
secret:
secretName: result-aggregator
Original file line number Diff line number Diff line change
@@ -1,5 +1,86 @@
presubmits:
opendatahub-io/notebooks:
- agent: kubernetes
always_run: false
branches:
- ^main$
- ^main-
cluster: build02
context: ci/prow/amd-runtimes-ubi9-e2e-tests
decorate: true
decoration_config:
skip_cloning: true
labels:
ci-operator.openshift.io/cloud: gcp
ci-operator.openshift.io/cloud-cluster-profile: gcp-opendatahub
ci.openshift.io/generator: prowgen
pj-rehearse.openshift.io/can-be-rehearsed: "true"
name: pull-ci-opendatahub-io-notebooks-main-amd-runtimes-ubi9-e2e-tests
rerun_command: /test amd-runtimes-ubi9-e2e-tests
run_if_changed: (runtimes\/rocm-pytorch\/ubi9-python-3.9\/*)|(runtimes\/rocm-tensorlow\/ubi9-python-3.9\/*)
spec:
containers:
- args:
- --gcs-upload-secret=/secrets/gcs/service-account.json
- --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson
- --lease-server-credentials-file=/etc/boskos/credentials
- --report-credentials-file=/etc/report/credentials
- --secret-dir=/secrets/ci-pull-credentials
- --secret-dir=/usr/local/amd-runtimes-ubi9-e2e-tests-cluster-profile
- --target=amd-runtimes-ubi9-e2e-tests
command:
- ci-operator
image: ci-operator:latest
imagePullPolicy: Always
name: ""
resources:
requests:
cpu: 10m
volumeMounts:
- mountPath: /etc/boskos
name: boskos
readOnly: true
- mountPath: /secrets/ci-pull-credentials
name: ci-pull-credentials
readOnly: true
- mountPath: /usr/local/amd-runtimes-ubi9-e2e-tests-cluster-profile
name: cluster-profile
- mountPath: /secrets/gcs
name: gcs-credentials
readOnly: true
- mountPath: /secrets/manifest-tool
name: manifest-tool-local-pusher
readOnly: true
- mountPath: /etc/pull-secret
name: pull-secret
readOnly: true
- mountPath: /etc/report
name: result-aggregator
readOnly: true
serviceAccountName: ci-operator
volumes:
- name: boskos
secret:
items:
- key: credentials
path: credentials
secretName: boskos-credentials
- name: ci-pull-credentials
secret:
secretName: ci-pull-credentials
- name: cluster-profile
secret:
secretName: cluster-secrets-gcp-opendatahub
- name: manifest-tool-local-pusher
secret:
secretName: manifest-tool-local-pusher
- name: pull-secret
secret:
secretName: registry-pull-credentials
- name: result-aggregator
secret:
secretName: result-aggregator
trigger: (?m)^/test( | .* )amd-runtimes-ubi9-e2e-tests,?($|\s.*)
- agent: kubernetes
always_run: false
branches:
Expand Down Expand Up @@ -3079,6 +3160,69 @@ presubmits:
secret:
secretName: result-aggregator
trigger: (?m)^/test( | .* )runtime-pytorch-ubi9-python-3-9-pr-image-mirror,?($|\s.*)
- agent: kubernetes
always_run: false
branches:
- ^main$
- ^main-
cluster: build02
context: ci/prow/runtime-rocm-pytorch-ubi9-python-3-9-pr-image-mirror
decorate: true
decoration_config:
skip_cloning: true
labels:
ci.openshift.io/generator: prowgen
pj-rehearse.openshift.io/can-be-rehearsed: "true"
name: pull-ci-opendatahub-io-notebooks-main-runtime-rocm-pytorch-ubi9-python-3-9-pr-image-mirror
rerun_command: /test runtime-rocm-pytorch-ubi9-python-3-9-pr-image-mirror
run_if_changed: (base\/amd-ubi9-python-3.9\/*)|(runtimes\/rocm-pytorch\/ubi9-python-3.9\/*)
spec:
containers:
- args:
- --gcs-upload-secret=/secrets/gcs/service-account.json
- --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson
- --report-credentials-file=/etc/report/credentials
- --secret-dir=/secrets/ci-pull-credentials
- --target=runtime-rocm-pytorch-ubi9-python-3-9-pr-image-mirror
command:
- ci-operator
image: ci-operator:latest
imagePullPolicy: Always
name: ""
resources:
requests:
cpu: 10m
volumeMounts:
- mountPath: /secrets/ci-pull-credentials
name: ci-pull-credentials
readOnly: true
- mountPath: /secrets/gcs
name: gcs-credentials
readOnly: true
- mountPath: /secrets/manifest-tool
name: manifest-tool-local-pusher
readOnly: true
- mountPath: /etc/pull-secret
name: pull-secret
readOnly: true
- mountPath: /etc/report
name: result-aggregator
readOnly: true
serviceAccountName: ci-operator
volumes:
- name: ci-pull-credentials
secret:
secretName: ci-pull-credentials
- name: manifest-tool-local-pusher
secret:
secretName: manifest-tool-local-pusher
- name: pull-secret
secret:
secretName: registry-pull-credentials
- name: result-aggregator
secret:
secretName: result-aggregator
trigger: (?m)^/test( | .* )runtime-rocm-pytorch-ubi9-python-3-9-pr-image-mirror,?($|\s.*)
- agent: kubernetes
always_run: false
branches:
Expand Down