Skip to content

Commit

Permalink
Onboard rocm pytorch and tensorflow runtimes on the ci:main config
Browse files Browse the repository at this point in the history
  • Loading branch information
atheo89 committed Jul 19, 2024
1 parent a7bf52f commit 4df5bbf
Show file tree
Hide file tree
Showing 3 changed files with 268 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,14 @@ images:
dockerfile_path: Dockerfile
from: base-anaconda-python-3.8
to: jupyter-datascience-anaconda-python-3.8
- context_dir: runtimes/rocm-pytorch/ubi9-python-3.9
dockerfile_path: Dockerfile
from: amd-ubi9-python-3.9
to: runtime-rocm-pytorch-ubi9-python-3.9
- context_dir: runtimes/rocm-tensorflow/ubi9-python-3.9
dockerfile_path: Dockerfile
from: amd-ubi9-python-3.9
to: runtime-rocm-tensorflow-ubi9-python-3.9
promotion:
to:
- namespace: opendatahub-io
Expand Down Expand Up @@ -916,6 +924,24 @@ tests:
IMAGE_REPO: workbench-images
RELEASE_VERSION: amd-jupyter-minimal-c9s-python-3.9
workflow: opendatahub-io-ci-image-mirror
- as: runtime-rocm-pytorch-ubi9-python-3-9-image-mirror
postsubmit: true
steps:
dependencies:
SOURCE_IMAGE_REF: runtime-rocm-pytorch-ubi9-python-3.9
env:
IMAGE_REPO: workbench-images
RELEASE_VERSION: runtime-rocm-pytorch-ubi9-python-3.9
workflow: opendatahub-io-ci-image-mirror
- as: runtime-rocm-pytorch-ubi9-python-3-9-pr-image-mirror
run_if_changed: (base\/amd-ubi9-python-3.9\/*)|(runtimes\/rocm-pytorch\/ubi9-python-3.9\/*)
steps:
dependencies:
SOURCE_IMAGE_REF: runtime-rocm-pytorch-ubi9-python-3.9
env:
IMAGE_REPO: workbench-images
RELEASE_VERSION: runtime-rocm-pytorch-ubi9-python-3.9
workflow: opendatahub-io-ci-image-mirror
- as: jupyter-datascience-anaconda-python-3-8-pr-image-mirror
run_if_changed: (base\/anaconda-python-3.8\/*)|(jupyter\/datascience\/anaconda-python-3.8\/*)
steps:
Expand Down Expand Up @@ -1456,6 +1482,46 @@ tests:
cpu: 100m
memory: 200Mi
workflow: ipi-gcp
- as: amd-runtimes-ubi9-e2e-tests
run_if_changed: (runtimes\/rocm-pytorch\/ubi9-python-3.9\/*)|(runtimes\/rocm-tensorlow\/ubi9-python-3.9\/*)
steps:
cluster_profile: gcp-opendatahub
test:
- as: runtime-rocm-pytorch-ubi9-python-3.9-test-e2e
cli: latest
commands: |
oc new-project runtimes-rocm-pytorch-ubi9-python-3-9
IFS=':' read -r -a NOTEBOOK_IMAGE <<< "${RUNTIME_ROCM_PYTORCH_IMAGE}"
make deploy9-runtimes-rocm-pytorch-ubi9-python-3.9 \
-e IMAGE_REGISTRY="${NOTEBOOK_IMAGE[0]}" -e NOTEBOOK_TAG="${NOTEBOOK_IMAGE[1]}"
make validate-runtime-image image=runtime-rocm-pytorch-ubi9-python-3.9
make undeploy9-runtimes-rocm-pytorch-ubi9-python-3.9
dependencies:
- env: RUNTIME_ROCM_PYTORCH_IMAGE
name: runtime-rocm-pytorch-ubi9-python-3.9
from: src
resources:
requests:
cpu: 100m
memory: 200Mi
- as: runtime-rocm-tensorflow-ubi9-python-3.9-test-e2e
cli: latest
commands: |
oc new-project runtimes-rocm-tensorflow-ubi9-python-3-9
IFS=':' read -r -a NOTEBOOK_IMAGE <<< "${RUNTIME_ROCM_TENSORFLOW_IMAGE}"
make deploy9-runtimes-rocm-tensorflow-ubi9-python-3.9 \
-e IMAGE_REGISTRY="${NOTEBOOK_IMAGE[0]}" -e NOTEBOOK_TAG="${NOTEBOOK_IMAGE[1]}"
make validate-runtime-image image=runtime-rocm-tensorflow-ubi9-python-3.9
make undeploy9-runtimes-rocm-tensorflow-ubi9-python-3.9
dependencies:
- env: RUNTIME_ROCM_TENSORFLOW_IMAGE
name: runtime-rocm-tensorflow-ubi9-python-3.9
from: src
resources:
requests:
cpu: 100m
memory: 200Mi
workflow: ipi-gcp
zz_generated_metadata:
branch: main
org: opendatahub-io
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2264,3 +2264,61 @@ postsubmits:
- name: result-aggregator
secret:
secretName: result-aggregator
- agent: kubernetes
always_run: true
branches:
- ^main$
cluster: build03
decorate: true
decoration_config:
skip_cloning: true
labels:
ci.openshift.io/generator: prowgen
max_concurrency: 1
name: branch-ci-opendatahub-io-notebooks-main-runtime-rocm-pytorch-ubi9-python-3-9-image-mirror
spec:
containers:
- args:
- --gcs-upload-secret=/secrets/gcs/service-account.json
- --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson
- --report-credentials-file=/etc/report/credentials
- --secret-dir=/secrets/ci-pull-credentials
- --target=runtime-rocm-pytorch-ubi9-python-3-9-image-mirror
command:
- ci-operator
image: ci-operator:latest
imagePullPolicy: Always
name: ""
resources:
requests:
cpu: 10m
volumeMounts:
- mountPath: /secrets/ci-pull-credentials
name: ci-pull-credentials
readOnly: true
- mountPath: /secrets/gcs
name: gcs-credentials
readOnly: true
- mountPath: /secrets/manifest-tool
name: manifest-tool-local-pusher
readOnly: true
- mountPath: /etc/pull-secret
name: pull-secret
readOnly: true
- mountPath: /etc/report
name: result-aggregator
readOnly: true
serviceAccountName: ci-operator
volumes:
- name: ci-pull-credentials
secret:
secretName: ci-pull-credentials
- name: manifest-tool-local-pusher
secret:
secretName: manifest-tool-local-pusher
- name: pull-secret
secret:
secretName: registry-pull-credentials
- name: result-aggregator
secret:
secretName: result-aggregator
Original file line number Diff line number Diff line change
@@ -1,5 +1,86 @@
presubmits:
opendatahub-io/notebooks:
- agent: kubernetes
always_run: false
branches:
- ^main$
- ^main-
cluster: build02
context: ci/prow/amd-runtimes-ubi9-e2e-tests
decorate: true
decoration_config:
skip_cloning: true
labels:
ci-operator.openshift.io/cloud: gcp
ci-operator.openshift.io/cloud-cluster-profile: gcp-opendatahub
ci.openshift.io/generator: prowgen
pj-rehearse.openshift.io/can-be-rehearsed: "true"
name: pull-ci-opendatahub-io-notebooks-main-amd-runtimes-ubi9-e2e-tests
rerun_command: /test amd-runtimes-ubi9-e2e-tests
run_if_changed: (runtimes\/rocm-pytorch\/ubi9-python-3.9\/*)|(runtimes\/rocm-tensorlow\/ubi9-python-3.9\/*)
spec:
containers:
- args:
- --gcs-upload-secret=/secrets/gcs/service-account.json
- --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson
- --lease-server-credentials-file=/etc/boskos/credentials
- --report-credentials-file=/etc/report/credentials
- --secret-dir=/secrets/ci-pull-credentials
- --secret-dir=/usr/local/amd-runtimes-ubi9-e2e-tests-cluster-profile
- --target=amd-runtimes-ubi9-e2e-tests
command:
- ci-operator
image: ci-operator:latest
imagePullPolicy: Always
name: ""
resources:
requests:
cpu: 10m
volumeMounts:
- mountPath: /etc/boskos
name: boskos
readOnly: true
- mountPath: /secrets/ci-pull-credentials
name: ci-pull-credentials
readOnly: true
- mountPath: /usr/local/amd-runtimes-ubi9-e2e-tests-cluster-profile
name: cluster-profile
- mountPath: /secrets/gcs
name: gcs-credentials
readOnly: true
- mountPath: /secrets/manifest-tool
name: manifest-tool-local-pusher
readOnly: true
- mountPath: /etc/pull-secret
name: pull-secret
readOnly: true
- mountPath: /etc/report
name: result-aggregator
readOnly: true
serviceAccountName: ci-operator
volumes:
- name: boskos
secret:
items:
- key: credentials
path: credentials
secretName: boskos-credentials
- name: ci-pull-credentials
secret:
secretName: ci-pull-credentials
- name: cluster-profile
secret:
secretName: cluster-secrets-gcp-opendatahub
- name: manifest-tool-local-pusher
secret:
secretName: manifest-tool-local-pusher
- name: pull-secret
secret:
secretName: registry-pull-credentials
- name: result-aggregator
secret:
secretName: result-aggregator
trigger: (?m)^/test( | .* )amd-runtimes-ubi9-e2e-tests,?($|\s.*)
- agent: kubernetes
always_run: false
branches:
Expand Down Expand Up @@ -3079,6 +3160,69 @@ presubmits:
secret:
secretName: result-aggregator
trigger: (?m)^/test( | .* )runtime-pytorch-ubi9-python-3-9-pr-image-mirror,?($|\s.*)
- agent: kubernetes
always_run: false
branches:
- ^main$
- ^main-
cluster: build02
context: ci/prow/runtime-rocm-pytorch-ubi9-python-3-9-pr-image-mirror
decorate: true
decoration_config:
skip_cloning: true
labels:
ci.openshift.io/generator: prowgen
pj-rehearse.openshift.io/can-be-rehearsed: "true"
name: pull-ci-opendatahub-io-notebooks-main-runtime-rocm-pytorch-ubi9-python-3-9-pr-image-mirror
rerun_command: /test runtime-rocm-pytorch-ubi9-python-3-9-pr-image-mirror
run_if_changed: (base\/amd-ubi9-python-3.9\/*)|(runtimes\/rocm-pytorch\/ubi9-python-3.9\/*)
spec:
containers:
- args:
- --gcs-upload-secret=/secrets/gcs/service-account.json
- --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson
- --report-credentials-file=/etc/report/credentials
- --secret-dir=/secrets/ci-pull-credentials
- --target=runtime-rocm-pytorch-ubi9-python-3-9-pr-image-mirror
command:
- ci-operator
image: ci-operator:latest
imagePullPolicy: Always
name: ""
resources:
requests:
cpu: 10m
volumeMounts:
- mountPath: /secrets/ci-pull-credentials
name: ci-pull-credentials
readOnly: true
- mountPath: /secrets/gcs
name: gcs-credentials
readOnly: true
- mountPath: /secrets/manifest-tool
name: manifest-tool-local-pusher
readOnly: true
- mountPath: /etc/pull-secret
name: pull-secret
readOnly: true
- mountPath: /etc/report
name: result-aggregator
readOnly: true
serviceAccountName: ci-operator
volumes:
- name: ci-pull-credentials
secret:
secretName: ci-pull-credentials
- name: manifest-tool-local-pusher
secret:
secretName: manifest-tool-local-pusher
- name: pull-secret
secret:
secretName: registry-pull-credentials
- name: result-aggregator
secret:
secretName: result-aggregator
trigger: (?m)^/test( | .* )runtime-rocm-pytorch-ubi9-python-3-9-pr-image-mirror,?($|\s.*)
- agent: kubernetes
always_run: false
branches:
Expand Down

0 comments on commit 4df5bbf

Please sign in to comment.