From 4df5bbf005b087c6a796028f6cefbcf05925b3ea Mon Sep 17 00:00:00 2001 From: atheo89 Date: Fri, 19 Jul 2024 10:18:49 +0200 Subject: [PATCH] Onboard rocm pytorch and tensorflow runtimes on the ci:main config --- .../opendatahub-io-notebooks-main.yaml | 66 ++++++++ ...datahub-io-notebooks-main-postsubmits.yaml | 58 +++++++ ...ndatahub-io-notebooks-main-presubmits.yaml | 144 ++++++++++++++++++ 3 files changed, 268 insertions(+) diff --git a/ci-operator/config/opendatahub-io/notebooks/opendatahub-io-notebooks-main.yaml b/ci-operator/config/opendatahub-io/notebooks/opendatahub-io-notebooks-main.yaml index 347770f8b175..3f01806c3deb 100644 --- a/ci-operator/config/opendatahub-io/notebooks/opendatahub-io-notebooks-main.yaml +++ b/ci-operator/config/opendatahub-io/notebooks/opendatahub-io-notebooks-main.yaml @@ -164,6 +164,14 @@ images: dockerfile_path: Dockerfile from: base-anaconda-python-3.8 to: jupyter-datascience-anaconda-python-3.8 +- context_dir: runtimes/rocm-pytorch/ubi9-python-3.9 + dockerfile_path: Dockerfile + from: amd-ubi9-python-3.9 + to: runtime-rocm-pytorch-ubi9-python-3.9 +- context_dir: runtimes/rocm-tensorflow/ubi9-python-3.9 + dockerfile_path: Dockerfile + from: amd-ubi9-python-3.9 + to: runtime-rocm-tensorflow-ubi9-python-3.9 promotion: to: - namespace: opendatahub-io @@ -916,6 +924,24 @@ tests: IMAGE_REPO: workbench-images RELEASE_VERSION: amd-jupyter-minimal-c9s-python-3.9 workflow: opendatahub-io-ci-image-mirror +- as: runtime-rocm-pytorch-ubi9-python-3-9-image-mirror + postsubmit: true + steps: + dependencies: + SOURCE_IMAGE_REF: runtime-rocm-pytorch-ubi9-python-3.9 + env: + IMAGE_REPO: workbench-images + RELEASE_VERSION: runtime-rocm-pytorch-ubi9-python-3.9 + workflow: opendatahub-io-ci-image-mirror +- as: runtime-rocm-pytorch-ubi9-python-3-9-pr-image-mirror + run_if_changed: (base\/amd-ubi9-python-3.9\/*)|(runtimes\/rocm-pytorch\/ubi9-python-3.9\/*) + steps: + dependencies: + SOURCE_IMAGE_REF: runtime-rocm-pytorch-ubi9-python-3.9 + env: + IMAGE_REPO: workbench-images + RELEASE_VERSION: runtime-rocm-pytorch-ubi9-python-3.9 + workflow: opendatahub-io-ci-image-mirror - as: jupyter-datascience-anaconda-python-3-8-pr-image-mirror run_if_changed: (base\/anaconda-python-3.8\/*)|(jupyter\/datascience\/anaconda-python-3.8\/*) steps: @@ -1456,6 +1482,46 @@ tests: cpu: 100m memory: 200Mi workflow: ipi-gcp +- as: amd-runtimes-ubi9-e2e-tests + run_if_changed: (runtimes\/rocm-pytorch\/ubi9-python-3.9\/*)|(runtimes\/rocm-tensorlow\/ubi9-python-3.9\/*) + steps: + cluster_profile: gcp-opendatahub + test: + - as: runtime-rocm-pytorch-ubi9-python-3.9-test-e2e + cli: latest + commands: | + oc new-project runtimes-rocm-pytorch-ubi9-python-3-9 + IFS=':' read -r -a NOTEBOOK_IMAGE <<< "${RUNTIME_ROCM_PYTORCH_IMAGE}" + make deploy9-runtimes-rocm-pytorch-ubi9-python-3.9 \ + -e IMAGE_REGISTRY="${NOTEBOOK_IMAGE[0]}" -e NOTEBOOK_TAG="${NOTEBOOK_IMAGE[1]}" + make validate-runtime-image image=runtime-rocm-pytorch-ubi9-python-3.9 + make undeploy9-runtimes-rocm-pytorch-ubi9-python-3.9 + dependencies: + - env: RUNTIME_ROCM_PYTORCH_IMAGE + name: runtime-rocm-pytorch-ubi9-python-3.9 + from: src + resources: + requests: + cpu: 100m + memory: 200Mi + - as: runtime-rocm-tensorflow-ubi9-python-3.9-test-e2e + cli: latest + commands: | + oc new-project runtimes-rocm-tensorflow-ubi9-python-3-9 + IFS=':' read -r -a NOTEBOOK_IMAGE <<< "${RUNTIME_ROCM_TENSORFLOW_IMAGE}" + make deploy9-runtimes-rocm-tensorflow-ubi9-python-3.9 \ + -e IMAGE_REGISTRY="${NOTEBOOK_IMAGE[0]}" -e NOTEBOOK_TAG="${NOTEBOOK_IMAGE[1]}" + make validate-runtime-image image=runtime-rocm-tensorflow-ubi9-python-3.9 + make undeploy9-runtimes-rocm-tensorflow-ubi9-python-3.9 + dependencies: + - env: RUNTIME_ROCM_TENSORFLOW_IMAGE + name: runtime-rocm-tensorflow-ubi9-python-3.9 + from: src + resources: + requests: + cpu: 100m + memory: 200Mi + workflow: ipi-gcp zz_generated_metadata: branch: main org: opendatahub-io diff --git a/ci-operator/jobs/opendatahub-io/notebooks/opendatahub-io-notebooks-main-postsubmits.yaml b/ci-operator/jobs/opendatahub-io/notebooks/opendatahub-io-notebooks-main-postsubmits.yaml index f9b851624988..b74668695572 100644 --- a/ci-operator/jobs/opendatahub-io/notebooks/opendatahub-io-notebooks-main-postsubmits.yaml +++ b/ci-operator/jobs/opendatahub-io/notebooks/opendatahub-io-notebooks-main-postsubmits.yaml @@ -2264,3 +2264,61 @@ postsubmits: - name: result-aggregator secret: secretName: result-aggregator + - agent: kubernetes + always_run: true + branches: + - ^main$ + cluster: build03 + decorate: true + decoration_config: + skip_cloning: true + labels: + ci.openshift.io/generator: prowgen + max_concurrency: 1 + name: branch-ci-opendatahub-io-notebooks-main-runtime-rocm-pytorch-ubi9-python-3-9-image-mirror + spec: + containers: + - args: + - --gcs-upload-secret=/secrets/gcs/service-account.json + - --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson + - --report-credentials-file=/etc/report/credentials + - --secret-dir=/secrets/ci-pull-credentials + - --target=runtime-rocm-pytorch-ubi9-python-3-9-image-mirror + command: + - ci-operator + image: ci-operator:latest + imagePullPolicy: Always + name: "" + resources: + requests: + cpu: 10m + volumeMounts: + - mountPath: /secrets/ci-pull-credentials + name: ci-pull-credentials + readOnly: true + - mountPath: /secrets/gcs + name: gcs-credentials + readOnly: true + - mountPath: /secrets/manifest-tool + name: manifest-tool-local-pusher + readOnly: true + - mountPath: /etc/pull-secret + name: pull-secret + readOnly: true + - mountPath: /etc/report + name: result-aggregator + readOnly: true + serviceAccountName: ci-operator + volumes: + - name: ci-pull-credentials + secret: + secretName: ci-pull-credentials + - name: manifest-tool-local-pusher + secret: + secretName: manifest-tool-local-pusher + - name: pull-secret + secret: + secretName: registry-pull-credentials + - name: result-aggregator + secret: + secretName: result-aggregator diff --git a/ci-operator/jobs/opendatahub-io/notebooks/opendatahub-io-notebooks-main-presubmits.yaml b/ci-operator/jobs/opendatahub-io/notebooks/opendatahub-io-notebooks-main-presubmits.yaml index 376b6d5ba655..8a6813059df8 100644 --- a/ci-operator/jobs/opendatahub-io/notebooks/opendatahub-io-notebooks-main-presubmits.yaml +++ b/ci-operator/jobs/opendatahub-io/notebooks/opendatahub-io-notebooks-main-presubmits.yaml @@ -1,5 +1,86 @@ presubmits: opendatahub-io/notebooks: + - agent: kubernetes + always_run: false + branches: + - ^main$ + - ^main- + cluster: build02 + context: ci/prow/amd-runtimes-ubi9-e2e-tests + decorate: true + decoration_config: + skip_cloning: true + labels: + ci-operator.openshift.io/cloud: gcp + ci-operator.openshift.io/cloud-cluster-profile: gcp-opendatahub + ci.openshift.io/generator: prowgen + pj-rehearse.openshift.io/can-be-rehearsed: "true" + name: pull-ci-opendatahub-io-notebooks-main-amd-runtimes-ubi9-e2e-tests + rerun_command: /test amd-runtimes-ubi9-e2e-tests + run_if_changed: (runtimes\/rocm-pytorch\/ubi9-python-3.9\/*)|(runtimes\/rocm-tensorlow\/ubi9-python-3.9\/*) + spec: + containers: + - args: + - --gcs-upload-secret=/secrets/gcs/service-account.json + - --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson + - --lease-server-credentials-file=/etc/boskos/credentials + - --report-credentials-file=/etc/report/credentials + - --secret-dir=/secrets/ci-pull-credentials + - --secret-dir=/usr/local/amd-runtimes-ubi9-e2e-tests-cluster-profile + - --target=amd-runtimes-ubi9-e2e-tests + command: + - ci-operator + image: ci-operator:latest + imagePullPolicy: Always + name: "" + resources: + requests: + cpu: 10m + volumeMounts: + - mountPath: /etc/boskos + name: boskos + readOnly: true + - mountPath: /secrets/ci-pull-credentials + name: ci-pull-credentials + readOnly: true + - mountPath: /usr/local/amd-runtimes-ubi9-e2e-tests-cluster-profile + name: cluster-profile + - mountPath: /secrets/gcs + name: gcs-credentials + readOnly: true + - mountPath: /secrets/manifest-tool + name: manifest-tool-local-pusher + readOnly: true + - mountPath: /etc/pull-secret + name: pull-secret + readOnly: true + - mountPath: /etc/report + name: result-aggregator + readOnly: true + serviceAccountName: ci-operator + volumes: + - name: boskos + secret: + items: + - key: credentials + path: credentials + secretName: boskos-credentials + - name: ci-pull-credentials + secret: + secretName: ci-pull-credentials + - name: cluster-profile + secret: + secretName: cluster-secrets-gcp-opendatahub + - name: manifest-tool-local-pusher + secret: + secretName: manifest-tool-local-pusher + - name: pull-secret + secret: + secretName: registry-pull-credentials + - name: result-aggregator + secret: + secretName: result-aggregator + trigger: (?m)^/test( | .* )amd-runtimes-ubi9-e2e-tests,?($|\s.*) - agent: kubernetes always_run: false branches: @@ -3079,6 +3160,69 @@ presubmits: secret: secretName: result-aggregator trigger: (?m)^/test( | .* )runtime-pytorch-ubi9-python-3-9-pr-image-mirror,?($|\s.*) + - agent: kubernetes + always_run: false + branches: + - ^main$ + - ^main- + cluster: build02 + context: ci/prow/runtime-rocm-pytorch-ubi9-python-3-9-pr-image-mirror + decorate: true + decoration_config: + skip_cloning: true + labels: + ci.openshift.io/generator: prowgen + pj-rehearse.openshift.io/can-be-rehearsed: "true" + name: pull-ci-opendatahub-io-notebooks-main-runtime-rocm-pytorch-ubi9-python-3-9-pr-image-mirror + rerun_command: /test runtime-rocm-pytorch-ubi9-python-3-9-pr-image-mirror + run_if_changed: (base\/amd-ubi9-python-3.9\/*)|(runtimes\/rocm-pytorch\/ubi9-python-3.9\/*) + spec: + containers: + - args: + - --gcs-upload-secret=/secrets/gcs/service-account.json + - --image-import-pull-secret=/etc/pull-secret/.dockerconfigjson + - --report-credentials-file=/etc/report/credentials + - --secret-dir=/secrets/ci-pull-credentials + - --target=runtime-rocm-pytorch-ubi9-python-3-9-pr-image-mirror + command: + - ci-operator + image: ci-operator:latest + imagePullPolicy: Always + name: "" + resources: + requests: + cpu: 10m + volumeMounts: + - mountPath: /secrets/ci-pull-credentials + name: ci-pull-credentials + readOnly: true + - mountPath: /secrets/gcs + name: gcs-credentials + readOnly: true + - mountPath: /secrets/manifest-tool + name: manifest-tool-local-pusher + readOnly: true + - mountPath: /etc/pull-secret + name: pull-secret + readOnly: true + - mountPath: /etc/report + name: result-aggregator + readOnly: true + serviceAccountName: ci-operator + volumes: + - name: ci-pull-credentials + secret: + secretName: ci-pull-credentials + - name: manifest-tool-local-pusher + secret: + secretName: manifest-tool-local-pusher + - name: pull-secret + secret: + secretName: registry-pull-credentials + - name: result-aggregator + secret: + secretName: result-aggregator + trigger: (?m)^/test( | .* )runtime-rocm-pytorch-ubi9-python-3-9-pr-image-mirror,?($|\s.*) - agent: kubernetes always_run: false branches: