forked from openshift-psap/topsail
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Watsonx-serving: keep working on the scale test (openshift-psap#14)
- Loading branch information
Showing
15 changed files
with
362 additions
and
181 deletions.
There are no files selected for viewing
32 changes: 32 additions & 0 deletions
32
roles/watsonx_serving/watsonx_serving_deploy_model/defaults/main/config.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
# Auto-generated file, do not edit manually ... | ||
# Toolbox generate command: repo generate_ansible_default_settings | ||
# Source component: Watsonx_Serving.deploy_model | ||
|
||
# the namespace in which the model should be deployed | ||
# Mandatory value | ||
watsonx_serving_deploy_model_namespace: | ||
|
||
# the name to give to the serving runtime | ||
# Mandatory value | ||
watsonx_serving_deploy_model_serving_runtime_name: | ||
|
||
# the image of the serving runtime | ||
# Mandatory value | ||
watsonx_serving_deploy_model_serving_runtime_image: | ||
|
||
# the resource request of the serving runtime | ||
# Mandatory value | ||
watsonx_serving_deploy_model_serving_runtime_resource_request: | ||
|
||
# the name to give to the inference service | ||
# Mandatory value | ||
watsonx_serving_deploy_model_inference_service_name: | ||
|
||
# [S3] URI where the model is stored | ||
# Mandatory value | ||
watsonx_serving_deploy_model_storage_uri: | ||
|
||
# name of the service account to use for running the Pod | ||
# Mandatory value | ||
watsonx_serving_deploy_model_sa_name: | ||
|
Empty file.
3 changes: 3 additions & 0 deletions
3
roles/watsonx_serving/watsonx_serving_deploy_model/meta/main.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
--- | ||
dependencies: | ||
- role: check_deps |
125 changes: 125 additions & 0 deletions
125
roles/watsonx_serving/watsonx_serving_deploy_model/tasks/main.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
--- | ||
- name: Create the src directory | ||
file: | ||
path: "{{ artifact_extra_logs_dir }}/src" | ||
state: directory | ||
mode: '0755' | ||
|
||
- name: Create the artifacts directory | ||
file: | ||
path: "{{ artifact_extra_logs_dir }}/artifacts" | ||
state: directory | ||
mode: '0755' | ||
|
||
# SMMR | ||
|
||
- name: Delete the tracking timestamps | ||
command: | ||
oc delete cm -ltopsail.time-tracking -n {{ watsonx_serving_deploy_model_namespace }} | ||
|
||
- name: Save timestamp | ||
shell: | | ||
NAME=start-deploy-model | ||
oc create configmap $NAME -n {{ watsonx_serving_deploy_model_namespace }} | ||
oc label cm/$NAME topsail.time-tracking=yes -n {{ watsonx_serving_deploy_model_namespace }} | ||
- name: Prepare the SMMR | ||
block: | ||
- name: Patch the SMMR | ||
command: | | ||
oc patch smmr/default \ | ||
-n istio-system \ | ||
--type=json \ | ||
-p="[{'op': 'add', 'path': '/spec/members/-', 'value': \"{{ watsonx_serving_deploy_model_namespace }}\"}]" | ||
register: patch_smmr_cmd | ||
failed_when: false | ||
|
||
- name: Check that the namespace is already registered | ||
when: patch_smmr_cmd.rc != 0 | ||
shell: | ||
oc get smmr/default -n istio-system -ojsonpath={.spec.members} | jq .[] -r | ||
register: smmr_members_cmd | ||
failed_when: watsonx_serving_deploy_model_namespace not in smmr_members_cmd.stdout_lines | ||
|
||
- name: Wait for the namespace to be registered | ||
shell: | ||
set -o pipefail; | ||
oc get smmr/default | ||
-n istio-system | ||
-ojsonpath={.status.configuredMembers} | ||
| jq '. | index("{{ watsonx_serving_deploy_model_namespace }}")' | ||
register: smmr_registered_namespace_cmd | ||
retries: 60 | ||
delay: 10 | ||
until: smmr_registered_namespace_cmd.stdout != "null" | ||
|
||
- name: Save timestamp | ||
shell: | | ||
NAME=smmr-registered-namespace | ||
oc create configmap $NAME -n {{ watsonx_serving_deploy_model_namespace }} | ||
oc label cm/$NAME topsail.time-tracking=yes -n {{ watsonx_serving_deploy_model_namespace }} | ||
always: | ||
- name: Capture the SMMR resource | ||
shell: | ||
oc get smmr/default | ||
-n istio-system | ||
-oyaml | ||
> {{ artifact_extra_logs_dir }}/artifacts/istio-system_smmr-default.yaml | ||
|
||
# Serving Runtime | ||
|
||
- name: Prepare the ServingRuntime template | ||
template: | ||
src: "{{ serving_runtime_template }}" | ||
dest: "{{ artifact_extra_logs_dir }}/src/serving_runtime.yaml" | ||
mode: 0400 | ||
|
||
- name: Create the ServingRuntime | ||
command: | ||
oc apply -f "{{ artifact_extra_logs_dir }}/src/serving_runtime.yaml" | ||
|
||
# Inference Service | ||
|
||
- name: Prepare the InferenceService template | ||
template: | ||
src: "{{ inference_service_template }}" | ||
dest: "{{ artifact_extra_logs_dir }}/src/inference_service.yaml" | ||
mode: 0400 | ||
|
||
- name: Create the InferenceService | ||
command: | ||
oc apply -f "{{ artifact_extra_logs_dir }}/src/inference_service.yaml" | ||
|
||
- name: Prepare the InferenceService | ||
block: | ||
- name: Wait for the InferenceService to be loaded | ||
shell: | ||
set -o pipefail; | ||
oc get -f "{{ artifact_extra_logs_dir }}/src/inference_service.yaml" | ||
-ojsonpath={.status.modelStatus.states.targetModelState} | ||
register: inference_service_state_cmd | ||
# wait 20 minutes | ||
retries: 240 | ||
delay: 5 | ||
until: inference_service_state_cmd.stdout == "Loaded" | ||
|
||
- name: Save timestamp | ||
shell: | | ||
NAME=inference-service-loaded | ||
oc create configmap $NAME -n {{ watsonx_serving_deploy_model_namespace }} | ||
oc label cm/$NAME topsail.time-tracking=yes -n {{ watsonx_serving_deploy_model_namespace }} | ||
always: | ||
- name: Capture the state of the InferenceService resource | ||
shell: | ||
oc get -f "{{ artifact_extra_logs_dir }}/src/inference_service.yaml" | ||
-oyaml | ||
> {{ artifact_extra_logs_dir }}/artifacts/inference_service.yaml | ||
|
||
- name: Save the timestamp configmaps | ||
shell: | ||
oc get cm -oyaml | ||
-ltopsail.time-tracking=yes | ||
-n {{ watsonx_serving_deploy_model_namespace }} | ||
> {{ artifact_extra_logs_dir }}/artifacts/time_tracking_cm.yaml |
17 changes: 17 additions & 0 deletions
17
roles/watsonx_serving/watsonx_serving_deploy_model/templates/inference_service.yaml.j2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
apiVersion: serving.kserve.io/v1beta1 | ||
kind: InferenceService | ||
metadata: | ||
annotations: | ||
serving.knative.openshift.io/enablePassthrough: "true" | ||
sidecar.istio.io/inject: "true" | ||
sidecar.istio.io/rewriteAppHTTPProbers: "true" | ||
name: {{ watsonx_serving_deploy_model_inference_service_name }} | ||
namespace: {{ watsonx_serving_deploy_model_namespace }} | ||
spec: | ||
predictor: | ||
serviceAccountName: {{ watsonx_serving_deploy_model_sa_name }} | ||
model: | ||
modelFormat: | ||
name: caikit | ||
runtime: {{ watsonx_serving_deploy_model_serving_runtime_name }} | ||
storageUri: {{ watsonx_serving_deploy_model_storage_uri }} |
25 changes: 25 additions & 0 deletions
25
roles/watsonx_serving/watsonx_serving_deploy_model/templates/serving_runtime.yaml.j2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
apiVersion: serving.kserve.io/v1alpha1 | ||
kind: ServingRuntime | ||
metadata: | ||
name: {{ watsonx_serving_deploy_model_serving_runtime_name }} | ||
namespace: {{ watsonx_serving_deploy_model_namespace }} | ||
spec: | ||
containers: | ||
- env: | ||
- name: RUNTIME_LOCAL_MODELS_DIR | ||
value: /mnt/models | ||
image: {{ watsonx_serving_deploy_model_serving_runtime_image }} | ||
name: kserve-container | ||
ports: | ||
# Note, KServe only allows a single port, this is the gRPC port. Subject to change in the future | ||
- containerPort: 8085 | ||
name: h2c | ||
protocol: TCP | ||
resources: | ||
requests: | ||
{{ watsonx_serving_deploy_model_serving_runtime_resource_request }} | ||
multiModel: false | ||
supportedModelFormats: | ||
# Note: this currently *only* supports caikit format models | ||
- autoSelect: true | ||
name: caikit |
2 changes: 2 additions & 0 deletions
2
roles/watsonx_serving/watsonx_serving_deploy_model/vars/main/resources.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
serving_runtime_template: templates/serving_runtime.yaml.j2 | ||
inference_service_template: templates/inference_service.yaml.j2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.