forked from machine-learning-exchange/katalog
-
Notifications
You must be signed in to change notification settings - Fork 0
/
katib-pipeline.yaml
124 lines (124 loc) · 6.67 KB
/
katib-pipeline.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# Copyright 2021 The MLX Contributors
#
# SPDX-License-Identifier: Apache-2.0
apiVersion: tekton.dev/v1beta1
kind: PipelineRun
metadata:
name: launch-katib-early-stopping-experiment
annotations:
tekton.dev/output_artifacts: '{"katib-launch-experiment": [{"key": "artifacts/$PIPELINERUN/katib-launch-experiment/Best-Parameter-Set.tgz",
"name": "katib-launch-experiment-Best-Parameter-Set", "path": "/tmp/outputs/Best_Parameter_Set/data"}]}'
tekton.dev/input_artifacts: '{"print": [{"name": "katib-launch-experiment-Best-Parameter-Set",
"parent_task": "katib-launch-experiment"}]}'
tekton.dev/artifact_bucket: mlpipeline
tekton.dev/artifact_endpoint: minio-service.kubeflow:9000
tekton.dev/artifact_endpoint_scheme: http://
tekton.dev/artifact_items: '{"katib-launch-experiment": [["Best-Parameter-Set",
"$(results.best-parameter-set.path)"]], "print": []}'
sidecar.istio.io/inject: "false"
pipelines.kubeflow.org/pipeline_spec: '{"description": "An example to launch Katib
Experiment with early stopping", "name": "Katib Early-Stopping Experiment"}'
spec:
pipelineSpec:
tasks:
- name: katib-launch-experiment
taskSpec:
steps:
- name: main
args:
- --experiment-name
- median-stop
- --experiment-namespace
- anonymous
- --experiment-spec
- '{"algorithm": {"algorithmName": "random"}, "earlyStopping": {"algorithmName":
"medianstop", "algorithmSettings": [{"name": "min_trials_required", "value":
"2"}]}, "maxFailedTrialCount": 3, "maxTrialCount": 18, "objective": {"additionalMetricNames":
["Train-accuracy"], "goal": 0.99, "objectiveMetricName": "Validation-accuracy",
"type": "maximize"}, "parallelTrialCount": 2, "parameters": [{"feasibleSpace":
{"max": "0.3", "min": "0.01"}, "name": "lr", "parameterType": "double"},
{"feasibleSpace": {"max": "5", "min": "2"}, "name": "num-layers", "parameterType":
"int"}, {"feasibleSpace": {"list": ["sgd", "adam", "ftrl"]}, "name": "optimizer",
"parameterType": "categorical"}], "trialTemplate": {"primaryContainerName":
"training-container", "retain": true, "trialParameters": [{"description":
"Learning rate for the training model", "name": "learningRate", "reference":
"lr"}, {"description": "Number of training model layers", "name": "numberLayers",
"reference": "num-layers"}, {"description": "Training model optimizer
(sdg, adam or ftrl)", "name": "optimizer", "reference": "optimizer"}],
"trialSpec": {"apiVersion": "batch/v1", "kind": "Job", "spec": {"template":
{"metadata": {"annotations": {"sidecar.istio.io/inject": "false"}}, "spec":
{"containers": [{"command": ["python3", "/opt/mxnet-mnist/mnist.py", "--batch-size=64",
"--lr=${trialParameters.learningRate}", "--num-layers=${trialParameters.numberLayers}",
"--optimizer=${trialParameters.optimizer}"], "image": "docker.io/kubeflowkatib/mxnet-mnist:v1beta1-45c5727",
"name": "training-container"}], "restartPolicy": "Never"}}}}}}'
- --experiment-timeout-minutes
- '60'
- --delete-after-done
- "False"
- --output-file
- $(results.best-parameter-set.path)
command:
- python
- src/launch_experiment.py
image: docker.io/kubeflowkatib/kubeflow-pipelines-launcher
results:
- name: best-parameter-set
description: /tmp/outputs/Best_Parameter_Set/data
metadata:
labels:
pipelines.kubeflow.org/pipelinename: ''
pipelines.kubeflow.org/generation: ''
pipelines.kubeflow.org/cache_enabled: "true"
annotations:
pipelines.kubeflow.org/component_spec: '{"description": "Katib Experiment
launcher", "implementation": {"container": {"args": ["--experiment-name",
{"inputValue": "Experiment Name"}, "--experiment-namespace", {"inputValue":
"Experiment Namespace"}, "--experiment-spec", {"inputValue": "Experiment
Spec"}, "--experiment-timeout-minutes", {"inputValue": "Experiment Timeout
Minutes"}, "--delete-after-done", {"inputValue": "Delete Finished Experiment"},
"--output-file", {"outputPath": "Best Parameter Set"}], "command": ["python",
"src/launch_experiment.py"], "image": "docker.io/kubeflowkatib/kubeflow-pipelines-launcher"}},
"inputs": [{"default": "", "description": "Experiment name", "name":
"Experiment Name", "type": "String"}, {"default": "anonymous", "description":
"Experiment namespace", "name": "Experiment Namespace", "type": "String"},
{"default": "{}", "description": "Experiment specification in dict format",
"name": "Experiment Spec", "type": "JsonObject"}, {"default": 1440,
"description": "Time in minutes to wait for the Experiment to complete",
"name": "Experiment Timeout Minutes", "type": "Integer"}, {"default":
"True", "description": "Whether to delete the Experiment after it is
finished", "name": "Delete Finished Experiment", "type": "Bool"}], "name":
"Katib - Launch Experiment", "outputs": [{"description": "The hyperparameter
set of the best Experiment Trial", "name": "Best Parameter Set", "type":
"JsonObject"}]}'
tekton.dev/template: ''
timeout: 0s
- name: print
params:
- name: katib-launch-experiment-Best-Parameter-Set
value: $(tasks.katib-launch-experiment.results.best-parameter-set)
taskSpec:
steps:
- name: main
args:
- |
echo "Best HyperParameters: $0"
- $(inputs.params.katib-launch-experiment-Best-Parameter-Set)
command:
- sh
- -c
image: library/bash:4.4.23
params:
- name: katib-launch-experiment-Best-Parameter-Set
metadata:
labels:
pipelines.kubeflow.org/pipelinename: ''
pipelines.kubeflow.org/generation: ''
pipelines.kubeflow.org/cache_enabled: "true"
annotations:
pipelines.kubeflow.org/component_spec: '{"description": "print msg", "implementation":
{"container": {"args": ["echo \"Best HyperParameters: $0\"\n", {"inputValue":
"message"}], "command": ["sh", "-c"], "image": "library/bash:4.4.23"}},
"inputs": [{"name": "message", "type": "JsonObject"}], "name": "print"}'
tekton.dev/template: ''
timeout: 0s
timeout: 0s