Skip to content

Commit

Permalink
Fixes local tests (kubeflow#1255)
Browse files Browse the repository at this point in the history
  • Loading branch information
shawnzhu authored Jun 16, 2020
1 parent da59c83 commit 5b7c89b
Show file tree
Hide file tree
Showing 12 changed files with 234 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ spec:
valueFrom:
fieldRef:
fieldPath: metadata.namespace
image: gcr.io/kubeflow-images-public/katib/v1alpha3/katib-controller:v0.8.0
image: gcr.io/kubeflow-images-public/katib/v1alpha3/katib-controller:917164a
imagePullPolicy: IfNotPresent
name: katib-controller
ports:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ spec:
secretKeyRef:
key: MYSQL_ROOT_PASSWORD
name: katib-mysql-secrets
image: gcr.io/kubeflow-images-public/katib/v1alpha3/katib-db-manager:v0.8.0
image: gcr.io/kubeflow-images-public/katib/v1alpha3/katib-db-manager:917164a
imagePullPolicy: IfNotPresent
livenessProbe:
exec:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ spec:
valueFrom:
fieldRef:
fieldPath: metadata.namespace
image: gcr.io/kubeflow-images-public/katib/v1alpha3/katib-ui:v0.8.0
image: gcr.io/kubeflow-images-public/katib/v1alpha3/katib-ui:917164a
imagePullPolicy: IfNotPresent
name: katib-ui
ports:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,6 @@ rules:
resources:
- experiments
- trials
- suggestions
verbs:
- '*'
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ data:
metrics-collector-sidecar: |-
{
"StdOut": {
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/file-metrics-collector:v0.8.0"
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/file-metrics-collector:917164a"
},
"File": {
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/file-metrics-collector:v0.8.0"
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/file-metrics-collector:917164a"
},
"TensorFlowEvent": {
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/tfevent-metrics-collector:v0.8.0",
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/tfevent-metrics-collector:917164a",
"resources": {
"limits": {
"memory": "1Gi"
Expand All @@ -20,28 +20,34 @@ data:
suggestion: |-
{
"random": {
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-hyperopt:v0.8.0"
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-hyperopt:917164a"
},
"grid": {
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-chocolate:v0.8.0"
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-chocolate:917164a"
},
"hyperband": {
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-hyperband:v0.8.0"
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-hyperband:917164a"
},
"bayesianoptimization": {
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-skopt:v0.8.0"
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-skopt:917164a"
},
"tpe": {
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-hyperopt:v0.8.0"
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-hyperopt:917164a"
},
"nasrl": {
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-nasrl:v0.8.0",
"enas": {
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-enas:917164a",
"imagePullPolicy": "Always",
"resources": {
"limits": {
"memory": "200Mi"
}
}
},
"cmaes": {
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-goptuna:917164a"
},
"darts": {
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-darts:917164a"
}
}
kind: ConfigMap
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
apiVersion: v1
data:
defaultTrialTemplate.yaml: |-
apiVersion: batch/v1
kind: Job
metadata:
name: {{.Trial}}
namespace: {{.NameSpace}}
spec:
template:
spec:
containers:
- name: {{.Trial}}
image: docker.io/kubeflowkatib/mxnet-mnist
command:
- "python3"
- "/opt/mxnet-mnist/mnist.py"
- "--batch-size=64"
{{- with .HyperParameters}}
{{- range .}}
- "{{.Name}}={{.Value}}"
{{- end}}
{{- end}}
restartPolicy: Never
enasCPUTemplate: |-
apiVersion: batch/v1
kind: Job
metadata:
name: {{.Trial}}
namespace: {{.NameSpace}}
spec:
template:
spec:
containers:
- name: {{.Trial}}
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu
command:
- "python3.5"
- "-u"
- "RunTrial.py"
{{- with .HyperParameters}}
{{- range .}}
- "--{{.Name}}=\"{{.Value}}\""
{{- end}}
{{- end}}
- "--num_epochs=1"
restartPolicy: Never
pytorchJobTemplate: |-
apiVersion: "kubeflow.org/v1"
kind: PyTorchJob
metadata:
name: {{.Trial}}
namespace: {{.NameSpace}}
spec:
pytorchReplicaSpecs:
Master:
replicas: 1
restartPolicy: OnFailure
template:
spec:
containers:
- name: pytorch
image: gcr.io/kubeflow-ci/pytorch-dist-mnist-test:v1.0
imagePullPolicy: Always
command:
- "python"
- "/var/mnist.py"
{{- with .HyperParameters}}
{{- range .}}
- "{{.Name}}={{.Value}}"
{{- end}}
{{- end}}
Worker:
replicas: 2
restartPolicy: OnFailure
template:
spec:
containers:
- name: pytorch
image: gcr.io/kubeflow-ci/pytorch-dist-mnist-test:v1.0
imagePullPolicy: Always
command:
- "python"
- "/var/mnist.py"
{{- with .HyperParameters}}
{{- range .}}
- "{{.Name}}={{.Value}}"
{{- end}}
{{- end}}
kind: ConfigMap
metadata:
labels:
app: katib-trial-templates
app.kubernetes.io/component: katib
app.kubernetes.io/name: katib-controller
name: trial-template-labeled
namespace: kubeflow
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ spec:
valueFrom:
fieldRef:
fieldPath: metadata.namespace
image: gcr.io/kubeflow-images-public/katib/v1alpha3/katib-controller:v0.8.0
image: gcr.io/kubeflow-images-public/katib/v1alpha3/katib-controller:917164a
imagePullPolicy: IfNotPresent
name: katib-controller
ports:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ spec:
secretKeyRef:
key: MYSQL_ROOT_PASSWORD
name: katib-mysql-secrets
image: gcr.io/kubeflow-images-public/katib/v1alpha3/katib-db-manager:v0.8.0
image: gcr.io/kubeflow-images-public/katib/v1alpha3/katib-db-manager:917164a
imagePullPolicy: IfNotPresent
livenessProbe:
exec:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ spec:
valueFrom:
fieldRef:
fieldPath: metadata.namespace
image: gcr.io/kubeflow-images-public/katib/v1alpha3/katib-ui:v0.8.0
image: gcr.io/kubeflow-images-public/katib/v1alpha3/katib-ui:917164a
imagePullPolicy: IfNotPresent
name: katib-ui
ports:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,6 @@ rules:
resources:
- experiments
- trials
- suggestions
verbs:
- '*'
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@ data:
metrics-collector-sidecar: |-
{
"StdOut": {
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/file-metrics-collector:v0.8.0"
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/file-metrics-collector:917164a"
},
"File": {
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/file-metrics-collector:v0.8.0"
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/file-metrics-collector:917164a"
},
"TensorFlowEvent": {
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/tfevent-metrics-collector:v0.8.0",
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/tfevent-metrics-collector:917164a",
"resources": {
"limits": {
"memory": "1Gi"
Expand All @@ -20,28 +20,34 @@ data:
suggestion: |-
{
"random": {
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-hyperopt:v0.8.0"
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-hyperopt:917164a"
},
"grid": {
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-chocolate:v0.8.0"
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-chocolate:917164a"
},
"hyperband": {
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-hyperband:v0.8.0"
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-hyperband:917164a"
},
"bayesianoptimization": {
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-skopt:v0.8.0"
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-skopt:917164a"
},
"tpe": {
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-hyperopt:v0.8.0"
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-hyperopt:917164a"
},
"nasrl": {
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-nasrl:v0.8.0",
"enas": {
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-enas:917164a",
"imagePullPolicy": "Always",
"resources": {
"limits": {
"memory": "200Mi"
}
}
},
"cmaes": {
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-goptuna:917164a"
},
"darts": {
"image": "gcr.io/kubeflow-images-public/katib/v1alpha3/suggestion-darts:917164a"
}
}
kind: ConfigMap
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
apiVersion: v1
data:
defaultTrialTemplate.yaml: |-
apiVersion: batch/v1
kind: Job
metadata:
name: {{.Trial}}
namespace: {{.NameSpace}}
spec:
template:
spec:
containers:
- name: {{.Trial}}
image: docker.io/kubeflowkatib/mxnet-mnist
command:
- "python3"
- "/opt/mxnet-mnist/mnist.py"
- "--batch-size=64"
{{- with .HyperParameters}}
{{- range .}}
- "{{.Name}}={{.Value}}"
{{- end}}
{{- end}}
restartPolicy: Never
enasCPUTemplate: |-
apiVersion: batch/v1
kind: Job
metadata:
name: {{.Trial}}
namespace: {{.NameSpace}}
spec:
template:
spec:
containers:
- name: {{.Trial}}
image: docker.io/kubeflowkatib/enas-cnn-cifar10-cpu
command:
- "python3.5"
- "-u"
- "RunTrial.py"
{{- with .HyperParameters}}
{{- range .}}
- "--{{.Name}}=\"{{.Value}}\""
{{- end}}
{{- end}}
- "--num_epochs=1"
restartPolicy: Never
pytorchJobTemplate: |-
apiVersion: "kubeflow.org/v1"
kind: PyTorchJob
metadata:
name: {{.Trial}}
namespace: {{.NameSpace}}
spec:
pytorchReplicaSpecs:
Master:
replicas: 1
restartPolicy: OnFailure
template:
spec:
containers:
- name: pytorch
image: gcr.io/kubeflow-ci/pytorch-dist-mnist-test:v1.0
imagePullPolicy: Always
command:
- "python"
- "/var/mnist.py"
{{- with .HyperParameters}}
{{- range .}}
- "{{.Name}}={{.Value}}"
{{- end}}
{{- end}}
Worker:
replicas: 2
restartPolicy: OnFailure
template:
spec:
containers:
- name: pytorch
image: gcr.io/kubeflow-ci/pytorch-dist-mnist-test:v1.0
imagePullPolicy: Always
command:
- "python"
- "/var/mnist.py"
{{- with .HyperParameters}}
{{- range .}}
- "{{.Name}}={{.Value}}"
{{- end}}
{{- end}}
kind: ConfigMap
metadata:
labels:
app: katib-trial-templates
app.kubernetes.io/component: katib
app.kubernetes.io/name: katib-controller
name: trial-template-labeled
namespace: kubeflow

0 comments on commit 5b7c89b

Please sign in to comment.