Skip to content

Commit

Permalink
feat: bump katib-operators version 0.15.0 -> 0.16-rc.1 (#123)
Browse files Browse the repository at this point in the history
- Update template files to match `0.16-rc.1` release's manifests
- Bump images to `v0.16-rc.1` images
See #122
  • Loading branch information
orfeas-k authored Aug 25, 2023
1 parent f862faa commit b00817e
Show file tree
Hide file tree
Showing 22 changed files with 152 additions and 150 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ __pycache__/
.coverage
.idea
.tox
.vscode
38 changes: 19 additions & 19 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,25 +72,25 @@ Katib controller comes with a set of preconfigured images that are used in Katib

```json
{
"default_trial_template": "docker.io/kubeflowkatib/mxnet-mnist:v0.15.0",
"early_stopping__medianstop": "docker.io/kubeflowkatib/earlystopping-medianstop:v0.15.0",
"enas_cpu_template": "docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:v0.15.0",
"metrics_collector_sidecar__stdout": "docker.io/kubeflowkatib/file-metrics-collector:v0.15.0",
"metrics_collector_sidecar__file": "docker.io/kubeflowkatib/file-metrics-collector:v0.15.0",
"metrics_collector_sidecar__tensorflow_event": "docker.io/kubeflowkatib/tfevent-metrics-collector:v0.15.0",
"pytorch_job_template__master": "docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.15.0",
"pytorch_job_template__worker": "docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.15.0",
"suggestion__random": "docker.io/kubeflowkatib/suggestion-hyperopt:v0.15.0",
"suggestion__tpe": "docker.io/kubeflowkatib/suggestion-hyperopt:v0.15.0",
"suggestion__grid": "docker.io/kubeflowkatib/suggestion-optuna:v0.15.0",
"suggestion__hyperband": "docker.io/kubeflowkatib/suggestion-hyperband:v0.15.0",
"suggestion__bayesianoptimization": "docker.io/kubeflowkatib/suggestion-skopt:v0.15.0",
"suggestion__cmaes": "docker.io/kubeflowkatib/suggestion-goptuna:v0.15.0",
"suggestion__sobol": "docker.io/kubeflowkatib/suggestion-goptuna:v0.15.0",
"suggestion__multivariate_tpe": "docker.io/kubeflowkatib/suggestion-optuna:v0.15.0",
"suggestion__enas": "docker.io/kubeflowkatib/suggestion-enas:v0.15.0",
"suggestion__darts": "docker.io/kubeflowkatib/suggestion-darts:v0.15.0",
"suggestion__pbt": "docker.io/kubeflowkatib/suggestion-pbt:v0.15.0",
"default_trial_template": "docker.io/kubeflowkatib/mxnet-mnist:v0.16.0-rc.1",
"early_stopping__medianstop": "docker.io/kubeflowkatib/earlystopping-medianstop:v0.16.0-rc.1",
"enas_cpu_template": "docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:v0.16.0-rc.1",
"metrics_collector_sidecar__stdout": "docker.io/kubeflowkatib/file-metrics-collector:v0.16.0-rc.1",
"metrics_collector_sidecar__file": "docker.io/kubeflowkatib/file-metrics-collector:v0.16.0-rc.1",
"metrics_collector_sidecar__tensorflow_event": "docker.io/kubeflowkatib/tfevent-metrics-collector:v0.16.0-rc.1",
"pytorch_job_template__master": "docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.16.0-rc.1",
"pytorch_job_template__worker": "docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.16.0-rc.1",
"suggestion__random": "docker.io/kubeflowkatib/suggestion-hyperopt:v0.16.0-rc.1",
"suggestion__tpe": "docker.io/kubeflowkatib/suggestion-hyperopt:v0.16.0-rc.1",
"suggestion__grid": "docker.io/kubeflowkatib/suggestion-optuna:v0.16.0-rc.1",
"suggestion__hyperband": "docker.io/kubeflowkatib/suggestion-hyperband:v0.16.0-rc.1",
"suggestion__bayesianoptimization": "docker.io/kubeflowkatib/suggestion-skopt:v0.16.0-rc.1",
"suggestion__cmaes": "docker.io/kubeflowkatib/suggestion-goptuna:v0.16.0-rc.1",
"suggestion__sobol": "docker.io/kubeflowkatib/suggestion-goptuna:v0.16.0-rc.1",
"suggestion__multivariate_tpe": "docker.io/kubeflowkatib/suggestion-optuna:v0.16.0-rc.1",
"suggestion__enas": "docker.io/kubeflowkatib/suggestion-enas:v0.16.0-rc.1",
"suggestion__darts": "docker.io/kubeflowkatib/suggestion-darts:v0.16.0-rc.1",
"suggestion__pbt": "docker.io/kubeflowkatib/suggestion-pbt:v0.16.0-rc.1",
}
```

Expand Down
2 changes: 1 addition & 1 deletion charms/katib-controller/metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ resources:
type: oci-image
description: Backing OCI image
auto-fetch: true
upstream-source: docker.io/kubeflowkatib/katib-controller:v0.15.0
upstream-source: docker.io/kubeflowkatib/katib-controller:v0.16.0-rc.1
provides:
metrics-endpoint:
interface: prometheus_scrape
Expand Down
80 changes: 47 additions & 33 deletions charms/katib-controller/src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,25 +19,25 @@
from ops.model import ActiveStatus, BlockedStatus, MaintenanceStatus, WaitingStatus

DEFAULT_IMAGES = {
"default_trial_template": "docker.io/kubeflowkatib/mxnet-mnist:v0.15.0",
"early_stopping__medianstop": "docker.io/kubeflowkatib/earlystopping-medianstop:v0.15.0",
"enas_cpu_template": "docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:v0.15.0",
"metrics_collector_sidecar__stdout": "docker.io/kubeflowkatib/file-metrics-collector:v0.15.0",
"metrics_collector_sidecar__file": "docker.io/kubeflowkatib/file-metrics-collector:v0.15.0",
"metrics_collector_sidecar__tensorflow_event": "docker.io/kubeflowkatib/tfevent-metrics-collector:v0.15.0", # noqa: E501
"pytorch_job_template__master": "docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.15.0",
"pytorch_job_template__worker": "docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.15.0",
"suggestion__random": "docker.io/kubeflowkatib/suggestion-hyperopt:v0.15.0",
"suggestion__tpe": "docker.io/kubeflowkatib/suggestion-hyperopt:v0.15.0",
"suggestion__grid": "docker.io/kubeflowkatib/suggestion-optuna:v0.15.0",
"suggestion__hyperband": "docker.io/kubeflowkatib/suggestion-hyperband:v0.15.0",
"suggestion__bayesianoptimization": "docker.io/kubeflowkatib/suggestion-skopt:v0.15.0",
"suggestion__cmaes": "docker.io/kubeflowkatib/suggestion-goptuna:v0.15.0",
"suggestion__sobol": "docker.io/kubeflowkatib/suggestion-goptuna:v0.15.0",
"suggestion__multivariate_tpe": "docker.io/kubeflowkatib/suggestion-optuna:v0.15.0",
"suggestion__enas": "docker.io/kubeflowkatib/suggestion-enas:v0.15.0",
"suggestion__darts": "docker.io/kubeflowkatib/suggestion-darts:v0.15.0",
"suggestion__pbt": "docker.io/kubeflowkatib/suggestion-pbt:v0.15.0",
"default_trial_template": "docker.io/kubeflowkatib/mxnet-mnist:v0.16.0-rc.1",
"early_stopping__medianstop": "docker.io/kubeflowkatib/earlystopping-medianstop:v0.16.0-rc.1",
"enas_cpu_template": "docker.io/kubeflowkatib/enas-cnn-cifar10-cpu:v0.16.0-rc.1",
"metrics_collector_sidecar__stdout": "docker.io/kubeflowkatib/file-metrics-collector:v0.16.0-rc.1", # noqa: E501
"metrics_collector_sidecar__file": "docker.io/kubeflowkatib/file-metrics-collector:v0.16.0-rc.1", # noqa: E501
"metrics_collector_sidecar__tensorflow_event": "docker.io/kubeflowkatib/tfevent-metrics-collector:v0.16.0-rc.1", # noqa: E501
"pytorch_job_template__master": "docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.16.0-rc.1",
"pytorch_job_template__worker": "docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.16.0-rc.1",
"suggestion__random": "docker.io/kubeflowkatib/suggestion-hyperopt:v0.16.0-rc.1",
"suggestion__tpe": "docker.io/kubeflowkatib/suggestion-hyperopt:v0.16.0-rc.1",
"suggestion__grid": "docker.io/kubeflowkatib/suggestion-optuna:v0.16.0-rc.1",
"suggestion__hyperband": "docker.io/kubeflowkatib/suggestion-hyperband:v0.16.0-rc.1",
"suggestion__bayesianoptimization": "docker.io/kubeflowkatib/suggestion-skopt:v0.16.0-rc.1",
"suggestion__cmaes": "docker.io/kubeflowkatib/suggestion-goptuna:v0.16.0-rc.1",
"suggestion__sobol": "docker.io/kubeflowkatib/suggestion-goptuna:v0.16.0-rc.1",
"suggestion__multivariate_tpe": "docker.io/kubeflowkatib/suggestion-optuna:v0.16.0-rc.1",
"suggestion__enas": "docker.io/kubeflowkatib/suggestion-enas:v0.16.0-rc.1",
"suggestion__darts": "docker.io/kubeflowkatib/suggestion-darts:v0.16.0-rc.1",
"suggestion__pbt": "docker.io/kubeflowkatib/suggestion-pbt:v0.16.0-rc.1",
}

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -173,6 +173,8 @@ def set_pod_spec(self, event):
self._check_leader()
self.custom_images = parse_images_config(self.model.config["custom_images"])
self.images_context = self.get_images(DEFAULT_IMAGES, self.custom_images)
self.katib_config_context = self.images_context
self.katib_config_context["webhookPort"] = self.model.config["webhook-port"]
image_details = self._check_image_details()
except CheckFailed as check_failed:
self.model.unit.status = check_failed.status
Expand Down Expand Up @@ -201,6 +203,7 @@ def set_pod_spec(self, event):
"pods",
"pods/log",
"pods/status",
"secrets",
],
"verbs": ["*"],
},
Expand Down Expand Up @@ -239,6 +242,14 @@ def set_pod_spec(self, event):
],
"verbs": ["*"],
},
{
"apiGroups": ["admissionregistration.k8s.io"],
"resources": [
"validatingwebhookconfigurations",
"mutatingwebhookconfigurations",
],
"verbs": ["get", "watch", "list", "patch"],
},
],
}
]
Expand All @@ -248,14 +259,7 @@ def set_pod_spec(self, event):
"name": "katib-controller",
"imageDetails": image_details,
"command": ["./katib-controller"],
"args": [
f"--webhook-port={self.model.config['webhook-port']}",
"--trial-resources=Job.v1.batch",
"--trial-resources=TFJob.v1.kubeflow.org",
"--trial-resources=PyTorchJob.v1.kubeflow.org",
"--trial-resources=MPIJob.v1.kubeflow.org",
"--trial-resources=PipelineRun.v1beta1.tekton.dev",
],
"args": ["--katib-config=/katib-config/katib-config.yaml"],
"ports": [
{
"name": "webhook",
Expand All @@ -276,7 +280,20 @@ def set_pod_spec(self, event):
{"path": "tls.crt", "content": self._stored.cert},
{"path": "tls.key", "content": self._stored.key},
],
}
},
{
"name": "katib-config",
"mountPath": "/katib-config",
"files": [
{
"path": "katib-config.yaml",
"content": render_template(
"src/templates/katib-config.yaml.j2",
self.katib_config_context,
),
}
],
},
],
}
],
Expand All @@ -302,11 +319,8 @@ def set_pod_spec(self, event):
},
"configMaps": {
"katib-config": {
f: render_template(f"src/templates/{f}.json.j2", self.images_context)
for f in (
"metrics-collector-sidecar",
"suggestion",
"early-stopping",
"katib-config.yaml": render_template(
"src/templates/katib-config.yaml.j2", self.katib_config_context
)
},
"trial-template": {
Expand Down
5 changes: 0 additions & 5 deletions charms/katib-controller/src/templates/early-stopping.json.j2

This file was deleted.

59 changes: 59 additions & 0 deletions charms/katib-controller/src/templates/katib-config.yaml.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
---
apiVersion: config.kubeflow.org/v1beta1
kind: KatibConfig
init:
controller:
webhookPort: {{ webhookPort }}
trialResources:
- Job.v1.batch
- TFJob.v1.kubeflow.org
- PyTorchJob.v1.kubeflow.org
- MPIJob.v1.kubeflow.org
- XGBoostJob.v1.kubeflow.org
- MXJob.v1.kubeflow.org
runtime:
metricsCollectors:
- kind: StdOut
image: {{ metrics_collector_sidecar__stdout }}
- kind: File
image: {{ metrics_collector_sidecar__file }}
- kind: TensorFlowEvent
image: {{ metrics_collector_sidecar__tensorflow_event }}
resources:
limits:
memory: 1Gi
suggestions:
- algorithmName: random
image: {{ suggestion__random }}
- algorithmName: tpe
image: {{ suggestion__tpe }}
- algorithmName: grid
image: {{ suggestion__grid }}
- algorithmName: hyperband
image: {{ suggestion__hyperband }}
- algorithmName: bayesianoptimization
image: {{ suggestion__bayesianoptimization }}
- algorithmName: cmaes
image: {{ suggestion__cmaes }}
- algorithmName: sobol
image: {{ suggestion__sobol }}
- algorithmName: multivariate-tpe
image: {{ suggestion__multivariate_tpe }}
- algorithmName: enas
image: {{ suggestion__enas }}
resources:
limits:
memory: 200Mi
- algorithmName: darts
image: {{ suggestion__darts }}
- algorithmName: pbt
image: {{ suggestion__pbt }}
persistentVolumeClaimSpec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: 5Gi
earlyStoppings:
- algorithmName: medianstop
image: {{ early_stopping__medianstop }}

This file was deleted.

50 changes: 0 additions & 50 deletions charms/katib-controller/src/templates/suggestion.json.j2

This file was deleted.

9 changes: 6 additions & 3 deletions charms/katib-controller/src/templates/webhooks.yaml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ metadata:
webhooks:
- name: validator.experiment.katib.kubeflow.org
sideEffects: None
failurePolicy: Fail
admissionReviewVersions:
- v1
clientConfig:
Expand All @@ -33,7 +32,6 @@ metadata:
webhooks:
- name: defaulter.experiment.katib.kubeflow.org
sideEffects: None
failurePolicy: Fail
admissionReviewVersions:
- v1
clientConfig:
Expand All @@ -54,7 +52,6 @@ webhooks:
- experiments
- name: mutator.pod.katib.kubeflow.org
sideEffects: None
failurePolicy: Fail
admissionReviewVersions:
- v1
clientConfig:
Expand All @@ -66,6 +63,12 @@ webhooks:
namespaceSelector:
matchLabels:
katib.kubeflow.org/metrics-collector-injection: enabled
objectSelector:
matchExpressions:
- key: katib.kubeflow.org/metrics-collector-injection
operator: NotIn
values:
- disabled
rules:
- apiGroups:
- ""
Expand Down
Loading

0 comments on commit b00817e

Please sign in to comment.