From 0c82174e285ddfbb2d142021901d47fe0e59c2c3 Mon Sep 17 00:00:00 2001 From: Orfeas Kourkakis Date: Tue, 30 Jul 2024 14:11:38 +0300 Subject: [PATCH] fix(tests): Update experiments CRs (#222) Update CRs according to upstream examples. Closes #214 --- .../experiments/bayesian-optimization.yaml | 44 ++++++---------- tests/assets/crs/experiments/cmaes.yaml | 44 ++++++---------- tests/assets/crs/experiments/enas-cpu.yaml | 6 +-- .../assets/crs/experiments/grid-example.yaml | 49 +++++++---------- tests/assets/crs/experiments/hyperband.yaml | 45 ++++++---------- tests/assets/crs/experiments/median-stop.yaml | 33 ++++++------ tests/assets/crs/experiments/random.yaml | 52 +++++++------------ 7 files changed, 106 insertions(+), 167 deletions(-) diff --git a/tests/assets/crs/experiments/bayesian-optimization.yaml b/tests/assets/crs/experiments/bayesian-optimization.yaml index 94006ac..168b3bb 100644 --- a/tests/assets/crs/experiments/bayesian-optimization.yaml +++ b/tests/assets/crs/experiments/bayesian-optimization.yaml @@ -9,11 +9,9 @@ metadata: name: bayesian-optimization spec: objective: - type: maximize - goal: 0.99 - objectiveMetricName: Validation-accuracy - additionalMetricNames: - - Train-accuracy + type: minimize + goal: 0.001 + objectiveMetricName: loss algorithm: algorithmName: bayesianoptimization algorithmSettings: @@ -27,31 +25,21 @@ spec: parameterType: double feasibleSpace: min: "0.01" - max: "0.03" - - name: num-layers - parameterType: int - feasibleSpace: - min: "2" - max: "5" - - name: optimizer - parameterType: categorical + max: "0.05" + - name: momentum + parameterType: double feasibleSpace: - list: - - sgd - - adam - - ftrl + min: "0.5" + max: "0.9" trialTemplate: primaryContainerName: training-container trialParameters: - name: learningRate description: Learning rate for the training model reference: lr - - name: numberLayers - description: Number of training model layers - reference: num-layers - - name: optimizer - description: Training model optimizer (sdg, adam or ftrl) - reference: optimizer + - name: momentum + description: Momentum for the training model + reference: momentum trialSpec: apiVersion: batch/v1 kind: Job @@ -60,14 +48,14 @@ spec: spec: containers: - name: training-container - image: docker.io/kubeflowkatib/mxnet-mnist:v0.17.0 + image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.17.0 command: - "python3" - - "/opt/mxnet-mnist/mnist.py" - - "--batch-size=64" + - "/opt/pytorch-mnist/mnist.py" + - "--epochs=1" + - "--batch-size=16" - "--lr=${trialParameters.learningRate}" - - "--num-layers=${trialParameters.numberLayers}" - - "--optimizer=${trialParameters.optimizer}" + - "--momentum=${trialParameters.momentum}" resources: # modified limits: # modified memory: "2Gi" # modified diff --git a/tests/assets/crs/experiments/cmaes.yaml b/tests/assets/crs/experiments/cmaes.yaml index 8dfe89b..9e23d77 100644 --- a/tests/assets/crs/experiments/cmaes.yaml +++ b/tests/assets/crs/experiments/cmaes.yaml @@ -9,11 +9,9 @@ metadata: name: cmaes spec: objective: - type: maximize - goal: 0.99 - objectiveMetricName: Validation-accuracy - additionalMetricNames: - - Train-accuracy + type: minimize + goal: 0.001 + objectiveMetricName: loss algorithm: algorithmName: cmaes algorithmSettings: @@ -27,31 +25,21 @@ spec: parameterType: double feasibleSpace: min: "0.01" - max: "0.03" - - name: num-layers - parameterType: int - feasibleSpace: - min: "2" - max: "5" - - name: optimizer - parameterType: categorical + max: "0.05" + - name: momentum + parameterType: double feasibleSpace: - list: - - sgd - - adam - - ftrl + min: "0.5" + max: "0.9" trialTemplate: primaryContainerName: training-container trialParameters: - name: learningRate description: Learning rate for the training model reference: lr - - name: numberLayers - description: Number of training model layers - reference: num-layers - - name: optimizer - description: Training model optimizer (sdg, adam or ftrl) - reference: optimizer + - name: momentum + description: Momentum for the training model + reference: momentum trialSpec: apiVersion: batch/v1 kind: Job @@ -60,14 +48,14 @@ spec: spec: containers: - name: training-container - image: docker.io/kubeflowkatib/mxnet-mnist:v0.17.0 + image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.17.0 command: - "python3" - - "/opt/mxnet-mnist/mnist.py" - - "--batch-size=64" + - "/opt/pytorch-mnist/mnist.py" + - "--epochs=1" + - "--batch-size=16" - "--lr=${trialParameters.learningRate}" - - "--num-layers=${trialParameters.numberLayers}" - - "--optimizer=${trialParameters.optimizer}" + - "--momentum=${trialParameters.momentum}" resources: # modified limits: # modified memory: "2Gi" # modified diff --git a/tests/assets/crs/experiments/enas-cpu.yaml b/tests/assets/crs/experiments/enas-cpu.yaml index 54b5eb4..39b3266 100644 --- a/tests/assets/crs/experiments/enas-cpu.yaml +++ b/tests/assets/crs/experiments/enas-cpu.yaml @@ -8,9 +8,9 @@ kind: Experiment metadata: name: enas-cpu spec: - parallelTrialCount: 1 - maxTrialCount: 1 - maxFailedTrialCount: 1 + parallelTrialCount: 1 # modified + maxTrialCount: 1 # modified + maxFailedTrialCount: 1 # modified objective: type: maximize goal: 0.99 diff --git a/tests/assets/crs/experiments/grid-example.yaml b/tests/assets/crs/experiments/grid-example.yaml index 0c8a2bb..97bdf41 100644 --- a/tests/assets/crs/experiments/grid-example.yaml +++ b/tests/assets/crs/experiments/grid-example.yaml @@ -9,11 +9,9 @@ metadata: name: grid spec: objective: - type: maximize - goal: 0.99 - objectiveMetricName: Validation-accuracy - additionalMetricNames: - - Train-accuracy + type: minimize + goal: 0.001 + objectiveMetricName: loss algorithm: algorithmName: grid parallelTrialCount: 1 # modified @@ -23,33 +21,24 @@ spec: - name: lr parameterType: double feasibleSpace: - min: "0.001" - max: "0.01" - step: "0.001" - - name: num-layers - parameterType: int - feasibleSpace: - min: "2" - max: "5" - - name: optimizer - parameterType: categorical + min: "0.01" + step: "0.005" + max: "0.05" + - name: momentum + parameterType: double feasibleSpace: - list: - - sgd - - adam - - ftrl + min: "0.5" + step: "0.1" + max: "0.9" trialTemplate: primaryContainerName: training-container trialParameters: - name: learningRate description: Learning rate for the training model reference: lr - - name: numberLayers - description: Number of training model layers - reference: num-layers - - name: optimizer - description: Training model optimizer (sdg, adam or ftrl) - reference: optimizer + - name: momentum + description: Momentum for the training model + reference: momentum trialSpec: apiVersion: batch/v1 kind: Job @@ -58,14 +47,14 @@ spec: spec: containers: - name: training-container - image: docker.io/kubeflowkatib/mxnet-mnist:v0.17.0 + image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.17.0 command: - "python3" - - "/opt/mxnet-mnist/mnist.py" - - "--batch-size=64" + - "/opt/pytorch-mnist/mnist.py" + - "--epochs=1" + - "--batch-size=16" - "--lr=${trialParameters.learningRate}" - - "--num-layers=${trialParameters.numberLayers}" - - "--optimizer=${trialParameters.optimizer}" + - "--momentum=${trialParameters.momentum}" resources: # modified limits: # modified memory: "2Gi" # modified diff --git a/tests/assets/crs/experiments/hyperband.yaml b/tests/assets/crs/experiments/hyperband.yaml index fe26ed5..cfdf8d6 100644 --- a/tests/assets/crs/experiments/hyperband.yaml +++ b/tests/assets/crs/experiments/hyperband.yaml @@ -11,11 +11,9 @@ spec: parallelTrialCount: 2 maxTrialCount: 2 objective: - type: maximize - goal: 0.99 - objectiveMetricName: Validation-accuracy - additionalMetricNames: - - Train-accuracy + type: minimize + goal: 0.001 + objectiveMetricName: loss algorithm: algorithmName: hyperband algorithmSettings: @@ -31,19 +29,12 @@ spec: parameterType: double feasibleSpace: min: "0.01" - max: "0.03" - - name: num-layers - parameterType: int - feasibleSpace: - min: "2" - max: "5" - - name: optimizer - parameterType: categorical + max: "0.05" + - name: momentum + parameterType: double feasibleSpace: - list: - - sgd - - adam - - ftrl + min: "0.5" + max: "0.9" - name: num-epochs parameterType: int feasibleSpace: @@ -55,12 +46,9 @@ spec: - name: learningRate description: Learning rate for the training model reference: lr - - name: numberLayers - description: Number of training model layers - reference: num-layers - - name: optimizer - description: Training model optimizer (sdg, adam or ftrl) - reference: optimizer + - name: momentum + description: Momentum for the training model + reference: momentum - name: numberEpochs description: Number of epochs to train the model reference: num-epochs @@ -72,15 +60,14 @@ spec: spec: containers: - name: training-container - image: docker.io/kubeflowkatib/mxnet-mnist:v0.17.0 + image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.17.0 command: - "python3" - - "/opt/mxnet-mnist/mnist.py" - - "--batch-size=32" + - "/opt/pytorch-mnist/mnist.py" + - "--epochs=${trialParameters.numberEpochs}" + - "--batch-size=16" - "--lr=${trialParameters.learningRate}" - - "--num-layers=${trialParameters.numberLayers}" - - "--optimizer=${trialParameters.optimizer}" - - "--num-epochs=${trialParameters.numberEpochs}" + - "--momentum=${trialParameters.momentum}" resources: # modified limits: # modified memory: "2Gi" # modified diff --git a/tests/assets/crs/experiments/median-stop.yaml b/tests/assets/crs/experiments/median-stop.yaml index bef307b..4eb8a50 100644 --- a/tests/assets/crs/experiments/median-stop.yaml +++ b/tests/assets/crs/experiments/median-stop.yaml @@ -9,11 +9,9 @@ metadata: name: median-stop spec: objective: - type: maximize - goal: 0.99 - objectiveMetricName: Validation-accuracy - additionalMetricNames: - - Train-accuracy + type: minimize + goal: 0.001 + objectiveMetricName: loss algorithm: algorithmName: random earlyStopping: @@ -31,12 +29,12 @@ spec: parameterType: double feasibleSpace: min: "0.01" - max: "0.5" - - name: num-epochs - parameterType: int + max: "0.05" + - name: momentum + parameterType: double feasibleSpace: - min: "3" - max: "4" + min: "0.5" + max: "0.9" trialTemplate: retain: true primaryContainerName: training-container @@ -44,9 +42,9 @@ spec: - name: learningRate description: Learning rate for the training model reference: lr - - name: numberEpochs - description: Number of epochs to train the model - reference: num-epochs + - name: momentum + description: Momentum for the training model + reference: momentum trialSpec: apiVersion: batch/v1 kind: Job @@ -55,13 +53,14 @@ spec: spec: containers: - name: training-container - image: docker.io/kubeflowkatib/mxnet-mnist:v0.17.0 + image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.17.0 command: - "python3" - - "/opt/mxnet-mnist/mnist.py" - - "--batch-size=64" + - "/opt/pytorch-mnist/mnist.py" + - "--epochs=1" + - "--batch-size=16" - "--lr=${trialParameters.learningRate}" - - "--num-epochs=${trialParameters.numberEpochs}" + - "--momentum=${trialParameters.momentum}" resources: # modified limits: # modified memory: "2Gi" # modified diff --git a/tests/assets/crs/experiments/random.yaml b/tests/assets/crs/experiments/random.yaml index d6bdaaf..8182fce 100644 --- a/tests/assets/crs/experiments/random.yaml +++ b/tests/assets/crs/experiments/random.yaml @@ -9,11 +9,9 @@ metadata: name: random spec: objective: - type: maximize - goal: 0.99 - objectiveMetricName: Validation-accuracy - additionalMetricNames: - - Train-accuracy + type: minimize + goal: 0.001 + objectiveMetricName: loss algorithm: algorithmName: random parallelTrialCount: 1 # modified @@ -24,31 +22,21 @@ spec: parameterType: double feasibleSpace: min: "0.01" - max: "0.03" - - name: num-layers - parameterType: int - feasibleSpace: - min: "2" - max: "5" - - name: optimizer - parameterType: categorical + max: "0.05" + - name: momentum + parameterType: double feasibleSpace: - list: - - sgd - - adam - - ftrl + min: "0.5" + max: "0.9" trialTemplate: primaryContainerName: training-container trialParameters: - name: learningRate description: Learning rate for the training model reference: lr - - name: numberLayers - description: Number of training model layers - reference: num-layers - - name: optimizer - description: Training model optimizer (sdg, adam or ftrl) - reference: optimizer + - name: momentum + description: Momentum for the training model + reference: momentum trialSpec: apiVersion: batch/v1 kind: Job @@ -57,16 +45,16 @@ spec: spec: containers: - name: training-container - image: docker.io/kubeflowkatib/mxnet-mnist:v0.17.0 + image: docker.io/kubeflowkatib/pytorch-mnist-cpu:v0.17.0 command: - "python3" - - "/opt/mxnet-mnist/mnist.py" - - "--batch-size=64" + - "/opt/pytorch-mnist/mnist.py" + - "--epochs=1" + - "--batch-size=16" - "--lr=${trialParameters.learningRate}" - - "--num-layers=${trialParameters.numberLayers}" - - "--optimizer=${trialParameters.optimizer}" - resources: # modified - limits: # modified - memory: "2Gi" # modified - cpu: "1" # modified + - "--momentum=${trialParameters.momentum}" + resources: + limits: + memory: "1Gi" + cpu: "0.5" restartPolicy: Never