Skip to content

Commit

Permalink
Implement webhook validations for the PaddleJob (kubeflow#2057)
Browse files Browse the repository at this point in the history
Signed-off-by: Yuki Iwai <[email protected]>
  • Loading branch information
tenzen-y authored and johnugeorge committed Apr 28, 2024
1 parent 8ea1a05 commit 27da395
Show file tree
Hide file tree
Showing 9 changed files with 352 additions and 254 deletions.
20 changes: 20 additions & 0 deletions manifests/base/webhook/manifests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,26 @@ kind: ValidatingWebhookConfiguration
metadata:
name: validating-webhook-configuration
webhooks:
- admissionReviewVersions:
- v1
clientConfig:
service:
name: webhook-service
namespace: system
path: /validate-kubeflow-org-v1-paddlejob
failurePolicy: Fail
name: validator.paddlejob.training-operator.kubeflow.org
rules:
- apiGroups:
- kubeflow.org
apiVersions:
- v1
operations:
- CREATE
- UPDATE
resources:
- paddlejobs
sideEffects: None
- admissionReviewVersions:
- v1
clientConfig:
Expand Down
3 changes: 3 additions & 0 deletions manifests/base/webhook/patch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
- op: replace
path: /webhooks/2/clientConfig/service/name
value: training-operator
- op: replace
path: /webhooks/3/clientConfig/service/name
value: training-operator
- op: replace
path: /metadata/name
value: validator.training-operator.kubeflow.org
76 changes: 0 additions & 76 deletions pkg/apis/kubeflow.org/v1/paddlepaddle_validation.go

This file was deleted.

166 changes: 0 additions & 166 deletions pkg/apis/kubeflow.org/v1/paddlepaddle_validation_test.go

This file was deleted.

7 changes: 0 additions & 7 deletions pkg/controller.v1/paddlepaddle/paddlepaddle_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,13 +131,6 @@ func (r *PaddleJobReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
return ctrl.Result{}, client.IgnoreNotFound(err)
}

if err = kubeflowv1.ValidateV1PaddleJob(paddlejob); err != nil {
logger.Error(err, "PaddleJob failed validation")
r.Recorder.Eventf(paddlejob, corev1.EventTypeWarning, commonutil.NewReason(kubeflowv1.PaddleJobKind, commonutil.JobFailedValidationReason),
"PaddleJob failed validation because %s", err)
return ctrl.Result{}, err
}

// Check if reconciliation is needed
jobKey, err := common.KeyFunc(paddlejob)
if err != nil {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,18 @@ package paddle

import (
"context"
"crypto/tls"
"fmt"
"net"
"path/filepath"
"testing"
"time"

kubeflowv1 "github.com/kubeflow/training-operator/pkg/apis/kubeflow.org/v1"
"github.com/kubeflow/training-operator/pkg/controller.v1/common"
paddlewebhook "github.com/kubeflow/training-operator/pkg/webhooks/paddlepaddle"

. "github.com/onsi/ginkgo/v2"
"github.com/onsi/gomega"
. "github.com/onsi/gomega"
"k8s.io/client-go/kubernetes/scheme"
ctrl "sigs.k8s.io/controller-runtime"
Expand All @@ -32,6 +36,7 @@ import (
logf "sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
"sigs.k8s.io/controller-runtime/pkg/webhook"
"volcano.sh/apis/pkg/apis/scheduling/v1beta1"
//+kubebuilder:scaffold:imports
)
Expand Down Expand Up @@ -61,6 +66,9 @@ var _ = BeforeSuite(func() {
testEnv = &envtest.Environment{
CRDDirectoryPaths: []string{filepath.Join("..", "..", "..", "manifests", "base", "crds")},
ErrorIfCRDPathMissing: true,
WebhookInstallOptions: envtest.WebhookInstallOptions{
Paths: []string{filepath.Join("..", "..", "..", "manifests", "base", "webhook", "manifests.yaml")},
},
}

cfg, err := testEnv.Start()
Expand All @@ -82,19 +90,33 @@ var _ = BeforeSuite(func() {
Metrics: metricsserver.Options{
BindAddress: "0",
},
WebhookServer: webhook.NewServer(
webhook.Options{
Host: testEnv.WebhookInstallOptions.LocalServingHost,
Port: testEnv.WebhookInstallOptions.LocalServingPort,
CertDir: testEnv.WebhookInstallOptions.LocalServingCertDir,
}),
})
Expect(err).NotTo(gomega.HaveOccurred())
Expect(err).NotTo(HaveOccurred())

gangSchedulingSetupFunc := common.GenNonGangSchedulerSetupFunc()
r := NewReconciler(mgr, gangSchedulingSetupFunc)

Expect(r.SetupWithManager(mgr, 1)).NotTo(gomega.HaveOccurred())
Expect(r.SetupWithManager(mgr, 1)).NotTo(HaveOccurred())
Expect(paddlewebhook.SetupWebhook(mgr)).NotTo(HaveOccurred())

go func() {
defer GinkgoRecover()
err = mgr.Start(testCtx)
Expect(err).ToNot(HaveOccurred(), "failed to run manager")
}()

dialer := &net.Dialer{Timeout: time.Second}
addrPort := fmt.Sprintf("%s:%d", testEnv.WebhookInstallOptions.LocalServingHost, testEnv.WebhookInstallOptions.LocalServingPort)
Eventually(func(g Gomega) {
conn, err := tls.DialWithDialer(dialer, "tcp", addrPort, &tls.Config{InsecureSkipVerify: true})
g.Expect(err).NotTo(HaveOccurred())
g.Expect(conn.Close()).NotTo(HaveOccurred())
}).Should(Succeed())
})

var _ = AfterSuite(func() {
Expand Down
Loading

0 comments on commit 27da395

Please sign in to comment.