From 3459880cfc087b4afefa00223c43dab58e8614ea Mon Sep 17 00:00:00 2001 From: kevinliu24 Date: Fri, 3 May 2024 15:08:29 -0700 Subject: [PATCH 01/13] Add autoscaler webhook --- cmd/autoscaler/app/autoscaler.go | 24 +++++++ cmd/autoscaler/app/options/options.go | 8 +++ pkg/webhook/autoscaler/helper.go | 62 +++++++++++++++++ pkg/webhook/autoscaler/validating.go | 82 +++++++++++++++++++++++ pkg/webhook/util/validation/autoscaler.go | 28 ++++++++ pkg/webhook/util/validation/validation.go | 8 +++ 6 files changed, 212 insertions(+) create mode 100644 pkg/webhook/autoscaler/helper.go create mode 100644 pkg/webhook/autoscaler/validating.go create mode 100644 pkg/webhook/util/validation/autoscaler.go diff --git a/cmd/autoscaler/app/autoscaler.go b/cmd/autoscaler/app/autoscaler.go index 02693bc7..10c14366 100644 --- a/cmd/autoscaler/app/autoscaler.go +++ b/cmd/autoscaler/app/autoscaler.go @@ -19,6 +19,7 @@ package app import ( "context" "flag" + "net/http" "github.com/spf13/cobra" cliflag "k8s.io/component-base/cli/flag" @@ -28,6 +29,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/cache" "sigs.k8s.io/controller-runtime/pkg/config" "sigs.k8s.io/controller-runtime/pkg/healthz" + "sigs.k8s.io/controller-runtime/pkg/webhook" + "sigs.k8s.io/controller-runtime/pkg/webhook/admission" "github.com/vesoft-inc/nebula-operator/apis/autoscaling/scheme" "github.com/vesoft-inc/nebula-operator/apis/autoscaling/v1alpha1" @@ -36,6 +39,7 @@ import ( klogflag "github.com/vesoft-inc/nebula-operator/pkg/flag/klog" profileflag "github.com/vesoft-inc/nebula-operator/pkg/flag/profile" "github.com/vesoft-inc/nebula-operator/pkg/version" + nawebhook "github.com/vesoft-inc/nebula-operator/pkg/webhook/autoscaler" ) // NewAutoscalerCommand creates a *cobra.Command object with default parameters @@ -106,6 +110,17 @@ func Run(ctx context.Context, opts *options.Options) error { }, } + if opts.EnableAdmissionWebhook { + ctrlOptions.WebhookServer = webhook.NewServer(webhook.Options{ + Host: opts.WebhookOpts.BindAddress, + Port: opts.WebhookOpts.SecurePort, + CertDir: opts.WebhookOpts.CertDir, + CertName: opts.WebhookOpts.CertName, + KeyName: opts.WebhookOpts.KeyName, + TLSMinVersion: opts.WebhookOpts.TLSMinVersion, + }) + } + mgr, err := ctrlruntime.NewManager(cfg, ctrlOptions) if err != nil { klog.Errorf("Failed to build nebula-autoscaler: %v", err) @@ -126,6 +141,15 @@ func Run(ctx context.Context, opts *options.Options) error { return err } + if opts.EnableAdmissionWebhook { + decoder := admission.NewDecoder(mgr.GetScheme()) + klog.Info("Registering webhooks to nebula-auto-scaler") + hookServer := mgr.GetWebhookServer() + hookServer.Register("/validate-nebulaautoscaler", + &webhook.Admission{Handler: &nawebhook.ValidatingAdmission{Decoder: decoder}}) + hookServer.WebhookMux().Handle("/readyz/", http.StripPrefix("/readyz/", &healthz.Handler{})) + } + if err := mgr.AddHealthzCheck("ping", healthz.Ping); err != nil { klog.Errorf("failed to add health check endpoint: %v", err) return err diff --git a/cmd/autoscaler/app/options/options.go b/cmd/autoscaler/app/options/options.go index a92a24fc..77e2788a 100644 --- a/cmd/autoscaler/app/options/options.go +++ b/cmd/autoscaler/app/options/options.go @@ -27,6 +27,7 @@ import ( ctrlmgrconfigv1alpha1 "k8s.io/kube-controller-manager/config/v1alpha1" "github.com/vesoft-inc/nebula-operator/pkg/flag/profile" + "github.com/vesoft-inc/nebula-operator/pkg/flag/webhook" ) const ( @@ -57,6 +58,9 @@ type Options struct { // HPAOpts defines the configuration of autoscaler controller. HPAOpts ctrlmgrconfigv1alpha1.HPAControllerConfiguration + // EnableAdmissionWebhook enable admission webhook for autoscaler. + EnableAdmissionWebhook bool + // MetricsBindAddress is the TCP address that the controller should bind to // for serving prometheus metrics. // It can be set to "0" to disable the metrics serving. @@ -70,6 +74,7 @@ type Options struct { HealthProbeBindAddress string ProfileOpts profile.Options + WebhookOpts webhook.Options } func NewOptions() *Options { @@ -110,7 +115,10 @@ func (o *Options) AddFlags(flags *pflag.FlagSet) { flags.DurationVar(&o.HPAOpts.HorizontalPodAutoscalerInitialReadinessDelay.Duration, "autoscaler-initial-readiness-delay", defaultAutoscalerInitialReadinessDelay.Duration, "The period after pod start during which readiness changes will be treated as initial readiness.") flags.StringVar(&o.MetricsBindAddress, "metrics-bind-address", ":8080", "The TCP address that the controller should bind to for serving prometheus metrics(e.g. 127.0.0.1:8080, :8080). It can be set to \"0\" to disable the metrics serving.") + flags.BoolVar(&o.EnableAdmissionWebhook, "enable-admission-webhook", false, "If set to ture enable admission webhook for autoscaler.") flags.StringVar(&o.HealthProbeBindAddress, "health-probe-bind-address", ":8081", "The TCP address that the controller should bind to for serving health probes.(e.g. 127.0.0.1:8081, :8081). It can be set to \"0\" to disable the health probe serving.") + o.WebhookOpts.AddFlags(flags) + //flags.StringSliceVar(&o.Namespaces, "watch-namespaces", nil, "Namespaces restricts the controller watches for updates to Kubernetes objects. If empty, all namespaces are watched. Multiple namespaces seperated by comma.(e.g. ns1,ns2,ns3).") } diff --git a/pkg/webhook/autoscaler/helper.go b/pkg/webhook/autoscaler/helper.go new file mode 100644 index 00000000..3b122109 --- /dev/null +++ b/pkg/webhook/autoscaler/helper.go @@ -0,0 +1,62 @@ +/* +Copyright 2024 Vesoft Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package nebulaautoscaler + +import ( + "github.com/vesoft-inc/nebula-operator/apis/autoscaling/v1alpha1" + "github.com/vesoft-inc/nebula-operator/pkg/webhook/util/validation" + admissionv1 "k8s.io/api/admission/v1" + "k8s.io/apimachinery/pkg/util/validation/field" + "k8s.io/klog/v2" +) + +// ValidateNebulaAutoscalerCreate validates a NebulaAutoscaler on create. +func validateNebulaAutoscalerCreate(na *v1alpha1.NebulaAutoscaler) (allErrs field.ErrorList) { + name := na.Name + namespace := na.Namespace + + klog.Infof("receive admission with resource [%s/%s], GVK %s, operation %s", namespace, name, + na.GroupVersionKind().String(), admissionv1.Create) + + allErrs = append(allErrs, validateNebulaAutoscalarReplica(na)...) + + return allErrs +} + +// ValidateNebulaCluster validates a NebulaAutoscaler on Update. +func validateNebulaAutoscalerUpdate(na, oldNA *v1alpha1.NebulaAutoscaler) (allErrs field.ErrorList) { + name := na.Name + namespace := na.Namespace + + klog.Infof("receive admission with resource [%s/%s], GVK %s, operation %s", namespace, name, + na.GroupVersionKind().String(), admissionv1.Update) + + allErrs = append(allErrs, validateNebulaAutoscalarReplica(na)...) + + return allErrs +} + +// validateNebulaClusterGraphd validates the replicas in an NebulaAutoscaler +func validateNebulaAutoscalarReplica(na *v1alpha1.NebulaAutoscaler) (allErrs field.ErrorList) { + allErrs = append(allErrs, validation.ValidateMinMaxReplica( + field.NewPath("spec").Child("graphPolicy").Child("minReplicas"), + int(*na.Spec.GraphdPolicy.MinReplicas), + int(na.Spec.GraphdPolicy.MaxReplicas), + )...) + + return allErrs +} diff --git a/pkg/webhook/autoscaler/validating.go b/pkg/webhook/autoscaler/validating.go new file mode 100644 index 00000000..3a4bd234 --- /dev/null +++ b/pkg/webhook/autoscaler/validating.go @@ -0,0 +1,82 @@ +/* +Copyright 2024 Vesoft Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package nebulaautoscaler + +import ( + "context" + "net/http" + + admissionv1 "k8s.io/api/admission/v1" + "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/webhook/admission" + + "github.com/vesoft-inc/nebula-operator/apis/autoscaling/v1alpha1" +) + +// ValidatingAdmission handles StatefulSet +type ValidatingAdmission struct { + // Decoder decodes objects + Decoder *admission.Decoder +} + +var _ admission.Handler = &ValidatingAdmission{} + +// Handle handles admission requests. +func (h *ValidatingAdmission) Handle(_ context.Context, req admission.Request) (resp admission.Response) { + klog.Infof("start validating resource %v [%s/%s] operation %s", req.Resource, req.Namespace, req.Name, req.Operation) + + defer func() { + klog.Infof("end validating, allowed %v, reason %v, message %s", resp.Allowed, + resp.Result.Reason, resp.Result.Message) + }() + + obj := &v1alpha1.NebulaAutoscaler{} + + if req.Operation == admissionv1.Delete { + if err := h.Decoder.DecodeRaw(req.OldObject, obj); err != nil { + return admission.Errored(http.StatusBadRequest, err) + } + } else { + if err := h.Decoder.Decode(req, obj); err != nil { + return admission.Errored(http.StatusBadRequest, err) + } + } + + operation := req.AdmissionRequest.Operation + + if operation == admissionv1.Connect { + return admission.ValidationResponse(true, "") + } + + if operation == admissionv1.Create { + if allErrs := validateNebulaAutoscalerCreate(obj); len(allErrs) > 0 { + return admission.Errored(http.StatusUnprocessableEntity, allErrs.ToAggregate()) + } + } else if operation == admissionv1.Update { + oldObj := &v1alpha1.NebulaAutoscaler{} + + if err := h.Decoder.DecodeRaw(req.AdmissionRequest.OldObject, oldObj); err != nil { + return admission.Errored(http.StatusBadRequest, err) + } + + if allErrs := validateNebulaAutoscalerUpdate(obj, oldObj); len(allErrs) > 0 { + return admission.Errored(http.StatusUnprocessableEntity, allErrs.ToAggregate()) + } + } + + return admission.ValidationResponse(true, "") +} diff --git a/pkg/webhook/util/validation/autoscaler.go b/pkg/webhook/util/validation/autoscaler.go new file mode 100644 index 00000000..b7f16acb --- /dev/null +++ b/pkg/webhook/util/validation/autoscaler.go @@ -0,0 +1,28 @@ +/* +Copyright 2024 Vesoft Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package validation + +import "k8s.io/apimachinery/pkg/util/validation/field" + +// ValidateMinReplicasGraphd validates replicas min value for Graphd +func ValidateMinMaxReplica(fldPath *field.Path, minReplicas, maxReplicas int) (allErrs field.ErrorList) { + if fieldErr := ValidateMinLessThanMax(fldPath, minReplicas, maxReplicas); fieldErr != nil { + allErrs = append(allErrs, fieldErr) + } + + return allErrs +} diff --git a/pkg/webhook/util/validation/validation.go b/pkg/webhook/util/validation/validation.go index 6a0dedd4..914c4a14 100644 --- a/pkg/webhook/util/validation/validation.go +++ b/pkg/webhook/util/validation/validation.go @@ -47,3 +47,11 @@ func ValidateOddNumber(fldPath *field.Path, value int) *field.Error { } return nil } + +// ValidateMinLessThanMax validates that the minimum replicas is less then or equal to the maximum replicas +func ValidateMinLessThanMax(fldPath *field.Path, minReplicas, maxReplicas int) *field.Error { + if minReplicas > maxReplicas { + return field.Invalid(fldPath, minReplicas, fmt.Sprintf("min replica %v should be less than or equal to max replicas %v", minReplicas, maxReplicas)) + } + return nil +} From 86122ee8211b936a0ad866f4cd53cd7ffb2d5b3c Mon Sep 17 00:00:00 2001 From: kevinliu24 Date: Sat, 4 May 2024 06:22:40 +0800 Subject: [PATCH 02/13] Add autoscalar webhook helm charts --- .../admission-webhook-certificate.yaml | 2 ++ .../admission-webhook-registration.yaml | 23 +++++++++++++++++++ .../autoscaler-admission-webhook-service.yaml | 17 ++++++++++++++ .../controller-manager-deployment.yaml | 20 ++++++++++++++-- charts/nebula-operator/values.yaml | 4 ++++ 5 files changed, 64 insertions(+), 2 deletions(-) create mode 100644 charts/nebula-operator/templates/autoscaler-admission-webhook-service.yaml diff --git a/charts/nebula-operator/templates/admission-webhook-certificate.yaml b/charts/nebula-operator/templates/admission-webhook-certificate.yaml index 1151b603..e1671fb5 100644 --- a/charts/nebula-operator/templates/admission-webhook-certificate.yaml +++ b/charts/nebula-operator/templates/admission-webhook-certificate.yaml @@ -11,6 +11,8 @@ spec: dnsNames: - {{ template "admission-webhook.name" . }}-service.{{ template "nebula-operator.namespace" . }}.svc - {{ template "admission-webhook.name" . }}-service.{{ template "nebula-operator.namespace" . }}.svc.{{ default "cluster.local" .Values.kubernetesClusterDomain }} + - {{ template "autoscaler-admission-webhook.name" . }}-service.{{ template "nebula-operator.namespace" . }}.svc + - {{ template "autoscaler-admission-webhook.name" . }}-service.{{ template "nebula-operator.namespace" . }}.svc.{{ default "cluster.local" .Values.kubernetesClusterDomain }} issuerRef: kind: Issuer name: {{ template "admission-webhook.name" . }}-issuer diff --git a/charts/nebula-operator/templates/admission-webhook-registration.yaml b/charts/nebula-operator/templates/admission-webhook-registration.yaml index d9421714..d6ba2dea 100644 --- a/charts/nebula-operator/templates/admission-webhook-registration.yaml +++ b/charts/nebula-operator/templates/admission-webhook-registration.yaml @@ -32,5 +32,28 @@ webhooks: scope: "*" sideEffects: None timeoutSeconds: 3 + + - name: nebulaautoscalingvalidating.nebula-graph.io + admissionReviewVersions: + - v1 + clientConfig: + service: + name: {{ template "autoscaler-admission-webhook.name" . }}-service + namespace: {{ template "nebula-operator.namespace" . }} + path: /validate-nebulaautoscaler + failurePolicy: Fail + rules: + - apiGroups: + - autoscaling.nebula-graph.io + apiVersions: + - v1alpha1 + operations: + - CREATE + - UPDATE + resources: + - nebulaautoscalers + scope: "*" + sideEffects: None + timeoutSeconds: 3 {{- end }} diff --git a/charts/nebula-operator/templates/autoscaler-admission-webhook-service.yaml b/charts/nebula-operator/templates/autoscaler-admission-webhook-service.yaml new file mode 100644 index 00000000..1a5e96b3 --- /dev/null +++ b/charts/nebula-operator/templates/autoscaler-admission-webhook-service.yaml @@ -0,0 +1,17 @@ +{{- if .Values.admissionWebhook.create }} +--- +apiVersion: v1 +kind: Service +metadata: + name: {{ template "autoscaler-admission-webhook.name" . }}-service + namespace: {{ template "nebula-operator.namespace" . }} + labels: + {{- include "admission-webhook.labels" . | nindent 4 }} +spec: + ports: + - port: 443 + targetPort: 9448 + selector: + {{- include "admission-webhook.matchLabels" . | nindent 4 }} +{{- end }} + diff --git a/charts/nebula-operator/templates/controller-manager-deployment.yaml b/charts/nebula-operator/templates/controller-manager-deployment.yaml index 7f7a3aad..72ff5a9e 100644 --- a/charts/nebula-operator/templates/controller-manager-deployment.yaml +++ b/charts/nebula-operator/templates/controller-manager-deployment.yaml @@ -100,6 +100,8 @@ spec: - --leader-elect - --leader-elect-resource-namespace={{ template "nebula-operator.namespace" . }} - --v={{ .Values.controllerManager.verbosity }} + - --enable-admission-webhook={{ .Values.admissionWebhook.create }} + - --webhook-secure-port={{ .Values.autoscalerAdmissionWebhook.webhookBindPort }} {{- if or .Values.kubernetesClusterDomain .Values.controllerManager.env }} env: {{- if .Values.kubernetesClusterDomain }} @@ -108,6 +110,12 @@ spec: {{- end }} {{- if .Values.controllerManager.env }}{{ toYaml .Values.controllerManager.env | nindent 12 }}{{- end }} {{- end }} + {{- if .Values.admissionWebhook.create }} + ports: + - containerPort: {{ .Values.autoscalerAdmissionWebhook.webhookBindPort | default 9448 }} + name: webhook-server + protocol: TCP + {{- end }} resources: {{- toYaml .Values.controllerManager.resources | nindent 12 }} livenessProbe: @@ -124,6 +132,14 @@ spec: periodSeconds: 10 securityContext: allowPrivilegeEscalation: false + {{- if .Values.admissionWebhook.create }} + volumeMounts: + {{- if .Values.admissionWebhook.create }} + - mountPath: /tmp/k8s-webhook-server/serving-certs + name: cert + readOnly: true + {{- end }} + {{- end }} {{- with .Values.controllerManager.sidecarContainers }} {{- range $name, $spec := $.Values.controllerManager.sidecarContainers }} - name: {{ $name }} @@ -153,9 +169,9 @@ spec: tolerations: {{- toYaml . | nindent 8 }} {{- end }} - {{- if or .Values.controllerManager.extraVolumes .Values.admissionWebhook.create }} + {{- if or .Values.controllerManager.extraVolumes .Values.admissionWebhook.create}} volumes: - {{- if .Values.admissionWebhook.create }} + {{- if .Values.admissionWebhook.create}} - name: cert secret: defaultMode: 420 diff --git a/charts/nebula-operator/values.yaml b/charts/nebula-operator/values.yaml index 87f2a073..eb2ee114 100644 --- a/charts/nebula-operator/values.yaml +++ b/charts/nebula-operator/values.yaml @@ -68,6 +68,10 @@ admissionWebhook: # The TCP port the Webhook server binds to. (default 9443) webhookBindPort: 9443 +autoscalerAdmissionWebhook: + # The TCP port the Webhook server binds to. (default 9443) + webhookBindPort: 9448 + scheduler: create: true schedulerName: nebula-scheduler From ba7229be67cc548773f4a1905a2a70bf8e9250f8 Mon Sep 17 00:00:00 2001 From: kevinliu24 Date: Thu, 6 Jun 2024 15:48:44 +0800 Subject: [PATCH 03/13] feat: Add cert generator --- Dockerfile | 1 + Makefile | 1 + charts/nebula-operator/templates/_helpers.tpl | 14 + .../admission-webhook-certificate.yaml | 6 +- .../admission-webhook-registration.yaml | 6 +- .../autoscaler-admission-webhook-service.yaml | 2 +- ...er-manager-admission-webhook-service.yaml} | 4 +- .../controller-manager-deployment.yaml | 57 +++- .../templates/controller-manager-rbac.yaml | 10 + charts/nebula-operator/values.yaml | 16 +- .../app/certificate-generator.go | 313 ++++++++++++++++++ .../app/options/options.go | 87 +++++ cmd/certificate-generator/main.go | 32 ++ 13 files changed, 520 insertions(+), 29 deletions(-) rename charts/nebula-operator/templates/{admission-webhook-service.yaml => controller-manager-admission-webhook-service.yaml} (67%) create mode 100644 cmd/certificate-generator/app/certificate-generator.go create mode 100644 cmd/certificate-generator/app/options/options.go create mode 100644 cmd/certificate-generator/main.go diff --git a/Dockerfile b/Dockerfile index d3dd15de..77a34d0e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,6 +12,7 @@ RUN if [ "$USERNAME" = "ng-user" ]; then \ ADD bin/${TARGETDIR}/controller-manager /usr/local/bin/controller-manager ADD bin/${TARGETDIR}/autoscaler /usr/local/bin/autoscaler ADD bin/${TARGETDIR}/scheduler /usr/local/bin/scheduler +ADD bin/${TARGETDIR}/certificate-generator /usr/local/bin/certificate-generator # [Optional] Set the default user. Omit if you want to keep the default as root. USER $USERNAME diff --git a/Makefile b/Makefile index c07c4213..e8797be7 100644 --- a/Makefile +++ b/Makefile @@ -79,6 +79,7 @@ build-operator: ## Build operator related binary. $(GO_BUILD) -ldflags '$(LDFLAGS)' -o bin/$(TARGETDIR)/controller-manager cmd/controller-manager/main.go $(GO_BUILD) -ldflags '$(LDFLAGS)' -o bin/$(TARGETDIR)/autoscaler cmd/autoscaler/main.go $(GO_BUILD) -ldflags '$(LDFLAGS)' -o bin/$(TARGETDIR)/scheduler cmd/scheduler/main.go + $(GO_BUILD) -ldflags '$(LDFLAGS)' -o bin/$(TARGETDIR)/certificate-generator cmd/certificate-generator/main.go build-provisioner: ## Build provisioner binary. $(GO_BUILD) -ldflags '$(LDFLAGS)' -o bin/$(TARGETDIR)/local-pv-provisioner cmd/provisioner/main.go diff --git a/charts/nebula-operator/templates/_helpers.tpl b/charts/nebula-operator/templates/_helpers.tpl index 5034201c..880d6f54 100644 --- a/charts/nebula-operator/templates/_helpers.tpl +++ b/charts/nebula-operator/templates/_helpers.tpl @@ -97,6 +97,20 @@ Admission webhook name of the chart. {{ include "nebula-operator.name" . }}-webhook {{- end }} +{{/* +Controller Manager Admission webhook name. +*/}} +{{- define "controller-manager-admission-webhook.name" -}} +controller-manager-{{ include "nebula-operator.name" . }}-webhook +{{- end }} + +{{/* +Autoscaler Admission webhook name. +*/}} +{{- define "autoscaler-admission-webhook.name" -}} +autoscaler-{{ include "nebula-operator.name" . }}-webhook +{{- end }} + {{/* Admission webhook selector labels */}} diff --git a/charts/nebula-operator/templates/admission-webhook-certificate.yaml b/charts/nebula-operator/templates/admission-webhook-certificate.yaml index e1671fb5..9ab758a6 100644 --- a/charts/nebula-operator/templates/admission-webhook-certificate.yaml +++ b/charts/nebula-operator/templates/admission-webhook-certificate.yaml @@ -1,4 +1,4 @@ -{{- if .Values.admissionWebhook.create }} +{{- if and (or .Values.admissionWebhook.contollerManagerAdmissionWebhook.create .Values.admissionWebhook.autoscalerAdmissionWebhook.create) .Values.admissionWebhook.useCertManager }} --- apiVersion: cert-manager.io/v1 kind: Certificate @@ -9,8 +9,8 @@ metadata: {{- include "admission-webhook.labels" . | nindent 4 }} spec: dnsNames: - - {{ template "admission-webhook.name" . }}-service.{{ template "nebula-operator.namespace" . }}.svc - - {{ template "admission-webhook.name" . }}-service.{{ template "nebula-operator.namespace" . }}.svc.{{ default "cluster.local" .Values.kubernetesClusterDomain }} + - {{ template "controller-manager-admission-webhook.name" . }}-service.{{ template "nebula-operator.namespace" . }}.svc + - {{ template "controller-manager-admission-webhook.name" . }}-service.{{ template "nebula-operator.namespace" . }}.svc.{{ default "cluster.local" .Values.kubernetesClusterDomain }} - {{ template "autoscaler-admission-webhook.name" . }}-service.{{ template "nebula-operator.namespace" . }}.svc - {{ template "autoscaler-admission-webhook.name" . }}-service.{{ template "nebula-operator.namespace" . }}.svc.{{ default "cluster.local" .Values.kubernetesClusterDomain }} issuerRef: diff --git a/charts/nebula-operator/templates/admission-webhook-registration.yaml b/charts/nebula-operator/templates/admission-webhook-registration.yaml index d6ba2dea..0bdec1a5 100644 --- a/charts/nebula-operator/templates/admission-webhook-registration.yaml +++ b/charts/nebula-operator/templates/admission-webhook-registration.yaml @@ -1,10 +1,12 @@ -{{- if .Values.admissionWebhook.create }} +{{- if or .Values.admissionWebhook.contollerManagerAdmissionWebhook.create .Values.admissionWebhook.autoscalerAdmissionWebhook.create }} --- apiVersion: admissionregistration.k8s.io/v1 kind: ValidatingWebhookConfiguration metadata: + {{- if .Values.admissionWebhook.useCertManager }} annotations: cert-manager.io/inject-ca-from: {{ template "nebula-operator.namespace" . }}/{{ template "admission-webhook.name" . }}-cert + {{- end }} name: {{ template "admission-webhook.name" . }}-validating labels: {{- include "admission-webhook.labels" . | nindent 4 }} @@ -14,7 +16,7 @@ webhooks: - v1 clientConfig: service: - name: {{ template "admission-webhook.name" . }}-service + name: {{ template "controller-manager-admission-webhook.name" . }}-service namespace: {{ template "nebula-operator.namespace" . }} path: /validate-nebulacluster failurePolicy: Fail diff --git a/charts/nebula-operator/templates/autoscaler-admission-webhook-service.yaml b/charts/nebula-operator/templates/autoscaler-admission-webhook-service.yaml index 1a5e96b3..dc580b3e 100644 --- a/charts/nebula-operator/templates/autoscaler-admission-webhook-service.yaml +++ b/charts/nebula-operator/templates/autoscaler-admission-webhook-service.yaml @@ -1,4 +1,4 @@ -{{- if .Values.admissionWebhook.create }} +{{- if .Values.admissionWebhook.autoscalerAdmissionWebhook.create }} --- apiVersion: v1 kind: Service diff --git a/charts/nebula-operator/templates/admission-webhook-service.yaml b/charts/nebula-operator/templates/controller-manager-admission-webhook-service.yaml similarity index 67% rename from charts/nebula-operator/templates/admission-webhook-service.yaml rename to charts/nebula-operator/templates/controller-manager-admission-webhook-service.yaml index 861c3bbb..60cfbd24 100644 --- a/charts/nebula-operator/templates/admission-webhook-service.yaml +++ b/charts/nebula-operator/templates/controller-manager-admission-webhook-service.yaml @@ -1,9 +1,9 @@ -{{- if .Values.admissionWebhook.create }} +{{- if .Values.admissionWebhook.contollerManagerAdmissionWebhook.create }} --- apiVersion: v1 kind: Service metadata: - name: {{ template "admission-webhook.name" . }}-service + name: {{ template "controller-manager-admission-webhook.name" . }}-service namespace: {{ template "nebula-operator.namespace" . }} labels: {{- include "admission-webhook.labels" . | nindent 4 }} diff --git a/charts/nebula-operator/templates/controller-manager-deployment.yaml b/charts/nebula-operator/templates/controller-manager-deployment.yaml index 72ff5a9e..15a13b8b 100644 --- a/charts/nebula-operator/templates/controller-manager-deployment.yaml +++ b/charts/nebula-operator/templates/controller-manager-deployment.yaml @@ -43,8 +43,8 @@ spec: - --concurrent-nebulabackup-syncs={{ .Values.concurrentNebulaBackupSyncs }} - --leader-elect - --leader-elect-resource-namespace={{ template "nebula-operator.namespace" . }} - - --enable-admission-webhook={{ .Values.admissionWebhook.create }} - - --webhook-secure-port={{ .Values.admissionWebhook.webhookBindPort }} + - --enable-admission-webhook={{ .Values.admissionWebhook.contollerManagerAdmissionWebhook.create }} + - --webhook-secure-port={{ .Values.admissionWebhook.contollerManagerAdmissionWebhook.webhookBindPort }} - --enable-kruise-scheme={{ .Values.enableKruiseScheme }} - --v={{ .Values.controllerManager.verbosity }} {{- if or .Values.kubernetesClusterDomain .Values.controllerManager.env }} @@ -55,9 +55,9 @@ spec: {{- end }} {{- if .Values.controllerManager.env }}{{ toYaml .Values.controllerManager.env | nindent 12 }}{{- end }} {{- end }} - {{- if .Values.admissionWebhook.create }} + {{- if .Values.admissionWebhook.contollerManagerAdmissionWebhook.create }} ports: - - containerPort: {{ .Values.admissionWebhook.webhookBindPort | default 9443 }} + - containerPort: {{ .Values.admissionWebhook.contollerManagerAdmissionWebhook.webhookBindPort | default 9443 }} name: webhook-server protocol: TCP {{- end }} @@ -77,9 +77,9 @@ spec: periodSeconds: 10 securityContext: allowPrivilegeEscalation: false - {{- if or .Values.controllerManager.extraVolumeMounts .Values.admissionWebhook.create }} + {{- if or .Values.controllerManager.extraVolumeMounts .Values.admissionWebhook.contollerManagerAdmissionWebhook.create }} volumeMounts: - {{- if .Values.admissionWebhook.create }} + {{- if .Values.admissionWebhook.contollerManagerAdmissionWebhook.create }} - mountPath: /tmp/k8s-webhook-server/serving-certs name: cert readOnly: true @@ -100,8 +100,8 @@ spec: - --leader-elect - --leader-elect-resource-namespace={{ template "nebula-operator.namespace" . }} - --v={{ .Values.controllerManager.verbosity }} - - --enable-admission-webhook={{ .Values.admissionWebhook.create }} - - --webhook-secure-port={{ .Values.autoscalerAdmissionWebhook.webhookBindPort }} + - --enable-admission-webhook={{ .Values.admissionWebhook.autoscalerAdmissionWebhook.create }} + - --webhook-secure-port={{ .Values.admissionWebhook.autoscalerAdmissionWebhook.webhookBindPort }} {{- if or .Values.kubernetesClusterDomain .Values.controllerManager.env }} env: {{- if .Values.kubernetesClusterDomain }} @@ -110,9 +110,9 @@ spec: {{- end }} {{- if .Values.controllerManager.env }}{{ toYaml .Values.controllerManager.env | nindent 12 }}{{- end }} {{- end }} - {{- if .Values.admissionWebhook.create }} + {{- if .Values.admissionWebhook.autoscalerAdmissionWebhook.create }} ports: - - containerPort: {{ .Values.autoscalerAdmissionWebhook.webhookBindPort | default 9448 }} + - containerPort: {{ .Values.admissionWebhook.autoscalerAdmissionWebhook.webhookBindPort | default 9448 }} name: webhook-server protocol: TCP {{- end }} @@ -132,14 +132,38 @@ spec: periodSeconds: 10 securityContext: allowPrivilegeEscalation: false - {{- if .Values.admissionWebhook.create }} + {{- if .Values.admissionWebhook.autoscalerAdmissionWebhook.create }} volumeMounts: - {{- if .Values.admissionWebhook.create }} + {{- if .Values.admissionWebhook.autoscalerAdmissionWebhook.create }} - mountPath: /tmp/k8s-webhook-server/serving-certs name: cert readOnly: true {{- end }} {{- end }} + {{- if and (or .Values.admissionWebhook.contollerManagerAdmissionWebhook.create .Values.admissionWebhook.autoscalerAdmissionWebhook.create) (not .Values.admissionWebhook.useCertManager) }} + - name: cert-rotation + image: {{ .Values.image.nebulaOperator.image }} + imagePullPolicy: {{ .Values.image.nebulaOperator.imagePullPolicy }} + command: + - /usr/local/bin/certificate-generator + args: + - --leader-elect + - --leader-elect-resource-namespace={{ template "nebula-operator.namespace" . }} + - --v={{ .Values.controllerManager.verbosity }} + - --webhook-namespace={{ template "nebula-operator.namespace" . }} + - --webhook-name={{ template "admission-webhook.name" . }}-validating + - --certificate-dir=/tmp/k8s-webhook-server/serving-certs + - --certificate-validity=1 + env: + {{- if .Values.kubernetesClusterDomain }} + - name: KUBERNETES_CLUSTER_DOMAIN + value: {{ .Values.kubernetesClusterDomain }} + {{- end }} + volumeMounts: + - mountPath: /tmp/k8s-webhook-server/serving-certs + name: cert + readOnly: false + {{- end}} {{- with .Values.controllerManager.sidecarContainers }} {{- range $name, $spec := $.Values.controllerManager.sidecarContainers }} - name: {{ $name }} @@ -169,13 +193,18 @@ spec: tolerations: {{- toYaml . | nindent 8 }} {{- end }} - {{- if or .Values.controllerManager.extraVolumes .Values.admissionWebhook.create}} + {{- if or .Values.controllerManager.extraVolumes (or .Values.admissionWebhook.contollerManagerAdmissionWebhook.create .Values.admissionWebhook.autoscalerAdmissionWebhook.create) }} volumes: - {{- if .Values.admissionWebhook.create}} + {{- if or .Values.admissionWebhook.contollerManagerAdmissionWebhook.create .Values.admissionWebhook.autoscalerAdmissionWebhook.create}} + {{- if .Values.admissionWebhook.useCertManager }} - name: cert secret: defaultMode: 420 secretName: {{ template "admission-webhook.name" . }}-secret + {{- else}} + - name: cert + emptyDir: {} + {{- end}} {{- end }} {{- if .Values.controllerManager.extraVolumes }} {{- toYaml .Values.controllerManager.extraVolumes | nindent 8 }} diff --git a/charts/nebula-operator/templates/controller-manager-rbac.yaml b/charts/nebula-operator/templates/controller-manager-rbac.yaml index a202d6b0..af705641 100644 --- a/charts/nebula-operator/templates/controller-manager-rbac.yaml +++ b/charts/nebula-operator/templates/controller-manager-rbac.yaml @@ -387,6 +387,16 @@ rules: verbs: - get - list + - apiGroups: + - admissionregistration.k8s.io + resources: + - validatingwebhookconfigurations + verbs: + - get + - list + - watch + - update + - patch --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding diff --git a/charts/nebula-operator/values.yaml b/charts/nebula-operator/values.yaml index eb2ee114..d4a5a5c6 100644 --- a/charts/nebula-operator/values.yaml +++ b/charts/nebula-operator/values.yaml @@ -64,13 +64,15 @@ controllerManager: # runAsNonRoot: true admissionWebhook: - create: false - # The TCP port the Webhook server binds to. (default 9443) - webhookBindPort: 9443 - -autoscalerAdmissionWebhook: - # The TCP port the Webhook server binds to. (default 9443) - webhookBindPort: 9448 + contollerManagerAdmissionWebhook: + create: false + # The TCP port the Webhook server binds to. (default 9443) + webhookBindPort: 9443 + autoscalerAdmissionWebhook: + create: true + # The TCP port the Webhook server binds to. (default 9448) + webhookBindPort: 9448 + useCertManager: false scheduler: create: true diff --git a/cmd/certificate-generator/app/certificate-generator.go b/cmd/certificate-generator/app/certificate-generator.go new file mode 100644 index 00000000..473ee334 --- /dev/null +++ b/cmd/certificate-generator/app/certificate-generator.go @@ -0,0 +1,313 @@ +/* +Copyright 2024 Vesoft Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package app + +import ( + "context" + "crypto/ecdsa" + "crypto/elliptic" + "crypto/rand" + "crypto/x509" + "crypto/x509/pkix" + "encoding/base64" + "encoding/pem" + "flag" + "fmt" + "math/big" + "os" + "time" + + cron "github.com/robfig/cron/v3" + "github.com/spf13/cobra" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/leaderelection" + "k8s.io/client-go/tools/leaderelection/resourcelock" + cliflag "k8s.io/component-base/cli/flag" + "k8s.io/klog/v2" + ctrlruntime "sigs.k8s.io/controller-runtime" + + "github.com/vesoft-inc/nebula-operator/cmd/certificate-generator/app/options" +) + +func NewCertGenCommand(ctx context.Context) *cobra.Command { + opts := options.NewOptions() + cmd := &cobra.Command{ + Use: "nebula-cert-gen", + RunE: func(cmd *cobra.Command, args []string) error { + return Run(ctx, opts) + }, + } + + nfs := cliflag.NamedFlagSets{} + fs := nfs.FlagSet("generic") + fs.AddGoFlagSet(flag.CommandLine) + opts.AddFlags(fs) + logsFlagSet := nfs.FlagSet("logs") + + cmd.Flags().AddFlagSet(fs) + cmd.Flags().AddFlagSet(logsFlagSet) + + return cmd +} + +func Run(ctx context.Context, opts *options.Options) error { + klog.Info("Getting kubernetes configs") + cfg, err := ctrlruntime.GetConfig() + if err != nil { + panic(err) + } + + clientset, err := kubernetes.NewForConfig(cfg) + if err != nil { + klog.Errorf("Error building Kubernetes clientset: %v", err.Error()) + } + + opts.CertValidity = opts.CertValidity * 24 * 60 + + // rotate cert once before starting cronjob + err = rotateCertificate(ctx, clientset, opts) + if err != nil { + klog.Errorf("Error rotating certificate for webhook [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookName, err) + os.Exit(1) + } + + klog.Infof("Starting cert rotation cron job for webhook [%v/%v]", opts.WebhookNamespace, opts.WebhookName) + c := cron.New() + // rotate cert 1 hour before expiration date + c.AddFunc(fmt.Sprintf("@every %vm", opts.CertValidity-60), func() { + err := rotateCertificate(ctx, clientset, opts) + if err != nil { + klog.Errorf("Error rotating certificate for webhook [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookName, err) + os.Exit(1) + } + }) + c.Start() + klog.Infof("Cert rotation crontab started for webhook [%v/%v]. Will rotate every %v minutes", opts.WebhookNamespace, opts.WebhookName, opts.CertValidity) + + // keep the program running + select {} +} + +func rotateCertificate(ctx context.Context, clientset *kubernetes.Clientset, opts *options.Options) error { + if opts.LeaderElection.LeaderElect { + klog.Info("Doing leader election") + id, err := os.Hostname() + if err != nil { + klog.Errorf("Failed to get hostname: %v", err) + } + + rl, err := resourcelock.New(resourcelock.LeasesResourceLock, + opts.WebhookNamespace, + opts.WebhookName, + clientset.CoreV1(), + clientset.CoordinationV1(), + resourcelock.ResourceLockConfig{ + Identity: id, + }) + if err != nil { + klog.Errorf("Error creating resource lock: %v", err) + } + + leaderelection.RunOrDie(ctx, leaderelection.LeaderElectionConfig{ + Lock: rl, + LeaseDuration: opts.LeaderElection.LeaseDuration.Duration, + RenewDeadline: opts.LeaderElection.RenewDeadline.Duration, + RetryPeriod: opts.LeaderElection.RetryPeriod.Duration, + Callbacks: leaderelection.LeaderCallbacks{ + OnStartedLeading: func(ctx context.Context) { + klog.Info("Leader election successful. Starting certificate rotation") + err = doCertRotation(clientset, opts) + if err != nil { + klog.Errorf("Failed to rotate certificates: %v", err) + } + }, + OnStoppedLeading: func() { + klog.Info("Lost leadership, stopping") + }, + }, + }) + } else { + klog.Infof("Leader election skipped. Starting certificate rotation") + err := doCertRotation(clientset, opts) + if err != nil { + klog.Errorf("Failed to rotate certificates: %v", err) + } + } + + return nil +} + +func doCertRotation(clientset *kubernetes.Clientset, opts *options.Options) error { + klog.Infof("Start generating certificates for webhook server [%v/%v]", opts.WebhookNamespace, opts.WebhookName) + caCert, caKey, err := generateCACert(opts.WebhookName, opts.CertValidity) + if err != nil { + klog.Errorf("Error generating CA certificate for webhook server [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookName, err) + return err + } + + serverCert, serverKey, err := generateServerCert(caCert, caKey, opts.WebhookName, opts.WebhookNamespace, opts.CertValidity) + if err != nil { + klog.Errorf("Error generating server certificate for webhook server [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookName, err) + return err + } + + err = saveToFile(fmt.Sprintf("%v/ca.crt", opts.CertDir), "CERTIFICATE", caCert) + if err != nil { + klog.Errorf("Error saving CA certificate for webhook server [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookName, err) + return err + } + + err = saveToFile(fmt.Sprintf("%v/ca.key", opts.CertDir), "EC PRIVATE KEY", caKey) + if err != nil { + klog.Errorf("Error saving CA key for webhook server [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookName, err) + return err + } + + err = saveToFile(fmt.Sprintf("%v/tls.crt", opts.CertDir), "CERTIFICATE", serverCert) + if err != nil { + klog.Errorf("Error saving server certificate for webhook server [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookName, err) + return err + } + + err = saveToFile(fmt.Sprintf("%v/tls.key", opts.CertDir), "EC PRIVATE KEY", serverKey) + if err != nil { + klog.Errorf("Error saving server key for webhook server [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookName, err) + return err + } + + klog.Infof("Certificates generated successfully for webhook server [%v/%v]", opts.WebhookNamespace, opts.WebhookName) + + klog.Infof("Updating ca bundle for webhook server [%v/%v]", opts.WebhookNamespace, opts.WebhookName) + err = updateWebhookConfiguration(clientset, opts.WebhookName, fmt.Sprintf("%v/ca.crt", opts.CertDir)) + if err != nil { + klog.Errorf("Error updating ca bundle for webhook server [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookName, err) + return err + } + + return nil +} + +func generateCACert(webhookName string, validity int64) ([]byte, *ecdsa.PrivateKey, error) { + caKey, err := ecdsa.GenerateKey(elliptic.P384(), rand.Reader) + if err != nil { + return nil, nil, err + } + + caTemplate := x509.Certificate{ + SerialNumber: big.NewInt(1), + Subject: pkix.Name{ + Country: []string{"US"}, + CommonName: webhookName, + }, + NotBefore: time.Now(), + NotAfter: time.Now().Add(time.Duration(validity) * time.Minute), + KeyUsage: x509.KeyUsageCertSign | x509.KeyUsageCRLSign, + BasicConstraintsValid: true, + IsCA: true, + } + + caCert, err := x509.CreateCertificate(rand.Reader, &caTemplate, &caTemplate, &caKey.PublicKey, caKey) + if err != nil { + return nil, nil, err + } + + return caCert, caKey, nil +} + +func generateServerCert(caCert []byte, caKey *ecdsa.PrivateKey, webhookName, webhookNamespace string, validity int64) ([]byte, *ecdsa.PrivateKey, error) { + serverKey, err := ecdsa.GenerateKey(elliptic.P384(), rand.Reader) + if err != nil { + return nil, nil, err + } + + serverTemplate := x509.Certificate{ + SerialNumber: big.NewInt(2), + Subject: pkix.Name{ + Country: []string{"US"}, + CommonName: webhookName, + }, + NotBefore: time.Now(), + NotAfter: time.Now().Add(time.Duration(validity) * time.Minute), + KeyUsage: x509.KeyUsageDigitalSignature, + ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth}, + DNSNames: []string{fmt.Sprintf("%v.%v.svc", webhookName, webhookNamespace), fmt.Sprintf("%v.%v.svc.cluster.local", webhookName, webhookNamespace)}, + } + + ca, err := x509.ParseCertificate(caCert) + if err != nil { + return nil, nil, err + } + + serverCert, err := x509.CreateCertificate(rand.Reader, &serverTemplate, ca, &serverKey.PublicKey, caKey) + if err != nil { + return nil, nil, err + } + + return serverCert, serverKey, nil +} + +func saveToFile(filename, blockType string, data interface{}) error { + file, err := os.Create(filename) + if err != nil { + return err + } + defer file.Close() + + var pemData []byte + switch data := data.(type) { + case []byte: + pemData = pem.EncodeToMemory(&pem.Block{Type: blockType, Bytes: data}) + case *ecdsa.PrivateKey: + der, err := x509.MarshalECPrivateKey(data) + if err != nil { + return err + } + pemData = pem.EncodeToMemory(&pem.Block{Type: blockType, Bytes: der}) + default: + return fmt.Errorf("unsupported data type") + } + + _, err = file.Write(pemData) + return err +} + +// Update the webhook configuration with the new CA bundle +func updateWebhookConfiguration(client *kubernetes.Clientset, webhookName, caCertPath string) error { + caCert, err := os.ReadFile(caCertPath) + if err != nil { + return fmt.Errorf("failed to read CA certificate: %v", err) + } + + caBundle := base64.StdEncoding.EncodeToString(caCert) + + webhook, err := client.AdmissionregistrationV1().ValidatingWebhookConfigurations().Get(context.Background(), webhookName, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get webhook configuration: %v", err) + } + + for i := range webhook.Webhooks { + webhook.Webhooks[i].ClientConfig.CABundle = []byte(caBundle) + } + + _, err = client.AdmissionregistrationV1().ValidatingWebhookConfigurations().Update(context.Background(), webhook, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("failed to update webhook configuration: %v", err) + } + + return nil +} diff --git a/cmd/certificate-generator/app/options/options.go b/cmd/certificate-generator/app/options/options.go new file mode 100644 index 00000000..61ca4bcc --- /dev/null +++ b/cmd/certificate-generator/app/options/options.go @@ -0,0 +1,87 @@ +/* +Copyright 2024 Vesoft Inc. +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package options + +import ( + "time" + + "github.com/spf13/pflag" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/tools/leaderelection/resourcelock" + cbc "k8s.io/component-base/config" +) + +const ( + NamespaceNebulaSystem = "nebula-system" +) + +var ( + defaultElectionLeaseDuration = metav1.Duration{Duration: 15 * time.Second} + defaultElectionRenewDeadline = metav1.Duration{Duration: 10 * time.Second} + defaultElectionRetryPeriod = metav1.Duration{Duration: 2 * time.Second} +) + +type Options struct { + // LeaderElection defines the configuration of leader election client. + LeaderElection cbc.LeaderElectionConfiguration + + // Webhook name represents the name of the webhook server associated with the certificate. + WebhookName string + + // Webhook namespace represents the namespace of the webhook server associated with the certificate. + WebhookNamespace string + + // CertDir represents the directory to save the certificates in + CertDir string + + // CertValidity represents the number of days the certificate should be valid for + CertValidity int64 +} + +func NewOptions() *Options { + return &Options{ + LeaderElection: cbc.LeaderElectionConfiguration{ + LeaderElect: true, + ResourceLock: resourcelock.LeasesResourceLock, + ResourceNamespace: NamespaceNebulaSystem, + ResourceName: "nebula-certificate-generator", + }, + } +} + +func (o *Options) AddFlags(flags *pflag.FlagSet) { + flags.BoolVar(&o.LeaderElection.LeaderElect, "leader-elect", false, "Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability.") + flags.StringVar(&o.LeaderElection.ResourceNamespace, "leader-elect-resource-namespace", NamespaceNebulaSystem, "The namespace of resource object that is used for locking during leader election.") + flags.DurationVar(&o.LeaderElection.LeaseDuration.Duration, "leader-elect-lease-duration", defaultElectionLeaseDuration.Duration, ""+ + "The duration that non-leader candidates will wait after observing a leadership "+ + "renewal until attempting to acquire leadership of a led but unrenewed leader "+ + "slot. This is effectively the maximum duration that a leader can be stopped "+ + "before it is replaced by another candidate. This is only applicable if leader "+ + "election is enabled.") + flags.DurationVar(&o.LeaderElection.RenewDeadline.Duration, "leader-elect-renew-deadline", defaultElectionRenewDeadline.Duration, ""+ + "The interval between attempts by the acting master to renew a leadership slot "+ + "before it stops leading. This must be less than or equal to the lease duration. "+ + "This is only applicable if leader election is enabled.") + flags.DurationVar(&o.LeaderElection.RetryPeriod.Duration, "leader-elect-retry-period", defaultElectionRetryPeriod.Duration, ""+ + "The duration the clients should wait between attempting acquisition and renewal "+ + "of a leadership. This is only applicable if leader election is enabled.") + flags.StringVar(&o.WebhookName, "webhook-name", "nebulaWebhook", "Specifies the name of the webhook to associate with the certificate") + flags.StringVar(&o.WebhookNamespace, "webhook-namespace", "default", "Specifies the namespace of the webhook to associate with the certificate") + flags.StringVar(&o.CertDir, "certificate-dir", "/etc/cert", "Specifies the directory in which to save the generated webhook certificates") + flags.Int64Var(&o.CertValidity, "certificate-validity", 365, "Specifies the number of days the certificate should be valid for") +} diff --git a/cmd/certificate-generator/main.go b/cmd/certificate-generator/main.go new file mode 100644 index 00000000..7d17f545 --- /dev/null +++ b/cmd/certificate-generator/main.go @@ -0,0 +1,32 @@ +/* +Copyright 2024 Vesoft Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "os" + + "github.com/vesoft-inc/nebula-operator/cmd/certificate-generator/app" + "k8s.io/component-base/cli" + ctrl "sigs.k8s.io/controller-runtime" +) + +func main() { + ctx := ctrl.SetupSignalHandler() + cmd := app.NewCertGenCommand(ctx) + code := cli.Run(cmd) + os.Exit(code) +} From aa3b18c9168df98e1fb7fece0b363ead9fe349ba Mon Sep 17 00:00:00 2001 From: kevinliu24 Date: Sat, 8 Jun 2024 08:11:26 +0800 Subject: [PATCH 04/13] Use secret instead --- .../controller-manager-deployment.yaml | 26 +++- .../templates/controller-manager-rbac.yaml | 15 ++ cmd/autoscaler/app/autoscaler.go | 18 +++ .../app/certificate-generator.go | 142 +++++++++--------- .../app/options/options.go | 24 ++- .../app/controller-manager.go | 18 +++ 6 files changed, 161 insertions(+), 82 deletions(-) diff --git a/charts/nebula-operator/templates/controller-manager-deployment.yaml b/charts/nebula-operator/templates/controller-manager-deployment.yaml index 15a13b8b..d9b57f25 100644 --- a/charts/nebula-operator/templates/controller-manager-deployment.yaml +++ b/charts/nebula-operator/templates/controller-manager-deployment.yaml @@ -1,4 +1,17 @@ {{- if .Values.controllerManager.create }} +{{- if and (or .Values.admissionWebhook.contollerManagerAdmissionWebhook.create .Values.admissionWebhook.autoscalerAdmissionWebhook.create) (not .Values.admissionWebhook.useCertManager) }} +--- +apiVersion: v1 +kind: Secret +metadata: + name: {{ template "admission-webhook.name" . }}-secret + namespace: {{ template "nebula-operator.namespace" . }} +type: kubernetes.io/tls +data: + tls.crt: "" + tls.key: "" + ca.crt: "" +{{- end }} --- apiVersion: apps/v1 kind: Deployment @@ -151,9 +164,13 @@ spec: - --leader-elect-resource-namespace={{ template "nebula-operator.namespace" . }} - --v={{ .Values.controllerManager.verbosity }} - --webhook-namespace={{ template "nebula-operator.namespace" . }} - - --webhook-name={{ template "admission-webhook.name" . }}-validating + - --webhook-server-name={{ template "admission-webhook.name" . }}-validating + - --webhook-names={{ template "controller-manager-admission-webhook.name" . }}-service,{{ template "autoscaler-admission-webhook.name" . }}-service - --certificate-dir=/tmp/k8s-webhook-server/serving-certs - --certificate-validity=1 + - --secret-namespace={{ template "nebula-operator.namespace" . }} + - --secret-name={{ template "admission-webhook.name" . }}-secret + - --kube-domain={{ default "cluster.local" .Values.kubernetesClusterDomain }} env: {{- if .Values.kubernetesClusterDomain }} - name: KUBERNETES_CLUSTER_DOMAIN @@ -162,7 +179,7 @@ spec: volumeMounts: - mountPath: /tmp/k8s-webhook-server/serving-certs name: cert - readOnly: false + readOnly: true {{- end}} {{- with .Values.controllerManager.sidecarContainers }} {{- range $name, $spec := $.Values.controllerManager.sidecarContainers }} @@ -196,15 +213,10 @@ spec: {{- if or .Values.controllerManager.extraVolumes (or .Values.admissionWebhook.contollerManagerAdmissionWebhook.create .Values.admissionWebhook.autoscalerAdmissionWebhook.create) }} volumes: {{- if or .Values.admissionWebhook.contollerManagerAdmissionWebhook.create .Values.admissionWebhook.autoscalerAdmissionWebhook.create}} - {{- if .Values.admissionWebhook.useCertManager }} - name: cert secret: defaultMode: 420 secretName: {{ template "admission-webhook.name" . }}-secret - {{- else}} - - name: cert - emptyDir: {} - {{- end}} {{- end }} {{- if .Values.controllerManager.extraVolumes }} {{- toYaml .Values.controllerManager.extraVolumes | nindent 8 }} diff --git a/charts/nebula-operator/templates/controller-manager-rbac.yaml b/charts/nebula-operator/templates/controller-manager-rbac.yaml index af705641..a96d6c8a 100644 --- a/charts/nebula-operator/templates/controller-manager-rbac.yaml +++ b/charts/nebula-operator/templates/controller-manager-rbac.yaml @@ -37,6 +37,19 @@ rules: verbs: - create - patch + {{- if and (or .Values.admissionWebhook.contollerManagerAdmissionWebhook.create .Values.admissionWebhook.autoscalerAdmissionWebhook.create) (not .Values.admissionWebhook.useCertManager) }} + - apiGroups: + - "" + resources: + - secrets + verbs: + - get + - list + - watch + - create + - update + - patch + {{- end}} --- apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding @@ -387,6 +400,7 @@ rules: verbs: - get - list + {{- if and (or .Values.admissionWebhook.contollerManagerAdmissionWebhook.create .Values.admissionWebhook.autoscalerAdmissionWebhook.create) (not .Values.admissionWebhook.useCertManager) }} - apiGroups: - admissionregistration.k8s.io resources: @@ -397,6 +411,7 @@ rules: - watch - update - patch + {{- end }} --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding diff --git a/cmd/autoscaler/app/autoscaler.go b/cmd/autoscaler/app/autoscaler.go index 10c14366..9fbb23c7 100644 --- a/cmd/autoscaler/app/autoscaler.go +++ b/cmd/autoscaler/app/autoscaler.go @@ -20,6 +20,9 @@ import ( "context" "flag" "net/http" + "os" + "path/filepath" + "time" "github.com/spf13/cobra" cliflag "k8s.io/component-base/cli/flag" @@ -160,6 +163,13 @@ func Run(ctx context.Context, opts *options.Options) error { return err } + if !opts.EnableAdmissionWebhook { + for !fileExists(filepath.Join(opts.WebhookOpts.CertDir, "tls.crt")) { + klog.Info("waiting for webhook certificate...") + time.Sleep(2 * time.Second) + } + } + if err := mgr.Start(ctx); err != nil { klog.Errorf("nebula-autoscaler exits unexpectedly: %v", err) return err @@ -167,3 +177,11 @@ func Run(ctx context.Context, opts *options.Options) error { return nil } + +func fileExists(filename string) bool { + _, err := os.Stat(filename) + if os.IsNotExist(err) { + return false + } + return err == nil +} diff --git a/cmd/certificate-generator/app/certificate-generator.go b/cmd/certificate-generator/app/certificate-generator.go index 473ee334..680783bd 100644 --- a/cmd/certificate-generator/app/certificate-generator.go +++ b/cmd/certificate-generator/app/certificate-generator.go @@ -29,10 +29,12 @@ import ( "fmt" "math/big" "os" + "path/filepath" "time" cron "github.com/robfig/cron/v3" "github.com/spf13/cobra" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes" "k8s.io/client-go/tools/leaderelection" @@ -82,22 +84,22 @@ func Run(ctx context.Context, opts *options.Options) error { // rotate cert once before starting cronjob err = rotateCertificate(ctx, clientset, opts) if err != nil { - klog.Errorf("Error rotating certificate for webhook [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookName, err) + klog.Errorf("Error rotating certificate for webhook [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookServerName, err) os.Exit(1) } - klog.Infof("Starting cert rotation cron job for webhook [%v/%v]", opts.WebhookNamespace, opts.WebhookName) + klog.Infof("Starting cert rotation cron job for webhook [%v/%v]", opts.WebhookNamespace, opts.WebhookServerName) c := cron.New() // rotate cert 1 hour before expiration date c.AddFunc(fmt.Sprintf("@every %vm", opts.CertValidity-60), func() { err := rotateCertificate(ctx, clientset, opts) if err != nil { - klog.Errorf("Error rotating certificate for webhook [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookName, err) + klog.Errorf("Error rotating certificate for webhook [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookServerName, err) os.Exit(1) } }) c.Start() - klog.Infof("Cert rotation crontab started for webhook [%v/%v]. Will rotate every %v minutes", opts.WebhookNamespace, opts.WebhookName, opts.CertValidity) + klog.Infof("Cert rotation crontab started for webhook [%v/%v]. Will rotate every %v minutes", opts.WebhookNamespace, opts.WebhookServerName, opts.CertValidity) // keep the program running select {} @@ -111,9 +113,9 @@ func rotateCertificate(ctx context.Context, clientset *kubernetes.Clientset, opt klog.Errorf("Failed to get hostname: %v", err) } - rl, err := resourcelock.New(resourcelock.LeasesResourceLock, - opts.WebhookNamespace, - opts.WebhookName, + rl, err := resourcelock.New(opts.LeaderElection.ResourceLock, + opts.LeaderElection.ResourceNamespace, + opts.LeaderElection.ResourceName, clientset.CoreV1(), clientset.CoordinationV1(), resourcelock.ResourceLockConfig{ @@ -153,56 +155,39 @@ func rotateCertificate(ctx context.Context, clientset *kubernetes.Clientset, opt } func doCertRotation(clientset *kubernetes.Clientset, opts *options.Options) error { - klog.Infof("Start generating certificates for webhook server [%v/%v]", opts.WebhookNamespace, opts.WebhookName) - caCert, caKey, err := generateCACert(opts.WebhookName, opts.CertValidity) + klog.Infof("Start generating certificates for webhook server [%v/%v]", opts.WebhookNamespace, opts.WebhookServerName) + caCert, caKey, err := generateCACert(opts) if err != nil { - klog.Errorf("Error generating CA certificate for webhook server [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookName, err) + klog.Errorf("Error generating CA certificate for webhook server [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookServerName, err) return err } - serverCert, serverKey, err := generateServerCert(caCert, caKey, opts.WebhookName, opts.WebhookNamespace, opts.CertValidity) + serverCert, serverKey, err := generateServerCert(caCert, caKey, opts) if err != nil { - klog.Errorf("Error generating server certificate for webhook server [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookName, err) + klog.Errorf("Error generating server certificate for webhook server [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookServerName, err) return err } - err = saveToFile(fmt.Sprintf("%v/ca.crt", opts.CertDir), "CERTIFICATE", caCert) + err = updateSecret(clientset, serverCert, serverKey, caCert, opts) if err != nil { - klog.Errorf("Error saving CA certificate for webhook server [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookName, err) + klog.Errorf("Error updating secret for webhook server [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookServerName, err) return err } - err = saveToFile(fmt.Sprintf("%v/ca.key", opts.CertDir), "EC PRIVATE KEY", caKey) - if err != nil { - klog.Errorf("Error saving CA key for webhook server [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookName, err) - return err - } - - err = saveToFile(fmt.Sprintf("%v/tls.crt", opts.CertDir), "CERTIFICATE", serverCert) - if err != nil { - klog.Errorf("Error saving server certificate for webhook server [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookName, err) - return err - } + klog.Infof("Certificates generated successfully for webhook server [%v/%v]", opts.WebhookNamespace, opts.WebhookServerName) - err = saveToFile(fmt.Sprintf("%v/tls.key", opts.CertDir), "EC PRIVATE KEY", serverKey) + klog.Infof("Updating ca bundle for webhook server [%v/%v]", opts.WebhookNamespace, opts.WebhookServerName) + err = updateWebhookConfiguration(clientset, opts) if err != nil { - klog.Errorf("Error saving server key for webhook server [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookName, err) - return err - } - - klog.Infof("Certificates generated successfully for webhook server [%v/%v]", opts.WebhookNamespace, opts.WebhookName) - - klog.Infof("Updating ca bundle for webhook server [%v/%v]", opts.WebhookNamespace, opts.WebhookName) - err = updateWebhookConfiguration(clientset, opts.WebhookName, fmt.Sprintf("%v/ca.crt", opts.CertDir)) - if err != nil { - klog.Errorf("Error updating ca bundle for webhook server [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookName, err) + klog.Errorf("Error updating ca bundle for webhook server [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookServerName, err) return err } + klog.Infof("Ca bundle updated successfully for webhook server [%v/%v]", opts.WebhookNamespace, opts.WebhookServerName) return nil } -func generateCACert(webhookName string, validity int64) ([]byte, *ecdsa.PrivateKey, error) { +func generateCACert(opts *options.Options) ([]byte, *ecdsa.PrivateKey, error) { caKey, err := ecdsa.GenerateKey(elliptic.P384(), rand.Reader) if err != nil { return nil, nil, err @@ -212,13 +197,14 @@ func generateCACert(webhookName string, validity int64) ([]byte, *ecdsa.PrivateK SerialNumber: big.NewInt(1), Subject: pkix.Name{ Country: []string{"US"}, - CommonName: webhookName, + CommonName: opts.WebhookServerName, }, NotBefore: time.Now(), - NotAfter: time.Now().Add(time.Duration(validity) * time.Minute), - KeyUsage: x509.KeyUsageCertSign | x509.KeyUsageCRLSign, - BasicConstraintsValid: true, + NotAfter: time.Now().Add(time.Duration(opts.CertValidity) * time.Minute), IsCA: true, + ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth, x509.ExtKeyUsageServerAuth}, + KeyUsage: x509.KeyUsageCertSign | x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment, + BasicConstraintsValid: true, } caCert, err := x509.CreateCertificate(rand.Reader, &caTemplate, &caTemplate, &caKey.PublicKey, caKey) @@ -226,29 +212,38 @@ func generateCACert(webhookName string, validity int64) ([]byte, *ecdsa.PrivateK return nil, nil, err } - return caCert, caKey, nil + caPem := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: caCert}) + + return caPem, caKey, nil } -func generateServerCert(caCert []byte, caKey *ecdsa.PrivateKey, webhookName, webhookNamespace string, validity int64) ([]byte, *ecdsa.PrivateKey, error) { +func generateServerCert(caCert []byte, caKey *ecdsa.PrivateKey, opts *options.Options) ([]byte, []byte, error) { serverKey, err := ecdsa.GenerateKey(elliptic.P384(), rand.Reader) if err != nil { return nil, nil, err } + dnsNames := make([]string, 2*len(*opts.WebhookNames)) + for idx := -1; idx < len(*opts.WebhookNames)-1; idx++ { + dnsNames[idx+1] = fmt.Sprintf("%v.%v.svc", (*opts.WebhookNames)[idx+1], opts.WebhookNamespace) + dnsNames[idx+2] = fmt.Sprintf("%v.%v.svc.%v", (*opts.WebhookNames)[idx+1], opts.WebhookNamespace, opts.KubernetesDomain) + } + serverTemplate := x509.Certificate{ SerialNumber: big.NewInt(2), Subject: pkix.Name{ Country: []string{"US"}, - CommonName: webhookName, + CommonName: opts.WebhookServerName, }, NotBefore: time.Now(), - NotAfter: time.Now().Add(time.Duration(validity) * time.Minute), + NotAfter: time.Now().Add(time.Duration(opts.CertValidity) * time.Minute), KeyUsage: x509.KeyUsageDigitalSignature, ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth}, - DNSNames: []string{fmt.Sprintf("%v.%v.svc", webhookName, webhookNamespace), fmt.Sprintf("%v.%v.svc.cluster.local", webhookName, webhookNamespace)}, + DNSNames: dnsNames, } - ca, err := x509.ParseCertificate(caCert) + caBlock, _ := pem.Decode(caCert) + ca, err := x509.ParseCertificate(caBlock.Bytes) if err != nil { return nil, nil, err } @@ -258,44 +253,49 @@ func generateServerCert(caCert []byte, caKey *ecdsa.PrivateKey, webhookName, web return nil, nil, err } - return serverCert, serverKey, nil + certPem := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: serverCert}) + marshalledKey, err := x509.MarshalECPrivateKey(serverKey) + if err != nil { + klog.Errorf("Error marshalling webhook server certificate key: %v", err) + } + + keyPem := pem.EncodeToMemory(&pem.Block{Type: "EC PRIVATE KEY", Bytes: marshalledKey}) + + return certPem, keyPem, nil } -func saveToFile(filename, blockType string, data interface{}) error { - file, err := os.Create(filename) - if err != nil { - return err +func updateSecret(clientset *kubernetes.Clientset, certPEM, keyPEM, caPEM []byte, opts *options.Options) error { + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: opts.SecretName, + Namespace: opts.SecretNamespace, + }, + Type: corev1.SecretTypeTLS, + Data: map[string][]byte{ + "tls.crt": certPEM, + "tls.key": keyPEM, + "ca.crt": caPEM, + }, } - defer file.Close() - - var pemData []byte - switch data := data.(type) { - case []byte: - pemData = pem.EncodeToMemory(&pem.Block{Type: blockType, Bytes: data}) - case *ecdsa.PrivateKey: - der, err := x509.MarshalECPrivateKey(data) - if err != nil { - return err - } - pemData = pem.EncodeToMemory(&pem.Block{Type: blockType, Bytes: der}) - default: - return fmt.Errorf("unsupported data type") + + _, err := clientset.CoreV1().Secrets(opts.SecretNamespace).Update(context.TODO(), secret, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("failed to update secret: %v", err) } - _, err = file.Write(pemData) - return err + return nil } // Update the webhook configuration with the new CA bundle -func updateWebhookConfiguration(client *kubernetes.Clientset, webhookName, caCertPath string) error { - caCert, err := os.ReadFile(caCertPath) +func updateWebhookConfiguration(client *kubernetes.Clientset, opts *options.Options) error { + caCert, err := os.ReadFile(filepath.Join(opts.CertDir, "ca.crt")) if err != nil { return fmt.Errorf("failed to read CA certificate: %v", err) } caBundle := base64.StdEncoding.EncodeToString(caCert) - webhook, err := client.AdmissionregistrationV1().ValidatingWebhookConfigurations().Get(context.Background(), webhookName, metav1.GetOptions{}) + webhook, err := client.AdmissionregistrationV1().ValidatingWebhookConfigurations().Get(context.Background(), opts.WebhookServerName, metav1.GetOptions{}) if err != nil { return fmt.Errorf("failed to get webhook configuration: %v", err) } diff --git a/cmd/certificate-generator/app/options/options.go b/cmd/certificate-generator/app/options/options.go index 61ca4bcc..1719db2c 100644 --- a/cmd/certificate-generator/app/options/options.go +++ b/cmd/certificate-generator/app/options/options.go @@ -40,10 +40,13 @@ type Options struct { // LeaderElection defines the configuration of leader election client. LeaderElection cbc.LeaderElectionConfiguration - // Webhook name represents the name of the webhook server associated with the certificate. - WebhookName string + // WebhookNames represents the names of the webhooks in the webhook server (i.e. controller-manager-nebula-operator-webhook, autoscaler-nebula-operator-webhook) + WebhookNames *[]string - // Webhook namespace represents the namespace of the webhook server associated with the certificate. + // WebhookServerName represents the name of the webhook server associated with the certificate. + WebhookServerName string + + // WebhookNamespace represents the namespace of the webhook server associated with the certificate. WebhookNamespace string // CertDir represents the directory to save the certificates in @@ -51,6 +54,15 @@ type Options struct { // CertValidity represents the number of days the certificate should be valid for CertValidity int64 + + // SecretName represents the name of the secret used to store the webhook certificates + SecretName string + + // SecretNamespace represents the namespace of the secret used to store the webhook certificates + SecretNamespace string + + // KubernetesDomain represents the custom kubernetes domain needed in the certificate + KubernetesDomain string } func NewOptions() *Options { @@ -80,8 +92,12 @@ func (o *Options) AddFlags(flags *pflag.FlagSet) { flags.DurationVar(&o.LeaderElection.RetryPeriod.Duration, "leader-elect-retry-period", defaultElectionRetryPeriod.Duration, ""+ "The duration the clients should wait between attempting acquisition and renewal "+ "of a leadership. This is only applicable if leader election is enabled.") - flags.StringVar(&o.WebhookName, "webhook-name", "nebulaWebhook", "Specifies the name of the webhook to associate with the certificate") + o.WebhookNames = flags.StringSlice("webhook-names", []string{}, "A comma-seperated list of the names of the webhooks supported by the webhook server (i.e. controller-manager-nebula-operator-webhook, autoscaler-nebula-operator-webhook)") + flags.StringVar(&o.WebhookServerName, "webhook-server-name", "nebulaWebhook", "Specifies the name of the webhook to associate with the certificate") flags.StringVar(&o.WebhookNamespace, "webhook-namespace", "default", "Specifies the namespace of the webhook to associate with the certificate") flags.StringVar(&o.CertDir, "certificate-dir", "/etc/cert", "Specifies the directory in which to save the generated webhook certificates") flags.Int64Var(&o.CertValidity, "certificate-validity", 365, "Specifies the number of days the certificate should be valid for") + flags.StringVar(&o.SecretName, "secret-name", "nebula-operator-webhook-secret", "Specifies the name of the webhook to associate with the certificate") + flags.StringVar(&o.SecretNamespace, "secret-namespace", "default", "Specifies the namespace of the webhook to associate with the certificate") + flags.StringVar(&o.KubernetesDomain, "kube-domain", "cluster.local", "Specifies the namespace of the webhook to associate with the certificate") } diff --git a/cmd/controller-manager/app/controller-manager.go b/cmd/controller-manager/app/controller-manager.go index 10f43f96..23ada250 100644 --- a/cmd/controller-manager/app/controller-manager.go +++ b/cmd/controller-manager/app/controller-manager.go @@ -20,6 +20,9 @@ import ( "context" "flag" "net/http" + "os" + "path/filepath" + "time" kruisev1beta1 "github.com/openkruise/kruise-api/apps/v1beta1" "github.com/spf13/cobra" @@ -218,6 +221,13 @@ func Run(ctx context.Context, opts *options.Options) error { return err } + if opts.EnableAdmissionWebhook { + for !fileExists(filepath.Join(opts.WebhookOpts.CertDir, "tls.crt")) { + klog.Info("waiting for webhook certificate...") + time.Sleep(2 * time.Second) + } + } + if err := mgr.Start(ctx); err != nil { klog.Errorf("nebula-controller-manager exits unexpectedly: %v", err) return err @@ -225,3 +235,11 @@ func Run(ctx context.Context, opts *options.Options) error { return nil } + +func fileExists(filename string) bool { + _, err := os.Stat(filename) + if os.IsNotExist(err) { + return false + } + return err == nil +} From 8157a138d40f0e7c31f7746c7fabfad7eb4d419a Mon Sep 17 00:00:00 2001 From: kevinliu24 Date: Sat, 8 Jun 2024 15:28:35 +0800 Subject: [PATCH 05/13] Fix issues in certificate-generator --- .../admission-webhook-registration.yaml | 4 ++ .../controller-manager-deployment.yaml | 58 +++++++++---------- cmd/autoscaler/app/autoscaler.go | 31 +++++++--- .../app/certificate-generator.go | 15 +---- .../app/controller-manager.go | 27 +++++++-- 5 files changed, 79 insertions(+), 56 deletions(-) diff --git a/charts/nebula-operator/templates/admission-webhook-registration.yaml b/charts/nebula-operator/templates/admission-webhook-registration.yaml index 0bdec1a5..85bf66d3 100644 --- a/charts/nebula-operator/templates/admission-webhook-registration.yaml +++ b/charts/nebula-operator/templates/admission-webhook-registration.yaml @@ -11,6 +11,7 @@ metadata: labels: {{- include "admission-webhook.labels" . | nindent 4 }} webhooks: + {{- if .Values.admissionWebhook.contollerManagerAdmissionWebhook.create }} - name: nebulaclustervalidating.nebula-graph.io admissionReviewVersions: - v1 @@ -34,7 +35,9 @@ webhooks: scope: "*" sideEffects: None timeoutSeconds: 3 + {{- end }} + {{- if .Values.admissionWebhook.autoscalerAdmissionWebhook.create }} - name: nebulaautoscalingvalidating.nebula-graph.io admissionReviewVersions: - v1 @@ -57,5 +60,6 @@ webhooks: scope: "*" sideEffects: None timeoutSeconds: 3 + {{- end }} {{- end }} diff --git a/charts/nebula-operator/templates/controller-manager-deployment.yaml b/charts/nebula-operator/templates/controller-manager-deployment.yaml index d9b57f25..f534cee8 100644 --- a/charts/nebula-operator/templates/controller-manager-deployment.yaml +++ b/charts/nebula-operator/templates/controller-manager-deployment.yaml @@ -37,6 +37,34 @@ spec: {{- toYaml . | nindent 8 }} {{- end }} containers: + {{- if and (or .Values.admissionWebhook.contollerManagerAdmissionWebhook.create .Values.admissionWebhook.autoscalerAdmissionWebhook.create) (not .Values.admissionWebhook.useCertManager) }} + - name: cert-rotation + image: {{ .Values.image.nebulaOperator.image }} + imagePullPolicy: {{ .Values.image.nebulaOperator.imagePullPolicy }} + command: + - /usr/local/bin/certificate-generator + args: + - --leader-elect + - --leader-elect-resource-namespace={{ template "nebula-operator.namespace" . }} + - --v={{ .Values.controllerManager.verbosity }} + - --webhook-namespace={{ template "nebula-operator.namespace" . }} + - --webhook-server-name={{ template "admission-webhook.name" . }}-validating + - --webhook-names={{ template "controller-manager-admission-webhook.name" . }}-service,{{ template "autoscaler-admission-webhook.name" . }}-service + - --certificate-dir=/tmp/k8s-webhook-server/serving-certs + - --certificate-validity=1 + - --secret-namespace={{ template "nebula-operator.namespace" . }} + - --secret-name={{ template "admission-webhook.name" . }}-secret + - --kube-domain={{ default "cluster.local" .Values.kubernetesClusterDomain }} + env: + {{- if .Values.kubernetesClusterDomain }} + - name: KUBERNETES_CLUSTER_DOMAIN + value: {{ .Values.kubernetesClusterDomain }} + {{- end }} + volumeMounts: + - mountPath: /tmp/k8s-webhook-server/serving-certs + name: cert + readOnly: true + {{- end}} - name: controller-manager image: {{ .Values.image.nebulaOperator.image }} imagePullPolicy: {{ .Values.image.nebulaOperator.imagePullPolicy }} @@ -147,40 +175,10 @@ spec: allowPrivilegeEscalation: false {{- if .Values.admissionWebhook.autoscalerAdmissionWebhook.create }} volumeMounts: - {{- if .Values.admissionWebhook.autoscalerAdmissionWebhook.create }} - mountPath: /tmp/k8s-webhook-server/serving-certs name: cert readOnly: true {{- end }} - {{- end }} - {{- if and (or .Values.admissionWebhook.contollerManagerAdmissionWebhook.create .Values.admissionWebhook.autoscalerAdmissionWebhook.create) (not .Values.admissionWebhook.useCertManager) }} - - name: cert-rotation - image: {{ .Values.image.nebulaOperator.image }} - imagePullPolicy: {{ .Values.image.nebulaOperator.imagePullPolicy }} - command: - - /usr/local/bin/certificate-generator - args: - - --leader-elect - - --leader-elect-resource-namespace={{ template "nebula-operator.namespace" . }} - - --v={{ .Values.controllerManager.verbosity }} - - --webhook-namespace={{ template "nebula-operator.namespace" . }} - - --webhook-server-name={{ template "admission-webhook.name" . }}-validating - - --webhook-names={{ template "controller-manager-admission-webhook.name" . }}-service,{{ template "autoscaler-admission-webhook.name" . }}-service - - --certificate-dir=/tmp/k8s-webhook-server/serving-certs - - --certificate-validity=1 - - --secret-namespace={{ template "nebula-operator.namespace" . }} - - --secret-name={{ template "admission-webhook.name" . }}-secret - - --kube-domain={{ default "cluster.local" .Values.kubernetesClusterDomain }} - env: - {{- if .Values.kubernetesClusterDomain }} - - name: KUBERNETES_CLUSTER_DOMAIN - value: {{ .Values.kubernetesClusterDomain }} - {{- end }} - volumeMounts: - - mountPath: /tmp/k8s-webhook-server/serving-certs - name: cert - readOnly: true - {{- end}} {{- with .Values.controllerManager.sidecarContainers }} {{- range $name, $spec := $.Values.controllerManager.sidecarContainers }} - name: {{ $name }} diff --git a/cmd/autoscaler/app/autoscaler.go b/cmd/autoscaler/app/autoscaler.go index 9fbb23c7..cc2b82c3 100644 --- a/cmd/autoscaler/app/autoscaler.go +++ b/cmd/autoscaler/app/autoscaler.go @@ -18,7 +18,9 @@ package app import ( "context" + "encoding/pem" "flag" + "fmt" "net/http" "os" "path/filepath" @@ -163,10 +165,15 @@ func Run(ctx context.Context, opts *options.Options) error { return err } - if !opts.EnableAdmissionWebhook { - for !fileExists(filepath.Join(opts.WebhookOpts.CertDir, "tls.crt")) { + if opts.EnableAdmissionWebhook { + pemFile, err := isPemFile(filepath.Join(opts.WebhookOpts.CertDir, "tls.crt")) + for !pemFile { + if err != nil { + klog.Errorf("Error waiting for webhook certificate: %v", err) + } klog.Info("waiting for webhook certificate...") - time.Sleep(2 * time.Second) + time.Sleep(10 * time.Second) + pemFile, err = isPemFile(filepath.Join(opts.WebhookOpts.CertDir, "tls.crt")) } } @@ -178,10 +185,18 @@ func Run(ctx context.Context, opts *options.Options) error { return nil } -func fileExists(filename string) bool { - _, err := os.Stat(filename) - if os.IsNotExist(err) { - return false +func isPemFile(filePath string) (bool, error) { + // Read file contents + fileContents, err := os.ReadFile(filePath) + if err != nil { + return false, fmt.Errorf("failed to read certificate file %v: %v", filePath, err) } - return err == nil + + // Decode PEM data + block, _ := pem.Decode(fileContents) + if block == nil { + return false, nil + } + + return true, nil } diff --git a/cmd/certificate-generator/app/certificate-generator.go b/cmd/certificate-generator/app/certificate-generator.go index 680783bd..e4211609 100644 --- a/cmd/certificate-generator/app/certificate-generator.go +++ b/cmd/certificate-generator/app/certificate-generator.go @@ -23,13 +23,11 @@ import ( "crypto/rand" "crypto/x509" "crypto/x509/pkix" - "encoding/base64" "encoding/pem" "flag" "fmt" "math/big" "os" - "path/filepath" "time" cron "github.com/robfig/cron/v3" @@ -177,7 +175,7 @@ func doCertRotation(clientset *kubernetes.Clientset, opts *options.Options) erro klog.Infof("Certificates generated successfully for webhook server [%v/%v]", opts.WebhookNamespace, opts.WebhookServerName) klog.Infof("Updating ca bundle for webhook server [%v/%v]", opts.WebhookNamespace, opts.WebhookServerName) - err = updateWebhookConfiguration(clientset, opts) + err = updateWebhookConfiguration(clientset, opts, caCert) if err != nil { klog.Errorf("Error updating ca bundle for webhook server [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookServerName, err) return err @@ -287,21 +285,14 @@ func updateSecret(clientset *kubernetes.Clientset, certPEM, keyPEM, caPEM []byte } // Update the webhook configuration with the new CA bundle -func updateWebhookConfiguration(client *kubernetes.Clientset, opts *options.Options) error { - caCert, err := os.ReadFile(filepath.Join(opts.CertDir, "ca.crt")) - if err != nil { - return fmt.Errorf("failed to read CA certificate: %v", err) - } - - caBundle := base64.StdEncoding.EncodeToString(caCert) - +func updateWebhookConfiguration(client *kubernetes.Clientset, opts *options.Options, caCert []byte) error { webhook, err := client.AdmissionregistrationV1().ValidatingWebhookConfigurations().Get(context.Background(), opts.WebhookServerName, metav1.GetOptions{}) if err != nil { return fmt.Errorf("failed to get webhook configuration: %v", err) } for i := range webhook.Webhooks { - webhook.Webhooks[i].ClientConfig.CABundle = []byte(caBundle) + webhook.Webhooks[i].ClientConfig.CABundle = caCert } _, err = client.AdmissionregistrationV1().ValidatingWebhookConfigurations().Update(context.Background(), webhook, metav1.UpdateOptions{}) diff --git a/cmd/controller-manager/app/controller-manager.go b/cmd/controller-manager/app/controller-manager.go index 23ada250..1e1e413d 100644 --- a/cmd/controller-manager/app/controller-manager.go +++ b/cmd/controller-manager/app/controller-manager.go @@ -18,7 +18,9 @@ package app import ( "context" + "encoding/pem" "flag" + "fmt" "net/http" "os" "path/filepath" @@ -222,9 +224,14 @@ func Run(ctx context.Context, opts *options.Options) error { } if opts.EnableAdmissionWebhook { - for !fileExists(filepath.Join(opts.WebhookOpts.CertDir, "tls.crt")) { + pemFile, err := isPemFile(filepath.Join(opts.WebhookOpts.CertDir, "tls.crt")) + for !pemFile { + if err != nil { + klog.Errorf("Error waiting for webhook certificate: %v", err) + } klog.Info("waiting for webhook certificate...") time.Sleep(2 * time.Second) + pemFile, err = isPemFile(filepath.Join(opts.WebhookOpts.CertDir, "tls.crt")) } } @@ -236,10 +243,18 @@ func Run(ctx context.Context, opts *options.Options) error { return nil } -func fileExists(filename string) bool { - _, err := os.Stat(filename) - if os.IsNotExist(err) { - return false +func isPemFile(filePath string) (bool, error) { + // Read file contents + fileContents, err := os.ReadFile(filePath) + if err != nil { + return false, fmt.Errorf("failed to read certificate file %v: %v", filePath, err) + } + + // Decode PEM data + block, _ := pem.Decode(fileContents) + if block == nil { + return false, nil } - return err == nil + + return true, nil } From a9d32517736863397b8f152ab95a1ff98a8224be Mon Sep 17 00:00:00 2001 From: kevinliu24 Date: Tue, 11 Jun 2024 15:02:58 +0800 Subject: [PATCH 06/13] Fix issues in webhook cert-rotation --- .../controller-manager-deployment.yaml | 52 ++++++-- cmd/autoscaler/app/autoscaler.go | 33 ----- .../app/certificate-generator.go | 121 +++++++++--------- .../app/options/options.go | 4 + .../app/controller-manager.go | 33 ----- 5 files changed, 107 insertions(+), 136 deletions(-) diff --git a/charts/nebula-operator/templates/controller-manager-deployment.yaml b/charts/nebula-operator/templates/controller-manager-deployment.yaml index f534cee8..6ef55521 100644 --- a/charts/nebula-operator/templates/controller-manager-deployment.yaml +++ b/charts/nebula-operator/templates/controller-manager-deployment.yaml @@ -32,29 +32,24 @@ spec: spec: serviceAccountName: {{ template "controller-manager.name" . }}-sa {{- include "nebula-operator.imagePullSecrets" . | indent 6 }} - {{- with .Values.controllerManager.extraInitContainers }} + {{- if and (or .Values.admissionWebhook.contollerManagerAdmissionWebhook.create .Values.admissionWebhook.autoscalerAdmissionWebhook.create) (not .Values.admissionWebhook.useCertManager) }} initContainers: - {{- toYaml . | nindent 8 }} - {{- end }} - containers: - {{- if and (or .Values.admissionWebhook.contollerManagerAdmissionWebhook.create .Values.admissionWebhook.autoscalerAdmissionWebhook.create) (not .Values.admissionWebhook.useCertManager) }} - - name: cert-rotation + - name: cert-init image: {{ .Values.image.nebulaOperator.image }} imagePullPolicy: {{ .Values.image.nebulaOperator.imagePullPolicy }} command: - /usr/local/bin/certificate-generator args: - - --leader-elect - - --leader-elect-resource-namespace={{ template "nebula-operator.namespace" . }} - --v={{ .Values.controllerManager.verbosity }} - --webhook-namespace={{ template "nebula-operator.namespace" . }} - --webhook-server-name={{ template "admission-webhook.name" . }}-validating - --webhook-names={{ template "controller-manager-admission-webhook.name" . }}-service,{{ template "autoscaler-admission-webhook.name" . }}-service - --certificate-dir=/tmp/k8s-webhook-server/serving-certs - - --certificate-validity=1 + - --certificate-validity=5 - --secret-namespace={{ template "nebula-operator.namespace" . }} - --secret-name={{ template "admission-webhook.name" . }}-secret - --kube-domain={{ default "cluster.local" .Values.kubernetesClusterDomain }} + - --init-only=true env: {{- if .Values.kubernetesClusterDomain }} - name: KUBERNETES_CLUSTER_DOMAIN @@ -64,7 +59,16 @@ spec: - mountPath: /tmp/k8s-webhook-server/serving-certs name: cert readOnly: true - {{- end}} + {{- with .Values.controllerManager.extraInitContainers }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- else}} + {{- with .Values.controllerManager.extraInitContainers }} + initContainers: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- end}} + containers: - name: controller-manager image: {{ .Values.image.nebulaOperator.image }} imagePullPolicy: {{ .Values.image.nebulaOperator.imagePullPolicy }} @@ -179,6 +183,34 @@ spec: name: cert readOnly: true {{- end }} + {{- if and (or .Values.admissionWebhook.contollerManagerAdmissionWebhook.create .Values.admissionWebhook.autoscalerAdmissionWebhook.create) (not .Values.admissionWebhook.useCertManager) }} + - name: cert-rotation + image: {{ .Values.image.nebulaOperator.image }} + imagePullPolicy: {{ .Values.image.nebulaOperator.imagePullPolicy }} + command: + - /usr/local/bin/certificate-generator + args: + - --leader-elect + - --leader-elect-resource-namespace={{ template "nebula-operator.namespace" . }} + - --v={{ .Values.controllerManager.verbosity }} + - --webhook-namespace={{ template "nebula-operator.namespace" . }} + - --webhook-server-name={{ template "admission-webhook.name" . }}-validating + - --webhook-names={{ template "controller-manager-admission-webhook.name" . }}-service,{{ template "autoscaler-admission-webhook.name" . }}-service + - --certificate-dir=/tmp/k8s-webhook-server/serving-certs + - --certificate-validity=5 + - --secret-namespace={{ template "nebula-operator.namespace" . }} + - --secret-name={{ template "admission-webhook.name" . }}-secret + - --kube-domain={{ default "cluster.local" .Values.kubernetesClusterDomain }} + env: + {{- if .Values.kubernetesClusterDomain }} + - name: KUBERNETES_CLUSTER_DOMAIN + value: {{ .Values.kubernetesClusterDomain }} + {{- end }} + volumeMounts: + - mountPath: /tmp/k8s-webhook-server/serving-certs + name: cert + readOnly: true + {{- end}} {{- with .Values.controllerManager.sidecarContainers }} {{- range $name, $spec := $.Values.controllerManager.sidecarContainers }} - name: {{ $name }} diff --git a/cmd/autoscaler/app/autoscaler.go b/cmd/autoscaler/app/autoscaler.go index cc2b82c3..10c14366 100644 --- a/cmd/autoscaler/app/autoscaler.go +++ b/cmd/autoscaler/app/autoscaler.go @@ -18,13 +18,8 @@ package app import ( "context" - "encoding/pem" "flag" - "fmt" "net/http" - "os" - "path/filepath" - "time" "github.com/spf13/cobra" cliflag "k8s.io/component-base/cli/flag" @@ -165,18 +160,6 @@ func Run(ctx context.Context, opts *options.Options) error { return err } - if opts.EnableAdmissionWebhook { - pemFile, err := isPemFile(filepath.Join(opts.WebhookOpts.CertDir, "tls.crt")) - for !pemFile { - if err != nil { - klog.Errorf("Error waiting for webhook certificate: %v", err) - } - klog.Info("waiting for webhook certificate...") - time.Sleep(10 * time.Second) - pemFile, err = isPemFile(filepath.Join(opts.WebhookOpts.CertDir, "tls.crt")) - } - } - if err := mgr.Start(ctx); err != nil { klog.Errorf("nebula-autoscaler exits unexpectedly: %v", err) return err @@ -184,19 +167,3 @@ func Run(ctx context.Context, opts *options.Options) error { return nil } - -func isPemFile(filePath string) (bool, error) { - // Read file contents - fileContents, err := os.ReadFile(filePath) - if err != nil { - return false, fmt.Errorf("failed to read certificate file %v: %v", filePath, err) - } - - // Decode PEM data - block, _ := pem.Decode(fileContents) - if block == nil { - return false, nil - } - - return true, nil -} diff --git a/cmd/certificate-generator/app/certificate-generator.go b/cmd/certificate-generator/app/certificate-generator.go index e4211609..cbb2880b 100644 --- a/cmd/certificate-generator/app/certificate-generator.go +++ b/cmd/certificate-generator/app/certificate-generator.go @@ -77,77 +77,78 @@ func Run(ctx context.Context, opts *options.Options) error { klog.Errorf("Error building Kubernetes clientset: %v", err.Error()) } - opts.CertValidity = opts.CertValidity * 24 * 60 - - // rotate cert once before starting cronjob - err = rotateCertificate(ctx, clientset, opts) - if err != nil { - klog.Errorf("Error rotating certificate for webhook [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookServerName, err) - os.Exit(1) - } - - klog.Infof("Starting cert rotation cron job for webhook [%v/%v]", opts.WebhookNamespace, opts.WebhookServerName) - c := cron.New() - // rotate cert 1 hour before expiration date - c.AddFunc(fmt.Sprintf("@every %vm", opts.CertValidity-60), func() { - err := rotateCertificate(ctx, clientset, opts) + if opts.InitOnly { + klog.Infof("Init only detected. Doing cert initialization for webhook [%v/%v]", opts.WebhookNamespace, opts.WebhookServerName) + err := doCertRotation(clientset, opts) if err != nil { klog.Errorf("Error rotating certificate for webhook [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookServerName, err) os.Exit(1) } - }) - c.Start() - klog.Infof("Cert rotation crontab started for webhook [%v/%v]. Will rotate every %v minutes", opts.WebhookNamespace, opts.WebhookServerName, opts.CertValidity) + } else { + if opts.LeaderElection.LeaderElect { + klog.Info("Doing leader election") + id, err := os.Hostname() + if err != nil { + klog.Errorf("Failed to get hostname: %v", err) + } + + rl, err := resourcelock.New(opts.LeaderElection.ResourceLock, + opts.LeaderElection.ResourceNamespace, + opts.LeaderElection.ResourceName, + clientset.CoreV1(), + clientset.CoordinationV1(), + resourcelock.ResourceLockConfig{ + Identity: id, + }) + if err != nil { + klog.Errorf("Error creating resource lock: %v", err) + } + + leaderelection.RunOrDie(ctx, leaderelection.LeaderElectionConfig{ + Lock: rl, + LeaseDuration: opts.LeaderElection.LeaseDuration.Duration, + RenewDeadline: opts.LeaderElection.RenewDeadline.Duration, + RetryPeriod: opts.LeaderElection.RetryPeriod.Duration, + Callbacks: leaderelection.LeaderCallbacks{ + OnStartedLeading: func(ctx context.Context) { + klog.Info("Leader election successful. Starting certificate rotation") + err = rotateCertificate(clientset, opts) + if err != nil { + klog.Errorf("Failed to rotate certificates: %v", err) + } + }, + OnStoppedLeading: func() { + klog.Info("Lost leadership, stopping") + }, + }, + }) + } else { + klog.Infof("Leader election skipped. Starting certificate rotation") + err := rotateCertificate(clientset, opts) + if err != nil { + klog.Errorf("Failed to rotate certificates: %v", err) + } + } + } - // keep the program running - select {} + return nil } -func rotateCertificate(ctx context.Context, clientset *kubernetes.Clientset, opts *options.Options) error { - if opts.LeaderElection.LeaderElect { - klog.Info("Doing leader election") - id, err := os.Hostname() - if err != nil { - klog.Errorf("Failed to get hostname: %v", err) - } - - rl, err := resourcelock.New(opts.LeaderElection.ResourceLock, - opts.LeaderElection.ResourceNamespace, - opts.LeaderElection.ResourceName, - clientset.CoreV1(), - clientset.CoordinationV1(), - resourcelock.ResourceLockConfig{ - Identity: id, - }) - if err != nil { - klog.Errorf("Error creating resource lock: %v", err) - } +func rotateCertificate(clientset *kubernetes.Clientset, opts *options.Options) error { + //opts.CertValidity = opts.CertValidity * 24 * 60 - leaderelection.RunOrDie(ctx, leaderelection.LeaderElectionConfig{ - Lock: rl, - LeaseDuration: opts.LeaderElection.LeaseDuration.Duration, - RenewDeadline: opts.LeaderElection.RenewDeadline.Duration, - RetryPeriod: opts.LeaderElection.RetryPeriod.Duration, - Callbacks: leaderelection.LeaderCallbacks{ - OnStartedLeading: func(ctx context.Context) { - klog.Info("Leader election successful. Starting certificate rotation") - err = doCertRotation(clientset, opts) - if err != nil { - klog.Errorf("Failed to rotate certificates: %v", err) - } - }, - OnStoppedLeading: func() { - klog.Info("Lost leadership, stopping") - }, - }, - }) - } else { - klog.Infof("Leader election skipped. Starting certificate rotation") + klog.Infof("Starting cert rotation cron job for webhook [%v/%v]", opts.WebhookNamespace, opts.WebhookServerName) + c := cron.New() + // rotate cert 1 hour before expiration date + c.AddFunc(fmt.Sprintf("@every %vm", opts.CertValidity-1), func() { err := doCertRotation(clientset, opts) if err != nil { - klog.Errorf("Failed to rotate certificates: %v", err) + klog.Errorf("Error rotating certificate for webhook [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookServerName, err) + os.Exit(1) } - } + }) + klog.Infof("Cert rotation crontab started for webhook [%v/%v]. Will rotate every %v minutes", opts.WebhookNamespace, opts.WebhookServerName, opts.CertValidity) + c.Run() return nil } diff --git a/cmd/certificate-generator/app/options/options.go b/cmd/certificate-generator/app/options/options.go index 1719db2c..e0f12704 100644 --- a/cmd/certificate-generator/app/options/options.go +++ b/cmd/certificate-generator/app/options/options.go @@ -63,6 +63,9 @@ type Options struct { // KubernetesDomain represents the custom kubernetes domain needed in the certificate KubernetesDomain string + + // InitOnly repersent whether this script is running in an init container. It will not start the cronjob + InitOnly bool } func NewOptions() *Options { @@ -100,4 +103,5 @@ func (o *Options) AddFlags(flags *pflag.FlagSet) { flags.StringVar(&o.SecretName, "secret-name", "nebula-operator-webhook-secret", "Specifies the name of the webhook to associate with the certificate") flags.StringVar(&o.SecretNamespace, "secret-namespace", "default", "Specifies the namespace of the webhook to associate with the certificate") flags.StringVar(&o.KubernetesDomain, "kube-domain", "cluster.local", "Specifies the namespace of the webhook to associate with the certificate") + flags.BoolVar(&o.InitOnly, "init-only", false, "Specifies whether this script is running in a init container") } diff --git a/cmd/controller-manager/app/controller-manager.go b/cmd/controller-manager/app/controller-manager.go index 1e1e413d..10f43f96 100644 --- a/cmd/controller-manager/app/controller-manager.go +++ b/cmd/controller-manager/app/controller-manager.go @@ -18,13 +18,8 @@ package app import ( "context" - "encoding/pem" "flag" - "fmt" "net/http" - "os" - "path/filepath" - "time" kruisev1beta1 "github.com/openkruise/kruise-api/apps/v1beta1" "github.com/spf13/cobra" @@ -223,18 +218,6 @@ func Run(ctx context.Context, opts *options.Options) error { return err } - if opts.EnableAdmissionWebhook { - pemFile, err := isPemFile(filepath.Join(opts.WebhookOpts.CertDir, "tls.crt")) - for !pemFile { - if err != nil { - klog.Errorf("Error waiting for webhook certificate: %v", err) - } - klog.Info("waiting for webhook certificate...") - time.Sleep(2 * time.Second) - pemFile, err = isPemFile(filepath.Join(opts.WebhookOpts.CertDir, "tls.crt")) - } - } - if err := mgr.Start(ctx); err != nil { klog.Errorf("nebula-controller-manager exits unexpectedly: %v", err) return err @@ -242,19 +225,3 @@ func Run(ctx context.Context, opts *options.Options) error { return nil } - -func isPemFile(filePath string) (bool, error) { - // Read file contents - fileContents, err := os.ReadFile(filePath) - if err != nil { - return false, fmt.Errorf("failed to read certificate file %v: %v", filePath, err) - } - - // Decode PEM data - block, _ := pem.Decode(fileContents) - if block == nil { - return false, nil - } - - return true, nil -} From ca222ecf92b769fd67f3f7c4bf267d986115593d Mon Sep 17 00:00:00 2001 From: kevinliu24 Date: Tue, 11 Jun 2024 15:29:28 +0800 Subject: [PATCH 07/13] Fix default certificate validity --- .../templates/controller-manager-deployment.yaml | 2 +- cmd/certificate-generator/app/certificate-generator.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/charts/nebula-operator/templates/controller-manager-deployment.yaml b/charts/nebula-operator/templates/controller-manager-deployment.yaml index 6ef55521..3eb51568 100644 --- a/charts/nebula-operator/templates/controller-manager-deployment.yaml +++ b/charts/nebula-operator/templates/controller-manager-deployment.yaml @@ -45,7 +45,7 @@ spec: - --webhook-server-name={{ template "admission-webhook.name" . }}-validating - --webhook-names={{ template "controller-manager-admission-webhook.name" . }}-service,{{ template "autoscaler-admission-webhook.name" . }}-service - --certificate-dir=/tmp/k8s-webhook-server/serving-certs - - --certificate-validity=5 + - --certificate-validity=1 - --secret-namespace={{ template "nebula-operator.namespace" . }} - --secret-name={{ template "admission-webhook.name" . }}-secret - --kube-domain={{ default "cluster.local" .Values.kubernetesClusterDomain }} diff --git a/cmd/certificate-generator/app/certificate-generator.go b/cmd/certificate-generator/app/certificate-generator.go index cbb2880b..7a04d4bf 100644 --- a/cmd/certificate-generator/app/certificate-generator.go +++ b/cmd/certificate-generator/app/certificate-generator.go @@ -135,12 +135,12 @@ func Run(ctx context.Context, opts *options.Options) error { } func rotateCertificate(clientset *kubernetes.Clientset, opts *options.Options) error { - //opts.CertValidity = opts.CertValidity * 24 * 60 + opts.CertValidity = opts.CertValidity * 24 * 60 klog.Infof("Starting cert rotation cron job for webhook [%v/%v]", opts.WebhookNamespace, opts.WebhookServerName) c := cron.New() // rotate cert 1 hour before expiration date - c.AddFunc(fmt.Sprintf("@every %vm", opts.CertValidity-1), func() { + c.AddFunc(fmt.Sprintf("@every %vm", opts.CertValidity-60), func() { err := doCertRotation(clientset, opts) if err != nil { klog.Errorf("Error rotating certificate for webhook [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookServerName, err) From efc790593962eb1bd254a841ebaba9ec9075ef6e Mon Sep 17 00:00:00 2001 From: kevinliu24 Date: Wed, 12 Jun 2024 00:53:56 +0800 Subject: [PATCH 08/13] Refactor error return --- cmd/certificate-generator/app/certificate-generator.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cmd/certificate-generator/app/certificate-generator.go b/cmd/certificate-generator/app/certificate-generator.go index 7a04d4bf..a7788458 100644 --- a/cmd/certificate-generator/app/certificate-generator.go +++ b/cmd/certificate-generator/app/certificate-generator.go @@ -75,6 +75,7 @@ func Run(ctx context.Context, opts *options.Options) error { clientset, err := kubernetes.NewForConfig(cfg) if err != nil { klog.Errorf("Error building Kubernetes clientset: %v", err.Error()) + return err } if opts.InitOnly { @@ -82,7 +83,7 @@ func Run(ctx context.Context, opts *options.Options) error { err := doCertRotation(clientset, opts) if err != nil { klog.Errorf("Error rotating certificate for webhook [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookServerName, err) - os.Exit(1) + return err } } else { if opts.LeaderElection.LeaderElect { @@ -90,6 +91,7 @@ func Run(ctx context.Context, opts *options.Options) error { id, err := os.Hostname() if err != nil { klog.Errorf("Failed to get hostname: %v", err) + return err } rl, err := resourcelock.New(opts.LeaderElection.ResourceLock, @@ -102,6 +104,7 @@ func Run(ctx context.Context, opts *options.Options) error { }) if err != nil { klog.Errorf("Error creating resource lock: %v", err) + return err } leaderelection.RunOrDie(ctx, leaderelection.LeaderElectionConfig{ @@ -115,6 +118,7 @@ func Run(ctx context.Context, opts *options.Options) error { err = rotateCertificate(clientset, opts) if err != nil { klog.Errorf("Failed to rotate certificates: %v", err) + os.Exit(1) } }, OnStoppedLeading: func() { @@ -127,6 +131,7 @@ func Run(ctx context.Context, opts *options.Options) error { err := rotateCertificate(clientset, opts) if err != nil { klog.Errorf("Failed to rotate certificates: %v", err) + return err } } } @@ -255,7 +260,7 @@ func generateServerCert(caCert []byte, caKey *ecdsa.PrivateKey, opts *options.Op certPem := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: serverCert}) marshalledKey, err := x509.MarshalECPrivateKey(serverKey) if err != nil { - klog.Errorf("Error marshalling webhook server certificate key: %v", err) + return nil, nil, fmt.Errorf("error marshalling webhook server certificate key: %v", err) } keyPem := pem.EncodeToMemory(&pem.Block{Type: "EC PRIVATE KEY", Bytes: marshalledKey}) From 2e99a0e794e4885b13cf5e125dc7614d3ea3a4d7 Mon Sep 17 00:00:00 2001 From: kevinliu24 Date: Wed, 12 Jun 2024 01:01:12 +0800 Subject: [PATCH 09/13] Check error when starting cronjob --- .../app/certificate-generator.go | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/cmd/certificate-generator/app/certificate-generator.go b/cmd/certificate-generator/app/certificate-generator.go index a7788458..fa55999e 100644 --- a/cmd/certificate-generator/app/certificate-generator.go +++ b/cmd/certificate-generator/app/certificate-generator.go @@ -117,7 +117,7 @@ func Run(ctx context.Context, opts *options.Options) error { klog.Info("Leader election successful. Starting certificate rotation") err = rotateCertificate(clientset, opts) if err != nil { - klog.Errorf("Failed to rotate certificates: %v", err) + klog.Errorf("Failed to start cronjob to rotate certificates: %v", err) os.Exit(1) } }, @@ -128,9 +128,9 @@ func Run(ctx context.Context, opts *options.Options) error { }) } else { klog.Infof("Leader election skipped. Starting certificate rotation") - err := rotateCertificate(clientset, opts) + err = rotateCertificate(clientset, opts) if err != nil { - klog.Errorf("Failed to rotate certificates: %v", err) + klog.Errorf("Failed to start cronjob to rotate certificates: %v", err) return err } } @@ -145,13 +145,16 @@ func rotateCertificate(clientset *kubernetes.Clientset, opts *options.Options) e klog.Infof("Starting cert rotation cron job for webhook [%v/%v]", opts.WebhookNamespace, opts.WebhookServerName) c := cron.New() // rotate cert 1 hour before expiration date - c.AddFunc(fmt.Sprintf("@every %vm", opts.CertValidity-60), func() { + _, err := c.AddFunc(fmt.Sprintf("@every %vm", opts.CertValidity-60), func() { err := doCertRotation(clientset, opts) if err != nil { klog.Errorf("Error rotating certificate for webhook [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookServerName, err) os.Exit(1) } }) + if err != nil { + return err + } klog.Infof("Cert rotation crontab started for webhook [%v/%v]. Will rotate every %v minutes", opts.WebhookNamespace, opts.WebhookServerName, opts.CertValidity) c.Run() From 4479aa8c31539ccb90fe7938789e935374737de5 Mon Sep 17 00:00:00 2001 From: kevinliu24 Date: Wed, 12 Jun 2024 01:07:25 +0800 Subject: [PATCH 10/13] add parameter for cert validity --- .../templates/controller-manager-deployment.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/charts/nebula-operator/templates/controller-manager-deployment.yaml b/charts/nebula-operator/templates/controller-manager-deployment.yaml index 3eb51568..41293490 100644 --- a/charts/nebula-operator/templates/controller-manager-deployment.yaml +++ b/charts/nebula-operator/templates/controller-manager-deployment.yaml @@ -45,7 +45,7 @@ spec: - --webhook-server-name={{ template "admission-webhook.name" . }}-validating - --webhook-names={{ template "controller-manager-admission-webhook.name" . }}-service,{{ template "autoscaler-admission-webhook.name" . }}-service - --certificate-dir=/tmp/k8s-webhook-server/serving-certs - - --certificate-validity=1 + - --certificate-validity={{ .Values.admissionWebhook.certValidity }} - --secret-namespace={{ template "nebula-operator.namespace" . }} - --secret-name={{ template "admission-webhook.name" . }}-secret - --kube-domain={{ default "cluster.local" .Values.kubernetesClusterDomain }} @@ -197,7 +197,7 @@ spec: - --webhook-server-name={{ template "admission-webhook.name" . }}-validating - --webhook-names={{ template "controller-manager-admission-webhook.name" . }}-service,{{ template "autoscaler-admission-webhook.name" . }}-service - --certificate-dir=/tmp/k8s-webhook-server/serving-certs - - --certificate-validity=5 + - --certificate-validity={{ .Values.admissionWebhook.certValidity }} - --secret-namespace={{ template "nebula-operator.namespace" . }} - --secret-name={{ template "admission-webhook.name" . }}-secret - --kube-domain={{ default "cluster.local" .Values.kubernetesClusterDomain }} From 3097f6446dec831a730dcf98763df3e8c5fa9601 Mon Sep 17 00:00:00 2001 From: kevinliu24 Date: Wed, 12 Jun 2024 01:08:41 +0800 Subject: [PATCH 11/13] update values file --- charts/nebula-operator/values.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/charts/nebula-operator/values.yaml b/charts/nebula-operator/values.yaml index d4a5a5c6..9d70450e 100644 --- a/charts/nebula-operator/values.yaml +++ b/charts/nebula-operator/values.yaml @@ -73,6 +73,8 @@ admissionWebhook: # The TCP port the Webhook server binds to. (default 9448) webhookBindPort: 9448 useCertManager: false + # Number of days. Only needed if useCertManager is false. (default 1) + certValidity: 1 scheduler: create: true From af26c64dc27193d8a091277b6dc8e74f3f5486a6 Mon Sep 17 00:00:00 2001 From: kevinliu24 Date: Thu, 13 Jun 2024 15:24:49 +0800 Subject: [PATCH 12/13] Fix pull request according to review. --- Dockerfile | 1 - Makefile | 1 - .../controller-manager-deployment.yaml | 74 +---- cmd/autoscaler/app/autoscaler.go | 15 + .../app/certificate-generator.go | 313 ------------------ .../app/options/options.go | 107 ------ cmd/certificate-generator/main.go | 32 -- .../app/controller-manager.go | 15 + pkg/cert-rotation/certificate-generator.go | 292 ++++++++++++++++ pkg/flag/webhook/webhook.go | 28 ++ 10 files changed, 364 insertions(+), 514 deletions(-) delete mode 100644 cmd/certificate-generator/app/certificate-generator.go delete mode 100644 cmd/certificate-generator/app/options/options.go delete mode 100644 cmd/certificate-generator/main.go create mode 100644 pkg/cert-rotation/certificate-generator.go diff --git a/Dockerfile b/Dockerfile index 77a34d0e..d3dd15de 100644 --- a/Dockerfile +++ b/Dockerfile @@ -12,7 +12,6 @@ RUN if [ "$USERNAME" = "ng-user" ]; then \ ADD bin/${TARGETDIR}/controller-manager /usr/local/bin/controller-manager ADD bin/${TARGETDIR}/autoscaler /usr/local/bin/autoscaler ADD bin/${TARGETDIR}/scheduler /usr/local/bin/scheduler -ADD bin/${TARGETDIR}/certificate-generator /usr/local/bin/certificate-generator # [Optional] Set the default user. Omit if you want to keep the default as root. USER $USERNAME diff --git a/Makefile b/Makefile index e8797be7..c07c4213 100644 --- a/Makefile +++ b/Makefile @@ -79,7 +79,6 @@ build-operator: ## Build operator related binary. $(GO_BUILD) -ldflags '$(LDFLAGS)' -o bin/$(TARGETDIR)/controller-manager cmd/controller-manager/main.go $(GO_BUILD) -ldflags '$(LDFLAGS)' -o bin/$(TARGETDIR)/autoscaler cmd/autoscaler/main.go $(GO_BUILD) -ldflags '$(LDFLAGS)' -o bin/$(TARGETDIR)/scheduler cmd/scheduler/main.go - $(GO_BUILD) -ldflags '$(LDFLAGS)' -o bin/$(TARGETDIR)/certificate-generator cmd/certificate-generator/main.go build-provisioner: ## Build provisioner binary. $(GO_BUILD) -ldflags '$(LDFLAGS)' -o bin/$(TARGETDIR)/local-pv-provisioner cmd/provisioner/main.go diff --git a/charts/nebula-operator/templates/controller-manager-deployment.yaml b/charts/nebula-operator/templates/controller-manager-deployment.yaml index 41293490..600281fd 100644 --- a/charts/nebula-operator/templates/controller-manager-deployment.yaml +++ b/charts/nebula-operator/templates/controller-manager-deployment.yaml @@ -32,42 +32,10 @@ spec: spec: serviceAccountName: {{ template "controller-manager.name" . }}-sa {{- include "nebula-operator.imagePullSecrets" . | indent 6 }} - {{- if and (or .Values.admissionWebhook.contollerManagerAdmissionWebhook.create .Values.admissionWebhook.autoscalerAdmissionWebhook.create) (not .Values.admissionWebhook.useCertManager) }} - initContainers: - - name: cert-init - image: {{ .Values.image.nebulaOperator.image }} - imagePullPolicy: {{ .Values.image.nebulaOperator.imagePullPolicy }} - command: - - /usr/local/bin/certificate-generator - args: - - --v={{ .Values.controllerManager.verbosity }} - - --webhook-namespace={{ template "nebula-operator.namespace" . }} - - --webhook-server-name={{ template "admission-webhook.name" . }}-validating - - --webhook-names={{ template "controller-manager-admission-webhook.name" . }}-service,{{ template "autoscaler-admission-webhook.name" . }}-service - - --certificate-dir=/tmp/k8s-webhook-server/serving-certs - - --certificate-validity={{ .Values.admissionWebhook.certValidity }} - - --secret-namespace={{ template "nebula-operator.namespace" . }} - - --secret-name={{ template "admission-webhook.name" . }}-secret - - --kube-domain={{ default "cluster.local" .Values.kubernetesClusterDomain }} - - --init-only=true - env: - {{- if .Values.kubernetesClusterDomain }} - - name: KUBERNETES_CLUSTER_DOMAIN - value: {{ .Values.kubernetesClusterDomain }} - {{- end }} - volumeMounts: - - mountPath: /tmp/k8s-webhook-server/serving-certs - name: cert - readOnly: true - {{- with .Values.controllerManager.extraInitContainers }} - {{- toYaml . | nindent 8 }} - {{- end }} - {{- else}} {{- with .Values.controllerManager.extraInitContainers }} initContainers: {{- toYaml . | nindent 8 }} {{- end }} - {{- end}} containers: - name: controller-manager image: {{ .Values.image.nebulaOperator.image }} @@ -90,6 +58,13 @@ spec: - --leader-elect-resource-namespace={{ template "nebula-operator.namespace" . }} - --enable-admission-webhook={{ .Values.admissionWebhook.contollerManagerAdmissionWebhook.create }} - --webhook-secure-port={{ .Values.admissionWebhook.contollerManagerAdmissionWebhook.webhookBindPort }} + - --webhook-namespace={{ template "nebula-operator.namespace" . }} + - --webhook-server-name={{ template "admission-webhook.name" . }}-validating + - --webhook-names={{ template "controller-manager-admission-webhook.name" . }}-service,{{ template "autoscaler-admission-webhook.name" . }}-service + - --certificate-validity={{ .Values.admissionWebhook.certValidity }} + - --secret-namespace={{ template "nebula-operator.namespace" . }} + - --secret-name={{ template "admission-webhook.name" . }}-secret + - --kube-domain={{ default "cluster.local" .Values.kubernetesClusterDomain }} - --enable-kruise-scheme={{ .Values.enableKruiseScheme }} - --v={{ .Values.controllerManager.verbosity }} {{- if or .Values.kubernetesClusterDomain .Values.controllerManager.env }} @@ -147,6 +122,13 @@ spec: - --v={{ .Values.controllerManager.verbosity }} - --enable-admission-webhook={{ .Values.admissionWebhook.autoscalerAdmissionWebhook.create }} - --webhook-secure-port={{ .Values.admissionWebhook.autoscalerAdmissionWebhook.webhookBindPort }} + - --webhook-namespace={{ template "nebula-operator.namespace" . }} + - --webhook-server-name={{ template "admission-webhook.name" . }}-validating + - --webhook-names={{ template "controller-manager-admission-webhook.name" . }}-service,{{ template "autoscaler-admission-webhook.name" . }}-service + - --certificate-validity={{ .Values.admissionWebhook.certValidity }} + - --secret-namespace={{ template "nebula-operator.namespace" . }} + - --secret-name={{ template "admission-webhook.name" . }}-secret + - --kube-domain={{ default "cluster.local" .Values.kubernetesClusterDomain }} {{- if or .Values.kubernetesClusterDomain .Values.controllerManager.env }} env: {{- if .Values.kubernetesClusterDomain }} @@ -183,34 +165,6 @@ spec: name: cert readOnly: true {{- end }} - {{- if and (or .Values.admissionWebhook.contollerManagerAdmissionWebhook.create .Values.admissionWebhook.autoscalerAdmissionWebhook.create) (not .Values.admissionWebhook.useCertManager) }} - - name: cert-rotation - image: {{ .Values.image.nebulaOperator.image }} - imagePullPolicy: {{ .Values.image.nebulaOperator.imagePullPolicy }} - command: - - /usr/local/bin/certificate-generator - args: - - --leader-elect - - --leader-elect-resource-namespace={{ template "nebula-operator.namespace" . }} - - --v={{ .Values.controllerManager.verbosity }} - - --webhook-namespace={{ template "nebula-operator.namespace" . }} - - --webhook-server-name={{ template "admission-webhook.name" . }}-validating - - --webhook-names={{ template "controller-manager-admission-webhook.name" . }}-service,{{ template "autoscaler-admission-webhook.name" . }}-service - - --certificate-dir=/tmp/k8s-webhook-server/serving-certs - - --certificate-validity={{ .Values.admissionWebhook.certValidity }} - - --secret-namespace={{ template "nebula-operator.namespace" . }} - - --secret-name={{ template "admission-webhook.name" . }}-secret - - --kube-domain={{ default "cluster.local" .Values.kubernetesClusterDomain }} - env: - {{- if .Values.kubernetesClusterDomain }} - - name: KUBERNETES_CLUSTER_DOMAIN - value: {{ .Values.kubernetesClusterDomain }} - {{- end }} - volumeMounts: - - mountPath: /tmp/k8s-webhook-server/serving-certs - name: cert - readOnly: true - {{- end}} {{- with .Values.controllerManager.sidecarContainers }} {{- range $name, $spec := $.Values.controllerManager.sidecarContainers }} - name: {{ $name }} diff --git a/cmd/autoscaler/app/autoscaler.go b/cmd/autoscaler/app/autoscaler.go index 10c14366..cbd841e2 100644 --- a/cmd/autoscaler/app/autoscaler.go +++ b/cmd/autoscaler/app/autoscaler.go @@ -35,6 +35,7 @@ import ( "github.com/vesoft-inc/nebula-operator/apis/autoscaling/scheme" "github.com/vesoft-inc/nebula-operator/apis/autoscaling/v1alpha1" "github.com/vesoft-inc/nebula-operator/cmd/autoscaler/app/options" + certrot "github.com/vesoft-inc/nebula-operator/pkg/cert-rotation" "github.com/vesoft-inc/nebula-operator/pkg/controller/autoscaler" klogflag "github.com/vesoft-inc/nebula-operator/pkg/flag/klog" profileflag "github.com/vesoft-inc/nebula-operator/pkg/flag/profile" @@ -148,6 +149,20 @@ func Run(ctx context.Context, opts *options.Options) error { hookServer.Register("/validate-nebulaautoscaler", &webhook.Admission{Handler: &nawebhook.ValidatingAdmission{Decoder: decoder}}) hookServer.WebhookMux().Handle("/readyz/", http.StripPrefix("/readyz/", &healthz.Handler{})) + + // Start certificate rotation + certGenerator := certrot.CertGenerator{ + WebhookNames: opts.WebhookOpts.WebhookNames, + WebhookServerName: opts.WebhookOpts.WebhookServerName, + WebhookNamespace: opts.WebhookOpts.WebhookNamespace, + CertDir: opts.WebhookOpts.CertDir, + CertValidity: opts.WebhookOpts.CertValidity, + SecretName: opts.WebhookOpts.SecretName, + SecretNamespace: opts.WebhookOpts.SecretNamespace, + KubernetesDomain: opts.WebhookOpts.KubernetesDomain, + } + + certGenerator.Run() } if err := mgr.AddHealthzCheck("ping", healthz.Ping); err != nil { diff --git a/cmd/certificate-generator/app/certificate-generator.go b/cmd/certificate-generator/app/certificate-generator.go deleted file mode 100644 index fa55999e..00000000 --- a/cmd/certificate-generator/app/certificate-generator.go +++ /dev/null @@ -1,313 +0,0 @@ -/* -Copyright 2024 Vesoft Inc. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package app - -import ( - "context" - "crypto/ecdsa" - "crypto/elliptic" - "crypto/rand" - "crypto/x509" - "crypto/x509/pkix" - "encoding/pem" - "flag" - "fmt" - "math/big" - "os" - "time" - - cron "github.com/robfig/cron/v3" - "github.com/spf13/cobra" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/kubernetes" - "k8s.io/client-go/tools/leaderelection" - "k8s.io/client-go/tools/leaderelection/resourcelock" - cliflag "k8s.io/component-base/cli/flag" - "k8s.io/klog/v2" - ctrlruntime "sigs.k8s.io/controller-runtime" - - "github.com/vesoft-inc/nebula-operator/cmd/certificate-generator/app/options" -) - -func NewCertGenCommand(ctx context.Context) *cobra.Command { - opts := options.NewOptions() - cmd := &cobra.Command{ - Use: "nebula-cert-gen", - RunE: func(cmd *cobra.Command, args []string) error { - return Run(ctx, opts) - }, - } - - nfs := cliflag.NamedFlagSets{} - fs := nfs.FlagSet("generic") - fs.AddGoFlagSet(flag.CommandLine) - opts.AddFlags(fs) - logsFlagSet := nfs.FlagSet("logs") - - cmd.Flags().AddFlagSet(fs) - cmd.Flags().AddFlagSet(logsFlagSet) - - return cmd -} - -func Run(ctx context.Context, opts *options.Options) error { - klog.Info("Getting kubernetes configs") - cfg, err := ctrlruntime.GetConfig() - if err != nil { - panic(err) - } - - clientset, err := kubernetes.NewForConfig(cfg) - if err != nil { - klog.Errorf("Error building Kubernetes clientset: %v", err.Error()) - return err - } - - if opts.InitOnly { - klog.Infof("Init only detected. Doing cert initialization for webhook [%v/%v]", opts.WebhookNamespace, opts.WebhookServerName) - err := doCertRotation(clientset, opts) - if err != nil { - klog.Errorf("Error rotating certificate for webhook [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookServerName, err) - return err - } - } else { - if opts.LeaderElection.LeaderElect { - klog.Info("Doing leader election") - id, err := os.Hostname() - if err != nil { - klog.Errorf("Failed to get hostname: %v", err) - return err - } - - rl, err := resourcelock.New(opts.LeaderElection.ResourceLock, - opts.LeaderElection.ResourceNamespace, - opts.LeaderElection.ResourceName, - clientset.CoreV1(), - clientset.CoordinationV1(), - resourcelock.ResourceLockConfig{ - Identity: id, - }) - if err != nil { - klog.Errorf("Error creating resource lock: %v", err) - return err - } - - leaderelection.RunOrDie(ctx, leaderelection.LeaderElectionConfig{ - Lock: rl, - LeaseDuration: opts.LeaderElection.LeaseDuration.Duration, - RenewDeadline: opts.LeaderElection.RenewDeadline.Duration, - RetryPeriod: opts.LeaderElection.RetryPeriod.Duration, - Callbacks: leaderelection.LeaderCallbacks{ - OnStartedLeading: func(ctx context.Context) { - klog.Info("Leader election successful. Starting certificate rotation") - err = rotateCertificate(clientset, opts) - if err != nil { - klog.Errorf("Failed to start cronjob to rotate certificates: %v", err) - os.Exit(1) - } - }, - OnStoppedLeading: func() { - klog.Info("Lost leadership, stopping") - }, - }, - }) - } else { - klog.Infof("Leader election skipped. Starting certificate rotation") - err = rotateCertificate(clientset, opts) - if err != nil { - klog.Errorf("Failed to start cronjob to rotate certificates: %v", err) - return err - } - } - } - - return nil -} - -func rotateCertificate(clientset *kubernetes.Clientset, opts *options.Options) error { - opts.CertValidity = opts.CertValidity * 24 * 60 - - klog.Infof("Starting cert rotation cron job for webhook [%v/%v]", opts.WebhookNamespace, opts.WebhookServerName) - c := cron.New() - // rotate cert 1 hour before expiration date - _, err := c.AddFunc(fmt.Sprintf("@every %vm", opts.CertValidity-60), func() { - err := doCertRotation(clientset, opts) - if err != nil { - klog.Errorf("Error rotating certificate for webhook [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookServerName, err) - os.Exit(1) - } - }) - if err != nil { - return err - } - klog.Infof("Cert rotation crontab started for webhook [%v/%v]. Will rotate every %v minutes", opts.WebhookNamespace, opts.WebhookServerName, opts.CertValidity) - c.Run() - - return nil -} - -func doCertRotation(clientset *kubernetes.Clientset, opts *options.Options) error { - klog.Infof("Start generating certificates for webhook server [%v/%v]", opts.WebhookNamespace, opts.WebhookServerName) - caCert, caKey, err := generateCACert(opts) - if err != nil { - klog.Errorf("Error generating CA certificate for webhook server [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookServerName, err) - return err - } - - serverCert, serverKey, err := generateServerCert(caCert, caKey, opts) - if err != nil { - klog.Errorf("Error generating server certificate for webhook server [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookServerName, err) - return err - } - - err = updateSecret(clientset, serverCert, serverKey, caCert, opts) - if err != nil { - klog.Errorf("Error updating secret for webhook server [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookServerName, err) - return err - } - - klog.Infof("Certificates generated successfully for webhook server [%v/%v]", opts.WebhookNamespace, opts.WebhookServerName) - - klog.Infof("Updating ca bundle for webhook server [%v/%v]", opts.WebhookNamespace, opts.WebhookServerName) - err = updateWebhookConfiguration(clientset, opts, caCert) - if err != nil { - klog.Errorf("Error updating ca bundle for webhook server [%v/%v]: %v", opts.WebhookNamespace, opts.WebhookServerName, err) - return err - } - klog.Infof("Ca bundle updated successfully for webhook server [%v/%v]", opts.WebhookNamespace, opts.WebhookServerName) - - return nil -} - -func generateCACert(opts *options.Options) ([]byte, *ecdsa.PrivateKey, error) { - caKey, err := ecdsa.GenerateKey(elliptic.P384(), rand.Reader) - if err != nil { - return nil, nil, err - } - - caTemplate := x509.Certificate{ - SerialNumber: big.NewInt(1), - Subject: pkix.Name{ - Country: []string{"US"}, - CommonName: opts.WebhookServerName, - }, - NotBefore: time.Now(), - NotAfter: time.Now().Add(time.Duration(opts.CertValidity) * time.Minute), - IsCA: true, - ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth, x509.ExtKeyUsageServerAuth}, - KeyUsage: x509.KeyUsageCertSign | x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment, - BasicConstraintsValid: true, - } - - caCert, err := x509.CreateCertificate(rand.Reader, &caTemplate, &caTemplate, &caKey.PublicKey, caKey) - if err != nil { - return nil, nil, err - } - - caPem := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: caCert}) - - return caPem, caKey, nil -} - -func generateServerCert(caCert []byte, caKey *ecdsa.PrivateKey, opts *options.Options) ([]byte, []byte, error) { - serverKey, err := ecdsa.GenerateKey(elliptic.P384(), rand.Reader) - if err != nil { - return nil, nil, err - } - - dnsNames := make([]string, 2*len(*opts.WebhookNames)) - for idx := -1; idx < len(*opts.WebhookNames)-1; idx++ { - dnsNames[idx+1] = fmt.Sprintf("%v.%v.svc", (*opts.WebhookNames)[idx+1], opts.WebhookNamespace) - dnsNames[idx+2] = fmt.Sprintf("%v.%v.svc.%v", (*opts.WebhookNames)[idx+1], opts.WebhookNamespace, opts.KubernetesDomain) - } - - serverTemplate := x509.Certificate{ - SerialNumber: big.NewInt(2), - Subject: pkix.Name{ - Country: []string{"US"}, - CommonName: opts.WebhookServerName, - }, - NotBefore: time.Now(), - NotAfter: time.Now().Add(time.Duration(opts.CertValidity) * time.Minute), - KeyUsage: x509.KeyUsageDigitalSignature, - ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth}, - DNSNames: dnsNames, - } - - caBlock, _ := pem.Decode(caCert) - ca, err := x509.ParseCertificate(caBlock.Bytes) - if err != nil { - return nil, nil, err - } - - serverCert, err := x509.CreateCertificate(rand.Reader, &serverTemplate, ca, &serverKey.PublicKey, caKey) - if err != nil { - return nil, nil, err - } - - certPem := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: serverCert}) - marshalledKey, err := x509.MarshalECPrivateKey(serverKey) - if err != nil { - return nil, nil, fmt.Errorf("error marshalling webhook server certificate key: %v", err) - } - - keyPem := pem.EncodeToMemory(&pem.Block{Type: "EC PRIVATE KEY", Bytes: marshalledKey}) - - return certPem, keyPem, nil -} - -func updateSecret(clientset *kubernetes.Clientset, certPEM, keyPEM, caPEM []byte, opts *options.Options) error { - secret := &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Name: opts.SecretName, - Namespace: opts.SecretNamespace, - }, - Type: corev1.SecretTypeTLS, - Data: map[string][]byte{ - "tls.crt": certPEM, - "tls.key": keyPEM, - "ca.crt": caPEM, - }, - } - - _, err := clientset.CoreV1().Secrets(opts.SecretNamespace).Update(context.TODO(), secret, metav1.UpdateOptions{}) - if err != nil { - return fmt.Errorf("failed to update secret: %v", err) - } - - return nil -} - -// Update the webhook configuration with the new CA bundle -func updateWebhookConfiguration(client *kubernetes.Clientset, opts *options.Options, caCert []byte) error { - webhook, err := client.AdmissionregistrationV1().ValidatingWebhookConfigurations().Get(context.Background(), opts.WebhookServerName, metav1.GetOptions{}) - if err != nil { - return fmt.Errorf("failed to get webhook configuration: %v", err) - } - - for i := range webhook.Webhooks { - webhook.Webhooks[i].ClientConfig.CABundle = caCert - } - - _, err = client.AdmissionregistrationV1().ValidatingWebhookConfigurations().Update(context.Background(), webhook, metav1.UpdateOptions{}) - if err != nil { - return fmt.Errorf("failed to update webhook configuration: %v", err) - } - - return nil -} diff --git a/cmd/certificate-generator/app/options/options.go b/cmd/certificate-generator/app/options/options.go deleted file mode 100644 index e0f12704..00000000 --- a/cmd/certificate-generator/app/options/options.go +++ /dev/null @@ -1,107 +0,0 @@ -/* -Copyright 2024 Vesoft Inc. -Copyright 2015 The Kubernetes Authors. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package options - -import ( - "time" - - "github.com/spf13/pflag" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/tools/leaderelection/resourcelock" - cbc "k8s.io/component-base/config" -) - -const ( - NamespaceNebulaSystem = "nebula-system" -) - -var ( - defaultElectionLeaseDuration = metav1.Duration{Duration: 15 * time.Second} - defaultElectionRenewDeadline = metav1.Duration{Duration: 10 * time.Second} - defaultElectionRetryPeriod = metav1.Duration{Duration: 2 * time.Second} -) - -type Options struct { - // LeaderElection defines the configuration of leader election client. - LeaderElection cbc.LeaderElectionConfiguration - - // WebhookNames represents the names of the webhooks in the webhook server (i.e. controller-manager-nebula-operator-webhook, autoscaler-nebula-operator-webhook) - WebhookNames *[]string - - // WebhookServerName represents the name of the webhook server associated with the certificate. - WebhookServerName string - - // WebhookNamespace represents the namespace of the webhook server associated with the certificate. - WebhookNamespace string - - // CertDir represents the directory to save the certificates in - CertDir string - - // CertValidity represents the number of days the certificate should be valid for - CertValidity int64 - - // SecretName represents the name of the secret used to store the webhook certificates - SecretName string - - // SecretNamespace represents the namespace of the secret used to store the webhook certificates - SecretNamespace string - - // KubernetesDomain represents the custom kubernetes domain needed in the certificate - KubernetesDomain string - - // InitOnly repersent whether this script is running in an init container. It will not start the cronjob - InitOnly bool -} - -func NewOptions() *Options { - return &Options{ - LeaderElection: cbc.LeaderElectionConfiguration{ - LeaderElect: true, - ResourceLock: resourcelock.LeasesResourceLock, - ResourceNamespace: NamespaceNebulaSystem, - ResourceName: "nebula-certificate-generator", - }, - } -} - -func (o *Options) AddFlags(flags *pflag.FlagSet) { - flags.BoolVar(&o.LeaderElection.LeaderElect, "leader-elect", false, "Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability.") - flags.StringVar(&o.LeaderElection.ResourceNamespace, "leader-elect-resource-namespace", NamespaceNebulaSystem, "The namespace of resource object that is used for locking during leader election.") - flags.DurationVar(&o.LeaderElection.LeaseDuration.Duration, "leader-elect-lease-duration", defaultElectionLeaseDuration.Duration, ""+ - "The duration that non-leader candidates will wait after observing a leadership "+ - "renewal until attempting to acquire leadership of a led but unrenewed leader "+ - "slot. This is effectively the maximum duration that a leader can be stopped "+ - "before it is replaced by another candidate. This is only applicable if leader "+ - "election is enabled.") - flags.DurationVar(&o.LeaderElection.RenewDeadline.Duration, "leader-elect-renew-deadline", defaultElectionRenewDeadline.Duration, ""+ - "The interval between attempts by the acting master to renew a leadership slot "+ - "before it stops leading. This must be less than or equal to the lease duration. "+ - "This is only applicable if leader election is enabled.") - flags.DurationVar(&o.LeaderElection.RetryPeriod.Duration, "leader-elect-retry-period", defaultElectionRetryPeriod.Duration, ""+ - "The duration the clients should wait between attempting acquisition and renewal "+ - "of a leadership. This is only applicable if leader election is enabled.") - o.WebhookNames = flags.StringSlice("webhook-names", []string{}, "A comma-seperated list of the names of the webhooks supported by the webhook server (i.e. controller-manager-nebula-operator-webhook, autoscaler-nebula-operator-webhook)") - flags.StringVar(&o.WebhookServerName, "webhook-server-name", "nebulaWebhook", "Specifies the name of the webhook to associate with the certificate") - flags.StringVar(&o.WebhookNamespace, "webhook-namespace", "default", "Specifies the namespace of the webhook to associate with the certificate") - flags.StringVar(&o.CertDir, "certificate-dir", "/etc/cert", "Specifies the directory in which to save the generated webhook certificates") - flags.Int64Var(&o.CertValidity, "certificate-validity", 365, "Specifies the number of days the certificate should be valid for") - flags.StringVar(&o.SecretName, "secret-name", "nebula-operator-webhook-secret", "Specifies the name of the webhook to associate with the certificate") - flags.StringVar(&o.SecretNamespace, "secret-namespace", "default", "Specifies the namespace of the webhook to associate with the certificate") - flags.StringVar(&o.KubernetesDomain, "kube-domain", "cluster.local", "Specifies the namespace of the webhook to associate with the certificate") - flags.BoolVar(&o.InitOnly, "init-only", false, "Specifies whether this script is running in a init container") -} diff --git a/cmd/certificate-generator/main.go b/cmd/certificate-generator/main.go deleted file mode 100644 index 7d17f545..00000000 --- a/cmd/certificate-generator/main.go +++ /dev/null @@ -1,32 +0,0 @@ -/* -Copyright 2024 Vesoft Inc. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package main - -import ( - "os" - - "github.com/vesoft-inc/nebula-operator/cmd/certificate-generator/app" - "k8s.io/component-base/cli" - ctrl "sigs.k8s.io/controller-runtime" -) - -func main() { - ctx := ctrl.SetupSignalHandler() - cmd := app.NewCertGenCommand(ctx) - code := cli.Run(cmd) - os.Exit(code) -} diff --git a/cmd/controller-manager/app/controller-manager.go b/cmd/controller-manager/app/controller-manager.go index 10f43f96..38db579e 100644 --- a/cmd/controller-manager/app/controller-manager.go +++ b/cmd/controller-manager/app/controller-manager.go @@ -40,6 +40,7 @@ import ( "github.com/vesoft-inc/nebula-operator/apis/apps/v1alpha1" "github.com/vesoft-inc/nebula-operator/cmd/controller-manager/app/options" + certrot "github.com/vesoft-inc/nebula-operator/pkg/cert-rotation" "github.com/vesoft-inc/nebula-operator/pkg/controller/cronbackup" "github.com/vesoft-inc/nebula-operator/pkg/controller/nebulabackup" "github.com/vesoft-inc/nebula-operator/pkg/controller/nebulacluster" @@ -206,6 +207,20 @@ func Run(ctx context.Context, opts *options.Options) error { hookServer.Register("/validate-nebulacluster", &webhook.Admission{Handler: &ncwebhook.ValidatingAdmission{Decoder: decoder}}) hookServer.WebhookMux().Handle("/readyz/", http.StripPrefix("/readyz/", &healthz.Handler{})) + + // Start certificate rotation + certGenerator := certrot.CertGenerator{ + WebhookNames: opts.WebhookOpts.WebhookNames, + WebhookServerName: opts.WebhookOpts.WebhookServerName, + WebhookNamespace: opts.WebhookOpts.WebhookNamespace, + CertDir: opts.WebhookOpts.CertDir, + CertValidity: opts.WebhookOpts.CertValidity, + SecretName: opts.WebhookOpts.SecretName, + SecretNamespace: opts.WebhookOpts.SecretNamespace, + KubernetesDomain: opts.WebhookOpts.KubernetesDomain, + } + + certGenerator.Run() } if err := mgr.AddHealthzCheck("ping", healthz.Ping); err != nil { diff --git a/pkg/cert-rotation/certificate-generator.go b/pkg/cert-rotation/certificate-generator.go new file mode 100644 index 00000000..d57b911c --- /dev/null +++ b/pkg/cert-rotation/certificate-generator.go @@ -0,0 +1,292 @@ +/* +Copyright 2024 Vesoft Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package app + +import ( + "context" + "crypto/ecdsa" + "crypto/elliptic" + "crypto/rand" + "crypto/x509" + "crypto/x509/pkix" + "encoding/pem" + "fmt" + "math/big" + "os" + "path/filepath" + "time" + + cron "github.com/robfig/cron/v3" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/klog/v2" + ctrlruntime "sigs.k8s.io/controller-runtime" +) + +type CertGenerator struct { + // WebhookNames represents the names of the webhooks in the webhook server (i.e. controller-manager-nebula-operator-webhook, autoscaler-nebula-operator-webhook) + WebhookNames *[]string + + // WebhookServerName represents the name of the webhook server associated with the certificate. + WebhookServerName string + + // WebhookNamespace represents the namespace of the webhook server associated with the certificate. + WebhookNamespace string + + // CertDir represents the directory to save the certificates in + CertDir string + + // CertValidity represents the number of days the certificate should be valid for + CertValidity int64 + + // SecretName represents the name of the secret used to store the webhook certificates + SecretName string + + // SecretNamespace represents the namespace of the secret used to store the webhook certificates + SecretNamespace string + + // KubernetesDomain represents the custom kubernetes domain needed in the certificate + KubernetesDomain string +} + +func (c *CertGenerator) Run() error { + klog.Info("Getting kubernetes configs") + cfg, err := ctrlruntime.GetConfig() + if err != nil { + panic(err) + } + + clientset, err := kubernetes.NewForConfig(cfg) + if err != nil { + klog.Errorf("Error building Kubernetes clientset: %v", err.Error()) + return err + } + + certValidityDuration := (time.Duration(c.CertValidity) * 24 * 60 * time.Minute) + + // Check certificate validity + klog.Infof("Checking certificate validity for webhook [%v/%v]", c.WebhookNamespace, c.WebhookServerName) + currCertValidity, hasCert := c.getCertValidity("tls.cert") + + // Initialize certificate if needed + if !hasCert { + klog.Infof("No certificate detected. Creating certificate for webhook [%v/%v]", c.WebhookNamespace, c.WebhookServerName) + err := c.doCertRotation(clientset, certValidityDuration) + if err != nil { + klog.Errorf("Error rotating certificate for webhook [%v/%v]: %v", c.WebhookNamespace, c.WebhookServerName, err) + os.Exit(1) + } + klog.Infof("Certificate created successfully for webhook [%v/%v]", c.WebhookNamespace, c.WebhookServerName) + } else if currCertValidity <= 2*time.Minute { + klog.Infof("Certificate is within 2 min of expiration. Rotating certificate for webhook [%v/%v]", c.WebhookNamespace, c.WebhookServerName) + err := c.doCertRotation(clientset, currCertValidity+(certValidityDuration)) + if err != nil { + klog.Errorf("Error rotating certificate for webhook [%v/%v]: %v", c.WebhookNamespace, c.WebhookServerName, err) + os.Exit(1) + } + klog.Infof("Certificate rotated successfully for webhook [%v/%v]", c.WebhookNamespace, c.WebhookServerName) + } + + // Start background job for rotation + rotateJob := cron.New() + rotateJob.AddFunc(fmt.Sprintf("@every %v", certValidityDuration-1*time.Minute), func() { + klog.Infof("Rotating certificate for webhook [%v/%v]", c.WebhookNamespace, c.WebhookServerName) + err := c.doCertRotation(clientset, certValidityDuration+1*time.Minute) + if err != nil { + klog.Errorf("Error rotating certificate for webhook [%v/%v]: %v", c.WebhookNamespace, c.WebhookServerName, err) + os.Exit(1) + } + klog.Infof("Certifcate rotation complete for webhook [%v/%v]. Will rotate in %v", c.WebhookNamespace, c.WebhookServerName, certValidityDuration) + }) + rotateJob.Start() + + return nil +} + +func (c *CertGenerator) doCertRotation(clientset *kubernetes.Clientset, certValidity time.Duration) error { + klog.Infof("Start generating certificates for webhook server [%v/%v]", c.WebhookNamespace, c.WebhookServerName) + caCert, caKey, err := c.generateCACert(certValidity) + if err != nil { + klog.Errorf("Error generating CA certificate for webhook server [%v/%v]: %v", c.WebhookNamespace, c.WebhookServerName, err) + return err + } + + serverCert, serverKey, err := c.generateServerCert(caCert, caKey, certValidity) + if err != nil { + klog.Errorf("Error generating server certificate for webhook server [%v/%v]: %v", c.WebhookNamespace, c.WebhookServerName, err) + return err + } + + err = c.updateSecret(clientset, serverCert, serverKey, caCert) + if err != nil { + klog.Errorf("Error updating secret for webhook server [%v/%v]: %v", c.WebhookNamespace, c.WebhookServerName, err) + return err + } + + klog.Infof("Certificates generated successfully for webhook server [%v/%v]", c.WebhookNamespace, c.WebhookServerName) + + klog.Infof("Updating ca bundle for webhook server [%v/%v]", c.WebhookNamespace, c.WebhookServerName) + err = c.updateWebhookConfiguration(clientset, caCert) + if err != nil { + klog.Errorf("Error updating ca bundle for webhook server [%v/%v]: %v", c.WebhookNamespace, c.WebhookServerName, err) + return err + } + klog.Infof("Ca bundle updated successfully for webhook server [%v/%v]", c.WebhookNamespace, c.WebhookServerName) + + return nil +} + +func (c *CertGenerator) generateCACert(certValidity time.Duration) ([]byte, *ecdsa.PrivateKey, error) { + caKey, err := ecdsa.GenerateKey(elliptic.P384(), rand.Reader) + if err != nil { + return nil, nil, err + } + + caTemplate := x509.Certificate{ + SerialNumber: big.NewInt(1), + Subject: pkix.Name{ + Country: []string{"US"}, + CommonName: c.WebhookServerName, + }, + NotBefore: time.Now(), + NotAfter: time.Now().Add(certValidity), + IsCA: true, + ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth, x509.ExtKeyUsageServerAuth}, + KeyUsage: x509.KeyUsageCertSign | x509.KeyUsageDigitalSignature | x509.KeyUsageKeyEncipherment, + BasicConstraintsValid: true, + } + + caCert, err := x509.CreateCertificate(rand.Reader, &caTemplate, &caTemplate, &caKey.PublicKey, caKey) + if err != nil { + return nil, nil, err + } + + caPem := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: caCert}) + + return caPem, caKey, nil +} + +func (c *CertGenerator) generateServerCert(caCert []byte, caKey *ecdsa.PrivateKey, certValidity time.Duration) ([]byte, []byte, error) { + serverKey, err := ecdsa.GenerateKey(elliptic.P384(), rand.Reader) + if err != nil { + return nil, nil, err + } + + dnsNames := make([]string, 2*len(*c.WebhookNames)) + for idx := -1; idx < len(*c.WebhookNames)-1; idx++ { + dnsNames[idx+1] = fmt.Sprintf("%v.%v.svc", (*c.WebhookNames)[idx+1], c.WebhookNamespace) + dnsNames[idx+2] = fmt.Sprintf("%v.%v.svc.%v", (*c.WebhookNames)[idx+1], c.WebhookNamespace, c.KubernetesDomain) + } + + serverTemplate := x509.Certificate{ + SerialNumber: big.NewInt(2), + Subject: pkix.Name{ + Country: []string{"US"}, + CommonName: c.WebhookServerName, + }, + NotBefore: time.Now(), + NotAfter: time.Now().Add(certValidity), + KeyUsage: x509.KeyUsageDigitalSignature, + ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth}, + DNSNames: dnsNames, + } + + caBlock, _ := pem.Decode(caCert) + ca, err := x509.ParseCertificate(caBlock.Bytes) + if err != nil { + return nil, nil, err + } + + serverCert, err := x509.CreateCertificate(rand.Reader, &serverTemplate, ca, &serverKey.PublicKey, caKey) + if err != nil { + return nil, nil, err + } + + certPem := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: serverCert}) + marshalledKey, err := x509.MarshalECPrivateKey(serverKey) + if err != nil { + return nil, nil, fmt.Errorf("error marshalling webhook server certificate key: %v", err) + } + + keyPem := pem.EncodeToMemory(&pem.Block{Type: "EC PRIVATE KEY", Bytes: marshalledKey}) + + return certPem, keyPem, nil +} + +func (c *CertGenerator) updateSecret(clientset *kubernetes.Clientset, certPEM, keyPEM, caPEM []byte) error { + secret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: c.SecretName, + Namespace: c.SecretNamespace, + }, + Type: corev1.SecretTypeTLS, + Data: map[string][]byte{ + "tls.crt": certPEM, + "tls.key": keyPEM, + "ca.crt": caPEM, + }, + } + + // The mounted local directory in the pod will automatically refresh regardless of the sync-interval since we're updating the data field in the secret here + _, err := clientset.CoreV1().Secrets(c.SecretNamespace).Update(context.TODO(), secret, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("failed to update secret: %v", err) + } + + // Wait for secret to update + time.Sleep(15 * time.Second) + + return nil +} + +func (c *CertGenerator) updateWebhookConfiguration(client *kubernetes.Clientset, caCert []byte) error { + webhook, err := client.AdmissionregistrationV1().ValidatingWebhookConfigurations().Get(context.Background(), c.WebhookServerName, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get webhook configuration: %v", err) + } + + for i := range webhook.Webhooks { + webhook.Webhooks[i].ClientConfig.CABundle = caCert + } + + _, err = client.AdmissionregistrationV1().ValidatingWebhookConfigurations().Update(context.Background(), webhook, metav1.UpdateOptions{}) + if err != nil { + return fmt.Errorf("failed to update webhook configuration: %v", err) + } + + return nil +} + +func (c *CertGenerator) getCertValidity(certName string) (time.Duration, bool) { + certPEM, err := os.ReadFile(filepath.Join(c.CertDir, certName)) + if err != nil { + return -1, false + } + + block, _ := pem.Decode(certPEM) + if block == nil || block.Type != "CERTIFICATE" { + return -1, false + } + + cert, err := x509.ParseCertificate(block.Bytes) + if err != nil { + return -1, false + } + + return time.Until(cert.NotAfter), true +} diff --git a/pkg/flag/webhook/webhook.go b/pkg/flag/webhook/webhook.go index eaf078c9..0611c5fb 100644 --- a/pkg/flag/webhook/webhook.go +++ b/pkg/flag/webhook/webhook.go @@ -48,14 +48,35 @@ type Options struct { // CertName is the server certificate name. Defaults to tls.crt. CertName string + // CertValidity represents the number of days the certificate should be valid for + CertValidity int64 + // KeyName is the server key name. Defaults to tls.key. KeyName string + // KubernetesDomain represents the custom kubernetes domain needed in the certificate + KubernetesDomain string + + // SecretName represents the name of the secret used to store the webhook certificates + SecretName string + + // SecretNamespace represents the namespace of the secret used to store the webhook certificates + SecretNamespace string + // TLSMinVersion is the minimum version of TLS supported. Possible values: 1.0, 1.1, 1.2, 1.3. // Some environments have automated security scans that trigger on TLS versions or insecure cipher suites, and // setting TLS to 1.3 would solve both problems. // Defaults to 1.3. TLSMinVersion string + + // WebhookNames represents the names of the webhooks in the webhook server (i.e. controller-manager-nebula-operator-webhook, autoscaler-nebula-operator-webhook) + WebhookNames *[]string + + // WebhookServerName represents the name of the webhook server associated with the certificate. + WebhookServerName string + + // WebhookNamespace represents the namespace of the webhook server associated with the certificate. + WebhookNamespace string } func (o *Options) AddFlags(flags *pflag.FlagSet) { @@ -66,9 +87,16 @@ func (o *Options) AddFlags(flags *pflag.FlagSet) { flags.StringVar(&o.CertDir, "webhook-cert-dir", defaultCertDir, "The directory that contains the server key and certificate.") flags.StringVar(&o.CertName, "webhook-tls-cert-file-name", "tls.crt", "The name of server certificate.") + flags.Int64Var(&o.CertValidity, "certificate-validity", 365, "Specifies the number of days the certificate should be valid for") flags.StringVar(&o.KeyName, "webhook-tls-private-key-file-name", "tls.key", "The name of server key.") + flags.StringVar(&o.KubernetesDomain, "kube-domain", "cluster.local", "Specifies the namespace of the webhook to associate with the certificate") + flags.StringVar(&o.SecretName, "secret-name", "nebula-operator-webhook-secret", "Specifies the name of the webhook to associate with the certificate") + flags.StringVar(&o.SecretNamespace, "secret-namespace", "default", "Specifies the namespace of the webhook to associate with the certificate") flags.StringVar(&o.TLSMinVersion, "webhook-tls-min-version", defaultTLSMinVersion, "Minimum TLS version supported. Possible values: 1.0, 1.1, 1.2, 1.3.") + o.WebhookNames = flags.StringSlice("webhook-names", []string{}, "A comma-seperated list of the names of the webhooks supported by the webhook server (i.e. controller-manager-nebula-operator-webhook, autoscaler-nebula-operator-webhook)") + flags.StringVar(&o.WebhookServerName, "webhook-server-name", "nebulaWebhook", "Specifies the name of the webhook to associate with the certificate") + flags.StringVar(&o.WebhookNamespace, "webhook-namespace", "default", "Specifies the namespace of the webhook to associate with the certificate") } func (o *Options) Validate() field.ErrorList { From 3cd6040d468900a3b95ad74afee32d954dffc9a5 Mon Sep 17 00:00:00 2001 From: kevinliu24 Date: Sat, 15 Jun 2024 06:54:18 +0800 Subject: [PATCH 13/13] Detect if certificate is ready by checking cert at local path. Use leader election to avoid conflicts. --- cmd/autoscaler/app/autoscaler.go | 3 +- .../app/controller-manager.go | 3 +- pkg/cert-rotation/certificate-generator.go | 154 +++++++++++++++--- 3 files changed, 131 insertions(+), 29 deletions(-) diff --git a/cmd/autoscaler/app/autoscaler.go b/cmd/autoscaler/app/autoscaler.go index cbd841e2..35cc3be1 100644 --- a/cmd/autoscaler/app/autoscaler.go +++ b/cmd/autoscaler/app/autoscaler.go @@ -152,6 +152,7 @@ func Run(ctx context.Context, opts *options.Options) error { // Start certificate rotation certGenerator := certrot.CertGenerator{ + LeaderElection: opts.LeaderElection, WebhookNames: opts.WebhookOpts.WebhookNames, WebhookServerName: opts.WebhookOpts.WebhookServerName, WebhookNamespace: opts.WebhookOpts.WebhookNamespace, @@ -162,7 +163,7 @@ func Run(ctx context.Context, opts *options.Options) error { KubernetesDomain: opts.WebhookOpts.KubernetesDomain, } - certGenerator.Run() + certGenerator.Run(ctx) } if err := mgr.AddHealthzCheck("ping", healthz.Ping); err != nil { diff --git a/cmd/controller-manager/app/controller-manager.go b/cmd/controller-manager/app/controller-manager.go index 38db579e..d31f359b 100644 --- a/cmd/controller-manager/app/controller-manager.go +++ b/cmd/controller-manager/app/controller-manager.go @@ -210,6 +210,7 @@ func Run(ctx context.Context, opts *options.Options) error { // Start certificate rotation certGenerator := certrot.CertGenerator{ + LeaderElection: opts.LeaderElection, WebhookNames: opts.WebhookOpts.WebhookNames, WebhookServerName: opts.WebhookOpts.WebhookServerName, WebhookNamespace: opts.WebhookOpts.WebhookNamespace, @@ -220,7 +221,7 @@ func Run(ctx context.Context, opts *options.Options) error { KubernetesDomain: opts.WebhookOpts.KubernetesDomain, } - certGenerator.Run() + certGenerator.Run(ctx) } if err := mgr.AddHealthzCheck("ping", healthz.Ping); err != nil { diff --git a/pkg/cert-rotation/certificate-generator.go b/pkg/cert-rotation/certificate-generator.go index d57b911c..05d1b95a 100644 --- a/pkg/cert-rotation/certificate-generator.go +++ b/pkg/cert-rotation/certificate-generator.go @@ -17,6 +17,7 @@ limitations under the License. package app import ( + "bytes" "context" "crypto/ecdsa" "crypto/elliptic" @@ -34,11 +35,21 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes" + "k8s.io/client-go/tools/leaderelection" + "k8s.io/client-go/tools/leaderelection/resourcelock" + cbc "k8s.io/component-base/config" "k8s.io/klog/v2" ctrlruntime "sigs.k8s.io/controller-runtime" ) +const ( + ResourceName = "nebula-certificate-generator" +) + type CertGenerator struct { + // LeaderElection defines the configuration of leader election client. + LeaderElection cbc.LeaderElectionConfiguration + // WebhookNames represents the names of the webhooks in the webhook server (i.e. controller-manager-nebula-operator-webhook, autoscaler-nebula-operator-webhook) WebhookNames *[]string @@ -64,7 +75,7 @@ type CertGenerator struct { KubernetesDomain string } -func (c *CertGenerator) Run() error { +func (c *CertGenerator) Run(ctx context.Context) error { klog.Info("Getting kubernetes configs") cfg, err := ctrlruntime.GetConfig() if err != nil { @@ -77,36 +88,20 @@ func (c *CertGenerator) Run() error { return err } + // Initialize certificate certValidityDuration := (time.Duration(c.CertValidity) * 24 * 60 * time.Minute) - - // Check certificate validity - klog.Infof("Checking certificate validity for webhook [%v/%v]", c.WebhookNamespace, c.WebhookServerName) - currCertValidity, hasCert := c.getCertValidity("tls.cert") - - // Initialize certificate if needed - if !hasCert { - klog.Infof("No certificate detected. Creating certificate for webhook [%v/%v]", c.WebhookNamespace, c.WebhookServerName) - err := c.doCertRotation(clientset, certValidityDuration) - if err != nil { - klog.Errorf("Error rotating certificate for webhook [%v/%v]: %v", c.WebhookNamespace, c.WebhookServerName, err) - os.Exit(1) - } - klog.Infof("Certificate created successfully for webhook [%v/%v]", c.WebhookNamespace, c.WebhookServerName) - } else if currCertValidity <= 2*time.Minute { - klog.Infof("Certificate is within 2 min of expiration. Rotating certificate for webhook [%v/%v]", c.WebhookNamespace, c.WebhookServerName) - err := c.doCertRotation(clientset, currCertValidity+(certValidityDuration)) - if err != nil { - klog.Errorf("Error rotating certificate for webhook [%v/%v]: %v", c.WebhookNamespace, c.WebhookServerName, err) - os.Exit(1) - } - klog.Infof("Certificate rotated successfully for webhook [%v/%v]", c.WebhookNamespace, c.WebhookServerName) + err = c.rotateCert(ctx, clientset, certValidityDuration) + if err != nil { + klog.Errorf("Error rotating certificate for webhook [%v/%v]: %v", c.WebhookNamespace, c.WebhookServerName, err) + return err } // Start background job for rotation + klog.Infof("Starting cert rotation cronjob for webhook [%v/%v]", c.WebhookNamespace, c.WebhookServerName) rotateJob := cron.New() rotateJob.AddFunc(fmt.Sprintf("@every %v", certValidityDuration-1*time.Minute), func() { klog.Infof("Rotating certificate for webhook [%v/%v]", c.WebhookNamespace, c.WebhookServerName) - err := c.doCertRotation(clientset, certValidityDuration+1*time.Minute) + err := c.rotateCert(ctx, clientset, certValidityDuration+1*time.Minute) if err != nil { klog.Errorf("Error rotating certificate for webhook [%v/%v]: %v", c.WebhookNamespace, c.WebhookServerName, err) os.Exit(1) @@ -114,6 +109,80 @@ func (c *CertGenerator) Run() error { klog.Infof("Certifcate rotation complete for webhook [%v/%v]. Will rotate in %v", c.WebhookNamespace, c.WebhookServerName, certValidityDuration) }) rotateJob.Start() + klog.Infof("Cert rotation cronjob for webhook [%v/%v] started successfully", c.WebhookNamespace, c.WebhookServerName) + + return nil +} + +func (c *CertGenerator) rotateCert(ctx context.Context, clientset *kubernetes.Clientset, certValidity time.Duration) error { + klog.Info("Doing leader election") + id, err := os.Hostname() + if err != nil { + klog.Errorf("Failed to get hostname: %v", err) + return err + } + + rl, err := resourcelock.New( + c.LeaderElection.ResourceLock, + c.LeaderElection.ResourceNamespace, + ResourceName, + clientset.CoreV1(), + clientset.CoordinationV1(), + resourcelock.ResourceLockConfig{ + Identity: fmt.Sprintf("%v-%v", id, c.LeaderElection.ResourceName), + }, + ) + if err != nil { + klog.Errorf("Error creating resource lock: %v", err) + return err + } + + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + leaderelection.RunOrDie(ctx, leaderelection.LeaderElectionConfig{ + Lock: rl, + LeaseDuration: c.LeaderElection.LeaseDuration.Duration, + RenewDeadline: c.LeaderElection.RenewDeadline.Duration, + RetryPeriod: c.LeaderElection.RetryPeriod.Duration, + Callbacks: leaderelection.LeaderCallbacks{ + OnStartedLeading: func(ctx context.Context) { + klog.Info("Leader election successful. Starting certificate rotation") + + // Check certificate validity + klog.Infof("Checking certificate validity for webhook [%v/%v]", c.WebhookNamespace, c.WebhookServerName) + currCertValidity, hasCert := c.getCertValidity("tls.crt") + + // Rotate if needed + if !hasCert { + klog.Infof("No certificate detected. Creating certificate for webhook [%v/%v]", c.WebhookNamespace, c.WebhookServerName) + err := c.doCertRotation(clientset, certValidity) + if err != nil { + klog.Errorf("Error rotating certificate for webhook [%v/%v]: %v", c.WebhookNamespace, c.WebhookServerName, err) + os.Exit(1) + } + klog.Infof("Certificate created successfully for webhook [%v/%v]", c.WebhookNamespace, c.WebhookServerName) + } else if currCertValidity <= 2*time.Minute { + klog.Infof("Certificate is within 2 min of expiration. Rotating certificate for webhook [%v/%v]", c.WebhookNamespace, c.WebhookServerName) + err := c.doCertRotation(clientset, currCertValidity+(certValidity)) + if err != nil { + klog.Errorf("Error rotating certificate for webhook [%v/%v]: %v", c.WebhookNamespace, c.WebhookServerName, err) + os.Exit(1) + } + klog.Infof("Certificate rotated successfully for webhook [%v/%v]", c.WebhookNamespace, c.WebhookServerName) + } else { + klog.Infof("Certificate for webhook [%v/%v] is still valid for %v. Skipping cert rotation for now", c.WebhookNamespace, c.WebhookServerName, currCertValidity) + } + + klog.Info("Finish certificate rotation. Relinquishing leadership") + cancel() + }, + OnStoppedLeading: func() { + klog.Info("Lost leadership, stopping") + }, + }, + ReleaseOnCancel: true, + }) return nil } @@ -137,7 +206,6 @@ func (c *CertGenerator) doCertRotation(clientset *kubernetes.Clientset, certVali klog.Errorf("Error updating secret for webhook server [%v/%v]: %v", c.WebhookNamespace, c.WebhookServerName, err) return err } - klog.Infof("Certificates generated successfully for webhook server [%v/%v]", c.WebhookNamespace, c.WebhookServerName) klog.Infof("Updating ca bundle for webhook server [%v/%v]", c.WebhookNamespace, c.WebhookServerName) @@ -152,6 +220,7 @@ func (c *CertGenerator) doCertRotation(clientset *kubernetes.Clientset, certVali } func (c *CertGenerator) generateCACert(certValidity time.Duration) ([]byte, *ecdsa.PrivateKey, error) { + klog.V(4).Infof("Generating CA certificate and key for webhook server [%v/%v]", c.WebhookNamespace, c.WebhookServerName) caKey, err := ecdsa.GenerateKey(elliptic.P384(), rand.Reader) if err != nil { return nil, nil, err @@ -178,10 +247,13 @@ func (c *CertGenerator) generateCACert(certValidity time.Duration) ([]byte, *ecd caPem := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: caCert}) + klog.V(4).Infof("CA certificate and key generated successfully for webhook server [%v/%v]", c.WebhookNamespace, c.WebhookServerName) + return caPem, caKey, nil } func (c *CertGenerator) generateServerCert(caCert []byte, caKey *ecdsa.PrivateKey, certValidity time.Duration) ([]byte, []byte, error) { + klog.V(4).Infof("Generating tls certificate and key for webhook server [%v/%v]", c.WebhookNamespace, c.WebhookServerName) serverKey, err := ecdsa.GenerateKey(elliptic.P384(), rand.Reader) if err != nil { return nil, nil, err @@ -224,11 +296,14 @@ func (c *CertGenerator) generateServerCert(caCert []byte, caKey *ecdsa.PrivateKe } keyPem := pem.EncodeToMemory(&pem.Block{Type: "EC PRIVATE KEY", Bytes: marshalledKey}) + klog.V(4).Infof("TLS certificate and key generated successfully for webhook server [%v/%v]", c.WebhookNamespace, c.WebhookServerName) return certPem, keyPem, nil } func (c *CertGenerator) updateSecret(clientset *kubernetes.Clientset, certPEM, keyPEM, caPEM []byte) error { + klog.V(4).Infof("Updating secret [%v/%v] for webhook server [%v/%v]", c.SecretNamespace, c.SecretName, c.WebhookNamespace, c.WebhookServerName) + secret := &corev1.Secret{ ObjectMeta: metav1.ObjectMeta{ Name: c.SecretName, @@ -248,12 +323,37 @@ func (c *CertGenerator) updateSecret(clientset *kubernetes.Clientset, certPEM, k return fmt.Errorf("failed to update secret: %v", err) } - // Wait for secret to update - time.Sleep(15 * time.Second) + // Wait for certificate in local volume to update + err = c.waitForCertificateUpdate(filepath.Join(c.CertDir, "tls.crt"), certPEM) + if err != nil { + return fmt.Errorf("failed to wait for secret [%v/%v] to update: %v", c.SecretNamespace, c.SecretName, err) + } + + klog.V(4).Infof("secret [%v/%v] updated successfully for webhook server [%v/%v]", c.SecretNamespace, c.SecretName, c.WebhookNamespace, c.WebhookServerName) return nil } +func (c *CertGenerator) waitForCertificateUpdate(certPath string, expectedContent []byte) error { + checkInterval := 2 * time.Second + + for { + certBytes, err := os.ReadFile(certPath) + if err != nil { + return fmt.Errorf("unable to read certificate at path %v for webhook [%v/%v]: %v", certPath, c.WebhookNamespace, c.WebhookServerName, err) + } + + block, _ := pem.Decode(certBytes) + if block != nil && block.Type == "CERTIFICATE" && bytes.Contains(certBytes, expectedContent) { + klog.V(4).Infof("Certificate updated successfully in the local volume for webhook [%v/%v]", c.WebhookNamespace, c.WebhookServerName) + return nil + } + + klog.V(4).Infof("Waiting for certificate to be updated in the local volume for webhook [%v/%v], retrying in %v...", c.WebhookNamespace, c.WebhookServerName, checkInterval) + time.Sleep(checkInterval) + } +} + func (c *CertGenerator) updateWebhookConfiguration(client *kubernetes.Clientset, caCert []byte) error { webhook, err := client.AdmissionregistrationV1().ValidatingWebhookConfigurations().Get(context.Background(), c.WebhookServerName, metav1.GetOptions{}) if err != nil {