Skip to content
This repository has been archived by the owner on Apr 25, 2023. It is now read-only.

Commit

Permalink
feat: add custom kubefed metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
Hector Fernandez committed Feb 25, 2020
1 parent 9271f47 commit b68b143
Show file tree
Hide file tree
Showing 6 changed files with 138 additions and 0 deletions.
3 changes: 3 additions & 0 deletions cmd/controller-manager/app/controller-manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ import (
"sigs.k8s.io/kubefed/pkg/controller/servicedns"
"sigs.k8s.io/kubefed/pkg/controller/util"
"sigs.k8s.io/kubefed/pkg/features"
kubefedmetrics "sigs.k8s.io/kubefed/pkg/metrics"
"sigs.k8s.io/kubefed/pkg/version"
)

Expand Down Expand Up @@ -114,6 +115,8 @@ func Run(opts *options.Options, stopChan <-chan struct{}) error {

go serveHealthz(healthzAddr)
go serveMetrics(metricsAddr, stopChan)
// Register kubefed custom metrics
kubefedmetrics.RegisterAll()

var err error
opts.Config.KubeConfig, err = clientcmd.BuildConfigFromFlags(masterURL, kubeconfig)
Expand Down
4 changes: 4 additions & 0 deletions pkg/controller/kubefedcluster/clusterclient.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (
fedv1b1 "sigs.k8s.io/kubefed/pkg/apis/core/v1beta1"
"sigs.k8s.io/kubefed/pkg/client/generic"
"sigs.k8s.io/kubefed/pkg/controller/util"
"sigs.k8s.io/kubefed/pkg/metrics"
)

const (
Expand Down Expand Up @@ -128,10 +129,13 @@ func (self *ClusterClient) GetClusterHealthStatus() (*fedv1b1.KubeFedClusterStat
if err != nil {
runtime.HandleError(errors.Wrapf(err, "Failed to do cluster health check for cluster %q", self.clusterName))
clusterStatus.Conditions = append(clusterStatus.Conditions, newClusterOfflineCondition)
metrics.RegisterKubefedClustersOfflineCount()
} else {
if !strings.EqualFold(string(body), "ok") {
metrics.RegisterKubefedClustersNotReadyCount()
clusterStatus.Conditions = append(clusterStatus.Conditions, newClusterNotReadyCondition, newClusterNotOfflineCondition)
} else {
metrics.RegisterKubefedClustersReadyCount()
clusterStatus.Conditions = append(clusterStatus.Conditions, newClusterReadyCondition)
}
}
Expand Down
6 changes: 6 additions & 0 deletions pkg/controller/kubefedcluster/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"context"
"fmt"
"sync"
"time"

"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"
Expand All @@ -41,6 +42,7 @@ import (
genscheme "sigs.k8s.io/kubefed/pkg/client/generic/scheme"
"sigs.k8s.io/kubefed/pkg/controller/util"
"sigs.k8s.io/kubefed/pkg/features"
"sigs.k8s.io/kubefed/pkg/metrics"
)

// ClusterData stores cluster client and previous health check probe results of individual cluster.
Expand Down Expand Up @@ -241,6 +243,7 @@ func (cc *ClusterController) updateIndividualClusterStatus(cluster *fedv1b1.Kube
storedData *ClusterData, wg *sync.WaitGroup) {
clusterClient := storedData.clusterKubeClient

clusterHealthStatusStart := time.Now()
currentClusterStatus, err := clusterClient.GetClusterHealthStatus()
if err != nil {
cc.RecordError(cluster, "RetrievingClusterHealthFailed", errors.Wrap(err, "Failed to retrieve health of the cluster"))
Expand All @@ -257,6 +260,9 @@ func (cc *ClusterController) updateIndividualClusterStatus(cluster *fedv1b1.Kube
if err := cc.client.UpdateStatus(context.TODO(), cluster); err != nil {
klog.Warningf("Failed to update the status of cluster %q: %v", cluster.Name, err)
}

metrics.UpdateDurationFromStart(metrics.ClusterHealthStatus, clusterHealthStatusStart)

wg.Done()
}

Expand Down
2 changes: 2 additions & 0 deletions pkg/controller/sync/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ import (
"sigs.k8s.io/kubefed/pkg/controller/sync/status"
"sigs.k8s.io/kubefed/pkg/controller/util"
finalizersutil "sigs.k8s.io/kubefed/pkg/controller/util/finalizers"
"sigs.k8s.io/kubefed/pkg/metrics"
)

const (
Expand Down Expand Up @@ -266,6 +267,7 @@ func (s *KubeFedSyncController) reconcile(qualifiedName util.QualifiedName) util
startTime := time.Now()
defer func() {
klog.V(4).Infof("Finished reconciling %s %q (duration: %v)", kind, key, time.Since(startTime))
metrics.UpdateDurationFromStart(metrics.ReconcileFederatedResources, startTime)
}()

if fedResource.Object().GetDeletionTimestamp() != nil {
Expand Down
5 changes: 5 additions & 0 deletions pkg/controller/util/federated_informer.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (
fedcommon "sigs.k8s.io/kubefed/pkg/apis/core/common"
fedv1b1 "sigs.k8s.io/kubefed/pkg/apis/core/v1beta1"
"sigs.k8s.io/kubefed/pkg/client/generic"
"sigs.k8s.io/kubefed/pkg/metrics"
)

const (
Expand Down Expand Up @@ -324,6 +325,8 @@ func (f *federatedInformerImpl) GetClientForCluster(clusterName string) (generic
f.Lock()
defer f.Unlock()

clientConnectionStart := time.Now()

// return cached client if one exists (to prevent frequent secret retrieval and rest discovery)
if client, ok := f.clusterClients[clusterName]; ok {
return client, nil
Expand All @@ -337,6 +340,8 @@ func (f *federatedInformerImpl) GetClientForCluster(clusterName string) (generic
return client, err
}
f.clusterClients[clusterName] = client

metrics.UpdateDurationFromStart(metrics.ClusterClientConnection, clientConnectionStart)
return client, nil
}

Expand Down
118 changes: 118 additions & 0 deletions pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
/*
Copyright 2018 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package metrics

import (
"time"

"github.com/prometheus/client_golang/prometheus"
"k8s.io/klog"
"sigs.k8s.io/controller-runtime/pkg/metrics"
)

var (
kubefedClustersNotReadyCount = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "kubefedclusters_not_ready_total",
Help: "Number of kubefed clusters not ready.",
},
)

kubefedClustersReadyCount = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "kubefedclusters_ready_total",
Help: "Number of ready kubefed clusters.",
},
)

kubefedClustersOfflineCount = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "kubefedclusters_offline_total",
Help: "Number of offline kubefed clusters.",
},
)

functionDuration = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Name: "function_duration_seconds",
Help: "Time taken by various parts of Kubefed main loops.",
Buckets: []float64{0.01, 0.05, 0.1, 0.5, 1.0, 2.5, 5.0, 7.5, 10.0, 12.5, 15.0, 17.5, 20.0, 22.5, 25.0, 27.5, 30.0, 50.0, 75.0, 100.0, 1000.0},
}, []string{"function"},
)

functionDurationSummary = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Name: "function_duration_quantile_seconds",
Help: "Quantiles of time taken by various parts of Kubefed main loops.",
MaxAge: time.Hour,
}, []string{"function"},
)
)

// FunctionLabel is a name of Kubefed operation for which
// we measure duration
type FunctionLabel string

const (
// LogLongDurationThreshold defines the duration after which long function
// duration will be logged.
LogLongDurationThreshold = 5 * time.Second
)

// Names of Kubefed operations
const (
ClusterHealthStatus FunctionLabel = "clusterHealthStatus"
ReconcileFederatedResources FunctionLabel = "reconcile:federatedResources"
ClusterClientConnection FunctionLabel = "clusterClientConnection"
)

// RegisterAll registers all metrics.
func RegisterAll() {
metrics.Registry.MustRegister(kubefedClustersNotReadyCount, kubefedClustersReadyCount, kubefedClustersOfflineCount, functionDuration, functionDurationSummary)
}

// UpdateDurationFromStart records the duration of the step identified by the
// label using start time
func UpdateDurationFromStart(label FunctionLabel, start time.Time) {
duration := time.Now().Sub(start)
UpdateDuration(label, duration)
}

// RegisterKubefedClustersReadyCount records number of Kubefed clusters that recorded a Ready state
func RegisterKubefedClustersReadyCount() {
kubefedClustersReadyCount.Inc()
}

// RegisterKubefedClustersOfflineCount records number of Kubefed clusters that recorded an Offline state
func RegisterKubefedClustersOfflineCount() {
kubefedClustersOfflineCount.Inc()
}

// RegisterKubefedClustersReadyCount records number of Kubefed clusters that recorded a NOT Ready state
func RegisterKubefedClustersNotReadyCount() {
kubefedClustersNotReadyCount.Inc()
}

// UpdateDuration records the duration of the step identified by the label
func UpdateDuration(label FunctionLabel, duration time.Duration) {
if duration > LogLongDurationThreshold {
klog.V(4).Infof("Function %s took %v to complete", label, duration)
}

functionDurationSummary.WithLabelValues(string(label)).Observe(duration.Seconds())
functionDuration.WithLabelValues(string(label)).Observe(duration.Seconds())
}

0 comments on commit b68b143

Please sign in to comment.