Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expose Application labels in prometheus metric #7374

Merged
merged 4 commits into from
Oct 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ func NewCommand() *cobra.Command {
glogLevel int
metricsPort int
metricsCacheExpiration time.Duration
metricsAplicationLabels []string
kubectlParallelismLimit int64
cacheSrc func() (*appstatecache.Cache, error)
redisClient *redis.Client
Expand Down Expand Up @@ -129,6 +130,7 @@ func NewCommand() *cobra.Command {
time.Duration(selfHealTimeoutSeconds)*time.Second,
metricsPort,
metricsCacheExpiration,
metricsAplicationLabels,
kubectlParallelismLimit,
clusterFilter)
errors.CheckError(err)
Expand Down Expand Up @@ -162,6 +164,7 @@ func NewCommand() *cobra.Command {
command.Flags().Int64Var(&kubectlParallelismLimit, "kubectl-parallelism-limit", 20, "Number of allowed concurrent kubectl fork/execs. Any value less the 1 means no limit.")
command.Flags().BoolVar(&repoServerPlaintext, "repo-server-plaintext", env.ParseBoolFromEnv("ARGOCD_APPLICATION_CONTROLLER_REPO_SERVER_PLAINTEXT", false), "Disable TLS on connections to repo server")
command.Flags().BoolVar(&repoServerStrictTLS, "repo-server-strict-tls", env.ParseBoolFromEnv("ARGOCD_APPLICATION_CONTROLLER_REPO_SERVER_STRICT_TLS", false), "Whether to use strict validation of the TLS cert presented by the repo server")
command.Flags().StringSliceVar(&metricsAplicationLabels, "metrics-application-labels", []string{}, "List of Application labels that will be added to the argocd_application_labels metric")
cacheSrc = appstatecache.AddCacheFlagsToCmd(&command, func(client *redis.Client) {
redisClient = client
})
Expand Down
2 changes: 1 addition & 1 deletion cmd/argocd/commands/admin/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ func reconcileApplications(
return true
}, func(r *http.Request) error {
return nil
})
}, []string{})

if err != nil {
return nil, err
Expand Down
3 changes: 2 additions & 1 deletion controller/appcontroller.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ func NewApplicationController(
selfHealTimeout time.Duration,
metricsPort int,
metricsCacheExpiration time.Duration,
metricsApplicationLabels []string,
kubectlParallelismLimit int64,
clusterFilter func(cluster *appv1.Cluster) bool,
) (*ApplicationController, error) {
Expand Down Expand Up @@ -185,7 +186,7 @@ func NewApplicationController(
var err error
ctrl.metricsServer, err = metrics.NewMetricsServer(metricsAddr, appLister, ctrl.canProcessApp, func(r *http.Request) error {
return nil
})
}, metricsApplicationLabels)
if err != nil {
return nil, err
}
Expand Down
1 change: 1 addition & 0 deletions controller/appcontroller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ func newFakeController(data *fakeData) *ApplicationController {
time.Minute,
common.DefaultPortArgoCDMetrics,
data.metricsCacheExpiration,
[]string{},
0,
nil,
)
Expand Down
55 changes: 47 additions & 8 deletions controller/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"net/http"
"os"
"strconv"
"strings"
"time"

"github.com/argoproj/gitops-engine/pkg/health"
Expand Down Expand Up @@ -49,6 +50,8 @@ const (
var (
descAppDefaultLabels = []string{"namespace", "name", "project"}

descAppLabels *prometheus.Desc

descAppInfo = prometheus.NewDesc(
"argocd_app_info",
"Information about application.",
Expand Down Expand Up @@ -121,7 +124,7 @@ var (
redisRequestCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "argocd_redis_request_total",
Help: "Number of kubernetes requests executed during application reconciliation.",
Help: "Number of redis requests executed during application reconciliation.",
},
[]string{"hostname", "initiator", "failed"},
)
Expand All @@ -137,13 +140,24 @@ var (
)

// NewMetricsServer returns a new prometheus server which collects application metrics
func NewMetricsServer(addr string, appLister applister.ApplicationLister, appFilter func(obj interface{}) bool, healthCheck func(r *http.Request) error) (*MetricsServer, error) {
func NewMetricsServer(addr string, appLister applister.ApplicationLister, appFilter func(obj interface{}) bool, healthCheck func(r *http.Request) error, appLabels []string) (*MetricsServer, error) {
hostname, err := os.Hostname()
if err != nil {
return nil, err
}

if len(appLabels) > 0 {
normalizedLabels := normalizeLabels("label", appLabels)
descAppLabels = prometheus.NewDesc(
"argocd_app_labels",
"Argo Application labels converted to Prometheus labels",
append(descAppDefaultLabels, normalizedLabels...),
nil,
)
}

mux := http.NewServeMux()
registry := NewAppRegistry(appLister, appFilter)
registry := NewAppRegistry(appLister, appFilter, appLabels)
mux.Handle(MetricsPath, promhttp.HandlerFor(prometheus.Gatherers{
// contains app controller specific metrics
registry,
Expand Down Expand Up @@ -180,6 +194,17 @@ func NewMetricsServer(addr string, appLister applister.ApplicationLister, appFil
}, nil
}

func normalizeLabels(prefix string, appLabels []string) []string {
results := []string{}
for _, label := range appLabels {
//prometheus labels don't accept dash in their name
curr := strings.ReplaceAll(label, "-", "_")
result := fmt.Sprintf("%s_%s", prefix, curr)
results = append(results, result)
}
return results
}

func (m *MetricsServer) RegisterClustersInfoSource(ctx context.Context, source HasClustersInfo) {
collector := &clusterCollector{infoSource: source}
go collector.Run(ctx)
Expand Down Expand Up @@ -272,25 +297,30 @@ func (m *MetricsServer) SetExpiration(cacheExpiration time.Duration) error {
type appCollector struct {
store applister.ApplicationLister
appFilter func(obj interface{}) bool
appLabels []string
}

// NewAppCollector returns a prometheus collector for application metrics
func NewAppCollector(appLister applister.ApplicationLister, appFilter func(obj interface{}) bool) prometheus.Collector {
func NewAppCollector(appLister applister.ApplicationLister, appFilter func(obj interface{}) bool, appLabels []string) prometheus.Collector {
return &appCollector{
store: appLister,
appFilter: appFilter,
appLabels: appLabels,
}
}

// NewAppRegistry creates a new prometheus registry that collects applications
func NewAppRegistry(appLister applister.ApplicationLister, appFilter func(obj interface{}) bool) *prometheus.Registry {
func NewAppRegistry(appLister applister.ApplicationLister, appFilter func(obj interface{}) bool, appLabels []string) *prometheus.Registry {
registry := prometheus.NewRegistry()
registry.MustRegister(NewAppCollector(appLister, appFilter))
registry.MustRegister(NewAppCollector(appLister, appFilter, appLabels))
return registry
}

// Describe implements the prometheus.Collector interface
func (c *appCollector) Describe(ch chan<- *prometheus.Desc) {
if len(c.appLabels) > 0 {
ch <- descAppLabels
}
ch <- descAppInfo
ch <- descAppSyncStatusCode
ch <- descAppHealthStatus
Expand All @@ -305,7 +335,7 @@ func (c *appCollector) Collect(ch chan<- prometheus.Metric) {
}
for _, app := range apps {
if c.appFilter(app) {
collectApps(ch, app)
c.collectApps(ch, app)
}
}
}
Expand All @@ -317,7 +347,7 @@ func boolFloat64(b bool) float64 {
return 0
}

func collectApps(ch chan<- prometheus.Metric, app *argoappv1.Application) {
func (c *appCollector) collectApps(ch chan<- prometheus.Metric, app *argoappv1.Application) {
addConstMetric := func(desc *prometheus.Desc, t prometheus.ValueType, v float64, lv ...string) {
project := app.Spec.GetProject()
lv = append([]string{app.Namespace, app.Name, project}, lv...)
Expand All @@ -344,6 +374,15 @@ func collectApps(ch chan<- prometheus.Metric, app *argoappv1.Application) {

addGauge(descAppInfo, 1, git.NormalizeGitURL(app.Spec.Source.RepoURL), app.Spec.Destination.Server, app.Spec.Destination.Namespace, string(syncStatus), string(healthStatus), operation)

if len(c.appLabels) > 0 {
labelValues := []string{}
for _, desiredLabel := range c.appLabels {
value := app.GetLabels()[desiredLabel]
labelValues = append(labelValues, value)
}
addGauge(descAppLabels, 1, labelValues...)
}

// Deprecated controller metrics
if os.Getenv(EnvVarLegacyControllerMetrics) == "true" {
addGauge(descAppCreated, float64(app.CreationTimestamp.Unix()))
Expand Down
76 changes: 67 additions & 9 deletions controller/metrics/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ kind: Application
metadata:
name: my-app
namespace: argocd
labels:
team-name: my-team
team-bu: bu-id
spec:
destination:
namespace: dummy-namespace
Expand All @@ -50,6 +53,9 @@ kind: Application
metadata:
name: my-app-2
namespace: argocd
labels:
team-name: my-team
team-bu: bu-id
spec:
destination:
namespace: dummy-namespace
Expand Down Expand Up @@ -77,6 +83,9 @@ metadata:
name: my-app-3
namespace: argocd
deletionTimestamp: "2020-03-16T09:17:45Z"
labels:
team-name: my-team
team-bu: bu-id
spec:
destination:
namespace: dummy-namespace
Expand Down Expand Up @@ -148,9 +157,15 @@ func newFakeLister(fakeAppYAMLs ...string) (context.CancelFunc, applister.Applic
}

func testApp(t *testing.T, fakeAppYAMLs []string, expectedResponse string) {
t.Helper()
testMetricServer(t, fakeAppYAMLs, expectedResponse, []string{})
}

func testMetricServer(t *testing.T, fakeAppYAMLs []string, expectedResponse string, appLabels []string) {
t.Helper()
cancel, appLister := newFakeLister(fakeAppYAMLs...)
defer cancel()
metricsServ, err := NewMetricsServer("localhost:8082", appLister, appFilter, noOpHealthCheck)
metricsServ, err := NewMetricsServer("localhost:8082", appLister, appFilter, noOpHealthCheck, appLabels)
assert.NoError(t, err)
req, err := http.NewRequest("GET", "/metrics", nil)
assert.NoError(t, err)
Expand All @@ -164,14 +179,14 @@ func testApp(t *testing.T, fakeAppYAMLs []string, expectedResponse string) {

type testCombination struct {
applications []string
expectedResponse string
responseContains string
}

func TestMetrics(t *testing.T) {
combinations := []testCombination{
{
applications: []string{fakeApp, fakeApp2, fakeApp3},
expectedResponse: `
responseContains: `
# HELP argocd_app_info Information about application.
# TYPE argocd_app_info gauge
argocd_app_info{dest_namespace="dummy-namespace",dest_server="https://localhost:6443",health_status="Degraded",name="my-app-3",namespace="argocd",operation="delete",project="important-project",repo="https://github.com/argoproj/argocd-example-apps",sync_status="OutOfSync"} 1
Expand All @@ -181,7 +196,7 @@ argocd_app_info{dest_namespace="dummy-namespace",dest_server="https://localhost:
},
{
applications: []string{fakeDefaultApp},
expectedResponse: `
responseContains: `
# HELP argocd_app_info Information about application.
# TYPE argocd_app_info gauge
argocd_app_info{dest_namespace="dummy-namespace",dest_server="https://localhost:6443",health_status="Healthy",name="my-app",namespace="argocd",operation="",project="default",repo="https://github.com/argoproj/argocd-example-apps",sync_status="Synced"} 1
Expand All @@ -190,7 +205,50 @@ argocd_app_info{dest_namespace="dummy-namespace",dest_server="https://localhost:
}

for _, combination := range combinations {
testApp(t, combination.applications, combination.expectedResponse)
testApp(t, combination.applications, combination.responseContains)
}
}

func TestMetricLabels(t *testing.T) {
type testCases struct {
testCombination
description string
metricLabels []string
}
cases := []testCases{
{
description: "will return the labels metrics successfully",
metricLabels: []string{"team-name", "team-bu"},
testCombination: testCombination{
applications: []string{fakeApp, fakeApp2, fakeApp3},
responseContains: `
# TYPE argocd_app_labels gauge
argocd_app_labels{label_team_bu="bu-id",label_team_name="my-team",name="my-app",namespace="argocd",project="important-project"} 1
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for the test!

argocd_app_labels{label_team_bu="bu-id",label_team_name="my-team",name="my-app-2",namespace="argocd",project="important-project"} 1
argocd_app_labels{label_team_bu="bu-id",label_team_name="my-team",name="my-app-3",namespace="argocd",project="important-project"} 1
`,
},
},
{
description: "metric will have empty label value if not present in the application",
metricLabels: []string{"non-existing"},
testCombination: testCombination{
applications: []string{fakeApp, fakeApp2, fakeApp3},
responseContains: `
# TYPE argocd_app_labels gauge
argocd_app_labels{label_non_existing="",name="my-app",namespace="argocd",project="important-project"} 1
argocd_app_labels{label_non_existing="",name="my-app-2",namespace="argocd",project="important-project"} 1
argocd_app_labels{label_non_existing="",name="my-app-3",namespace="argocd",project="important-project"} 1
`,
},
},
}

for _, c := range cases {
c := c
t.Run(c.description, func(t *testing.T) {
testMetricServer(t, c.applications, c.responseContains, c.metricLabels)
})
}
}

Expand Down Expand Up @@ -222,7 +280,7 @@ argocd_app_sync_status{name="my-app",namespace="argocd",project="important-proje
func TestMetricsSyncCounter(t *testing.T) {
cancel, appLister := newFakeLister()
defer cancel()
metricsServ, err := NewMetricsServer("localhost:8082", appLister, appFilter, noOpHealthCheck)
metricsServ, err := NewMetricsServer("localhost:8082", appLister, appFilter, noOpHealthCheck, []string{})
assert.NoError(t, err)

appSyncTotal := `
Expand Down Expand Up @@ -256,7 +314,7 @@ func assertMetricsPrinted(t *testing.T, expectedLines, body string) {
if line == "" {
continue
}
assert.Contains(t, body, line)
assert.Contains(t, body, line, "expected metrics mismatch")
}
}

Expand All @@ -273,7 +331,7 @@ func assertMetricsNotPrinted(t *testing.T, expectedLines, body string) {
func TestReconcileMetrics(t *testing.T) {
cancel, appLister := newFakeLister()
defer cancel()
metricsServ, err := NewMetricsServer("localhost:8082", appLister, appFilter, noOpHealthCheck)
metricsServ, err := NewMetricsServer("localhost:8082", appLister, appFilter, noOpHealthCheck, []string{})
assert.NoError(t, err)

appReconcileMetrics := `
Expand Down Expand Up @@ -306,7 +364,7 @@ argocd_app_reconcile_count{dest_server="https://localhost:6443",namespace="argoc
func TestMetricsReset(t *testing.T) {
cancel, appLister := newFakeLister()
defer cancel()
metricsServ, err := NewMetricsServer("localhost:8082", appLister, appFilter, noOpHealthCheck)
metricsServ, err := NewMetricsServer("localhost:8082", appLister, appFilter, noOpHealthCheck, []string{})
assert.NoError(t, err)

appSyncTotal := `
Expand Down
Loading