Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add native histogram support for histogram metrics #9971

Merged
merged 2 commits into from
Aug 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/dataplane/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ func main() {
mc := metric.NewDummyCollector()
if conf.EnableMetrics {
// TODO: Ingress class is not a part of dataplane anymore
mc, err = metric.NewCollector(conf.MetricsPerHost, conf.ReportStatusClasses, reg, conf.IngressClassConfiguration.Controller, *conf.MetricsBuckets, conf.ExcludeSocketMetrics)
mc, err = metric.NewCollector(conf.MetricsPerHost, conf.ReportStatusClasses, reg, conf.IngressClassConfiguration.Controller, *conf.MetricsBuckets, conf.MetricsBucketFactor, conf.MetricsMaxBuckets, conf.ExcludeSocketMetrics)
if err != nil {
klog.Fatalf("Error creating prometheus collector: %v", err)
}
Expand Down
2 changes: 1 addition & 1 deletion cmd/nginx/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ func main() {

mc := metric.NewDummyCollector()
if conf.EnableMetrics {
mc, err = metric.NewCollector(conf.MetricsPerHost, conf.ReportStatusClasses, reg, conf.IngressClassConfiguration.Controller, *conf.MetricsBuckets, conf.ExcludeSocketMetrics)
mc, err = metric.NewCollector(conf.MetricsPerHost, conf.ReportStatusClasses, reg, conf.IngressClassConfiguration.Controller, *conf.MetricsBuckets, conf.MetricsBucketFactor, conf.MetricsMaxBuckets, conf.ExcludeSocketMetrics)
if err != nil {
klog.Fatalf("Error creating prometheus collector: %v", err)
}
Expand Down
2 changes: 2 additions & 0 deletions docs/user-guide/cli-arguments.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ They are set in the container spec of the `ingress-nginx-controller` Deployment
|----------|-------------|
| `--annotations-prefix` | Prefix of the Ingress annotations specific to the NGINX controller. (default "nginx.ingress.kubernetes.io") |
| `--apiserver-host` | Address of the Kubernetes API server. Takes the form "protocol://address:port". If not specified, it is assumed the program runs inside a Kubernetes cluster and local discovery is attempted. |
| `--bucket-factor` | Bucket factor for native histograms. Value must be > 1 for enabling native histograms. (default 0) |
| `--certificate-authority` | Path to a cert file for the certificate authority. This certificate is used only when the flag --apiserver-host is specified. |
| `--configmap` | Name of the ConfigMap containing custom global configurations for the controller. |
| `--controller-class` | Ingress Class Controller value this Ingress satisfies. The class of an Ingress object is set using the field IngressClassName in Kubernetes clusters version v1.19.0 or higher. The .spec.controller value of the IngressClass referenced in an Ingress Object should be the same value specified here to make this object be watched. |
Expand Down Expand Up @@ -40,6 +41,7 @@ They are set in the container spec of the `ingress-nginx-controller` Deployment
| `--internal-logger-address` | Address to be used when binding internal syslogger. (default 127.0.0.1:11514) |
| `--kubeconfig` | Path to a kubeconfig file containing authorization and API server information. |
| `--length-buckets` | Set of buckets which will be used for prometheus histogram metrics such as RequestLength, ResponseLength. (default `[10, 20, 30, 40, 50, 60, 70, 80, 90, 100]`) |
| `--max-buckets` | Maximum number of buckets for native histograms. (default 100) |
| `--maxmind-edition-ids` | Maxmind edition ids to download GeoLite2 Databases. (default "GeoLite2-City,GeoLite2-ASN") |
| `--maxmind-retries-timeout` | Maxmind downloading delay between 1st and 2nd attempt, 0s - do not retry to download if something went wrong. (default 0s) |
| `--maxmind-retries-count` | Number of attempts to download the GeoIP DB. (default 1) |
Expand Down
71 changes: 69 additions & 2 deletions go.work.sum

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions internal/ingress/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ type Configuration struct {
EnableMetrics bool
MetricsPerHost bool
MetricsBuckets *collectors.HistogramBuckets
MetricsBucketFactor float64
MetricsMaxBuckets uint32
ReportStatusClasses bool
ExcludeSocketMetrics []string

Expand Down
74 changes: 43 additions & 31 deletions internal/ingress/metric/collectors/socket.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ var requestTags = []string{

// NewSocketCollector creates a new SocketCollector instance using
// the ingress watch namespace and class used by the controller
func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStatusClasses bool, buckets HistogramBuckets, excludeMetrics []string) (*SocketCollector, error) {
func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStatusClasses bool, buckets HistogramBuckets, bucketFactor float64, maxBuckets uint32, excludeMetrics []string) (*SocketCollector, error) {
socket := "/tmp/nginx/prometheus-nginx.socket"
// unix sockets must be unlink()ed before being used
//nolint:errcheck // Ignore unlink error
Expand Down Expand Up @@ -144,11 +144,13 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat

connectTime: histogramMetric(
&prometheus.HistogramOpts{
Name: "connect_duration_seconds",
Help: "The time spent on establishing a connection with the upstream server",
Namespace: PrometheusNamespace,
ConstLabels: constLabels,
Buckets: buckets.TimeBuckets,
Name: "connect_duration_seconds",
Help: "The time spent on establishing a connection with the upstream server",
Namespace: PrometheusNamespace,
ConstLabels: constLabels,
Buckets: buckets.TimeBuckets,
NativeHistogramBucketFactor: bucketFactor,
NativeHistogramMaxBucketNumber: maxBuckets,
},
requestTags,
em,
Expand All @@ -157,23 +159,27 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat

headerTime: histogramMetric(
&prometheus.HistogramOpts{
Name: "header_duration_seconds",
Help: "The time spent on receiving first header from the upstream server",
Namespace: PrometheusNamespace,
ConstLabels: constLabels,
Buckets: buckets.TimeBuckets,
Name: "header_duration_seconds",
Help: "The time spent on receiving first header from the upstream server",
Namespace: PrometheusNamespace,
ConstLabels: constLabels,
Buckets: buckets.TimeBuckets,
NativeHistogramBucketFactor: bucketFactor,
NativeHistogramMaxBucketNumber: maxBuckets,
},
requestTags,
em,
mm,
),
responseTime: histogramMetric(
&prometheus.HistogramOpts{
Name: "response_duration_seconds",
Help: "The time spent on receiving the response from the upstream server",
Namespace: PrometheusNamespace,
ConstLabels: constLabels,
Buckets: buckets.TimeBuckets,
Name: "response_duration_seconds",
Help: "The time spent on receiving the response from the upstream server",
Namespace: PrometheusNamespace,
ConstLabels: constLabels,
Buckets: buckets.TimeBuckets,
NativeHistogramBucketFactor: bucketFactor,
NativeHistogramMaxBucketNumber: maxBuckets,
},
requestTags,
em,
Expand All @@ -182,11 +188,13 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat

requestTime: histogramMetric(
&prometheus.HistogramOpts{
Name: "request_duration_seconds",
Help: "The request processing time in milliseconds",
Namespace: PrometheusNamespace,
ConstLabels: constLabels,
Buckets: buckets.TimeBuckets,
Name: "request_duration_seconds",
Help: "The request processing time in milliseconds",
Namespace: PrometheusNamespace,
ConstLabels: constLabels,
Buckets: buckets.TimeBuckets,
NativeHistogramBucketFactor: bucketFactor,
NativeHistogramMaxBucketNumber: maxBuckets,
},
requestTags,
em,
Expand All @@ -195,11 +203,13 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat

responseLength: histogramMetric(
&prometheus.HistogramOpts{
Name: "response_size",
Help: "The response length (including request line, header, and request body)",
Namespace: PrometheusNamespace,
ConstLabels: constLabels,
Buckets: buckets.LengthBuckets,
Name: "response_size",
Help: "The response length (including request line, header, and request body)",
Namespace: PrometheusNamespace,
ConstLabels: constLabels,
Buckets: buckets.LengthBuckets,
NativeHistogramBucketFactor: bucketFactor,
NativeHistogramMaxBucketNumber: maxBuckets,
},
requestTags,
em,
Expand All @@ -208,11 +218,13 @@ func NewSocketCollector(pod, namespace, class string, metricsPerHost, reportStat

requestLength: histogramMetric(
&prometheus.HistogramOpts{
Name: "request_size",
Help: "The request length (including request line, header, and request body)",
Namespace: PrometheusNamespace,
ConstLabels: constLabels,
Buckets: buckets.LengthBuckets,
Name: "request_size",
Help: "The request length (including request line, header, and request body)",
Namespace: PrometheusNamespace,
ConstLabels: constLabels,
Buckets: buckets.LengthBuckets,
NativeHistogramBucketFactor: bucketFactor,
NativeHistogramMaxBucketNumber: maxBuckets,
},
requestTags,
em,
Expand Down
5 changes: 4 additions & 1 deletion internal/ingress/metric/collectors/socket_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ func TestCollector(t *testing.T) {
prometheus.ExponentialBuckets(10, 10, 7),
}

bucketFactor := 1.1
maxBuckets := uint32(100)

cases := []struct {
name string
data []string
Expand Down Expand Up @@ -594,7 +597,7 @@ func TestCollector(t *testing.T) {
t.Run(c.name, func(t *testing.T) {
registry := prometheus.NewPedanticRegistry()

sc, err := NewSocketCollector("pod", "default", "ingress", true, c.useStatusClasses, buckets, c.excludeMetrics)
sc, err := NewSocketCollector("pod", "default", "ingress", true, c.useStatusClasses, buckets, bucketFactor, maxBuckets, c.excludeMetrics)
if err != nil {
t.Errorf("%v: unexpected error creating new SocketCollector: %v", c.name, err)
}
Expand Down
4 changes: 2 additions & 2 deletions internal/ingress/metric/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ type collector struct {
}

// NewCollector creates a new metric collector the for ingress controller
func NewCollector(metricsPerHost, reportStatusClasses bool, registry *prometheus.Registry, ingressclass string, buckets collectors.HistogramBuckets, excludedSocketMetrics []string) (Collector, error) {
func NewCollector(metricsPerHost, reportStatusClasses bool, registry *prometheus.Registry, ingressclass string, buckets collectors.HistogramBuckets, bucketFactor float64, maxBuckets uint32, excludedSocketMetrics []string) (Collector, error) {
podNamespace := os.Getenv("POD_NAMESPACE")
if podNamespace == "" {
podNamespace = "default"
Expand All @@ -89,7 +89,7 @@ func NewCollector(metricsPerHost, reportStatusClasses bool, registry *prometheus
return nil, err
}

s, err := collectors.NewSocketCollector(podName, podNamespace, ingressclass, metricsPerHost, reportStatusClasses, buckets, excludedSocketMetrics)
s, err := collectors.NewSocketCollector(podName, podNamespace, ingressclass, metricsPerHost, reportStatusClasses, buckets, bucketFactor, maxBuckets, excludedSocketMetrics)
if err != nil {
return nil, err
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/flags/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,8 @@ Requires the update-status parameter.`)
timeBuckets = flags.Float64Slice("time-buckets", prometheus.DefBuckets, "Set of buckets which will be used for prometheus histogram metrics such as RequestTime, ResponseTime.")
lengthBuckets = flags.Float64Slice("length-buckets", prometheus.LinearBuckets(10, 10, 10), "Set of buckets which will be used for prometheus histogram metrics such as RequestLength, ResponseLength.")
sizeBuckets = flags.Float64Slice("size-buckets", prometheus.ExponentialBuckets(10, 10, 7), "Set of buckets which will be used for prometheus histogram metrics such as BytesSent.")
bucketFactor = flags.Float64("bucket-factor", 0, "Bucket factor for native histograms. Value must be > 1 for enabling native histograms.")
maxBuckets = flags.Uint32("max-buckets", 100, "Maximum number of buckets for native histograms.")
excludeSocketMetrics = flags.StringSlice("exclude-socket-metrics", []string{}, "et of socket request metrics to exclude which won't be exported nor being calculated. E.g. 'nginx_ingress_controller_success,nginx_ingress_controller_header_duration_seconds'.")
monitorMaxBatchSize = flags.Int("monitor-max-batch-size", 10000, "Max batch size of NGINX metrics.")

Expand Down Expand Up @@ -339,6 +341,8 @@ https://blog.maxmind.com/2019/12/18/significant-changes-to-accessing-and-using-g
EnableMetrics: *enableMetrics,
MetricsPerHost: *metricsPerHost,
MetricsBuckets: histogramBuckets,
MetricsBucketFactor: *bucketFactor,
MetricsMaxBuckets: *maxBuckets,
ReportStatusClasses: *reportStatusClasses,
ExcludeSocketMetrics: *excludeSocketMetrics,
MonitorMaxBatchSize: *monitorMaxBatchSize,
Expand Down