diff --git a/internal/ingress/controller/controller.go b/internal/ingress/controller/controller.go index 607bb5cb96..651e983a84 100644 --- a/internal/ingress/controller/controller.go +++ b/internal/ingress/controller/controller.go @@ -146,6 +146,7 @@ func (n *NGINXController) syncIngress(interface{}) error { hosts, servers, pcfg := n.getConfiguration(ings) n.metricCollector.SetSSLExpireTime(servers) + n.metricCollector.SetSSLInfo(servers) if n.runningConfig.Equal(pcfg) { klog.V(3).Infof("No configuration change detected, skipping backend reload") @@ -211,7 +212,8 @@ func (n *NGINXController) syncIngress(interface{}) error { ri := getRemovedIngresses(n.runningConfig, pcfg) re := getRemovedHosts(n.runningConfig, pcfg) - n.metricCollector.RemoveMetrics(ri, re) + rc := getRemovedCertificateSerialNumbers(n.runningConfig, pcfg) + n.metricCollector.RemoveMetrics(ri, re, rc) n.runningConfig = pcfg @@ -1625,6 +1627,37 @@ func getRemovedHosts(rucfg, newcfg *ingress.Configuration) []string { return old.Difference(new).List() } +func getRemovedCertificateSerialNumbers(rucfg, newcfg *ingress.Configuration) []string { + oldCertificates := sets.NewString() + newCertificates := sets.NewString() + + for _, server := range rucfg.Servers { + if server.SSLCert == nil { + continue + } + identifier := server.SSLCert.Identifier() + if identifier != "" { + if !oldCertificates.Has(identifier) { + oldCertificates.Insert(identifier) + } + } + } + + for _, server := range newcfg.Servers { + if server.SSLCert == nil { + continue + } + identifier := server.SSLCert.Identifier() + if identifier != "" { + if !newCertificates.Has(identifier) { + newCertificates.Insert(identifier) + } + } + } + + return oldCertificates.Difference(newCertificates).List() +} + func getRemovedIngresses(rucfg, newcfg *ingress.Configuration) []string { oldIngresses := sets.NewString() newIngresses := sets.NewString() diff --git a/internal/ingress/controller/nginx.go b/internal/ingress/controller/nginx.go index ed5590c3e7..e4037dfb05 100644 --- a/internal/ingress/controller/nginx.go +++ b/internal/ingress/controller/nginx.go @@ -275,6 +275,7 @@ func (n *NGINXController) Start() { // manually update SSL expiration metrics // (to not wait for a reload) n.metricCollector.SetSSLExpireTime(n.runningConfig.Servers) + n.metricCollector.SetSSLInfo(n.runningConfig.Servers) }, OnStoppedLeading: func() { n.metricCollector.OnStoppedLeading(electionID) diff --git a/internal/ingress/metric/collectors/controller.go b/internal/ingress/metric/collectors/controller.go index 135e74b9b8..e4e2655c80 100644 --- a/internal/ingress/metric/collectors/controller.go +++ b/internal/ingress/metric/collectors/controller.go @@ -30,7 +30,8 @@ import ( var ( operation = []string{"controller_namespace", "controller_class", "controller_pod"} ingressOperation = []string{"controller_namespace", "controller_class", "controller_pod", "namespace", "ingress"} - sslLabelHost = []string{"namespace", "class", "host"} + sslLabelHost = []string{"namespace", "class", "host", "secret_name"} + sslInfoLabels = []string{"namespace", "class", "host", "secret_name", "identifier", "issuer_organization", "issuer_common_name", "serial_number", "public_key_algorithm"} ) // Controller defines base metrics about the ingress controller @@ -46,6 +47,7 @@ type Controller struct { checkIngressOperation *prometheus.CounterVec checkIngressOperationErrors *prometheus.CounterVec sslExpireTime *prometheus.GaugeVec + sslInfo *prometheus.GaugeVec constLabels prometheus.Labels labels prometheus.Labels @@ -152,6 +154,14 @@ func NewController(pod, namespace, class string) *Controller { }, sslLabelHost, ), + sslInfo: prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: PrometheusNamespace, + Name: "ssl_certificate_info", + Help: `Hold all labels associated to a certificate`, + }, + sslInfoLabels, + ), leaderElection: prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: PrometheusNamespace, @@ -229,6 +239,7 @@ func (cm Controller) Describe(ch chan<- *prometheus.Desc) { cm.checkIngressOperation.Describe(ch) cm.checkIngressOperationErrors.Describe(ch) cm.sslExpireTime.Describe(ch) + cm.sslInfo.Describe(ch) cm.leaderElection.Describe(ch) cm.buildInfo.Describe(ch) } @@ -243,6 +254,7 @@ func (cm Controller) Collect(ch chan<- prometheus.Metric) { cm.checkIngressOperation.Collect(ch) cm.checkIngressOperationErrors.Collect(ch) cm.sslExpireTime.Collect(ch) + cm.sslInfo.Collect(ch) cm.leaderElection.Collect(ch) cm.buildInfo.Collect(ch) } @@ -256,20 +268,89 @@ func (cm *Controller) SetSSLExpireTime(servers []*ingress.Server) { labels[k] = v } labels["host"] = s.Hostname + labels["secret_name"] = s.SSLCert.Name cm.sslExpireTime.With(labels).Set(float64(s.SSLCert.ExpireTime.Unix())) } } } +// SetSSLInfo creates a metric with all certificates informations +func (cm *Controller) SetSSLInfo(servers []*ingress.Server) { + for _, s := range servers { + if s.SSLCert != nil && s.SSLCert.Certificate != nil && s.SSLCert.Certificate.SerialNumber != nil { + labels := make(prometheus.Labels, len(cm.labels)+1) + for k, v := range cm.labels { + labels[k] = v + } + labels["identifier"] = s.SSLCert.Identifier() + labels["host"] = s.Hostname + labels["secret_name"] = s.SSLCert.Name + labels["namespace"] = s.SSLCert.Namespace + labels["issuer_common_name"] = s.SSLCert.Certificate.Issuer.CommonName + labels["issuer_organization"] = "" + if len(s.SSLCert.Certificate.Issuer.Organization) > 0 { + labels["issuer_organization"] = s.SSLCert.Certificate.Issuer.Organization[0] + } + labels["serial_number"] = s.SSLCert.Certificate.SerialNumber.String() + labels["public_key_algorithm"] = s.SSLCert.Certificate.PublicKeyAlgorithm.String() + + cm.sslInfo.With(labels).Set(1) + } + } +} + // RemoveMetrics removes metrics for hostnames not available anymore -func (cm *Controller) RemoveMetrics(hosts []string, registry prometheus.Gatherer) { +func (cm *Controller) RemoveMetrics(hosts, certificates []string, registry prometheus.Gatherer) { cm.removeSSLExpireMetrics(true, hosts, registry) + cm.removeCertificatesMetrics(true, certificates, registry) } -// RemoveAllSSLExpireMetrics removes metrics for expiration of SSL Certificates -func (cm *Controller) RemoveAllSSLExpireMetrics(registry prometheus.Gatherer) { +// RemoveAllSSLMetrics removes metrics for expiration of SSL Certificates +func (cm *Controller) RemoveAllSSLMetrics(registry prometheus.Gatherer) { cm.removeSSLExpireMetrics(false, []string{}, registry) + cm.removeCertificatesMetrics(false, []string{}, registry) +} + +func (cm *Controller) removeCertificatesMetrics(onlyDefinedHosts bool, certificates []string, registry prometheus.Gatherer) { + mfs, err := registry.Gather() + if err != nil { + klog.Errorf("Error gathering metrics: %v", err) + return + } + + toRemove := sets.NewString(certificates...) + + for _, mf := range mfs { + metricName := mf.GetName() + if fmt.Sprintf("%v_ssl_certificate_info", PrometheusNamespace) != metricName { + continue + } + + for _, m := range mf.GetMetric() { + labels := make(map[string]string, len(m.GetLabel())) + for _, labelPair := range m.GetLabel() { + labels[*labelPair.Name] = *labelPair.Value + } + + // remove labels that are constant + deleteConstants(labels) + + identifier, ok := labels["identifier"] + if !ok { + continue + } + if onlyDefinedHosts && !toRemove.Has(identifier) { + continue + } + + klog.V(2).Infof("Removing prometheus metric from gauge %v for identifier %v", metricName, identifier) + removed := cm.sslInfo.Delete(labels) + if !removed { + klog.V(2).Infof("metric %v for identifier %v with labels not removed: %v", metricName, identifier, labels) + } + } + } } func (cm *Controller) removeSSLExpireMetrics(onlyDefinedHosts bool, hosts []string, registry prometheus.Gatherer) { diff --git a/internal/ingress/metric/collectors/controller_test.go b/internal/ingress/metric/collectors/controller_test.go index e10dafdfe3..bc9f21c60c 100644 --- a/internal/ingress/metric/collectors/controller_test.go +++ b/internal/ingress/metric/collectors/controller_test.go @@ -17,6 +17,9 @@ limitations under the License. package collectors import ( + "crypto/x509" + "crypto/x509/pkix" + "math/big" "testing" "time" @@ -96,10 +99,111 @@ func TestControllerCounters(t *testing.T) { want: ` # HELP nginx_ingress_controller_ssl_expire_time_seconds Number of seconds since 1970 to the SSL Certificate expire.\n An example to check if this certificate will expire in 10 days is: "nginx_ingress_controller_ssl_expire_time_seconds < (time() + (10 * 24 * 3600))" # TYPE nginx_ingress_controller_ssl_expire_time_seconds gauge - nginx_ingress_controller_ssl_expire_time_seconds{class="nginx",host="demo",namespace="default"} 1.351807721e+09 + nginx_ingress_controller_ssl_expire_time_seconds{class="nginx",host="demo",namespace="default",secret_name=""} 1.351807721e+09 `, metrics: []string{"nginx_ingress_controller_ssl_expire_time_seconds"}, }, + { + name: "should set SSL certificates infos metrics", + test: func(cm *Controller) { + + servers := []*ingress.Server{ + { + Hostname: "demo", + SSLCert: &ingress.SSLCert{ + Name: "secret-name", + Namespace: "ingress-namespace", + Certificate: &x509.Certificate{ + PublicKeyAlgorithm: x509.ECDSA, + Issuer: pkix.Name{ + CommonName: "certificate issuer", + SerialNumber: "abcd1234", + Organization: []string{"issuer org"}, + }, + SerialNumber: big.NewInt(100), + }, + }, + }, + { + Hostname: "invalid", + SSLCert: &ingress.SSLCert{ + ExpireTime: time.Unix(0, 0), + }, + }, + } + cm.SetSSLInfo(servers) + }, + want: ` + # HELP nginx_ingress_controller_ssl_certificate_info Hold all labels associated to a certificate + # TYPE nginx_ingress_controller_ssl_certificate_info gauge + nginx_ingress_controller_ssl_certificate_info{class="nginx",host="demo",identifier="abcd1234-100",issuer_common_name="certificate issuer",issuer_organization="issuer org",namespace="ingress-namespace",public_key_algorithm="ECDSA",secret_name="secret-name",serial_number="100"} 1 + `, + metrics: []string{"nginx_ingress_controller_ssl_certificate_info"}, + }, + { + name: "should ignore certificates without serial number", + test: func(cm *Controller) { + + servers := []*ingress.Server{ + { + Hostname: "demo", + SSLCert: &ingress.SSLCert{ + Name: "secret-name", + Namespace: "ingress-namespace", + Certificate: &x509.Certificate{ + PublicKeyAlgorithm: x509.ECDSA, + Issuer: pkix.Name{ + CommonName: "certificate issuer", + SerialNumber: "abcd1234", + }, + }, + }, + }, + } + cm.SetSSLInfo(servers) + }, + want: ``, + metrics: []string{"nginx_ingress_controller_ssl_certificate_info"}, + }, + { + name: "should ignore certificates with nil x509 pointer", + test: func(cm *Controller) { + + servers := []*ingress.Server{ + { + Hostname: "demo", + SSLCert: &ingress.SSLCert{ + Name: "secret-name", + Namespace: "ingress-namespace", + Certificate: &x509.Certificate{ + PublicKeyAlgorithm: x509.ECDSA, + Issuer: pkix.Name{ + CommonName: "certificate issuer", + SerialNumber: "abcd1234", + }, + }, + }, + }, + } + cm.SetSSLInfo(servers) + }, + want: ``, + metrics: []string{"nginx_ingress_controller_ssl_certificate_info"}, + }, + { + name: "should ignore servers without certificates", + test: func(cm *Controller) { + + servers := []*ingress.Server{ + { + Hostname: "demo", + }, + } + cm.SetSSLInfo(servers) + }, + want: ``, + metrics: []string{"nginx_ingress_controller_ssl_certificate_info"}, + }, } for _, c := range cases { @@ -137,6 +241,13 @@ func TestRemoveMetrics(t *testing.T) { Hostname: "demo", SSLCert: &ingress.SSLCert{ ExpireTime: t1, + Certificate: &x509.Certificate{ + Issuer: pkix.Name{ + CommonName: "certificate issuer", + SerialNumber: "abcd1234", + }, + SerialNumber: big.NewInt(100), + }, }, }, { @@ -147,12 +258,63 @@ func TestRemoveMetrics(t *testing.T) { }, } cm.SetSSLExpireTime(servers) + cm.SetSSLInfo(servers) - cm.RemoveMetrics([]string{"demo"}, reg) + cm.RemoveMetrics([]string{"demo"}, []string{"abcd1234-100"}, reg) if err := GatherAndCompare(cm, "", []string{"nginx_ingress_controller_ssl_expire_time_seconds"}, reg); err != nil { t.Errorf("unexpected collecting result:\n%s", err) } + if err := GatherAndCompare(cm, "", []string{"nginx_ingress_controller_ssl_certificate_info"}, reg); err != nil { + t.Errorf("unexpected collecting result:\n%s", err) + } + + reg.Unregister(cm) +} + +func TestRemoveAllSSLMetrics(t *testing.T) { + cm := NewController("pod", "default", "nginx") + reg := prometheus.NewPedanticRegistry() + if err := reg.Register(cm); err != nil { + t.Errorf("registering collector failed: %s", err) + } + + t1, _ := time.Parse( + time.RFC3339, + "2012-11-01T22:08:41+00:00") + + servers := []*ingress.Server{ + { + Hostname: "demo", + SSLCert: &ingress.SSLCert{ + ExpireTime: t1, + Certificate: &x509.Certificate{ + Issuer: pkix.Name{ + CommonName: "certificate issuer", + SerialNumber: "abcd1234", + }, + SerialNumber: big.NewInt(100), + }, + }, + }, + { + Hostname: "invalid", + SSLCert: &ingress.SSLCert{ + ExpireTime: time.Unix(0, 0), + }, + }, + } + cm.SetSSLExpireTime(servers) + cm.SetSSLInfo(servers) + + cm.RemoveAllSSLMetrics(reg) + + if err := GatherAndCompare(cm, "", []string{"nginx_ingress_controller_ssl_expire_time_seconds"}, reg); err != nil { + t.Errorf("unexpected collecting result:\n%s", err) + } + if err := GatherAndCompare(cm, "", []string{"nginx_ingress_controller_ssl_certificate_info"}, reg); err != nil { + t.Errorf("unexpected collecting result:\n%s", err) + } reg.Unregister(cm) } diff --git a/internal/ingress/metric/dummy.go b/internal/ingress/metric/dummy.go index 922a21604d..8360dc87db 100644 --- a/internal/ingress/metric/dummy.go +++ b/internal/ingress/metric/dummy.go @@ -48,7 +48,7 @@ func (dc DummyCollector) IncCheckCount(string, string) {} func (dc DummyCollector) IncCheckErrorCount(string, string) {} // RemoveMetrics ... -func (dc DummyCollector) RemoveMetrics(ingresses, endpoints []string) {} +func (dc DummyCollector) RemoveMetrics(ingresses, endpoints, certificates []string) {} // Start ... func (dc DummyCollector) Start(admissionStatus string) {} @@ -56,6 +56,9 @@ func (dc DummyCollector) Start(admissionStatus string) {} // Stop ... func (dc DummyCollector) Stop(admissionStatus string) {} +// SetSSLInfo ... +func (dc DummyCollector) SetSSLInfo([]*ingress.Server) {} + // SetSSLExpireTime ... func (dc DummyCollector) SetSSLExpireTime([]*ingress.Server) {} diff --git a/internal/ingress/metric/main.go b/internal/ingress/metric/main.go index 644a9b690e..97a8ca7d40 100644 --- a/internal/ingress/metric/main.go +++ b/internal/ingress/metric/main.go @@ -44,9 +44,10 @@ type Collector interface { IncCheckCount(string, string) IncCheckErrorCount(string, string) - RemoveMetrics(ingresses, endpoints []string) + RemoveMetrics(ingresses, endpoints, certificates []string) SetSSLExpireTime([]*ingress.Server) + SetSSLInfo(servers []*ingress.Server) // SetHosts sets the hostnames that are being served by the ingress controller SetHosts(sets.String) @@ -128,9 +129,9 @@ func (c *collector) IncReloadErrorCount() { c.ingressController.IncReloadErrorCount() } -func (c *collector) RemoveMetrics(ingresses, hosts []string) { +func (c *collector) RemoveMetrics(ingresses, hosts, certificates []string) { c.socket.RemoveMetrics(ingresses, c.registry) - c.ingressController.RemoveMetrics(hosts, c.registry) + c.ingressController.RemoveMetrics(hosts, certificates, c.registry) } func (c *collector) Start(admissionStatus string) { @@ -175,6 +176,11 @@ func (c *collector) SetSSLExpireTime(servers []*ingress.Server) { c.ingressController.SetSSLExpireTime(servers) } +func (c *collector) SetSSLInfo(servers []*ingress.Server) { + klog.V(2).Infof("Updating ssl certificate info metrics") + c.ingressController.SetSSLInfo(servers) +} + func (c *collector) SetHosts(hosts sets.String) { c.socket.SetHosts(hosts) } @@ -200,7 +206,7 @@ func (c *collector) OnStartedLeading(electionID string) { func (c *collector) OnStoppedLeading(electionID string) { setLeader(false) c.ingressController.OnStoppedLeading(electionID) - c.ingressController.RemoveAllSSLExpireMetrics(c.registry) + c.ingressController.RemoveAllSSLMetrics(c.registry) } var ( diff --git a/internal/ingress/sslcert.go b/internal/ingress/sslcert.go index 9265b07ac8..7dee3880d2 100644 --- a/internal/ingress/sslcert.go +++ b/internal/ingress/sslcert.go @@ -18,6 +18,7 @@ package ingress import ( "crypto/x509" + "fmt" "time" "k8s.io/apimachinery/pkg/runtime/schema" @@ -69,6 +70,16 @@ func (s SSLCert) GetObjectKind() schema.ObjectKind { return schema.EmptyObjectKind } +// Identifier returns a the couple issuer / serial number if they both exist, an empty string otherwise +func (s SSLCert) Identifier() string { + if s.Certificate != nil { + if s.Certificate.SerialNumber != nil { + return fmt.Sprintf("%s-%s", s.Certificate.Issuer.SerialNumber, s.Certificate.SerialNumber.String()) + } + } + return "" +} + // HashInclude defines if a field should be used or not to calculate the hash func (s SSLCert) HashInclude(field string, v interface{}) (bool, error) { switch field {