diff --git a/pkg/controllers/netpol/network_policy_controller.go b/pkg/controllers/netpol/network_policy_controller.go index bf7f28995e..4a0c23ed1c 100644 --- a/pkg/controllers/netpol/network_policy_controller.go +++ b/pkg/controllers/netpol/network_policy_controller.go @@ -253,8 +253,24 @@ func (npc *NetworkPolicyController) fullPolicySync() { } for ipFamily, iptablesSaveRestore := range npc.iptablesSaveRestore { + ipFamily := ipFamily npc.filterTableRules[ipFamily].Reset() - if err := iptablesSaveRestore.SaveInto("filter", npc.filterTableRules[ipFamily]); err != nil { + saveStart := time.Now() + err := iptablesSaveRestore.SaveInto("filter", npc.filterTableRules[ipFamily]) + saveEndTime := time.Since(saveStart) + defer func() { + if npc.MetricsEnabled { + switch ipFamily { + case v1core.IPv4Protocol: + metrics.ControllerIptablesV4SaveTime.Observe(saveEndTime.Seconds()) + case v1core.IPv6Protocol: + metrics.ControllerIptablesV6SaveTime.Observe(saveEndTime.Seconds()) + } + } + klog.V(2).Infof("Saving %v iptables rules took %v", ipFamily, saveEndTime) + }() + + if err != nil { klog.Errorf("Aborting sync. Failed to run iptables-save: %v", err.Error()) return } @@ -279,8 +295,23 @@ func (npc *NetworkPolicyController) fullPolicySync() { } for ipFamily, iptablesSaveRestore := range npc.iptablesSaveRestore { - if err := iptablesSaveRestore.Restore("filter", - npc.filterTableRules[ipFamily].Bytes()); err != nil { + ipFamily := ipFamily + restoreStart := time.Now() + err := iptablesSaveRestore.Restore("filter", npc.filterTableRules[ipFamily].Bytes()) + restoreEndTime := time.Since(restoreStart) + defer func() { + if npc.MetricsEnabled { + switch ipFamily { + case v1core.IPv4Protocol: + metrics.ControllerIptablesV4RestoreTime.Observe(restoreEndTime.Seconds()) + case v1core.IPv6Protocol: + metrics.ControllerIptablesV6RestoreTime.Observe(restoreEndTime.Seconds()) + } + } + klog.V(2).Infof("Restoring %v iptables rules took %v", ipFamily, restoreEndTime) + }() + + if err != nil { klog.Errorf("Aborting sync. Failed to run iptables-restore: %v\n%s", err.Error(), npc.filterTableRules[ipFamily].String()) return @@ -845,7 +876,15 @@ func NewNetworkPolicyController(clientset kubernetes.Interface, if config.MetricsEnabled { // Register the metrics for this controller metrics.DefaultRegisterer.MustRegister(metrics.ControllerIptablesSyncTime) + metrics.DefaultRegisterer.MustRegister(metrics.ControllerIptablesV4SaveTime) + metrics.DefaultRegisterer.MustRegister(metrics.ControllerIptablesV6SaveTime) + metrics.DefaultRegisterer.MustRegister(metrics.ControllerIptablesV4RestoreTime) + metrics.DefaultRegisterer.MustRegister(metrics.ControllerIptablesV6RestoreTime) metrics.DefaultRegisterer.MustRegister(metrics.ControllerPolicyChainsSyncTime) + metrics.DefaultRegisterer.MustRegister(metrics.ControllerPolicyIpsetV4RestoreTime) + metrics.DefaultRegisterer.MustRegister(metrics.ControllerPolicyIpsetV6RestoreTime) + metrics.DefaultRegisterer.MustRegister(metrics.ControllerPolicyChains) + metrics.DefaultRegisterer.MustRegister(metrics.ControllerPolicyIpsets) npc.MetricsEnabled = true } diff --git a/pkg/controllers/netpol/policy.go b/pkg/controllers/netpol/policy.go index 3ef83cb17b..63b98807ce 100644 --- a/pkg/controllers/netpol/policy.go +++ b/pkg/controllers/netpol/policy.go @@ -74,7 +74,9 @@ func (npc *NetworkPolicyController) syncNetworkPolicyChains(networkPoliciesInfo start := time.Now() defer func() { endTime := time.Since(start) - metrics.ControllerPolicyChainsSyncTime.Observe(endTime.Seconds()) + if npc.MetricsEnabled { + metrics.ControllerPolicyChainsSyncTime.Observe(endTime.Seconds()) + } klog.V(2).Infof("Syncing network policy chains took %v", endTime) }() @@ -89,9 +91,15 @@ func (npc *NetworkPolicyController) syncNetworkPolicyChains(networkPoliciesInfo activePolicyChains := make(map[string]bool) activePolicyIPSets := make(map[string]bool) + defer func() { + if npc.MetricsEnabled { + metrics.ControllerPolicyChains.Set(float64(len(activePolicyChains))) + metrics.ControllerPolicyIpsets.Set(float64(len(activePolicyIPSets))) + } + }() + // run through all network policies for _, policy := range networkPoliciesInfo { - currentPodIPs := make(map[api.IPFamily][]string) for _, pod := range policy.targetPods { for _, ip := range pod.ips { @@ -105,6 +113,7 @@ func (npc *NetworkPolicyController) syncNetworkPolicyChains(networkPoliciesInfo } for ipFamily, ipset := range npc.ipSetHandlers { + ipFamily := ipFamily // ensure there is a unique chain per network policy in filter table policyChainName := networkPolicyChainName(policy.namespace, policy.name, version, ipFamily) @@ -135,7 +144,22 @@ func (npc *NetworkPolicyController) syncNetworkPolicyChains(networkPoliciesInfo activePolicyIPSets[targetSourcePodIPSetName] = true } + restoreStart := time.Now() err := ipset.Restore() + restoreEndTime := time.Since(restoreStart) + + defer func() { + if npc.MetricsEnabled { + switch ipFamily { + case api.IPv4Protocol: + metrics.ControllerPolicyIpsetV4RestoreTime.Observe(restoreEndTime.Seconds()) + case api.IPv6Protocol: + metrics.ControllerPolicyIpsetV6RestoreTime.Observe(restoreEndTime.Seconds()) + } + } + klog.V(2).Infof("Restoring %v ipset took %v", ipFamily, restoreEndTime) + }() + if err != nil { return nil, nil, fmt.Errorf("failed to perform ipset restore: %w", err) } diff --git a/pkg/metrics/metrics_controller.go b/pkg/metrics/metrics_controller.go index defea6f2f5..044f0f224e 100644 --- a/pkg/metrics/metrics_controller.go +++ b/pkg/metrics/metrics_controller.go @@ -110,6 +110,30 @@ var ( Name: "controller_iptables_sync_time", Help: "Time it took for controller to sync iptables", }) + // ControllerIptablesV4SaveTime Time it took controller to save IPv4 rules + ControllerIptablesV4SaveTime = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: namespace, + Name: "controller_iptables_v4_save_time", + Help: "Time it took controller to save IPv4 rules", + }) + // ControllerIptablesV6SaveTime Time to took for controller to save IPv6 rules + ControllerIptablesV6SaveTime = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: namespace, + Name: "controller_iptables_v6_save_time", + Help: "Time to took for controller to save IPv6 rules", + }) + // ControllerIptablesV4RestoreTime Time it took for controller to restore IPv4 rules + ControllerIptablesV4RestoreTime = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: namespace, + Name: "controller_iptables_v4_restore_time", + Help: "Time it took for controller to restore IPv4 rules", + }) + // ControllerIptablesV6RestoreTime Time it took for controller to restore IPv6 rules + ControllerIptablesV6RestoreTime = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: namespace, + Name: "controller_iptables_v6_restore_time", + Help: "Time it took for controller to restore IPv6 rules", + }) // ControllerIpvsServicesSyncTime Time it took for controller to sync ipvs services ControllerIpvsServicesSyncTime = prometheus.NewHistogram(prometheus.HistogramOpts{ Namespace: namespace, @@ -161,6 +185,30 @@ var ( Name: "controller_policy_chains_sync_time", Help: "Time it took for controller to sync policy chains", }) + // ControllerPolicyIpsetV4RestoreTime Time it took for controller to restore IPv4 ipsets + ControllerPolicyIpsetV4RestoreTime = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: namespace, + Name: "controller_policy_ipset_v4_restore_time", + Help: "Time it took for controller to restore IPv4 ipsets", + }) + // ControllerPolicyIpsetV6RestoreTime Time it took for controller to restore IPv6 ipsets + ControllerPolicyIpsetV6RestoreTime = prometheus.NewHistogram(prometheus.HistogramOpts{ + Namespace: namespace, + Name: "controller_policy_ipset_v6_restore_time", + Help: "Time it took for controller to restore IPv6 ipsets", + }) + // ControllerPolicyChains Active policy chains + ControllerPolicyChains = prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: namespace, + Name: "controller_policy_chains", + Help: "Active policy chains", + }) + // ControllerPolicyIpsets Active policy ipsets + ControllerPolicyIpsets = prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: namespace, + Name: "controller_policy_ipsets", + Help: "Active policy ipsets", + }) ) // Controller Holds settings for the metrics controller