diff --git a/Makefile b/Makefile index b55d6920..5221e283 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ MAKEFILE_PATH = $(dir $(realpath -s $(firstword $(MAKEFILE_LIST)))) VERSION ?= $(GIT_VERSION) IMAGE ?= $(REPO):$(VERSION) BASE_IMAGE ?= public.ecr.aws/eks-distro-build-tooling/eks-distro-minimal-base-nonroot:latest.2 -BUILD_IMAGE ?= public.ecr.aws/bitnami/golang:1.20.5 +BUILD_IMAGE ?= public.ecr.aws/bitnami/golang:1.21.3 GOARCH ?= amd64 PLATFORM ?= linux/amd64 diff --git a/pkg/aws/ec2/api/eni_cleanup.go b/pkg/aws/ec2/api/eni_cleanup.go index f51e29b2..6f3db155 100644 --- a/pkg/aws/ec2/api/eni_cleanup.go +++ b/pkg/aws/ec2/api/eni_cleanup.go @@ -20,6 +20,8 @@ import ( "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config" rcHealthz "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/healthz" + "github.com/prometheus/client_golang/prometheus" + "golang.org/x/exp/slices" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/ec2" @@ -39,6 +41,21 @@ type ENICleaner struct { ctx context.Context } +var ( + vpcCniLeakedENICleanupCnt = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "vpc_cni_created_leaked_eni_cleanup_count", + Help: "The number of leaked ENIs created by VPC-CNI that is cleaned up by the controller", + }, + ) + vpcrcLeakedENICleanupCnt = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "vpc_rc_created_leaked_eni_cleanup_count", + Help: "The number of leaked ENIs created by VPC-RC that is cleaned up by the controller", + }, + ) +) + func (e *ENICleaner) SetupWithManager(ctx context.Context, mgr ctrl.Manager, healthzHandler *rcHealthz.HealthzHandler) error { e.clusterNameTagKey = fmt.Sprintf(config.ClusterNameTagKeyFormat, e.ClusterName) e.availableENIs = make(map[string]struct{}) @@ -113,6 +130,21 @@ func (e *ENICleaner) cleanUpAvailableENIs() { for _, networkInterface := range describeNetworkInterfaceOp.NetworkInterfaces { if _, exists := e.availableENIs[*networkInterface.NetworkInterfaceId]; exists { + // Increment promethues metrics for number of leaked ENIs cleaned up + if tagIdx := slices.IndexFunc(networkInterface.TagSet, func(tag *ec2.Tag) bool { + return *tag.Key == config.NetworkInterfaceOwnerTagKey + }); tagIdx != -1 { + switch *networkInterface.TagSet[tagIdx].Value { + case config.NetworkInterfaceOwnerTagValue: + vpcrcLeakedENICleanupCnt.Inc() + case config.NetworkInterfaceOwnerVPCCNITagValue: + vpcCniLeakedENICleanupCnt.Inc() + default: + // We will not hit this case as we only filter for above two tag values, adding it for any future use cases + e.Log.Info("found available ENI not created by VPC-CNI/VPC-RC") + } + } + // The ENI in available state has been sitting for at least the eni clean up interval and it should // be removed _, err := e.EC2Wrapper.DeleteNetworkInterface(&ec2.DeleteNetworkInterfaceInput{ diff --git a/pkg/aws/ec2/api/wrapper.go b/pkg/aws/ec2/api/wrapper.go index 911a426f..bcf4cc74 100644 --- a/pkg/aws/ec2/api/wrapper.go +++ b/pkg/aws/ec2/api/wrapper.go @@ -344,7 +344,10 @@ func prometheusRegister() { ec2describeTrunkInterfaceAssociationAPIErrCnt, ec2modifyNetworkInterfaceAttributeAPICallCnt, ec2modifyNetworkInterfaceAttributeAPIErrCnt, - ec2APICallLatencies) + ec2APICallLatencies, + vpcCniLeakedENICleanupCnt, + vpcrcLeakedENICleanupCnt, + ) prometheusRegistered = true }