Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add metrics for leaked ENI cleanup routine #328

Merged
merged 2 commits into from
Nov 2, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ MAKEFILE_PATH = $(dir $(realpath -s $(firstword $(MAKEFILE_LIST))))
VERSION ?= $(GIT_VERSION)
IMAGE ?= $(REPO):$(VERSION)
BASE_IMAGE ?= public.ecr.aws/eks-distro-build-tooling/eks-distro-minimal-base-nonroot:latest.2
BUILD_IMAGE ?= public.ecr.aws/bitnami/golang:1.20.5
BUILD_IMAGE ?= public.ecr.aws/bitnami/golang:1.21.3
GOARCH ?= amd64
PLATFORM ?= linux/amd64

Expand Down
32 changes: 32 additions & 0 deletions pkg/aws/ec2/api/eni_cleanup.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ import (

"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config"
rcHealthz "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/healthz"
"github.com/prometheus/client_golang/prometheus"
"golang.org/x/exp/slices"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/service/ec2"
Expand All @@ -39,6 +41,21 @@ type ENICleaner struct {
ctx context.Context
}

var (
vpcCniLeakedENICleanupCnt = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "vpc_cni_created_leaked_eni_cleanup_count",
Help: "The number of leaked ENIs created by VPC-CNI that is cleaned up by the controller",
},
)
vpcrcLeakedENICleanupCnt = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "vpc_rc_created_leaked_eni_cleanup_count",
Help: "The number of leaked ENIs created by VPC-RC that is cleaned up by the controller",
},
)
)

func (e *ENICleaner) SetupWithManager(ctx context.Context, mgr ctrl.Manager, healthzHandler *rcHealthz.HealthzHandler) error {
e.clusterNameTagKey = fmt.Sprintf(config.ClusterNameTagKeyFormat, e.ClusterName)
e.availableENIs = make(map[string]struct{})
Expand Down Expand Up @@ -113,6 +130,21 @@ func (e *ENICleaner) cleanUpAvailableENIs() {

for _, networkInterface := range describeNetworkInterfaceOp.NetworkInterfaces {
if _, exists := e.availableENIs[*networkInterface.NetworkInterfaceId]; exists {
// Increment promethues metrics for number of leaked ENIs cleaned up
if tagIdx := slices.IndexFunc(networkInterface.TagSet, func(tag *ec2.Tag) bool {
return *tag.Key == config.NetworkInterfaceOwnerTagKey
}); tagIdx != -1 {
switch *networkInterface.TagSet[tagIdx].Value {
case config.NetworkInterfaceOwnerTagValue:
vpcrcLeakedENICleanupCnt.Inc()
case config.NetworkInterfaceOwnerVPCCNITagValue:
vpcCniLeakedENICleanupCnt.Inc()
default:
// We will not hit this case as we only filter for above two tag values, adding it for any future use cases
e.Log.Info("found leaked NI not owned by VPC-CNI/VPC-RC")
sushrk marked this conversation as resolved.
Show resolved Hide resolved
}
}

// The ENI in available state has been sitting for at least the eni clean up interval and it should
// be removed
_, err := e.EC2Wrapper.DeleteNetworkInterface(&ec2.DeleteNetworkInterfaceInput{
Expand Down
5 changes: 4 additions & 1 deletion pkg/aws/ec2/api/wrapper.go
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,10 @@ func prometheusRegister() {
ec2describeTrunkInterfaceAssociationAPIErrCnt,
ec2modifyNetworkInterfaceAttributeAPICallCnt,
ec2modifyNetworkInterfaceAttributeAPIErrCnt,
ec2APICallLatencies)
ec2APICallLatencies,
vpcCniLeakedENICleanupCnt,
vpcrcLeakedENICleanupCnt,
)

prometheusRegistered = true
}
Expand Down