Skip to content

Commit

Permalink
Merge branch 'main' into fix-issue-1418
Browse files Browse the repository at this point in the history
  • Loading branch information
helen-frank authored Oct 3, 2024
2 parents 133eacd + a860293 commit 379b0c6
Show file tree
Hide file tree
Showing 35 changed files with 500 additions and 474 deletions.
2 changes: 1 addition & 1 deletion .github/actions/install-prometheus/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ description: 'Installs prometheus'
runs:
using: "composite"
steps:
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
- name: install
shell: bash
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/actions/install-pyroscope/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ description: 'Installs Pyroscope'
runs:
using: "composite"
steps:
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
- name: download pyroscope
shell: bash
run: |
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/codeql-analysis.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
actions: read # github/codeql-action/init@v2
security-events: write # github/codeql-action/init@v2
steps:
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
- uses: ./.github/actions/install-deps
- run: make vulncheck
- uses: github/codeql-action/init@df32e399139a3050671466d7d9b3cbacc1cfd034 # v2.22.8
Expand All @@ -34,7 +34,7 @@ jobs:
actions: read # github/codeql-action/init@v2
security-events: write # github/codeql-action/init@v2
steps:
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
- uses: github/codeql-action/init@df32e399139a3050671466d7d9b3cbacc1cfd034 # v2.22.8
with:
languages: javascript
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/kind-e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ jobs:
strategy:
fail-fast: false
matrix:
k8sVersion: ["1.25.x", "1.26.x", "1.27.x", "1.28.x", "1.29.x", "1.30.x"]
k8sVersion: ["1.25.x", "1.26.x", "1.27.x", "1.28.x", "1.29.x", "1.30.x", "1.31.x"]
steps:
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
- name: Set up Python 3.10
uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3 # v5.2.0
with:
Expand Down Expand Up @@ -49,7 +49,7 @@ jobs:
kubectl get nodepools
kubectl get pods -A
kubectl describe nodes
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
- uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938
with:
repository: nathangeology/karpenter_evaluate
path: ./karpenter_eval/ # Installs to a folder in the Karpenter repo for the test
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/presubmit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@ jobs:
strategy:
fail-fast: false
matrix:
k8sVersion: ["1.25.x", "1.26.x", "1.27.x", "1.28.x", "1.29.x", "1.30.x"]
k8sVersion: ["1.25.x", "1.26.x", "1.27.x", "1.28.x", "1.29.x", "1.30.x", "1.31.x"]
steps:
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
- uses: ./.github/actions/install-deps
with:
k8sVersion: ${{ matrix.k8sVersion }}
Expand All @@ -25,7 +25,7 @@ jobs:
- run: K8S_VERSION=${{ matrix.k8sVersion }} make presubmit
- name: Send coverage
# should only send converage once https://docs.coveralls.io/parallel-builds
if: matrix.k8sVersion == '1.30.x'
if: matrix.k8sVersion == '1.31.x'
env:
COVERALLS_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: goveralls -coverprofile=coverage.out -service=github
2 changes: 1 addition & 1 deletion .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
id-token: write # Needed for cosigning build attestation files with tejolote
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7
- uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938 # v4.2.0
with:
fetch-depth: 0

Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Karpenter improves the efficiency and cost of running workloads on Kubernetes cl
Karpenter is a multi-cloud project with implementations by the following cloud providers:
- [AWS](https://github.com/aws/karpenter-provider-aws)
- [Azure](https://github.com/Azure/karpenter-provider-azure)
- [AlibabaCloud](https://github.com/cloudpilot-ai/karpenter-provider-alicloud)

## Community, discussion, contribution, and support

Expand Down
6 changes: 3 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@ go 1.23.0
require (
github.com/Pallinder/go-randomdata v1.2.0
github.com/avast/retry-go v3.0.0+incompatible
github.com/awslabs/operatorpkg v0.0.0-20240912183034-ab1e7d730cf3
github.com/docker/docker v27.2.1+incompatible
github.com/awslabs/operatorpkg v0.0.0-20240920182301-771460b3160b
github.com/docker/docker v27.3.1+incompatible
github.com/go-logr/logr v1.4.2
github.com/imdario/mergo v0.3.16
github.com/klauspost/compress v1.17.9 // indirect
github.com/mitchellh/hashstructure/v2 v2.0.2
github.com/onsi/ginkgo/v2 v2.20.2
github.com/onsi/gomega v1.34.2
github.com/patrickmn/go-cache v2.1.0+incompatible
github.com/prometheus/client_golang v1.20.3
github.com/prometheus/client_golang v1.20.4
github.com/prometheus/client_model v0.6.1
github.com/samber/lo v1.47.0
go.uber.org/multierr v1.11.0
Expand Down
12 changes: 6 additions & 6 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ github.com/Pallinder/go-randomdata v1.2.0 h1:DZ41wBchNRb/0GfsePLiSwb0PHZmT67XY00
github.com/Pallinder/go-randomdata v1.2.0/go.mod h1:yHmJgulpD2Nfrm0cR9tI/+oAgRqCQQixsA8HyRZfV9Y=
github.com/avast/retry-go v3.0.0+incompatible h1:4SOWQ7Qs+oroOTQOYnAHqelpCO0biHSxpiH9JdtuBj0=
github.com/avast/retry-go v3.0.0+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY=
github.com/awslabs/operatorpkg v0.0.0-20240912183034-ab1e7d730cf3 h1:jhFYTRgOZhFMtQ2GzY2dVDAjorHpWI0rt9wDFG5jQ2o=
github.com/awslabs/operatorpkg v0.0.0-20240912183034-ab1e7d730cf3/go.mod h1:WnRtbjt0U71m4PqDBLybOcY5KkbeGh9FIsJGohVFUo4=
github.com/awslabs/operatorpkg v0.0.0-20240920182301-771460b3160b h1:aG1+YRmKIf5nLTZJNhw1NmuxvjUprWYyluqJ2jmVqiU=
github.com/awslabs/operatorpkg v0.0.0-20240920182301-771460b3160b/go.mod h1:RI+iNDn57c3WX0tsZg4rvkmM58lWsEC5cc6E4vJJld8=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
Expand All @@ -14,8 +14,8 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/docker/docker v27.2.1+incompatible h1:fQdiLfW7VLscyoeYEBz7/J8soYFDZV1u6VW6gJEjNMI=
github.com/docker/docker v27.2.1+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
github.com/docker/docker v27.3.1+incompatible h1:KttF0XoteNTicmUtBO0L2tP+J7FGRFTjaEF4k6WdhfI=
github.com/docker/docker v27.3.1+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g=
github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
github.com/evanphx/json-patch v5.6.0+incompatible h1:jBYDEEiFBPxA0v50tFdvOzQQTCvpL6mnFh5mB2/l16U=
Expand Down Expand Up @@ -100,8 +100,8 @@ github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_golang v1.20.3 h1:oPksm4K8B+Vt35tUhw6GbSNSgVlVSBH0qELP/7u83l4=
github.com/prometheus/client_golang v1.20.3/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
github.com/prometheus/client_golang v1.20.4 h1:Tgh3Yr67PaOv/uTqloMsCEdeuFTatm5zIq5+qNN23vI=
github.com/prometheus/client_golang v1.20.4/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc=
Expand Down
2 changes: 1 addition & 1 deletion hack/toolchain.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env bash
set -euo pipefail

K8S_VERSION="${K8S_VERSION:="1.30.x"}"
K8S_VERSION="${K8S_VERSION:="1.31.x"}"
KUBEBUILDER_ASSETS="/usr/local/kubebuilder/bin"

main() {
Expand Down
2 changes: 0 additions & 2 deletions pkg/controllers/controllers.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ import (
nodeclaimgarbagecollection "sigs.k8s.io/karpenter/pkg/controllers/nodeclaim/garbagecollection"
nodeclaimlifecycle "sigs.k8s.io/karpenter/pkg/controllers/nodeclaim/lifecycle"
podevents "sigs.k8s.io/karpenter/pkg/controllers/nodeclaim/podevents"
nodeclaimtermination "sigs.k8s.io/karpenter/pkg/controllers/nodeclaim/termination"
nodepoolcounter "sigs.k8s.io/karpenter/pkg/controllers/nodepool/counter"
nodepoolhash "sigs.k8s.io/karpenter/pkg/controllers/nodepool/hash"
nodepoolreadiness "sigs.k8s.io/karpenter/pkg/controllers/nodepool/readiness"
Expand Down Expand Up @@ -85,7 +84,6 @@ func NewControllers(
nodeclaimconsistency.NewController(clock, kubeClient, recorder),
nodeclaimlifecycle.NewController(clock, kubeClient, cloudProvider, recorder),
nodeclaimgarbagecollection.NewController(clock, kubeClient, cloudProvider),
nodeclaimtermination.NewController(kubeClient, cloudProvider, recorder),
nodeclaimdisruption.NewController(clock, kubeClient, cloudProvider),
status.NewController[*v1.NodeClaim](kubeClient, mgr.GetEventRecorderFor("karpenter")),
status.NewController[*v1.NodePool](kubeClient, mgr.GetEventRecorderFor("karpenter")),
Expand Down
3 changes: 3 additions & 0 deletions pkg/controllers/node/termination/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ func (c *Controller) finalize(ctx context.Context, node *corev1.Node) (reconcile

return reconcile.Result{RequeueAfter: 1 * time.Second}, nil
}
NodesDrainedTotal.With(prometheus.Labels{
metrics.NodePoolLabel: node.Labels[v1.NodePoolLabelKey],
}).Inc()
// In order for Pods associated with PersistentVolumes to smoothly migrate from the terminating Node, we wait
// for VolumeAttachments of drain-able Pods to be cleaned up before terminating Node and removing its finalizer.
// However, if TerminationGracePeriod is configured for Node, and we are past that period, we will skip waiting.
Expand Down
12 changes: 11 additions & 1 deletion pkg/controllers/node/termination/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ import (
func init() {
crmetrics.Registry.MustRegister(
TerminationDurationSeconds,
NodeLifetimeDurationSeconds)
NodeLifetimeDurationSeconds,
NodesDrainedTotal)
}

const dayDuration = time.Hour * 24
Expand All @@ -44,6 +45,15 @@ var (
},
[]string{metrics.NodePoolLabel},
)
NodesDrainedTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: metrics.Namespace,
Subsystem: metrics.NodeSubsystem,
Name: "drained_total",
Help: "The total number of nodes drained by Karpenter",
},
[]string{metrics.NodePoolLabel},
)
NodeLifetimeDurationSeconds = prometheus.NewHistogramVec(
prometheus.HistogramOpts{
Namespace: metrics.Namespace,
Expand Down
2 changes: 2 additions & 0 deletions pkg/controllers/node/termination/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ var _ = Describe("Termination", func() {
metrics.NodesTerminatedTotal.Reset()
termination.TerminationDurationSeconds.Reset()
termination.NodeLifetimeDurationSeconds.Reset()
termination.NodesDrainedTotal.Reset()
})

Context("Reconciliation", func() {
Expand Down Expand Up @@ -841,6 +842,7 @@ var _ = Describe("Termination", func() {
node = ExpectNodeExists(ctx, env.Client, node.Name)
// Reconcile twice, once to set the NodeClaim to terminating, another to check the instance termination status (and delete the node).
ExpectObjectReconciled(ctx, env.Client, terminationController, node)
ExpectMetricCounterValue(termination.NodesDrainedTotal, 1, map[string]string{"nodepool": node.Labels[v1.NodePoolLabelKey]})
ExpectObjectReconciled(ctx, env.Client, terminationController, node)

m, ok := FindMetricWithLabelValues("karpenter_nodes_terminated_total", map[string]string{"nodepool": node.Labels[v1.NodePoolLabelKey]})
Expand Down
6 changes: 6 additions & 0 deletions pkg/controllers/node/termination/terminator/eviction.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,11 @@ func (q *Queue) Evict(ctx context.Context, key QueueKey) bool {
},
},
}); err != nil {
var apiStatus apierrors.APIStatus
if errors.As(err, &apiStatus) {
code := apiStatus.Status().Code
NodesEvictionRequestsTotal.With(map[string]string{CodeLabel: fmt.Sprint(code)}).Inc()
}
// status codes for the eviction API are defined here:
// https://kubernetes.io/docs/concepts/scheduling-eviction/api-eviction/#how-api-initiated-eviction-works
if apierrors.IsNotFound(err) || apierrors.IsConflict(err) {
Expand All @@ -199,6 +204,7 @@ func (q *Queue) Evict(ctx context.Context, key QueueKey) bool {
log.FromContext(ctx).Error(err, "failed evicting pod")
return false
}
NodesEvictionRequestsTotal.With(map[string]string{CodeLabel: "200"}).Inc()
q.recorder.Publish(terminatorevents.EvictPod(&corev1.Pod{ObjectMeta: metav1.ObjectMeta{Name: key.Name, Namespace: key.Namespace}}))
return true
}
43 changes: 43 additions & 0 deletions pkg/controllers/node/termination/terminator/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
Copyright The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package terminator

import (
"github.com/prometheus/client_golang/prometheus"
crmetrics "sigs.k8s.io/controller-runtime/pkg/metrics"

"sigs.k8s.io/karpenter/pkg/metrics"
)

const (
// CodeLabel for eviction request
CodeLabel = "code"
)

var NodesEvictionRequestsTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: metrics.Namespace,
Subsystem: metrics.NodeSubsystem,
Name: "eviction_requests_total",
Help: "The total number of eviction requests made by Karpenter",
},
[]string{CodeLabel},
)

func init() {
crmetrics.Registry.MustRegister(NodesEvictionRequestsTotal)
}
4 changes: 4 additions & 0 deletions pkg/controllers/node/termination/terminator/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ var _ = Describe("Eviction/Queue", func() {
Labels: testLabels,
},
})
terminator.NodesEvictionRequestsTotal.Reset()
})

Context("Eviction API", func() {
Expand All @@ -102,11 +103,13 @@ var _ = Describe("Eviction/Queue", func() {
It("should succeed with no event when the pod UID conflicts", func() {
ExpectApplied(ctx, env.Client, pod)
Expect(queue.Evict(ctx, terminator.QueueKey{NamespacedName: client.ObjectKeyFromObject(pod), UID: uuid.NewUUID()})).To(BeTrue())
ExpectMetricCounterValue(terminator.NodesEvictionRequestsTotal, 1, map[string]string{terminator.CodeLabel: "409"})
Expect(recorder.Events()).To(HaveLen(0))
})
It("should succeed with an evicted event when there are no PDBs", func() {
ExpectApplied(ctx, env.Client, pod)
Expect(queue.Evict(ctx, terminator.NewQueueKey(pod))).To(BeTrue())
ExpectMetricCounterValue(terminator.NodesEvictionRequestsTotal, 1, map[string]string{terminator.CodeLabel: "200"})
Expect(recorder.Calls("Evicted")).To(Equal(1))
})
It("should succeed with no event when there are PDBs that allow an eviction", func() {
Expand All @@ -130,6 +133,7 @@ var _ = Describe("Eviction/Queue", func() {
})
ExpectApplied(ctx, env.Client, pdb, pdb2, pod)
Expect(queue.Evict(ctx, terminator.NewQueueKey(pod))).To(BeFalse())
ExpectMetricCounterValue(terminator.NodesEvictionRequestsTotal, 1, map[string]string{terminator.CodeLabel: "500"})
})
It("should ensure that calling Evict() is valid while making Add() calls", func() {
cancelCtx, cancel := context.WithCancel(ctx)
Expand Down
Loading

0 comments on commit 379b0c6

Please sign in to comment.