Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean up job for old resources #15312

Merged
merged 6 commits into from
Jun 19, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions config/post-install/cleanup.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# Copyright 2024 The Knative Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

apiVersion: batch/v1
kind: Job
metadata:
generateName: cleanup-serving-
namespace: knative-serving
labels:
app: cleanup-serving
app.kubernetes.io/name: knative-serving
app.kubernetes.io/component: cleanup-job
app.kubernetes.io/version: devel
spec:
ttlSecondsAfterFinished: 600
backoffLimit: 10
template:
metadata:
annotations:
sidecar.istio.io/inject: "false"
labels:
app: cleanup-migration-serving
app.kubernetes.io/name: knative-serving
app.kubernetes.io/component: cleanup-job
app.kubernetes.io/version: devel
spec:
serviceAccountName: controller
restartPolicy: OnFailure
containers:
- name: cleanup
# This is the Go import path for the binary that is containerized
# and substituted here.
image: ko://knative.dev/serving/pkg/cleanup/cmd/cleanup
resources:
requests:
cpu: 100m
memory: 100Mi
limits:
cpu: 1000m
memory: 1000Mi
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
runAsNonRoot: true
capabilities:
drop:
- ALL
seccompProfile:
type: RuntimeDefault
env:
- name: SYSTEM_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
---

kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: knative-serving-cleanup
labels:
serving.knative.dev/controller: "true"
app.kubernetes.io/version: devel
app.kubernetes.io/name: knative-serving
rules:
- apiGroups: ["rbac.authorization.k8s.io"]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should probably limit this to just the single role we need to delete

resourceNames: ["..."]

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oddly this role becomes a lingering resources after this job runs.

I wonder if we should just move this to serving-core role?

Copy link
Contributor Author

@skonto skonto Jun 7, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem is that if we move it there, people who want to run this they will need to update the serving-core manually as well if they are targeting an old version (instead of just running a job). Anyway I think release notes could help.

resources: ["clusterroles"]
verbs: ["delete"]
5 changes: 4 additions & 1 deletion hack/generate-yamls.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,12 @@ readonly SERVING_DEFAULT_DOMAIN_YAML=${YAML_OUTPUT_DIR}/serving-default-domain.y
readonly SERVING_STORAGE_VERSION_MIGRATE_YAML=${YAML_OUTPUT_DIR}/serving-storage-version-migration.yaml
readonly SERVING_HPA_YAML=${YAML_OUTPUT_DIR}/serving-hpa.yaml
readonly SERVING_CRD_YAML=${YAML_OUTPUT_DIR}/serving-crds.yaml
readonly SERVING_CLEAN_UP_YAML=${YAML_OUTPUT_DIR}/cleanup.yaml
readonly SERVING_POST_INSTALL_JOBS_YAML=${YAML_OUTPUT_DIR}/serving-post-install-jobs.yaml

declare -A CONSOLIDATED_ARTIFACTS
CONSOLIDATED_ARTIFACTS=(
["${SERVING_POST_INSTALL_JOBS_YAML}"]="${SERVING_STORAGE_VERSION_MIGRATE_YAML}"
["${SERVING_POST_INSTALL_JOBS_YAML}"]="${SERVING_STORAGE_VERSION_MIGRATE_YAML} ${SERVING_CLEAN_UP_YAML}"
)
readonly CONSOLIDATED_ARTIFACTS

Expand Down Expand Up @@ -91,6 +92,8 @@ ko resolve ${KO_YAML_FLAGS} -f config/post-install/default-domain.yaml | "${LABE

ko resolve ${KO_YAML_FLAGS} -f config/post-install/storage-version-migration.yaml | "${LABEL_YAML_CMD[@]}" > "${SERVING_STORAGE_VERSION_MIGRATE_YAML}"

ko resolve ${KO_YAML_FLAGS} -f config/post-install/cleanup.yaml | "${LABEL_YAML_CMD[@]}" > "${SERVING_CLEAN_UP_YAML}"

# These don't have images, but ko will concatenate them for us.
ko resolve ${KO_YAML_FLAGS} -f config/core/300-resources/ -f config/core/300-imagecache.yaml | "${LABEL_YAML_CMD[@]}" > "${SERVING_CRD_YAML}"

Expand Down
120 changes: 120 additions & 0 deletions pkg/cleanup/cmd/cleanup/cleanup.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
/*
Copyright 2024 The Knative Authors

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package main

import (
"context"
"flag"
"log"
"strings"

"go.uber.org/zap"
v1 "k8s.io/api/apps/v1"
apierrs "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"

"knative.dev/pkg/environment"
"knative.dev/pkg/logging"
"knative.dev/pkg/system"
)

func main() {
logger := setupLogger()
defer logger.Sync()

env := environment.ClientConfig{}
env.InitFlags(flag.CommandLine)

flag.Parse()

config, err := env.GetRESTConfig()
if err != nil {
logger.Fatalf("failed to get kubeconfig %s", err)
}

client := kubernetes.NewForConfigOrDie(config)

logger.Info("Deleting old Serving resources if any")

// Delete all deployments
var deps *v1.DeploymentList
if deps, err = client.AppsV1().Deployments(system.Namespace()).List(context.Background(), metav1.ListOptions{}); err != nil {
logger.Fatal("failed to list deployments: ", err)
}
for _, dep := range deps.Items {
if hasPrefix(dep.Name) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't we know the exact names of the deployments we want to delete?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably a relic need to update.

if err = client.AppsV1().Deployments(system.Namespace()).Delete(context.Background(), dep.Name, metav1.DeleteOptions{}); err != nil && !apierrs.IsNotFound(err) {
logger.Fatal("failed to delete deployment ", dep.Name, ": ", err)
}
}
}

// Delete the rest of the domain mapping resources
if err = client.CoreV1().Services(system.Namespace()).Delete(context.Background(), "domainmapping-webhook", metav1.DeleteOptions{}); err != nil && !apierrs.IsNotFound(err) {
logger.Fatal("failed to delete service domainmapping-webhook: ", err)
}
if err = client.CoreV1().Secrets(system.Namespace()).Delete(context.Background(), "domainmapping-webhook-certs", metav1.DeleteOptions{}); err != nil && !apierrs.IsNotFound(err) {
logger.Fatal("failed to delete secret domainmapping-webhook-certs: ", err)
}
if err = client.AdmissionregistrationV1().MutatingWebhookConfigurations().Delete(context.Background(), "webhook.domainmapping.serving.knative.dev", metav1.DeleteOptions{}); err != nil && !apierrs.IsNotFound(err) {
logger.Fatal("failed to delete mutating webhook configuration webhook.domainmapping.serving.knative.dev: ", err)
}
if err = client.AdmissionregistrationV1().ValidatingWebhookConfigurations().Delete(context.Background(), "validation.webhook.domainmapping.serving.knative.dev", metav1.DeleteOptions{}); err != nil && !apierrs.IsNotFound(err) {
logger.Fatal("failed to delete validating webhook configuration validation.webhook.domainmapping.serving.knative.dev: ", err)
}

// Delete the rest of the net-certmanager resources
if err = client.CoreV1().Services(system.Namespace()).Delete(context.Background(), "net-certmanager-controller", metav1.DeleteOptions{}); err != nil && !apierrs.IsNotFound(err) {
logger.Fatal("failed to delete service net-certmanager-controller: ", err)
}
if err = client.CoreV1().Services(system.Namespace()).Delete(context.Background(), "net-certmanager-webhook", metav1.DeleteOptions{}); err != nil && !apierrs.IsNotFound(err) {
logger.Fatal("failed to delete service net-certmanager-webhook: ", err)
}
if err = client.AdmissionregistrationV1().ValidatingWebhookConfigurations().Delete(context.Background(), "config.webhook.net-certmanager.networking.internal.knative.dev", metav1.DeleteOptions{}); err != nil && !apierrs.IsNotFound(err) {
logger.Fatal("failed to delete validating webhook config.webhook.net-certmanager.networking.internal.knative.dev: ", err)
}
if err = client.CoreV1().Secrets(system.Namespace()).Delete(context.Background(), "net-certmanager-webhook-certs", metav1.DeleteOptions{}); err != nil && !apierrs.IsNotFound(err) {
logger.Fatal("failed to delete secret net-certmanager-webhook-certs: ", err)
}
if err = client.RbacV1().ClusterRoles().Delete(context.Background(), "knative-serving-certmanager", metav1.DeleteOptions{}); err != nil && !apierrs.IsNotFound(err) {
logger.Fatal("failed to delete clusterrole knative-serving-certmanager: ", err)
}

// Delete the role we created for the cleanup job
if err = client.RbacV1().ClusterRoles().Delete(context.Background(), "knative-serving-cleanup", metav1.DeleteOptions{}); err != nil && !apierrs.IsNotFound(err) {
logger.Fatal("failed to delete clusterrole knative-serving-cleanup: ", err)
}
logger.Info("Old Serving resource deletion completed successfully")
}

func hasPrefix(name string) bool {
return strings.HasPrefix(name, "domain-mapping") || strings.HasPrefix(name, "domainmapping-webhook") ||
strings.HasPrefix(name, "net-certmanager-controller") || strings.HasPrefix(name, "net-certmanager-webhook")
}

func setupLogger() *zap.SugaredLogger {
const component = "old-resource-cleanup"

config, err := logging.NewConfigFromMap(nil)
if err != nil {
log.Fatal("Failed to create logging config: ", err)
}

logger, _ := logging.NewLoggerFromConfig(config, component)
return logger
}
Loading