Skip to content

Commit

Permalink
Bump CAPI to v1.7.5
Browse files Browse the repository at this point in the history
  • Loading branch information
haijianyang committed Aug 20, 2024
1 parent d9ab098 commit 6ba7557
Show file tree
Hide file tree
Showing 47 changed files with 685 additions and 918 deletions.
2 changes: 1 addition & 1 deletion .go-version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.21.11
1.22.5
6 changes: 3 additions & 3 deletions .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,9 @@ linters-settings:
- pkg: github.com/smartxworks/cluster-api-provider-elf/api/v1beta1
alias: infrav1
staticcheck:
go: "1.21"
go: "1.22"
stylecheck:
go: "1.21"
go: "1.22"
issues:
max-same-issues: 0
max-issues-per-linter: 0
Expand Down Expand Up @@ -169,7 +169,7 @@ issues:

run:
timeout: 10m
go: "1.21"
go: "1.22"
skip-files:
- "zz_generated.*\\.go$"
allow-parallel-runners: true
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
ARG ARCH

# Build the manager binary
FROM golang:1.21.11 as builder
FROM golang:1.22.5 as builder
WORKDIR /workspace

# Run this with docker build --build_arg $(go env GOPROXY) to override the goproxy
Expand Down
10 changes: 5 additions & 5 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ VERSION ?= $(shell cat clusterctl-settings.json | jq .config.nextVersion -r)
#
# Go.
#
GO_VERSION ?= 1.21.11
GO_VERSION ?= 1.22.5

# Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set)
ifeq (,$(shell go env GOBIN))
Expand Down Expand Up @@ -151,23 +151,23 @@ test-e2e: ginkgo kustomize kind ## Run e2e tests

KUSTOMIZE = $(shell pwd)/bin/kustomize
kustomize: ## Download kustomize locally if necessary.
$(call go-get-tool,$(KUSTOMIZE),sigs.k8s.io/kustomize/kustomize/[email protected])
$(call go-get-tool,$(KUSTOMIZE),sigs.k8s.io/kustomize/kustomize/[email protected])

CONTROLLER_GEN = $(shell pwd)/bin/controller-gen
controller-gen: ## Download controller-gen locally if necessary.
$(call go-get-tool,$(CONTROLLER_GEN),sigs.k8s.io/controller-tools/cmd/[email protected])

GINKGO := $(shell pwd)/bin/ginkgo
ginkgo: ## Download ginkgo locally if necessary.
$(call go-get-tool,$(GINKGO),github.com/onsi/ginkgo/v2/ginkgo@v2.13.1)
$(call go-get-tool,$(GINKGO),github.com/onsi/ginkgo/v2/ginkgo@v2.17.1)

KIND := $(shell pwd)/bin/kind
kind: ## Download kind locally if necessary.
$(call go-get-tool,$(KIND),sigs.k8s.io/kind@v0.20.0)
$(call go-get-tool,$(KIND),sigs.k8s.io/kind@v0.22.0)

GOLANGCI_LINT := $(shell pwd)/bin/golangci-lint
golangci-lint: ## Download golangci-lint locally if necessary.
$(call go-get-tool,$(GOLANGCI_LINT),github.com/golangci/golangci-lint/cmd/golangci-lint@v1.55.2)
$(call go-get-tool,$(GOLANGCI_LINT),github.com/golangci/golangci-lint/cmd/golangci-lint@v1.57.2)

## --------------------------------------
## Linting and fixing linter errors
Expand Down
2 changes: 1 addition & 1 deletion api/v1beta1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ type NetworkSpec struct {
}

func (n *NetworkSpec) RequiresStaticIPs() bool {
for i := 0; i < len(n.Devices); i++ {
for i := range len(n.Devices) {
if n.Devices[i].NetworkType == NetworkTypeIPV4 && len(n.Devices[i].IPAddrs) == 0 {
return true
}
Expand Down
2 changes: 1 addition & 1 deletion controllers/elfcluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ func (r *ElfClusterReconciler) cleanOrphanLabels(ctx goctx.Context, clusterCtx *
keys := []string{towerresources.GetVMLabelClusterName(), towerresources.GetVMLabelVIP(), towerresources.GetVMLabelNamespace()}
labelIDs, err := clusterCtx.VMService.CleanUnusedLabels(keys)
if err != nil {
log.Error(err, fmt.Sprintf("Warning: failed to clean orphan labels in Tower %s", clusterCtx.ElfCluster.Spec.Tower.Server))
log.Error(err, "Warning: failed to clean orphan labels in Tower "+clusterCtx.ElfCluster.Spec.Tower.Server)

return
}
Expand Down
2 changes: 1 addition & 1 deletion controllers/elfcluster_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ var _ = Describe("ElfClusterReconciler", func() {
mockVMService.EXPECT().CleanUnusedLabels(keys).Return(nil, unexpectedError)
reconciler := &ElfClusterReconciler{ControllerManagerContext: ctrlMgrCtx, NewVMService: mockNewVMService}
reconciler.cleanOrphanLabels(ctx, clusterCtx)
Expect(logBuffer.String()).To(ContainSubstring(fmt.Sprintf("Warning: failed to clean orphan labels in Tower %s", elfCluster.Spec.Tower.Server)))
Expect(logBuffer.String()).To(ContainSubstring("Warning: failed to clean orphan labels in Tower " + elfCluster.Spec.Tower.Server))

logBuffer.Reset()
mockVMService.EXPECT().CleanUnusedLabels(keys).Return(nil, nil)
Expand Down
48 changes: 25 additions & 23 deletions controllers/elfmachine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,8 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
apitypes "k8s.io/apimachinery/pkg/types"
"k8s.io/klog/v2"
"k8s.io/utils/pointer"
"k8s.io/utils/ptr"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
capierrors "sigs.k8s.io/cluster-api/errors"
capiutil "sigs.k8s.io/cluster-api/util"
"sigs.k8s.io/cluster-api/util/annotations"
"sigs.k8s.io/cluster-api/util/conditions"
Expand Down Expand Up @@ -540,15 +539,15 @@ func (r *ElfMachineReconciler) reconcileVM(ctx goctx.Context, machineCtx *contex
if canRetry, err := canRetryVMOperation(ctx, machineCtx, r.Client); err != nil {
return nil, false, err
} else if !canRetry {
log.V(1).Info(fmt.Sprintf("%s, skip creating VM", message))
log.V(1).Info(message + ", skip creating VM")
return nil, false, nil
}

log.V(1).Info(fmt.Sprintf("%s and the retry silence period passes, will try to create the VM again", message))
log.V(1).Info(message + " and the retry silence period passes, will try to create the VM again")
}

if ok, msg := acquireTicketForCreateVM(machineCtx.ElfMachine.Name, machineutil.IsControlPlaneMachine(machineCtx.ElfMachine)); !ok {
log.V(1).Info(fmt.Sprintf("%s, skip creating VM", msg))
log.V(1).Info(msg + ", skip creating VM")
return nil, false, nil
}

Expand Down Expand Up @@ -644,8 +643,9 @@ func (r *ElfMachineReconciler) reconcileVM(ctx goctx.Context, machineCtx *contex
// The VM was moved to the recycle bin. Treat the VM as deleted, and will not reconganize it even if it's moved back from the recycle bin.
if service.IsVMInRecycleBin(vm) {
message := fmt.Sprintf("The VM %s was moved to the Tower recycle bin by users, so treat it as deleted.", machineCtx.ElfMachine.Status.VMRef)
machineCtx.ElfMachine.Status.FailureReason = capierrors.MachineStatusErrorPtr(capeerrors.MovedToRecycleBinError)
machineCtx.ElfMachine.Status.FailureMessage = pointer.String(message)
machineStatusError := capeerrors.MovedToRecycleBinError
machineCtx.ElfMachine.Status.FailureReason = &machineStatusError
machineCtx.ElfMachine.Status.FailureMessage = ptr.To(message)
machineCtx.ElfMachine.SetVM("")
log.Error(stderrors.New(message), "")

Expand Down Expand Up @@ -700,8 +700,9 @@ func (r *ElfMachineReconciler) getVM(ctx goctx.Context, machineCtx *context.Mach
}

// If the machine was not found by UUID and timed out it means that it got deleted directly
machineCtx.ElfMachine.Status.FailureReason = capierrors.MachineStatusErrorPtr(capeerrors.RemovedFromInfrastructureError)
machineCtx.ElfMachine.Status.FailureMessage = pointer.String(fmt.Sprintf("Unable to find VM by UUID %s. The VM was removed from infrastructure.", machineCtx.ElfMachine.Status.VMRef))
machineStatusError := capeerrors.RemovedFromInfrastructureError
machineCtx.ElfMachine.Status.FailureReason = &machineStatusError
machineCtx.ElfMachine.Status.FailureMessage = ptr.To(fmt.Sprintf("Unable to find VM by UUID %s. The VM was removed from infrastructure.", machineCtx.ElfMachine.Status.VMRef))
log.Error(err, fmt.Sprintf("failed to get VM by UUID %s in %s", machineCtx.ElfMachine.Status.VMRef, infrav1.VMDisconnectionTimeout.String()), "message", machineCtx.ElfMachine.Status.FailureMessage)

return nil, err
Expand Down Expand Up @@ -779,7 +780,7 @@ func (r *ElfMachineReconciler) reconcileVMStatus(ctx goctx.Context, machineCtx *
// See issue http://jira.smartx.com/browse/SKS-1351 for details.
return false, r.powerOffVM(ctx, machineCtx)
default:
log.Info(fmt.Sprintf("The VM is in an unexpected status %s", string(*vm.Status)), "vmRef", machineCtx.ElfMachine.Status.VMRef)
log.Info("The VM is in an unexpected status "+string(*vm.Status), "vmRef", machineCtx.ElfMachine.Status.VMRef)

return false, nil
}
Expand Down Expand Up @@ -814,7 +815,7 @@ func (r *ElfMachineReconciler) powerOffVM(ctx goctx.Context, machineCtx *context
log := ctrl.LoggerFrom(ctx)

if ok := acquireTicketForUpdatingVM(machineCtx.ElfMachine.Name); !ok {
log.V(1).Info(fmt.Sprintf("The VM operation reaches rate limit, skip powering off VM %s", machineCtx.ElfMachine.Status.VMRef))
log.V(1).Info("The VM operation reaches rate limit, skip powering off VM " + machineCtx.ElfMachine.Status.VMRef)

return nil
}
Expand Down Expand Up @@ -849,11 +850,11 @@ func (r *ElfMachineReconciler) powerOnVM(ctx goctx.Context, machineCtx *context.
return nil
}

log.V(1).Info(fmt.Sprintf("%s and the retry silence period passes, will try to power on the VM again", message))
log.V(1).Info(message + " and the retry silence period passes, will try to power on the VM again")
}

if ok := acquireTicketForUpdatingVM(machineCtx.ElfMachine.Name); !ok {
log.V(1).Info(fmt.Sprintf("The VM operation reaches rate limit, skip power on VM %s", machineCtx.ElfMachine.Status.VMRef))
log.V(1).Info("The VM operation reaches rate limit, skip power on VM " + machineCtx.ElfMachine.Status.VMRef)

return nil
}
Expand Down Expand Up @@ -885,7 +886,7 @@ func (r *ElfMachineReconciler) updateVM(ctx goctx.Context, machineCtx *context.M
log := ctrl.LoggerFrom(ctx)

if ok := acquireTicketForUpdatingVM(machineCtx.ElfMachine.Name); !ok {
log.V(1).Info(fmt.Sprintf("The VM operation reaches rate limit, skip updating VM %s", machineCtx.ElfMachine.Status.VMRef))
log.V(1).Info("The VM operation reaches rate limit, skip updating VM " + machineCtx.ElfMachine.Status.VMRef)

return nil
}
Expand Down Expand Up @@ -1005,8 +1006,9 @@ func (r *ElfMachineReconciler) reconcileVMFailedTask(ctx goctx.Context, machineC
conditions.MarkFalse(machineCtx.ElfMachine, infrav1.VMProvisionedCondition, infrav1.TaskFailureReason, clusterv1.ConditionSeverityInfo, errorMessage)

if service.IsCloudInitConfigError(errorMessage) {
machineCtx.ElfMachine.Status.FailureReason = capierrors.MachineStatusErrorPtr(capeerrors.CloudInitConfigError)
machineCtx.ElfMachine.Status.FailureMessage = pointer.String(fmt.Sprintf("VM cloud-init config error: %s", service.FormatCloudInitError(errorMessage)))
machineStatusError := capeerrors.CloudInitConfigError
machineCtx.ElfMachine.Status.FailureReason = &machineStatusError
machineCtx.ElfMachine.Status.FailureMessage = ptr.To("VM cloud-init config error: " + service.FormatCloudInitError(errorMessage))
}

log.Error(errors.New("VM task failed"), "", "vmRef", vmRef, "taskRef", taskRef, "taskErrorMessage", errorMessage, "taskErrorCode", service.GetTowerString(task.ErrorCode), "taskDescription", service.GetTowerString(task.Description))
Expand Down Expand Up @@ -1038,13 +1040,13 @@ func (r *ElfMachineReconciler) reconcileVMFailedTask(ctx goctx.Context, machineC
setVMDuplicate(machineCtx.ElfMachine.Name)
case service.IsStorageInsufficientError(errorMessage):
recordElfClusterStorageInsufficient(machineCtx, true)
message := fmt.Sprintf("Insufficient storage detected for the ELF cluster %s", machineCtx.ElfCluster.Spec.Cluster)
message := "Insufficient storage detected for the ELF cluster " + machineCtx.ElfCluster.Spec.Cluster
log.Info(message)

return errors.New(message)
case service.IsMemoryInsufficientError(errorMessage):
recordElfClusterMemoryInsufficient(machineCtx, true)
message := fmt.Sprintf("Insufficient memory detected for the ELF cluster %s", machineCtx.ElfCluster.Spec.Cluster)
message := "Insufficient memory detected for the ELF cluster " + machineCtx.ElfCluster.Spec.Cluster
log.Info(message)

return errors.New(message)
Expand Down Expand Up @@ -1075,7 +1077,7 @@ func (r *ElfMachineReconciler) reconcileProviderID(ctx goctx.Context, machineCtx
}

if machineCtx.ElfMachine.Spec.ProviderID == nil || *machineCtx.ElfMachine.Spec.ProviderID != providerID {
machineCtx.ElfMachine.Spec.ProviderID = pointer.String(providerID)
machineCtx.ElfMachine.Spec.ProviderID = ptr.To(providerID)

log.Info("updated providerID", "providerID", providerID)
}
Expand Down Expand Up @@ -1185,7 +1187,7 @@ func (r *ElfMachineReconciler) reconcileNetwork(ctx goctx.Context, machineCtx *c
return false, err
}

for i := 0; i < len(nics); i++ {
for i := range len(nics) {
nic := nics[i]
ip := service.GetTowerString(nic.IPAddress)

Expand Down Expand Up @@ -1281,7 +1283,7 @@ func (r *ElfMachineReconciler) reconcileLabels(ctx goctx.Context, machineCtx *co

// If the virtual machine has been labeled with managed label,
// it is considered that all labels have been labeled.
for i := 0; i < len(vm.Labels); i++ {
for i := range len(vm.Labels) {
if *vm.Labels[i].ID == *capeManagedLabel.ID {
return true, nil
}
Expand Down Expand Up @@ -1429,14 +1431,14 @@ func (r *ElfMachineReconciler) deleteDuplicateVMs(ctx goctx.Context, machineCtx

if machineCtx.ElfMachine.Status.VMRef == "" {
vmIDs := make([]string, 0, len(vms))
for i := 0; i < len(vms); i++ {
for i := range len(vms) {
vmIDs = append(vmIDs, *vms[i].ID)
}
log.Info("Waiting for ElfMachine to select one of the duplicate VMs before deleting the other", "vms", vmIDs)
return reconcile.Result{RequeueAfter: config.DefaultRequeueTimeout}, nil
}

for i := 0; i < len(vms); i++ {
for i := range len(vms) {
// Do not delete already running virtual machines to avoid deleting already used virtual machines.
if *vms[i].ID == machineCtx.ElfMachine.Status.VMRef ||
*vms[i].LocalID == machineCtx.ElfMachine.Status.VMRef ||
Expand Down
22 changes: 11 additions & 11 deletions controllers/elfmachine_controller_gpu.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import (
"github.com/pkg/errors"
"github.com/smartxworks/cloudtower-go-sdk/v2/models"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/utils/pointer"
"k8s.io/utils/ptr"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
"sigs.k8s.io/cluster-api/util/conditions"
ctrl "sigs.k8s.io/controller-runtime"
Expand Down Expand Up @@ -50,7 +50,7 @@ func (r *ElfMachineReconciler) selectHostAndGPUsForVM(ctx goctx.Context, machine
log := ctrl.LoggerFrom(ctx)

if !machineCtx.ElfMachine.RequiresGPUDevices() {
return pointer.String(""), nil, nil
return ptr.To(""), nil, nil
}

defer func() {
Expand Down Expand Up @@ -129,7 +129,7 @@ func (r *ElfMachineReconciler) selectHostAndGPUsForVM(ctx goctx.Context, machine
unsortedHostIDs = hostIDSet.UnsortedList()
}

for i := 0; i < len(unsortedHostIDs); i++ {
for i := range len(unsortedHostIDs) {
hostGPUVMInfos, ok := hostGPUVMInfoMap[unsortedHostIDs[i]]
if !ok {
continue
Expand Down Expand Up @@ -173,14 +173,14 @@ func selectGPUDevicesForVM(hostGPUVMInfos service.GPUVMInfos, requiredGPUDevices
})

var selectedGPUDeviceInfos []*service.GPUDeviceInfo
for i := 0; i < len(requiredGPUDevices); i++ {
for i := range len(requiredGPUDevices) {
gpuVMInfos, ok := modelGPUVMInfoMap[requiredGPUDevices[i].Model]
if !ok || len(gpuVMInfos) < int(requiredGPUDevices[i].Count) {
return nil
}

gpuInfos := gpuVMInfos[:int(requiredGPUDevices[i].Count)]
for j := 0; j < len(gpuInfos); j++ {
for j := range len(gpuInfos) {
selectedGPUDeviceInfos = append(selectedGPUDeviceInfos, &service.GPUDeviceInfo{ID: *gpuInfos[j].ID, AllocatedCount: 1, AvailableCount: 1})
}
}
Expand All @@ -202,15 +202,15 @@ func selectVGPUDevicesForVM(hostGPUVMInfos service.GPUVMInfos, requiredVGPUDevic
})

var selectedGPUDeviceInfos []*service.GPUDeviceInfo
for i := 0; i < len(requiredVGPUDevices); i++ {
for i := range len(requiredVGPUDevices) {
gpuVMInfos, ok := typeVGPUVMInfoMap[requiredVGPUDevices[i].Type]
if !ok {
return nil
}

var gpuInfos []*service.GPUDeviceInfo
requiredCount := requiredVGPUDevices[i].Count
for j := 0; j < len(gpuVMInfos); j++ {
for j := range len(gpuVMInfos) {
availableCount := service.GetAvailableCountFromGPUVMInfo(gpuVMInfos[j])
if availableCount <= 0 {
continue
Expand Down Expand Up @@ -249,7 +249,7 @@ func (r *ElfMachineReconciler) reconcileGPUDevices(ctx goctx.Context, machineCtx

// Ensure GPUStatus is set or up to date.
gpuDevices := make([]infrav1.GPUStatus, len(vm.GpuDevices))
for i := 0; i < len(vm.GpuDevices); i++ {
for i := range len(vm.GpuDevices) {
gpuDevices[i] = infrav1.GPUStatus{GPUID: *vm.GpuDevices[i].ID, Name: *vm.GpuDevices[i].Name}
}
machineCtx.ElfMachine.Status.GPUDevices = gpuDevices
Expand All @@ -272,7 +272,7 @@ func (r *ElfMachineReconciler) reconcileGPUDevices(ctx goctx.Context, machineCtx
}

gpuIDs := make([]string, len(vm.GpuDevices))
for i := 0; i < len(vm.GpuDevices); i++ {
for i := range len(vm.GpuDevices) {
gpuIDs[i] = *vm.GpuDevices[i].ID
}

Expand Down Expand Up @@ -338,14 +338,14 @@ func (r *ElfMachineReconciler) removeVMGPUDevices(ctx goctx.Context, machineCtx
return err
}

for i := 0; i < len(vmGPUInfo.GpuDevices); i++ {
for i := range len(vmGPUInfo.GpuDevices) {
staleGPUs = append(staleGPUs, &models.VMGpuOperationParams{
GpuID: vmGPUInfo.GpuDevices[i].ID,
Amount: vmGPUInfo.GpuDevices[i].VgpuInstanceOnVMNum,
})
}
} else {
for i := 0; i < len(vm.GpuDevices); i++ {
for i := range len(vm.GpuDevices) {
staleGPUs = append(staleGPUs, &models.VMGpuOperationParams{
GpuID: vm.GpuDevices[i].ID,
Amount: service.TowerInt32(1),
Expand Down
Loading

0 comments on commit 6ba7557

Please sign in to comment.