Skip to content

Commit

Permalink
Merge pull request #961 from zeeke/414-critical-bugs
Browse files Browse the repository at this point in the history
OCPBUGS-36753,OCPBUGS-36754,OCPBUGS-36755OCPBUGS-36756: [release-4.14] Critical Bugs
  • Loading branch information
openshift-merge-bot[bot] authored Jul 10, 2024
2 parents 0dda92f + 8f84438 commit 9e7bc3a
Show file tree
Hide file tree
Showing 102 changed files with 10,618 additions and 4,498 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,10 @@ VF groups** (when #-notation is used in pfName field) are merged, otherwise only
the highest priority policy is applied. In case of same-priority policies and
overlapping VF groups, only the last processed policy is applied.

When using #-notation to define VF group, no actions are taken on virtual functions that
are not mentioned in any policy (e.g. if a policy defines a `vfio-pci` device group for a device, when
it is deleted the VF are not reset to the default driver).

#### Externally Manage virtual functions

When `ExternallyManage` is request on a policy the operator will only skip the virtual function creation.
Expand Down
10 changes: 1 addition & 9 deletions api/v1/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ func GetEswitchModeFromStatus(ifaceStatus *InterfaceExt) string {
func NeedToUpdateSriov(ifaceSpec *Interface, ifaceStatus *InterfaceExt) bool {
if ifaceSpec.Mtu > 0 {
mtu := ifaceSpec.Mtu
if mtu != ifaceStatus.Mtu {
if mtu > ifaceStatus.Mtu {
log.V(2).Info("NeedToUpdateSriov(): MTU needs update", "desired", mtu, "current", ifaceStatus.Mtu)
return true
}
Expand All @@ -273,10 +273,8 @@ func NeedToUpdateSriov(ifaceSpec *Interface, ifaceStatus *InterfaceExt) bool {
}
if ifaceSpec.NumVfs > 0 {
for _, vfStatus := range ifaceStatus.VFs {
ingroup := false
for _, groupSpec := range ifaceSpec.VfGroups {
if IndexInRange(vfStatus.VfID, groupSpec.VfRange) {
ingroup = true
if vfStatus.Driver == "" {
log.V(2).Info("NeedToUpdateSriov(): Driver needs update - has no driver",
"desired", groupSpec.DeviceType)
Expand Down Expand Up @@ -315,12 +313,6 @@ func NeedToUpdateSriov(ifaceSpec *Interface, ifaceStatus *InterfaceExt) bool {
break
}
}
if !ingroup && (StringInArray(vfStatus.Driver, vars.DpdkDrivers) || vfStatus.VdpaType != "") {
// need to reset VF if it is not a part of a group and:
// a. has DPDK driver loaded
// b. has VDPA device
return true
}
}
}
return false
Expand Down
68 changes: 68 additions & 0 deletions api/v1/helper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -961,3 +961,71 @@ func TestSriovNetworkPoolConfig_MaxUnavailable(t *testing.T) {
})
}
}

func TestNeedToUpdateSriov(t *testing.T) {
type args struct {
ifaceSpec *v1.Interface
ifaceStatus *v1.InterfaceExt
}
tests := []struct {
name string
args args
want bool
}{
{
name: "number of VFs changed",
args: args{
ifaceSpec: &v1.Interface{NumVfs: 1},
ifaceStatus: &v1.InterfaceExt{NumVfs: 0},
},
want: true,
},
{
name: "no update",
args: args{
ifaceSpec: &v1.Interface{NumVfs: 1},
ifaceStatus: &v1.InterfaceExt{NumVfs: 1},
},
want: false,
},
{
name: "vfio-pci VF is not configured for any group",
args: args{
ifaceSpec: &v1.Interface{
NumVfs: 3,
VfGroups: []v1.VfGroup{
{
VfRange: "1-2",
DeviceType: consts.DeviceTypeNetDevice,
},
},
},
ifaceStatus: &v1.InterfaceExt{
NumVfs: 3,
VFs: []v1.VirtualFunction{
{
VfID: 0,
Driver: "vfio-pci",
},
{
VfID: 1,
Driver: "iavf",
},
{
VfID: 2,
Driver: "iavf",
},
},
},
},
want: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := v1.NeedToUpdateSriov(tt.args.ifaceSpec, tt.args.ifaceStatus); got != tt.want {
t.Errorf("NeedToUpdateSriov() = %v, want %v", got, tt.want)
}
})
}
}
3 changes: 3 additions & 0 deletions controllers/sriovnetworknodepolicy_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,9 @@ func (r *SriovNetworkNodePolicyReconciler) Reconcile(ctx context.Context, req ct
}

// Sort the policies with priority, higher priority ones is applied later
// We need to use the sort so we always get the policies in the same order
// That is needed so when we create the node Affinity for the sriov-device plugin
// it will remain in the same order and not trigger a pod recreation
sort.Sort(sriovnetworkv1.ByPriority(policyList.Items))
// Sync SriovNetworkNodeState objects
if err = r.syncAllSriovNetworkNodeStates(ctx, defaultPolicy, policyList, nodeList); err != nil {
Expand Down
6 changes: 6 additions & 0 deletions controllers/sriovoperatorconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"context"
"fmt"
"os"
"sort"
"strings"

appsv1 "k8s.io/api/apps/v1"
Expand Down Expand Up @@ -131,6 +132,11 @@ func (r *SriovOperatorConfigReconciler) Reconcile(ctx context.Context, req ctrl.
// Error reading the object - requeue the request.
return reconcile.Result{}, err
}
// Sort the policies with priority, higher priority ones is applied later
// We need to use the sort so we always get the policies in the same order
// That is needed so when we create the node Affinity for the sriov-device plugin
// it will remain in the same order and not trigger a pod recreation
sort.Sort(sriovnetworkv1.ByPriority(policyList.Items))

defaultPolicy := &sriovnetworkv1.SriovNetworkNodePolicy{}
err = r.Get(ctx, types.NamespacedName{Name: consts.DefaultPolicyName, Namespace: vars.Namespace}, defaultPolicy)
Expand Down
111 changes: 111 additions & 0 deletions controllers/sriovoperatorconfig_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,15 @@ package controllers

import (
"context"
"fmt"
"os"
"strings"
"sync"

admv1 "k8s.io/api/admissionregistration/v1"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"

"github.com/golang/mock/gomock"
Expand All @@ -19,6 +23,7 @@ import (
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/featuregate"
mock_platforms "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms/mock"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms/openshift"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars"
util "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util"
)

Expand Down Expand Up @@ -307,5 +312,111 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() {
return strings.Join(daemonSet.Spec.Template.Spec.Containers[0].Args, " ")
}, util.APITimeout*10, util.RetryInterval).Should(ContainSubstring("disable-plugins=mellanox"))
})

// This test verifies that the CABundle field in the webhook configuration added by third party components is not
// removed during the reconciliation loop. This is important when dealing with OpenShift certificate mangement:
// https://docs.openshift.com/container-platform/4.15/security/certificates/service-serving-certificate.html
// and when CertManager is used
It("should not remove the field Spec.ClientConfig.CABundle from webhook configuration when reconciling", func() {
validateCfg := &admv1.ValidatingWebhookConfiguration{}
err := util.WaitForNamespacedObject(validateCfg, k8sClient, testNamespace, "sriov-operator-webhook-config", util.RetryInterval, util.APITimeout*3)
Expect(err).NotTo(HaveOccurred())

By("Simulate a third party component updating the webhook CABundle")
validateCfg.Webhooks[0].ClientConfig.CABundle = []byte("some-base64-ca-bundle-value")

err = k8sClient.Update(ctx, validateCfg)
Expect(err).NotTo(HaveOccurred())

By("Trigger a controller reconciliation")
err = util.TriggerSriovOperatorConfigReconcile(k8sClient, testNamespace)
Expect(err).NotTo(HaveOccurred())

By("Verify the operator did not remove the CABundle from the webhook configuration")
Consistently(func(g Gomega) {
err = k8sClient.Get(ctx, types.NamespacedName{Namespace: testNamespace, Name: "sriov-operator-webhook-config"}, validateCfg)
g.Expect(err).NotTo(HaveOccurred())
g.Expect(validateCfg.Webhooks[0].ClientConfig.CABundle).To(Equal([]byte("some-base64-ca-bundle-value")))
}, "1s").Should(Succeed())
})

It("should update the webhook CABundle if `ADMISSION_CONTROLLERS_CERTIFICATES environment variable are set` ", func() {
DeferCleanup(os.Setenv, "ADMISSION_CONTROLLERS_CERTIFICATES_OPERATOR_CA_CRT", os.Getenv("ADMISSION_CONTROLLERS_CERTIFICATES_OPERATOR_CA_CRT"))
// echo "ca-bundle-1" | base64 -w 0
os.Setenv("ADMISSION_CONTROLLERS_CERTIFICATES_OPERATOR_CA_CRT", "Y2EtYnVuZGxlLTEK")

DeferCleanup(os.Setenv, "ADMISSION_CONTROLLERS_CERTIFICATES_INJECTOR_CA_CRT", os.Getenv("ADMISSION_CONTROLLERS_CERTIFICATES_INJECTOR_CA_CRT"))
// echo "ca-bundle-2" | base64 -w 0
os.Setenv("ADMISSION_CONTROLLERS_CERTIFICATES_INJECTOR_CA_CRT", "Y2EtYnVuZGxlLTIK")

DeferCleanup(func(old string) { vars.ClusterType = old }, vars.ClusterType)
vars.ClusterType = consts.ClusterTypeKubernetes

err := util.TriggerSriovOperatorConfigReconcile(k8sClient, testNamespace)
Expect(err).NotTo(HaveOccurred())

Eventually(func(g Gomega) {
validateCfg := &admv1.ValidatingWebhookConfiguration{}
err = k8sClient.Get(ctx, types.NamespacedName{Namespace: testNamespace, Name: "sriov-operator-webhook-config"}, validateCfg)
g.Expect(err).NotTo(HaveOccurred())
g.Expect(validateCfg.Webhooks[0].ClientConfig.CABundle).To(Equal([]byte("ca-bundle-1\n")))

mutateCfg := &admv1.MutatingWebhookConfiguration{}
err = k8sClient.Get(ctx, types.NamespacedName{Namespace: testNamespace, Name: "sriov-operator-webhook-config"}, mutateCfg)
g.Expect(err).NotTo(HaveOccurred())
g.Expect(mutateCfg.Webhooks[0].ClientConfig.CABundle).To(Equal([]byte("ca-bundle-1\n")))

injectorCfg := &admv1.MutatingWebhookConfiguration{}
err = k8sClient.Get(ctx, types.NamespacedName{Namespace: testNamespace, Name: "network-resources-injector-config"}, injectorCfg)
g.Expect(err).NotTo(HaveOccurred())
g.Expect(injectorCfg.Webhooks[0].ClientConfig.CABundle).To(Equal([]byte("ca-bundle-2\n")))
}, "1s").Should(Succeed())
})
It("should reconcile to a converging state when multiple node policies are set", func() {
By("Creating a consistent number of node policies")
for i := 0; i < 30; i++ {
p := &sriovnetworkv1.SriovNetworkNodePolicy{
ObjectMeta: metav1.ObjectMeta{Namespace: testNamespace, Name: fmt.Sprintf("p%d", i)},
Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{
Priority: 99,
NodeSelector: map[string]string{"foo": fmt.Sprintf("v%d", i)},
},
}
err := k8sClient.Create(context.Background(), p)
Expect(err).NotTo(HaveOccurred())
}

By("Triggering a the reconcile loop")
config := &sriovnetworkv1.SriovOperatorConfig{}
err := k8sClient.Get(context.Background(), types.NamespacedName{Name: "default", Namespace: testNamespace}, config)
Expect(err).NotTo(HaveOccurred())
if config.ObjectMeta.Labels == nil {
config.ObjectMeta.Labels = make(map[string]string)
}
config.ObjectMeta.Labels["trigger-test"] = "test-reconcile-daemonset"
err = k8sClient.Update(context.Background(), config)
Expect(err).NotTo(HaveOccurred())

By("Wait until device-plugin Daemonset's affinity has been calculated")
var expectedAffinity *corev1.Affinity

Eventually(func(g Gomega) {
daemonSet := &appsv1.DaemonSet{}
err = k8sClient.Get(context.Background(), types.NamespacedName{Name: "sriov-device-plugin", Namespace: testNamespace}, daemonSet)
g.Expect(err).NotTo(HaveOccurred())
// Wait until the last policy (with NodeSelector foo=v29) has been considered at least one time
g.Expect(daemonSet.Spec.Template.Spec.Affinity.String()).To(ContainSubstring("v29"))
expectedAffinity = daemonSet.Spec.Template.Spec.Affinity
}, "3s", "1s").Should(Succeed())

By("Verify device-plugin Daemonset's affinity doesn't change over time")
Consistently(func(g Gomega) {
daemonSet := &appsv1.DaemonSet{}
err = k8sClient.Get(context.Background(), types.NamespacedName{Name: "sriov-device-plugin", Namespace: testNamespace}, daemonSet)
g.Expect(err).NotTo(HaveOccurred())
g.Expect(daemonSet.Spec.Template.Spec.Affinity).
To(Equal(expectedAffinity))
}, "3s", "1s").Should(Succeed())
})
})
})
8 changes: 3 additions & 5 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ require (
github.com/go-logr/logr v1.2.4
github.com/golang/mock v1.4.4
github.com/google/go-cmp v0.6.0
github.com/hashicorp/go-retryablehttp v0.7.0
github.com/google/uuid v1.3.1
github.com/hashicorp/go-retryablehttp v0.7.7
github.com/jaypipes/ghw v0.9.0
github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.4.0
github.com/k8snetworkplumbingwg/sriov-network-device-plugin v0.0.0-20221127172732-a5a7395122e3
Expand Down Expand Up @@ -82,10 +83,8 @@ require (
github.com/google/gofuzz v1.2.0 // indirect
github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 // indirect
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
github.com/google/uuid v1.3.1 // indirect
github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 // indirect
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
github.com/hashicorp/go-hclog v1.2.0 // indirect
github.com/huandu/xstrings v1.3.2 // indirect
github.com/imdario/mergo v0.3.16 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
Expand All @@ -95,7 +94,6 @@ require (
github.com/k8snetworkplumbingwg/govdpa v0.1.4 // indirect
github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/mattn/go-isatty v0.0.17 // indirect
github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 // indirect
github.com/mitchellh/copystructure v1.2.0 // indirect
github.com/mitchellh/go-homedir v1.1.0 // indirect
Expand Down Expand Up @@ -130,7 +128,7 @@ require (
golang.org/x/net v0.17.0 // indirect
golang.org/x/oauth2 v0.13.0 // indirect
golang.org/x/sync v0.4.0 // indirect
golang.org/x/sys v0.17.0 // indirect
golang.org/x/sys v0.20.0 // indirect
golang.org/x/term v0.15.0 // indirect
golang.org/x/text v0.14.0 // indirect
golang.org/x/tools v0.14.0 // indirect
Expand Down
Loading

0 comments on commit 9e7bc3a

Please sign in to comment.