Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OCPBUGS-36753,OCPBUGS-36754,OCPBUGS-36755OCPBUGS-36756: [release-4.14] Critical Bugs #961

Merged
merged 10 commits into from
Jul 10, 2024
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,10 @@ VF groups** (when #-notation is used in pfName field) are merged, otherwise only
the highest priority policy is applied. In case of same-priority policies and
overlapping VF groups, only the last processed policy is applied.

When using #-notation to define VF group, no actions are taken on virtual functions that
are not mentioned in any policy (e.g. if a policy defines a `vfio-pci` device group for a device, when
it is deleted the VF are not reset to the default driver).

#### Externally Manage virtual functions

When `ExternallyManage` is request on a policy the operator will only skip the virtual function creation.
Expand Down
10 changes: 1 addition & 9 deletions api/v1/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ func GetEswitchModeFromStatus(ifaceStatus *InterfaceExt) string {
func NeedToUpdateSriov(ifaceSpec *Interface, ifaceStatus *InterfaceExt) bool {
if ifaceSpec.Mtu > 0 {
mtu := ifaceSpec.Mtu
if mtu != ifaceStatus.Mtu {
if mtu > ifaceStatus.Mtu {
log.V(2).Info("NeedToUpdateSriov(): MTU needs update", "desired", mtu, "current", ifaceStatus.Mtu)
return true
}
Expand All @@ -273,10 +273,8 @@ func NeedToUpdateSriov(ifaceSpec *Interface, ifaceStatus *InterfaceExt) bool {
}
if ifaceSpec.NumVfs > 0 {
for _, vfStatus := range ifaceStatus.VFs {
ingroup := false
for _, groupSpec := range ifaceSpec.VfGroups {
if IndexInRange(vfStatus.VfID, groupSpec.VfRange) {
ingroup = true
if vfStatus.Driver == "" {
log.V(2).Info("NeedToUpdateSriov(): Driver needs update - has no driver",
"desired", groupSpec.DeviceType)
Expand Down Expand Up @@ -315,12 +313,6 @@ func NeedToUpdateSriov(ifaceSpec *Interface, ifaceStatus *InterfaceExt) bool {
break
}
}
if !ingroup && (StringInArray(vfStatus.Driver, vars.DpdkDrivers) || vfStatus.VdpaType != "") {
// need to reset VF if it is not a part of a group and:
// a. has DPDK driver loaded
// b. has VDPA device
return true
}
}
}
return false
Expand Down
68 changes: 68 additions & 0 deletions api/v1/helper_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -961,3 +961,71 @@ func TestSriovNetworkPoolConfig_MaxUnavailable(t *testing.T) {
})
}
}

func TestNeedToUpdateSriov(t *testing.T) {
type args struct {
ifaceSpec *v1.Interface
ifaceStatus *v1.InterfaceExt
}
tests := []struct {
name string
args args
want bool
}{
{
name: "number of VFs changed",
args: args{
ifaceSpec: &v1.Interface{NumVfs: 1},
ifaceStatus: &v1.InterfaceExt{NumVfs: 0},
},
want: true,
},
{
name: "no update",
args: args{
ifaceSpec: &v1.Interface{NumVfs: 1},
ifaceStatus: &v1.InterfaceExt{NumVfs: 1},
},
want: false,
},
{
name: "vfio-pci VF is not configured for any group",
args: args{
ifaceSpec: &v1.Interface{
NumVfs: 3,
VfGroups: []v1.VfGroup{
{
VfRange: "1-2",
DeviceType: consts.DeviceTypeNetDevice,
},
},
},
ifaceStatus: &v1.InterfaceExt{
NumVfs: 3,
VFs: []v1.VirtualFunction{
{
VfID: 0,
Driver: "vfio-pci",
},
{
VfID: 1,
Driver: "iavf",
},
{
VfID: 2,
Driver: "iavf",
},
},
},
},
want: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := v1.NeedToUpdateSriov(tt.args.ifaceSpec, tt.args.ifaceStatus); got != tt.want {
t.Errorf("NeedToUpdateSriov() = %v, want %v", got, tt.want)
}
})
}
}
3 changes: 3 additions & 0 deletions controllers/sriovnetworknodepolicy_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,9 @@ func (r *SriovNetworkNodePolicyReconciler) Reconcile(ctx context.Context, req ct
}

// Sort the policies with priority, higher priority ones is applied later
// We need to use the sort so we always get the policies in the same order
// That is needed so when we create the node Affinity for the sriov-device plugin
// it will remain in the same order and not trigger a pod recreation
sort.Sort(sriovnetworkv1.ByPriority(policyList.Items))
// Sync SriovNetworkNodeState objects
if err = r.syncAllSriovNetworkNodeStates(ctx, defaultPolicy, policyList, nodeList); err != nil {
Expand Down
6 changes: 6 additions & 0 deletions controllers/sriovoperatorconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"context"
"fmt"
"os"
"sort"
"strings"

appsv1 "k8s.io/api/apps/v1"
Expand Down Expand Up @@ -131,6 +132,11 @@ func (r *SriovOperatorConfigReconciler) Reconcile(ctx context.Context, req ctrl.
// Error reading the object - requeue the request.
return reconcile.Result{}, err
}
// Sort the policies with priority, higher priority ones is applied later
// We need to use the sort so we always get the policies in the same order
// That is needed so when we create the node Affinity for the sriov-device plugin
// it will remain in the same order and not trigger a pod recreation
sort.Sort(sriovnetworkv1.ByPriority(policyList.Items))

defaultPolicy := &sriovnetworkv1.SriovNetworkNodePolicy{}
err = r.Get(ctx, types.NamespacedName{Name: consts.DefaultPolicyName, Namespace: vars.Namespace}, defaultPolicy)
Expand Down
111 changes: 111 additions & 0 deletions controllers/sriovoperatorconfig_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,15 @@ package controllers

import (
"context"
"fmt"
"os"
"strings"
"sync"

admv1 "k8s.io/api/admissionregistration/v1"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"

"github.com/golang/mock/gomock"
Expand All @@ -19,6 +23,7 @@ import (
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/featuregate"
mock_platforms "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms/mock"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/platforms/openshift"
"github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vars"
util "github.com/k8snetworkplumbingwg/sriov-network-operator/test/util"
)

Expand Down Expand Up @@ -307,5 +312,111 @@ var _ = Describe("SriovOperatorConfig controller", Ordered, func() {
return strings.Join(daemonSet.Spec.Template.Spec.Containers[0].Args, " ")
}, util.APITimeout*10, util.RetryInterval).Should(ContainSubstring("disable-plugins=mellanox"))
})

// This test verifies that the CABundle field in the webhook configuration added by third party components is not
// removed during the reconciliation loop. This is important when dealing with OpenShift certificate mangement:
// https://docs.openshift.com/container-platform/4.15/security/certificates/service-serving-certificate.html
// and when CertManager is used
It("should not remove the field Spec.ClientConfig.CABundle from webhook configuration when reconciling", func() {
validateCfg := &admv1.ValidatingWebhookConfiguration{}
err := util.WaitForNamespacedObject(validateCfg, k8sClient, testNamespace, "sriov-operator-webhook-config", util.RetryInterval, util.APITimeout*3)
Expect(err).NotTo(HaveOccurred())

By("Simulate a third party component updating the webhook CABundle")
validateCfg.Webhooks[0].ClientConfig.CABundle = []byte("some-base64-ca-bundle-value")

err = k8sClient.Update(ctx, validateCfg)
Expect(err).NotTo(HaveOccurred())

By("Trigger a controller reconciliation")
err = util.TriggerSriovOperatorConfigReconcile(k8sClient, testNamespace)
Expect(err).NotTo(HaveOccurred())

By("Verify the operator did not remove the CABundle from the webhook configuration")
Consistently(func(g Gomega) {
err = k8sClient.Get(ctx, types.NamespacedName{Namespace: testNamespace, Name: "sriov-operator-webhook-config"}, validateCfg)
g.Expect(err).NotTo(HaveOccurred())
g.Expect(validateCfg.Webhooks[0].ClientConfig.CABundle).To(Equal([]byte("some-base64-ca-bundle-value")))
}, "1s").Should(Succeed())
})

It("should update the webhook CABundle if `ADMISSION_CONTROLLERS_CERTIFICATES environment variable are set` ", func() {
DeferCleanup(os.Setenv, "ADMISSION_CONTROLLERS_CERTIFICATES_OPERATOR_CA_CRT", os.Getenv("ADMISSION_CONTROLLERS_CERTIFICATES_OPERATOR_CA_CRT"))
// echo "ca-bundle-1" | base64 -w 0
os.Setenv("ADMISSION_CONTROLLERS_CERTIFICATES_OPERATOR_CA_CRT", "Y2EtYnVuZGxlLTEK")

DeferCleanup(os.Setenv, "ADMISSION_CONTROLLERS_CERTIFICATES_INJECTOR_CA_CRT", os.Getenv("ADMISSION_CONTROLLERS_CERTIFICATES_INJECTOR_CA_CRT"))
// echo "ca-bundle-2" | base64 -w 0
os.Setenv("ADMISSION_CONTROLLERS_CERTIFICATES_INJECTOR_CA_CRT", "Y2EtYnVuZGxlLTIK")

DeferCleanup(func(old string) { vars.ClusterType = old }, vars.ClusterType)
vars.ClusterType = consts.ClusterTypeKubernetes

err := util.TriggerSriovOperatorConfigReconcile(k8sClient, testNamespace)
Expect(err).NotTo(HaveOccurred())

Eventually(func(g Gomega) {
validateCfg := &admv1.ValidatingWebhookConfiguration{}
err = k8sClient.Get(ctx, types.NamespacedName{Namespace: testNamespace, Name: "sriov-operator-webhook-config"}, validateCfg)
g.Expect(err).NotTo(HaveOccurred())
g.Expect(validateCfg.Webhooks[0].ClientConfig.CABundle).To(Equal([]byte("ca-bundle-1\n")))

mutateCfg := &admv1.MutatingWebhookConfiguration{}
err = k8sClient.Get(ctx, types.NamespacedName{Namespace: testNamespace, Name: "sriov-operator-webhook-config"}, mutateCfg)
g.Expect(err).NotTo(HaveOccurred())
g.Expect(mutateCfg.Webhooks[0].ClientConfig.CABundle).To(Equal([]byte("ca-bundle-1\n")))

injectorCfg := &admv1.MutatingWebhookConfiguration{}
err = k8sClient.Get(ctx, types.NamespacedName{Namespace: testNamespace, Name: "network-resources-injector-config"}, injectorCfg)
g.Expect(err).NotTo(HaveOccurred())
g.Expect(injectorCfg.Webhooks[0].ClientConfig.CABundle).To(Equal([]byte("ca-bundle-2\n")))
}, "1s").Should(Succeed())
})
It("should reconcile to a converging state when multiple node policies are set", func() {
By("Creating a consistent number of node policies")
for i := 0; i < 30; i++ {
p := &sriovnetworkv1.SriovNetworkNodePolicy{
ObjectMeta: metav1.ObjectMeta{Namespace: testNamespace, Name: fmt.Sprintf("p%d", i)},
Spec: sriovnetworkv1.SriovNetworkNodePolicySpec{
Priority: 99,
NodeSelector: map[string]string{"foo": fmt.Sprintf("v%d", i)},
},
}
err := k8sClient.Create(context.Background(), p)
Expect(err).NotTo(HaveOccurred())
}

By("Triggering a the reconcile loop")
config := &sriovnetworkv1.SriovOperatorConfig{}
err := k8sClient.Get(context.Background(), types.NamespacedName{Name: "default", Namespace: testNamespace}, config)
Expect(err).NotTo(HaveOccurred())
if config.ObjectMeta.Labels == nil {
config.ObjectMeta.Labels = make(map[string]string)
}
config.ObjectMeta.Labels["trigger-test"] = "test-reconcile-daemonset"
err = k8sClient.Update(context.Background(), config)
Expect(err).NotTo(HaveOccurred())

By("Wait until device-plugin Daemonset's affinity has been calculated")
var expectedAffinity *corev1.Affinity

Eventually(func(g Gomega) {
daemonSet := &appsv1.DaemonSet{}
err = k8sClient.Get(context.Background(), types.NamespacedName{Name: "sriov-device-plugin", Namespace: testNamespace}, daemonSet)
g.Expect(err).NotTo(HaveOccurred())
// Wait until the last policy (with NodeSelector foo=v29) has been considered at least one time
g.Expect(daemonSet.Spec.Template.Spec.Affinity.String()).To(ContainSubstring("v29"))
expectedAffinity = daemonSet.Spec.Template.Spec.Affinity
}, "3s", "1s").Should(Succeed())

By("Verify device-plugin Daemonset's affinity doesn't change over time")
Consistently(func(g Gomega) {
daemonSet := &appsv1.DaemonSet{}
err = k8sClient.Get(context.Background(), types.NamespacedName{Name: "sriov-device-plugin", Namespace: testNamespace}, daemonSet)
g.Expect(err).NotTo(HaveOccurred())
g.Expect(daemonSet.Spec.Template.Spec.Affinity).
To(Equal(expectedAffinity))
}, "3s", "1s").Should(Succeed())
})
})
})
8 changes: 3 additions & 5 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ require (
github.com/go-logr/logr v1.2.4
github.com/golang/mock v1.4.4
github.com/google/go-cmp v0.6.0
github.com/hashicorp/go-retryablehttp v0.7.0
github.com/google/uuid v1.3.1
github.com/hashicorp/go-retryablehttp v0.7.7
github.com/jaypipes/ghw v0.9.0
github.com/k8snetworkplumbingwg/network-attachment-definition-client v1.4.0
github.com/k8snetworkplumbingwg/sriov-network-device-plugin v0.0.0-20221127172732-a5a7395122e3
Expand Down Expand Up @@ -82,10 +83,8 @@ require (
github.com/google/gofuzz v1.2.0 // indirect
github.com/google/pprof v0.0.0-20210720184732-4bb14d4b1be1 // indirect
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
github.com/google/uuid v1.3.1 // indirect
github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 // indirect
github.com/hashicorp/go-cleanhttp v0.5.2 // indirect
github.com/hashicorp/go-hclog v1.2.0 // indirect
github.com/huandu/xstrings v1.3.2 // indirect
github.com/imdario/mergo v0.3.16 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
Expand All @@ -95,7 +94,6 @@ require (
github.com/k8snetworkplumbingwg/govdpa v0.1.4 // indirect
github.com/liggitt/tabwriter v0.0.0-20181228230101-89fcab3d43de // indirect
github.com/mailru/easyjson v0.7.7 // indirect
github.com/mattn/go-isatty v0.0.17 // indirect
github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 // indirect
github.com/mitchellh/copystructure v1.2.0 // indirect
github.com/mitchellh/go-homedir v1.1.0 // indirect
Expand Down Expand Up @@ -130,7 +128,7 @@ require (
golang.org/x/net v0.17.0 // indirect
golang.org/x/oauth2 v0.13.0 // indirect
golang.org/x/sync v0.4.0 // indirect
golang.org/x/sys v0.17.0 // indirect
golang.org/x/sys v0.20.0 // indirect
golang.org/x/term v0.15.0 // indirect
golang.org/x/text v0.14.0 // indirect
golang.org/x/tools v0.14.0 // indirect
Expand Down
Loading