Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Delete OVS port and flows before releasing Pod IP #5788

Merged
merged 1 commit into from
Dec 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions pkg/agent/cniserver/ipam/ipam_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
var ipamDrivers map[string][]IPAMDriver

// A cache of IPAM results.
// TODO: We should get rid of using global variables to store status, which makes testing complicated.
var ipamResults = sync.Map{}

type IPAMResult struct {
Expand All @@ -59,6 +60,10 @@ func ResetIPAMDrivers(ipamType string) {
}
}

func ResetIPAMResults() {
ipamResults = sync.Map{}
}

func argsFromEnv(cniArgs *cnipb.CniCmdArgs) *invoke.Args {
return &invoke.Args{
ContainerID: cniArgs.ContainerId,
Expand Down
17 changes: 10 additions & 7 deletions pkg/agent/cniserver/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ func (s *CNIServer) incompatibleCniVersionResponse(cniVersion string) *cnipb.Cni

func (s *CNIServer) unsupportedFieldResponse(key string, value interface{}) *cnipb.CniCmdResponse {
cniErrorCode := cnipb.ErrorCode_UNSUPPORTED_FIELD
cniErrorMsg := fmt.Sprintf("Network configuration does not support key %s and value %v", key, value)
cniErrorMsg := fmt.Sprintf("Network configuration does not support %s=%v", key, value)
return s.generateCNIErrorResponse(cniErrorCode, cniErrorMsg)
}

Expand Down Expand Up @@ -537,17 +537,20 @@ func (s *CNIServer) cmdDel(_ context.Context, cniConfig *CNIConfig) (*cnipb.CniC
if s.isChaining {
return s.interceptDel(cniConfig)
}
// Release IP to IPAM driver
if err := ipam.ExecIPAMDelete(cniConfig.CniCmdArgs, cniConfig.K8sArgs, cniConfig.IPAM.Type, infraContainer); err != nil {
klog.ErrorS(err, "Failed to delete IP addresses for container", "container", cniConfig.ContainerId)
return s.ipamFailureResponse(err), nil
}
klog.InfoS("Deleted IP addresses for container", "container", cniConfig.ContainerId)

// Remove host interface and OVS configuration
if err := s.podConfigurator.removeInterfaces(cniConfig.ContainerId); err != nil {
klog.ErrorS(err, "Failed to remove interfaces for container", "container", cniConfig.ContainerId)
return s.configInterfaceFailureResponse(err), nil
}
klog.InfoS("Deleted interfaces for container", "container", cniConfig.ContainerId)

// Release IP to IPAM driver
if err := ipam.ExecIPAMDelete(cniConfig.CniCmdArgs, cniConfig.K8sArgs, cniConfig.IPAM.Type, infraContainer); err != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think one point I was trying to make earlier (in the other PR) is that the main reason we have observed (AFAIK) for a CNI DEL to fail is OVS cleanup. As long as s.podConfigurator.removeInterfaces keeps failing, we will not be releasing the Pod IP address, so it will not be available for new Pods.
However, it's probably fair to assume that if s.podConfigurator.removeInterfaces keeps failing, then s.podConfigurator.configureInterfaces cannot also succeed consistently, so Pod creation is impacted anyway. So releasing the IP address first would not help in this case.

The observation here is that the pool of IPs is much smaller than the pool of OVS resources (e.g. OF ports). But I don't know if it is that relevant. Typically you want to release things in the reverse order compared to allocation, and your point about OVS flows referencing the IP address makes sense.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, creating resource should be more demanding than releasing resources. Releasing IPs first wouldn't help much in theory. Besides:

  1. The issue didn't happen only when there is something wrong but could also happen when Pod A is being created and Pod B is being deleted, and there is no other available IP.
  2. We should avoid creating a case that multiple Pods share the same IP as IP is the identifier in datapath, it may lead to misbehavior to Service and NetworkPolicy. If there is a case cleaning up OVS resources keeps failing, releasing the IP to another Pod could make another Pod be applied this Pod's Policy and used as an irrelavant Service's endpoint wrongly.

klog.ErrorS(err, "Failed to delete IP addresses for container", "container", cniConfig.ContainerId)
return s.ipamFailureResponse(err), nil
}

klog.InfoS("CmdDel for container succeeded", "container", cniConfig.ContainerId)

return &cnipb.CniCmdResponse{CniResult: []byte("")}, nil
Expand Down
64 changes: 43 additions & 21 deletions pkg/agent/cniserver/server_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -223,16 +223,13 @@ func createCNIRequestAndInterfaceName(t *testing.T, name string, cniType string,
}

func TestCmdAdd(t *testing.T) {
controller := gomock.NewController(t)
ipamMock := ipamtest.NewMockIPAMDriver(controller)
ctx := context.TODO()

versionedIPAMResult, err := ipamResult.GetAsVersion(supportedCNIVersion)
assert.NoError(t, err)

for _, tc := range []struct {
name string
podName string
ipamType string
ipamAdd bool
ipamError error
Expand All @@ -248,7 +245,6 @@ func TestCmdAdd(t *testing.T) {
}{
{
name: "secondary-IPAM",
podName: "pod0",
ipamType: ipam.AntreaIPAMType,
cniType: "cniType",
enableSecondaryNetworkIPAM: true,
Expand All @@ -257,7 +253,6 @@ func TestCmdAdd(t *testing.T) {
response: resultToResponse(versionedIPAMResult),
}, {
name: "secondary-IPAM-failure",
podName: "pod1",
ipamType: ipam.AntreaIPAMType,
cniType: "cniType",
enableSecondaryNetworkIPAM: true,
Expand All @@ -272,7 +267,6 @@ func TestCmdAdd(t *testing.T) {
},
}, {
name: "chaining",
podName: "pod2",
ipamType: "test-cni-ipam",
enableSecondaryNetworkIPAM: false,
isChaining: true,
Expand All @@ -281,7 +275,6 @@ func TestCmdAdd(t *testing.T) {
containerIfaceExist: true,
}, {
name: "add-general-cni",
podName: "pod3",
ipamType: "test-cni-ipam",
ipamAdd: true,
enableSecondaryNetworkIPAM: false,
Expand All @@ -291,7 +284,6 @@ func TestCmdAdd(t *testing.T) {
containerIfaceExist: true,
}, {
name: "add-general-cni-failure",
podName: "pod3",
ipamType: "test-cni-ipam",
ipamAdd: true,
enableSecondaryNetworkIPAM: false,
Expand All @@ -304,9 +296,12 @@ func TestCmdAdd(t *testing.T) {
} {
t.Run(tc.name, func(t *testing.T) {
defer mockGetNSPath(nil)()
ipam.ResetIPAMResults()
controller := gomock.NewController(t)
ipamMock := ipamtest.NewMockIPAMDriver(controller)
cniserver := newMockCNIServer(t, controller, ipamMock, tc.ipamType, tc.enableSecondaryNetworkIPAM, tc.isChaining)
testIfaceConfigurator := newTestInterfaceConfigurator()
requestMsg, hostInterfaceName := createCNIRequestAndInterfaceName(t, tc.podName, tc.cniType, ipamResult, tc.ipamType, true)
requestMsg, hostInterfaceName := createCNIRequestAndInterfaceName(t, testPodNameA, tc.cniType, ipamResult, tc.ipamType, true)
testIfaceConfigurator.hostIfaceName = hostInterfaceName
cniserver.podConfigurator.ifConfigurator = testIfaceConfigurator
if tc.ipamAdd {
Expand Down Expand Up @@ -371,30 +366,27 @@ func TestCmdAdd(t *testing.T) {
}

func TestCmdDel(t *testing.T) {
controller := gomock.NewController(t)
ipamMock := ipamtest.NewMockIPAMDriver(controller)
ovsPortID := generateUUID(t)
ovsPort := int32(100)
ctx := context.TODO()

for _, tc := range []struct {
name string
podName string
ipamType string
ipamDel bool
ipamError error
cniType string
enableSecondaryNetworkIPAM bool
isChaining bool
disconnectOVS bool
disconnectOVSErr error
migrateRoute bool
delLocalIPAMRoute bool
delLocalIPAMRouteError error
response *cnipb.CniCmdResponse
}{
{
name: "secondary-IPAM",
podName: "pod1",
ipamType: ipam.AntreaIPAMType,
cniType: "cniType",
ipamDel: true,
Expand All @@ -404,7 +396,6 @@ func TestCmdDel(t *testing.T) {
},
{
name: "secondary-IPAM-failure",
podName: "pod1",
ipamType: ipam.AntreaIPAMType,
cniType: "cniType",
ipamDel: true,
Expand All @@ -418,9 +409,40 @@ func TestCmdDel(t *testing.T) {
},
},
},
{
name: "IPAM-failure",
ipamType: "host-local",
ipamDel: true,
ipamError: fmt.Errorf("failed to release IP"),
enableSecondaryNetworkIPAM: false,
isChaining: false,
disconnectOVS: true,
delLocalIPAMRoute: true,
response: &cnipb.CniCmdResponse{
Error: &cnipb.Error{
Code: cnipb.ErrorCode_IPAM_FAILURE,
Message: "failed to release IP",
},
},
},
{
name: "del-ovs-failure",
ipamType: "host-local",
enableSecondaryNetworkIPAM: false,
isChaining: false,
disconnectOVS: true,
disconnectOVSErr: ovsconfig.NewTransactionError(fmt.Errorf("failed to delete port"), true),
ipamDel: false,
delLocalIPAMRoute: false,
response: &cnipb.CniCmdResponse{
Error: &cnipb.Error{
Code: cnipb.ErrorCode_CONFIG_INTERFACE_FAILURE,
Message: fmt.Sprintf("failed to delete OVS port for container %s: failed to delete port", testPodInfraContainerID),
},
},
},
{
name: "chaining",
podName: "pod2",
ipamType: "test-delete",
enableSecondaryNetworkIPAM: false,
isChaining: true,
Expand All @@ -429,7 +451,6 @@ func TestCmdDel(t *testing.T) {
},
{
name: "del-general-cni",
podName: "pod3",
ipamType: "test-delete",
ipamDel: true,
enableSecondaryNetworkIPAM: false,
Expand All @@ -439,9 +460,8 @@ func TestCmdDel(t *testing.T) {
},
{
name: "del-general-cni-failure",
podName: "pod3",
ipamType: "test-delete",
ipamDel: true,
ipamDel: false,
enableSecondaryNetworkIPAM: false,
isChaining: false,
disconnectOVS: true,
Expand All @@ -450,10 +470,12 @@ func TestCmdDel(t *testing.T) {
},
} {
t.Run(tc.name, func(t *testing.T) {
controller := gomock.NewController(t)
ipamMock := ipamtest.NewMockIPAMDriver(controller)
cniserver := newMockCNIServer(t, controller, ipamMock, tc.ipamType, tc.enableSecondaryNetworkIPAM, tc.isChaining)
requestMsg, hostInterfaceName := createCNIRequestAndInterfaceName(t, tc.podName, tc.cniType, ipamResult, tc.ipamType, true)
requestMsg, hostInterfaceName := createCNIRequestAndInterfaceName(t, testPodNameA, tc.cniType, ipamResult, tc.ipamType, true)
containerID := requestMsg.CniArgs.ContainerId
containerIfaceConfig := interfacestore.NewContainerInterface(hostInterfaceName, containerID, tc.podName, testPodNamespace, containerVethMac, []net.IP{net.ParseIP("10.1.2.100")}, 0)
containerIfaceConfig := interfacestore.NewContainerInterface(hostInterfaceName, containerID, testPodNameA, testPodNamespace, containerVethMac, []net.IP{net.ParseIP("10.1.2.100")}, 0)
containerIfaceConfig.OVSPortConfig = &interfacestore.OVSPortConfig{PortUUID: ovsPortID, OFPort: ovsPort}
ifaceStore.AddInterface(containerIfaceConfig)
testIfaceConfigurator := newTestInterfaceConfigurator()
Expand All @@ -472,7 +494,7 @@ func TestCmdDel(t *testing.T) {
}
}
if tc.disconnectOVS {
mockOVSBridgeClient.EXPECT().DeletePort(ovsPortID).Return(nil).Times(1)
mockOVSBridgeClient.EXPECT().DeletePort(ovsPortID).Return(tc.disconnectOVSErr).Times(1)
mockOFClient.EXPECT().UninstallPodFlows(hostInterfaceName).Return(nil).Times(1)
}
if tc.migrateRoute {
Expand Down
4 changes: 2 additions & 2 deletions pkg/agent/cniserver/server_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -687,14 +687,14 @@ func generateNetworkConfiguration(name, cniVersion, cniType, ipamType string) *t
if cniType == "" {
netCfg.Type = AntreaCNIType
} else {
netCfg.Type = "cniType"
netCfg.Type = cniType
}
netCfg.IPAM = &types.IPAMConfig{Type: ipamType}
return netCfg
}

func newRequest(args string, netCfg *types.NetworkConfig, path string, t *testing.T) (*cnipb.CniCmdRequest, string) {
containerID := generateUUID(t)
_, _, containerID := cniservertest.ParseCNIArgs(args)
networkConfig, err := json.Marshal(netCfg)
if err != nil {
t.Error("Failed to generate Network configuration")
Expand Down
39 changes: 18 additions & 21 deletions pkg/agent/cniserver/server_windows_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -572,8 +572,6 @@ func TestCmdDel(t *testing.T) {

for _, tc := range []struct {
name string
podName string
containerID string
netns string
ipamDel bool
ipamError error
Expand All @@ -584,26 +582,25 @@ func TestCmdDel(t *testing.T) {
}{
{
name: "docker-infra-success",
podName: "pod0",
containerID: containerID,
netns: "none",
ipamDel: true,
disconnectOVS: true,
endpointExists: true,
ifaceExists: true,
}, {
name: "interface-not-exist",
podName: "pod1",
containerID: containerID,
netns: "none",
ipamDel: true,
}, {
name: "ipam-delete-failure",
podName: "pod2",
containerID: containerID,
netns: "none",
ipamDel: true,
ipamError: fmt.Errorf("unable to delete IP"),
},
{
name: "interface-not-exist",
netns: "none",
ipamDel: true,
},
{
name: "ipam-delete-failure",
netns: "none",
ipamDel: true,
ipamError: fmt.Errorf("unable to delete IP"),
disconnectOVS: true,
endpointExists: true,
ifaceExists: true,
errResponse: &cnipb.CniCmdResponse{
Error: &cnipb.Error{
Code: cnipb.ErrorCode_IPAM_FAILURE,
Expand All @@ -614,7 +611,7 @@ func TestCmdDel(t *testing.T) {
} {
t.Run(tc.name, func(t *testing.T) {
isDocker := isDockerContainer(tc.netns)
requestMsg, ovsPortName := prepareSetup(t, ipamType, tc.podName, tc.containerID, tc.containerID, tc.netns, nil)
requestMsg, ovsPortName := prepareSetup(t, ipamType, testPodNameA, containerID, containerID, tc.netns, nil)
hnsEndpoint := getHnsEndpoint(generateUUID(t), ovsPortName)
var existingHnsEndpoints []hcsshim.HNSEndpoint
if tc.endpointExists {
Expand All @@ -623,14 +620,14 @@ func TestCmdDel(t *testing.T) {
testUtil := newHnsTestUtil(hnsEndpoint.Id, existingHnsEndpoints, isDocker, true, nil, nil)
testUtil.setFunctions()
defer testUtil.restore()
waiter := newAsyncWaiter(tc.podName, tc.containerID)
waiter := newAsyncWaiter(testPodNameA, containerID)
server := newMockCNIServer(t, controller, waiter.notifier)
ovsPortID := generateUUID(t)
if tc.endpointExists {
server.podConfigurator.ifConfigurator.(*ifConfigurator).addEndpoint(hnsEndpoint)
}
if tc.ifaceExists {
containerIface := interfacestore.NewContainerInterface(ovsPortName, tc.containerID, tc.podName, testPodNamespace, containerMAC, []net.IP{net.ParseIP("10.1.2.100")}, 0)
containerIface := interfacestore.NewContainerInterface(ovsPortName, containerID, testPodNameA, testPodNamespace, containerMAC, []net.IP{net.ParseIP("10.1.2.100")}, 0)
containerIface.OVSPortConfig = &interfacestore.OVSPortConfig{
OFPort: 100,
PortUUID: ovsPortID,
Expand All @@ -652,7 +649,7 @@ func TestCmdDel(t *testing.T) {
} else {
assert.Equal(t, emptyResponse, resp)
}
_, exists := ifaceStore.GetContainerInterface(tc.containerID)
_, exists := ifaceStore.GetContainerInterface(containerID)
assert.False(t, exists)
if tc.endpointExists {
_, exists = server.podConfigurator.ifConfigurator.(*ifConfigurator).getEndpoint(ovsPortName)
Expand Down
25 changes: 23 additions & 2 deletions pkg/agent/cniserver/testing/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,31 @@

package testing

import "fmt"
import (
"fmt"
"strings"
)

const argsFormat = "IgnoreUnknown=1;K8S_POD_NAMESPACE=%s;K8S_POD_NAME=%s;K8S_POD_INFRA_CONTAINER_ID=%s"

func GenerateCNIArgs(podName string, podNamespace string, podInfraContainerID string) string {
func GenerateCNIArgs(podName, podNamespace, podInfraContainerID string) string {
return fmt.Sprintf(argsFormat, podNamespace, podName, podInfraContainerID)
}

func ParseCNIArgs(args string) (podName, podNamespace, podInfraContainerID string) {
strs := strings.Split(args, ";")
for _, str := range strs {
fields := strings.Split(str, "=")
if len(fields) == 2 {
switch fields[0] {
case "K8S_POD_NAMESPACE":
podNamespace = fields[1]
case "K8S_POD_NAME":
podName = fields[1]
case "K8S_POD_INFRA_CONTAINER_ID":
podInfraContainerID = fields[1]
}
}
}
return
}
Loading