From 1b1efcec6c4acef4f39512924ad5a67ca2b4e120 Mon Sep 17 00:00:00 2001 From: wenyingd Date: Mon, 3 Jul 2023 14:56:21 +0800 Subject: [PATCH] [VM Agent] Bugfix: antrea-agent failed to delete ExternalNode The issue is seen on a RHEL 8.4 VM on azure cloud, which is configured with dhclient to manage the network interface. The root cause is antrea-agent fails to recover the IP/Routes from host internal interface to uplink after ExternalNode is deleted, because the added IP/Routes is deleted or conflicted with dhclient configuration. Then in the continuous retry with ExternalNode deletion, antrea-agent is blocking at the precheck on the existentance of host internal interface always returns true as the uplink's name is already recovered. Signed-off-by: wenyingd --- .../externalnode/external_node_controller.go | 15 +++++++++-- .../external_node_controller_linux_test.go | 25 +++++++++++++++---- 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/pkg/agent/externalnode/external_node_controller.go b/pkg/agent/externalnode/external_node_controller.go index 33f7c8b485b..ae28d103c84 100644 --- a/pkg/agent/externalnode/external_node_controller.go +++ b/pkg/agent/externalnode/external_node_controller.go @@ -552,12 +552,23 @@ func (c *ExternalNodeController) updateOVSPortsData(interfaceConfig *interfacest func (c *ExternalNodeController) removeOVSPortsAndFlows(interfaceConfig *interfacestore.InterfaceConfig) error { portUUID := interfaceConfig.PortUUID portName := interfaceConfig.InterfaceName + hostIFName := interfaceConfig.InterfaceName + uplinkIfName := util.GenerateUplinkInterfaceName(portName) + + // This is for issue #5111 (https://github.com/antrea-io/antrea/issues/5111), which may happen if an error occurs + // when moving the configuration back from host internal interface to uplink. This logic is run in the second + // try after the error is returned, at this time the host internal interface is already deleted, and the uplink's + // name is recovered. So the ips and routes in "adapterConfig" are actually read from the uplink and no need to + // move the configurations back. The issue was seen on VM with RHEL 8.4 on azure cloud. + if !hostInterfaceExists(uplinkIfName) { + klog.InfoS("The interface with uplink name did not exist on the host, skipping its recovery", "uplinkIfName", uplinkIfName) + return nil + } + if err := c.ofClient.UninstallVMUplinkFlows(portName); err != nil { return fmt.Errorf("failed to uninstall uplink and host port openflow entries, portName %s, err %v", portName, err) } klog.InfoS("Removed the flows installed to forward packet between uplinkPort and hostPort", "hostInterface", portName) - hostIFName := interfaceConfig.InterfaceName - uplinkIfName := util.GenerateUplinkInterfaceName(portName) uplinkPortID := interfaceConfig.UplinkPort.PortUUID iface, addrs, routes, err := getInterfaceConfig(hostIFName) if err != nil { diff --git a/pkg/agent/externalnode/external_node_controller_linux_test.go b/pkg/agent/externalnode/external_node_controller_linux_test.go index a1d79152fe8..358b3778421 100644 --- a/pkg/agent/externalnode/external_node_controller_linux_test.go +++ b/pkg/agent/externalnode/external_node_controller_linux_test.go @@ -87,6 +87,7 @@ func TestUpdateExternalNode(t *testing.T) { curIf *interfacestore.InterfaceConfig syncedExternalNode *v1alpha1.ExternalNode linkByNameCalledTimes int + existingIfaceMap map[string]bool expectedCalls func(mockOFClient *openflowtest.MockClient, mockOVSBridgeClient *ovsconfigtest.MockOVSBridgeClient, mockIfaceStore *interfacestoretest.MockInterfaceStore, mockOVSCtlClient *ovsctltest.MockOVSCtlClient) }{ { @@ -130,6 +131,7 @@ func TestUpdateExternalNode(t *testing.T) { mockOVSBridgeClient.EXPECT().SetPortExternalIDs(intf3.InterfaceName, expectedAttachInfo).Times(1) mockIfaceStore.EXPECT().AddInterface(expectedUpdatedInterface).Times(1) }, + existingIfaceMap: map[string]bool{}, }, { name: "no change for Interface[0]", @@ -139,6 +141,7 @@ func TestUpdateExternalNode(t *testing.T) { curExternalNode: &externalNode1, expectedCalls: func(_ *openflowtest.MockClient, _ *ovsconfigtest.MockOVSBridgeClient, _ *interfacestoretest.MockInterfaceStore, _ *ovsctltest.MockOVSCtlClient) { }, + existingIfaceMap: map[string]bool{}, }, { name: "different interface name", @@ -188,6 +191,11 @@ func TestUpdateExternalNode(t *testing.T) { mockOVSCtlClient.EXPECT().DeleteDPInterface(intf1.InterfaceName).Times(1) mockIfaceStore.EXPECT().DeleteInterface(&intf1).Times(1) }, + existingIfaceMap: map[string]bool{ + ifaceName1: false, + ifaceName2: false, + fmt.Sprintf("%s~", intf1.InterfaceName): true, + }, }, } { t.Run(tt.name, func(t *testing.T) { @@ -196,7 +204,7 @@ func TestUpdateExternalNode(t *testing.T) { defer mockLinkSetUp(nil)() defer mockLinkSetMTU(nil)() defer mockRenameInterface(nil)() - defer mockHostInterfaceExists(false)() + defer mockHostInterfaceExists(tt.existingIfaceMap)() defer mockConfigureLinkAddresses(nil)() defer mockConfigureLinkRoutes(nil)() defer mockLinkByName(t, tt.linkByNameCalledTimes)() @@ -462,7 +470,10 @@ func TestDeleteExternalNode(t *testing.T) { })() defer mockLinkByName(t, 2)() defer mockRenameInterface(nil)() - defer mockHostInterfaceExists(false)() + defer mockHostInterfaceExists(map[string]bool{ + fmt.Sprintf("%s~", intf1.InterfaceName): true, + fmt.Sprintf("%s~", intf2.InterfaceName): true, + })() defer mockConfigureLinkAddresses(nil)() defer mockConfigureLinkRoutes(nil)() mockIfaceStore.EXPECT().GetInterfacesByType(interfacestore.ExternalEntityInterface).Return([]*interfacestore.InterfaceConfig{ @@ -616,10 +627,14 @@ func mockLinkByName(t *testing.T, calledTimes int) func() { } } -func mockHostInterfaceExists(exists bool) func() { +func mockHostInterfaceExists(existingIfaces map[string]bool) func() { originalHostInterfaceExists := hostInterfaceExists - hostInterfaceExists = func(IfName string) bool { - return exists + hostInterfaceExists = func(ifName string) bool { + exists, ok := existingIfaces[ifName] + if ok { + return exists + } + return false } return func() { hostInterfaceExists = originalHostInterfaceExists