Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[VM Agent] Bugfix: antrea-agent failed to delete ExternalNode #5191

Merged
merged 1 commit into from
Jul 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions pkg/agent/externalnode/external_node_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -552,12 +552,23 @@ func (c *ExternalNodeController) updateOVSPortsData(interfaceConfig *interfacest
func (c *ExternalNodeController) removeOVSPortsAndFlows(interfaceConfig *interfacestore.InterfaceConfig) error {
portUUID := interfaceConfig.PortUUID
portName := interfaceConfig.InterfaceName
hostIFName := interfaceConfig.InterfaceName
uplinkIfName := util.GenerateUplinkInterfaceName(portName)

// This is for issue #5111 (https://github.com/antrea-io/antrea/issues/5111), which may happen if an error occurs
// when moving the configuration back from host internal interface to uplink. This logic is run in the second
// try after the error is returned, at this time the host internal interface is already deleted, and the uplink's
// name is recovered. So the ips and routes in "adapterConfig" are actually read from the uplink and no need to
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I may miss some thing here. If the error occurs, the uplink's name is recovered. But ip and routes can still be wrong, right? Our current logic can not handle this case and make it right later?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For your mentioned case, we can not fix it in this change, and it may also break agent without this change.

The motivation for this change is issue #5111 , in which ip address/routes configuration are failed because dhclient is modifying the interface in the meanwhile. Then antrea-agent may fail in the first try, and it still blocks at the check on the existence host internal interface (antrea thought the interface was supposed to not exist because it was removed from OVS and the uplink was not renamed yet), but the fact is the uplink is already renamed. As for the ip address/routes are actually configured back by dhclient.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the background details. It is clear to me now.

// move the configurations back. The issue was seen on VM with RHEL 8.4 on azure cloud.
if !hostInterfaceExists(uplinkIfName) {
klog.InfoS("The interface with uplink name did not exist on the host, skipping its recovery", "uplinkIfName", uplinkIfName)
return nil
}

if err := c.ofClient.UninstallVMUplinkFlows(portName); err != nil {
return fmt.Errorf("failed to uninstall uplink and host port openflow entries, portName %s, err %v", portName, err)
}
klog.InfoS("Removed the flows installed to forward packet between uplinkPort and hostPort", "hostInterface", portName)
hostIFName := interfaceConfig.InterfaceName
uplinkIfName := util.GenerateUplinkInterfaceName(portName)
uplinkPortID := interfaceConfig.UplinkPort.PortUUID
iface, addrs, routes, err := getInterfaceConfig(hostIFName)
if err != nil {
Expand Down
25 changes: 20 additions & 5 deletions pkg/agent/externalnode/external_node_controller_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ func TestUpdateExternalNode(t *testing.T) {
curIf *interfacestore.InterfaceConfig
syncedExternalNode *v1alpha1.ExternalNode
linkByNameCalledTimes int
existingIfaceMap map[string]bool
expectedCalls func(mockOFClient *openflowtest.MockClient, mockOVSBridgeClient *ovsconfigtest.MockOVSBridgeClient, mockIfaceStore *interfacestoretest.MockInterfaceStore, mockOVSCtlClient *ovsctltest.MockOVSCtlClient)
}{
{
Expand Down Expand Up @@ -130,6 +131,7 @@ func TestUpdateExternalNode(t *testing.T) {
mockOVSBridgeClient.EXPECT().SetPortExternalIDs(intf3.InterfaceName, expectedAttachInfo).Times(1)
mockIfaceStore.EXPECT().AddInterface(expectedUpdatedInterface).Times(1)
},
existingIfaceMap: map[string]bool{},
},
{
name: "no change for Interface[0]",
Expand All @@ -139,6 +141,7 @@ func TestUpdateExternalNode(t *testing.T) {
curExternalNode: &externalNode1,
expectedCalls: func(_ *openflowtest.MockClient, _ *ovsconfigtest.MockOVSBridgeClient, _ *interfacestoretest.MockInterfaceStore, _ *ovsctltest.MockOVSCtlClient) {
},
existingIfaceMap: map[string]bool{},
},
{
name: "different interface name",
Expand Down Expand Up @@ -188,6 +191,11 @@ func TestUpdateExternalNode(t *testing.T) {
mockOVSCtlClient.EXPECT().DeleteDPInterface(intf1.InterfaceName).Times(1)
mockIfaceStore.EXPECT().DeleteInterface(&intf1).Times(1)
},
existingIfaceMap: map[string]bool{
ifaceName1: false,
ifaceName2: false,
fmt.Sprintf("%s~", intf1.InterfaceName): true,
},
},
} {
t.Run(tt.name, func(t *testing.T) {
Expand All @@ -196,7 +204,7 @@ func TestUpdateExternalNode(t *testing.T) {
defer mockLinkSetUp(nil)()
defer mockLinkSetMTU(nil)()
defer mockRenameInterface(nil)()
defer mockHostInterfaceExists(false)()
defer mockHostInterfaceExists(tt.existingIfaceMap)()
defer mockConfigureLinkAddresses(nil)()
defer mockConfigureLinkRoutes(nil)()
defer mockLinkByName(t, tt.linkByNameCalledTimes)()
Expand Down Expand Up @@ -462,7 +470,10 @@ func TestDeleteExternalNode(t *testing.T) {
})()
defer mockLinkByName(t, 2)()
defer mockRenameInterface(nil)()
defer mockHostInterfaceExists(false)()
defer mockHostInterfaceExists(map[string]bool{
fmt.Sprintf("%s~", intf1.InterfaceName): true,
fmt.Sprintf("%s~", intf2.InterfaceName): true,
})()
defer mockConfigureLinkAddresses(nil)()
defer mockConfigureLinkRoutes(nil)()
mockIfaceStore.EXPECT().GetInterfacesByType(interfacestore.ExternalEntityInterface).Return([]*interfacestore.InterfaceConfig{
Expand Down Expand Up @@ -616,10 +627,14 @@ func mockLinkByName(t *testing.T, calledTimes int) func() {
}
}

func mockHostInterfaceExists(exists bool) func() {
func mockHostInterfaceExists(existingIfaces map[string]bool) func() {
originalHostInterfaceExists := hostInterfaceExists
hostInterfaceExists = func(IfName string) bool {
return exists
hostInterfaceExists = func(ifName string) bool {
exists, ok := existingIfaces[ifName]
if ok {
return exists
}
return false
}
return func() {
hostInterfaceExists = originalHostInterfaceExists
Expand Down