From 6933039a8fdef6bfa4a7e506c582c96b0b120495 Mon Sep 17 00:00:00 2001 From: Zhecheng Li Date: Mon, 10 May 2021 18:12:20 +0800 Subject: [PATCH] [Windows] NoEncap support Co-authored-by: Zhecheng Li --- build/yamls/antrea-windows.yml | 14 ++++- .../yamls/windows/base/conf/antrea-agent.conf | 10 +++ cmd/antrea-agent/options_windows.go | 2 +- cmd/antrea-agent/options_windows_test.go | 2 +- .../noderoute/node_route_controller.go | 35 +++++++++-- .../noderoute/node_route_controller_test.go | 8 +-- pkg/agent/openflow/client.go | 39 +++++++++++- pkg/agent/openflow/client_test.go | 2 +- pkg/agent/openflow/pipeline.go | 39 +++++++++++- pkg/agent/openflow/pipeline_windows.go | 12 ++++ pkg/agent/openflow/testing/mock_openflow.go | 8 +-- pkg/agent/route/route_windows.go | 62 ++++++++++++++----- pkg/agent/route/route_windows_test.go | 1 + test/integration/agent/openflow_test.go | 2 +- 14 files changed, 198 insertions(+), 38 deletions(-) diff --git a/build/yamls/antrea-windows.yml b/build/yamls/antrea-windows.yml index 009fa3db664..3f600859e87 100644 --- a/build/yamls/antrea-windows.yml +++ b/build/yamls/antrea-windows.yml @@ -71,6 +71,16 @@ data: # the flow collector. # Flow export frequency should be greater than or equal to 1. #flowExportFrequency: 12 + + # Determines how traffic is encapsulated. It has the following options: + # encap(default): Inter-node Pod traffic is always encapsulated and Pod to external network + # traffic is SNAT'd. + # noEncap: Inter-node Pod traffic is not encapsulated; Pod to external network traffic is + # SNAT'd if noSNAT is not set to true. Underlying network must be capable of + # supporting Pod traffic across IP subnets. + # hybrid: noEncap if source and destination Nodes are on the same subnet, otherwise encap. + # + #trafficEncapMode: encap antrea-cni.conflist: | { "cniVersion":"0.3.0", @@ -89,7 +99,7 @@ kind: ConfigMap metadata: labels: app: antrea - name: antrea-windows-config-kc6bfhk4mg + name: antrea-windows-config-4kcddbh9tt namespace: kube-system --- apiVersion: apps/v1 @@ -177,7 +187,7 @@ spec: operator: Exists volumes: - configMap: - name: antrea-windows-config-kc6bfhk4mg + name: antrea-windows-config-4kcddbh9tt name: antrea-windows-config - configMap: defaultMode: 420 diff --git a/build/yamls/windows/base/conf/antrea-agent.conf b/build/yamls/windows/base/conf/antrea-agent.conf index c1fc62f0914..a4e3663346a 100644 --- a/build/yamls/windows/base/conf/antrea-agent.conf +++ b/build/yamls/windows/base/conf/antrea-agent.conf @@ -53,3 +53,13 @@ featureGates: # the flow collector. # Flow export frequency should be greater than or equal to 1. #flowExportFrequency: 12 + +# Determines how traffic is encapsulated. It has the following options: +# encap(default): Inter-node Pod traffic is always encapsulated and Pod to external network +# traffic is SNAT'd. +# noEncap: Inter-node Pod traffic is not encapsulated; Pod to external network traffic is +# SNAT'd if noSNAT is not set to true. Underlying network must be capable of +# supporting Pod traffic across IP subnets. +# hybrid: noEncap if source and destination Nodes are on the same subnet, otherwise encap. +# +#trafficEncapMode: encap diff --git a/cmd/antrea-agent/options_windows.go b/cmd/antrea-agent/options_windows.go index 483714c75b7..1e72a509201 100644 --- a/cmd/antrea-agent/options_windows.go +++ b/cmd/antrea-agent/options_windows.go @@ -42,7 +42,7 @@ func (o *Options) checkUnsupportedFeatures() error { unsupported = append(unsupported, "OVSDatapathType: "+o.config.OVSDatapathType) } _, encapMode := config.GetTrafficEncapModeFromStr(o.config.TrafficEncapMode) - if encapMode != config.TrafficEncapModeEncap { + if encapMode == config.TrafficEncapModeNetworkPolicyOnly { unsupported = append(unsupported, "TrafficEncapMode: "+encapMode.String()) } if o.config.TunnelType == ovsconfig.GRETunnel { diff --git a/cmd/antrea-agent/options_windows_test.go b/cmd/antrea-agent/options_windows_test.go index 7c5a571854c..f27632b281b 100644 --- a/cmd/antrea-agent/options_windows_test.go +++ b/cmd/antrea-agent/options_windows_test.go @@ -57,7 +57,7 @@ func TestCheckUnsupportedFeatures(t *testing.T) { { "noEncap mode", AgentConfig{TrafficEncapMode: config.TrafficEncapModeNoEncap.String()}, - false, + true, }, { "GRE tunnel", diff --git a/pkg/agent/controller/noderoute/node_route_controller.go b/pkg/agent/controller/noderoute/node_route_controller.go index 26086a7e303..d083af98d00 100644 --- a/pkg/agent/controller/noderoute/node_route_controller.go +++ b/pkg/agent/controller/noderoute/node_route_controller.go @@ -35,6 +35,7 @@ import ( "antrea.io/antrea/pkg/agent/interfacestore" "antrea.io/antrea/pkg/agent/openflow" "antrea.io/antrea/pkg/agent/route" + "antrea.io/antrea/pkg/agent/types" "antrea.io/antrea/pkg/agent/util" "antrea.io/antrea/pkg/ovs/ovsconfig" utilip "antrea.io/antrea/pkg/util/ip" @@ -135,6 +136,7 @@ type nodeRouteInfo struct { podCIDRs []*net.IPNet nodeIP net.IP gatewayIP []net.IP + nodeMAC net.HardwareAddr } // enqueueNode adds an object to the controller work queue @@ -406,8 +408,14 @@ func (c *Controller) deleteNodeRoute(nodeName string) error { } func (c *Controller) addNodeRoute(nodeName string, node *corev1.Node) error { - if _, installed, _ := c.installedNodes.GetByKey(nodeName); installed { - // Route is already added for this Node. + peerNodeMAC, err := getNodeMAC(node) + if err != nil { + klog.Errorf("Error when retrieving MAC of Node %s: %v", nodeName, err) + } + + nrInfo, installed, _ := c.installedNodes.GetByKey(nodeName) + if installed && nrInfo != nil && nrInfo.(*nodeRouteInfo).nodeMAC != nil && nrInfo.(*nodeRouteInfo).nodeMAC.String() == peerNodeMAC.String() { + // Route is already added for this Node and Node MAC isn't changed. return nil } @@ -431,7 +439,7 @@ func (c *Controller) addNodeRoute(nodeName string, node *corev1.Node) error { return nil } - nodesHaveSamePodCIDR, _ := c.installedNodes.ByIndex(nodeRouteInfoPodCIDRIndexName, podCIDR) + nodesHaveSamePodCIDR, _ := c.installedNodes.IndexKeys(nodeRouteInfoPodCIDRIndexName, podCIDR) // PodCIDRs can be released from deleted Nodes and allocated to new Nodes. For server side, it won't happen that a // PodCIDR is allocated to more than one Node at any point. However, for client side, if a resync happens to occur // when there are Node creation and deletion events, the informer will generate the events in a way that all @@ -441,9 +449,9 @@ func (c *Controller) addNodeRoute(nodeName string, node *corev1.Node) error { // stale routes, flows, and relevant cache of this podCIDR are removed appropriately, we wait for the Node deletion // event to be processed before proceeding, or the route installation and uninstallation operations may override or // conflict with each other. - if len(nodesHaveSamePodCIDR) > 0 { + if len(nodesHaveSamePodCIDR) > 0 && (len(nodesHaveSamePodCIDR) != 1 || nodesHaveSamePodCIDR[0] != nodeName) { // Return an error so that the Node will be put back to the workqueue and will be retried later. - return fmt.Errorf("skipping addNodeRoute for Node %s because podCIDR %s is duplicate with Node %s, will retry later", nodeName, podCIDR, nodesHaveSamePodCIDR[0].(*nodeRouteInfo).nodeName) + return fmt.Errorf("skipping addNodeRoute for Node %s because podCIDR %s is duplicate with Node %s, will retry later", nodeName, podCIDR, nodesHaveSamePodCIDR[0]) } peerPodCIDRAddr, peerPodCIDR, err := net.ParseCIDR(podCIDR) @@ -476,7 +484,8 @@ func (c *Controller) addNodeRoute(nodeName string, node *corev1.Node) error { nodeName, peerConfig, peerNodeIP, - uint32(ipsecTunOFPort)) + uint32(ipsecTunOFPort), + peerNodeMAC) if err != nil { return fmt.Errorf("failed to install flows to Node %s: %v", nodeName, err) } @@ -493,6 +502,7 @@ func (c *Controller) addNodeRoute(nodeName string, node *corev1.Node) error { podCIDRs: podCIDRs, nodeIP: peerNodeIP, gatewayIP: peerGatewayIPs, + nodeMAC: peerNodeMAC, }) return err } @@ -653,3 +663,16 @@ func (c *Controller) IPInPodSubnets(ip net.IP) bool { nodeInCluster, _ := c.installedNodes.ByIndex(nodeRouteInfoPodCIDRIndexName, ipCIDRStr) return len(nodeInCluster) > 0 || ipCIDRStr == curNodeCIDRStr } + +// getNodeMAC gets Node's br-int MAC from its annotation. It is for Windows Noencap mode only. +func getNodeMAC(node *corev1.Node) (net.HardwareAddr, error) { + macStr := node.Annotations[types.NodeMACAddressAnnotationKey] + if macStr == "" { + return nil, nil + } + mac, err := net.ParseMAC(macStr) + if err != nil { + return nil, err + } + return mac, nil +} diff --git a/pkg/agent/controller/noderoute/node_route_controller_test.go b/pkg/agent/controller/noderoute/node_route_controller_test.go index 66873b14a05..162055cbe7f 100644 --- a/pkg/agent/controller/noderoute/node_route_controller_test.go +++ b/pkg/agent/controller/noderoute/node_route_controller_test.go @@ -133,7 +133,7 @@ func TestControllerWithDuplicatePodCIDR(t *testing.T) { c.clientset.CoreV1().Nodes().Create(context.TODO(), node1, metav1.CreateOptions{}) // The 2nd argument is Any() because the argument is unpredictable when it uses pointer as the key of map. // The argument type is map[*net.IPNet]net.IP. - c.ofClient.EXPECT().InstallNodeFlows("node1", gomock.Any(), nodeIP1, uint32(0)).Times(1) + c.ofClient.EXPECT().InstallNodeFlows("node1", gomock.Any(), nodeIP1, uint32(0), nil).Times(1) c.routeClient.EXPECT().AddRoutes(podCIDR, "node1", nodeIP1, podCIDRGateway).Times(1) c.processNextWorkItem() @@ -150,7 +150,7 @@ func TestControllerWithDuplicatePodCIDR(t *testing.T) { // After node1 is deleted, routes and flows should be installed for node2 successfully. // The 2nd argument is Any() because the argument is unpredictable when it uses pointer as the key of map. // The argument type is map[*net.IPNet]net.IP. - c.ofClient.EXPECT().InstallNodeFlows("node2", gomock.Any(), nodeIP2, uint32(0)).Times(1) + c.ofClient.EXPECT().InstallNodeFlows("node2", gomock.Any(), nodeIP2, uint32(0), nil).Times(1) c.routeClient.EXPECT().AddRoutes(podCIDR, "node2", nodeIP2, podCIDRGateway).Times(1) c.processNextWorkItem() }() @@ -214,12 +214,12 @@ func TestIPInPodSubnets(t *testing.T) { c.clientset.CoreV1().Nodes().Create(context.TODO(), node1, metav1.CreateOptions{}) // The 2nd argument is Any() because the argument is unpredictable when it uses pointer as the key of map. // The argument type is map[*net.IPNet]net.IP. - c.ofClient.EXPECT().InstallNodeFlows("node1", gomock.Any(), nodeIP1, uint32(0)).Times(1) + c.ofClient.EXPECT().InstallNodeFlows("node1", gomock.Any(), nodeIP1, uint32(0), nil).Times(1) c.routeClient.EXPECT().AddRoutes(podCIDR, "node1", nodeIP1, podCIDRGateway).Times(1) c.processNextWorkItem() c.clientset.CoreV1().Nodes().Create(context.TODO(), node2, metav1.CreateOptions{}) - c.ofClient.EXPECT().InstallNodeFlows("node2", gomock.Any(), nodeIP2, uint32(0)).Times(1) + c.ofClient.EXPECT().InstallNodeFlows("node2", gomock.Any(), nodeIP2, uint32(0), nil).Times(1) c.routeClient.EXPECT().AddRoutes(podCIDR2, "node2", nodeIP2, podCIDR2Gateway).Times(1) c.processNextWorkItem() diff --git a/pkg/agent/openflow/client.go b/pkg/agent/openflow/client.go index d1163fadb8e..f9f657feae0 100644 --- a/pkg/agent/openflow/client.go +++ b/pkg/agent/openflow/client.go @@ -27,6 +27,7 @@ import ( "antrea.io/antrea/pkg/agent/types" "antrea.io/antrea/pkg/agent/util" binding "antrea.io/antrea/pkg/ovs/openflow" + "antrea.io/antrea/pkg/util/runtime" "antrea.io/antrea/third_party/proxy" ) @@ -68,7 +69,8 @@ type Client interface { hostname string, peerConfigs map[*net.IPNet]net.IP, tunnelPeerIP net.IP, - ipsecTunOFPort uint32) error + ipsecTunOFPort uint32, + peerNodeMAC net.HardwareAddr) error // UninstallNodeFlows removes the connection to the remote Node specified with the // hostname. UninstallNodeFlows will do nothing if no connection to the host was established. @@ -290,7 +292,8 @@ func (c *client) IsConnected() bool { // addFlows installs the flows on the OVS bridge and then add them into the flow cache. If the flow cache exists, // it will return immediately, otherwise it will use Bundle to add all flows, and then add them into the flow cache. -// If it fails to add the flows with Bundle, it will return the error and no flow cache is created. +// If it fails to add the flows with Bundle, it will return the error and no flow cache is created. If the force parameter +// is true, flows will be added regardless of flow cache. func (c *client) addFlows(cache *flowCategoryCache, flowCacheKey string, flows []binding.Flow) error { _, ok := cache.Load(flowCacheKey) // If a flow cache entry already exists for the key, return immediately. Otherwise, add the flows to the switch @@ -312,6 +315,20 @@ func (c *client) addFlows(cache *flowCategoryCache, flowCacheKey string, flows [ return nil } +func (c *client) modifyFlows(cache *flowCategoryCache, flowCacheKey string, flows []binding.Flow) error { + err := c.ofEntryOperations.ModifyAll(flows) + if err != nil { + return err + } + fCache := flowCache{} + // Modify the flows in the flow cache. + for _, flow := range flows { + fCache[flow.MatchString()] = flow + } + cache.Store(flowCacheKey, fCache) + return nil +} + // deleteFlows deletes all the flows in the flow cache indexed by the provided flowCacheKey. func (c *client) deleteFlows(cache *flowCategoryCache, flowCacheKey string) error { fCacheI, ok := cache.Load(flowCacheKey) @@ -335,13 +352,15 @@ func (c *client) deleteFlows(cache *flowCategoryCache, flowCacheKey string) erro func (c *client) InstallNodeFlows(hostname string, peerConfigs map[*net.IPNet]net.IP, tunnelPeerIP net.IP, - ipsecTunOFPort uint32) error { + ipsecTunOFPort uint32, + remoteGatewayMAC net.HardwareAddr) error { c.replayMutex.RLock() defer c.replayMutex.RUnlock() var flows []binding.Flow localGatewayMAC := c.nodeConfig.GatewayConfig.MAC + modify := false for peerPodCIDR, peerGatewayIP := range peerConfigs { if peerGatewayIP.To4() != nil { // Since broadcast is not supported in IPv6, ARP should happen only with IPv4 address, and ARP responder flows @@ -352,6 +371,9 @@ func (c *client) InstallNodeFlows(hostname string, // tunnelPeerIP is the Node Internal Address. In a dual-stack setup, whether this address is an IPv4 address or an // IPv6 one is decided by the address family of Node Internal Address. flows = append(flows, c.l3FwdFlowToRemote(localGatewayMAC, *peerPodCIDR, tunnelPeerIP, cookie.Node)) + } else if runtime.IsWindowsPlatform() && !c.encapMode.NeedsRoutingToPeer(tunnelPeerIP, c.nodeConfig.NodeIPAddr) && remoteGatewayMAC != nil { + flows = append(flows, c.l3FwdFlowToRemoteViaRouting(remoteGatewayMAC, *peerPodCIDR, cookie.Node)...) + modify = true } else { flows = append(flows, c.l3FwdFlowToRemoteViaGW(localGatewayMAC, *peerPodCIDR, cookie.Node)) } @@ -364,6 +386,9 @@ func (c *client) InstallNodeFlows(hostname string, flows = append(flows, c.tunnelClassifierFlow(ipsecTunOFPort, cookie.Node)) } + if modify { + return c.modifyFlows(c.nodeFlowCache, hostname, flows) + } return c.addFlows(c.nodeFlowCache, hostname, flows) } @@ -580,6 +605,14 @@ func (c *client) InstallGatewayFlows() error { if c.encapMode.SupportsEncap() { flows = append(flows, c.l3FwdFlowToGateway(gatewayIPs, gatewayConfig.MAC, cookie.Default)...) } + if runtime.IsWindowsPlatform() && c.encapMode.SupportsNoEncap() { + if gatewayConfig.IPv4 != nil { + flows = append(flows, c.l3FwdFlowRouteToWindowsGW(gatewayConfig.MAC, gatewayConfig.IPv4, cookie.Default)...) + } + if gatewayConfig.IPv6 != nil { + flows = append(flows, c.l3FwdFlowRouteToWindowsGW(gatewayConfig.MAC, gatewayConfig.IPv6, cookie.Default)...) + } + } if err := c.ofEntryOperations.AddAll(flows); err != nil { return err diff --git a/pkg/agent/openflow/client_test.go b/pkg/agent/openflow/client_test.go index ec17223bdf4..69e235e830e 100644 --- a/pkg/agent/openflow/client_test.go +++ b/pkg/agent/openflow/client_test.go @@ -57,7 +57,7 @@ func installNodeFlows(ofClient Client, cacheKey string) (int, error) { peerConfig := map[*net.IPNet]net.IP{ ipNet: gwIP, } - err := ofClient.InstallNodeFlows(hostName, peerConfig, peerNodeIP, 0) + err := ofClient.InstallNodeFlows(hostName, peerConfig, peerNodeIP, 0, nil) client := ofClient.(*client) fCacheI, ok := client.nodeFlowCache.Load(hostName) if ok { diff --git a/pkg/agent/openflow/pipeline.go b/pkg/agent/openflow/pipeline.go index 48230e6bab8..5d3aab97ec1 100644 --- a/pkg/agent/openflow/pipeline.go +++ b/pkg/agent/openflow/pipeline.go @@ -486,7 +486,6 @@ func (c *client) changeOFEntries(ofEntries []binding.OFEntry, action ofAction) e } else { return fmt.Errorf("OF Entries Action not exists: %s", action) } - startTime := time.Now() defer func() { d := time.Since(startTime) @@ -2165,3 +2164,41 @@ func NewClient(bridgeName, mgmtAddr string, ovsDatapathType ovsconfig.OVSDatapat c.generatePipeline() return c } + +// l3FwdFlowRouteToWindowsGW adds a flow table to forward traffic to antrea-gw0 on Windows in NoEncap mode. It is for Windows. +func (c *client) l3FwdFlowRouteToWindowsGW(localGatewayMAC net.HardwareAddr, localGatewayIP net.IP, category cookie.Category) []binding.Flow { + flows := []binding.Flow{ + c.pipeline[l3ForwardingTable].BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolIP). + MatchRegRange(int(marksReg), macRewriteMark, macRewriteMarkRange). + MatchDstIP(localGatewayIP). + Action().SetDstMAC(localGatewayMAC). + Action().GotoTable(l3DecTTLTable). + Cookie(c.cookieAllocator.Request(category).Raw()). + Done(), + } + return flows +} + +// l3FwdFlowToRemoteViaRouting enhances Windows Noencap mode performance by bypassing host network. +func (c *client) l3FwdFlowToRemoteViaRouting( + gatewayMAC net.HardwareAddr, + peerSubnet net.IPNet, + category cookie.Category) []binding.Flow { + var flows []binding.Flow + ipProto := getIPProtocol(peerSubnet.IP) + l3FwdTable := c.pipeline[l3ForwardingTable] + flows = append(flows, l3FwdTable.BuildFlow(priorityNormal).MatchProtocol(ipProto). + MatchDstIPNet(peerSubnet). + Action().SetDstMAC(gatewayMAC). + Action().GotoTable(l3FwdTable.GetNext()). + Cookie(c.cookieAllocator.Request(category).Raw()). + Done()) + flows = append(flows, c.pipeline[l2ForwardingCalcTable].BuildFlow(priorityNormal). + MatchDstMAC(gatewayMAC). + Action().LoadRegRange(int(PortCacheReg), config.UplinkOFPort, ofPortRegRange). + Action().LoadRegRange(int(marksReg), macRewriteMark, ofPortMarkRange). + Action().GotoTable(conntrackCommitTable). + Cookie(c.cookieAllocator.Request(category).Raw()). + Done()) + return flows +} diff --git a/pkg/agent/openflow/pipeline_windows.go b/pkg/agent/openflow/pipeline_windows.go index 187d84c5a0a..b65e52eb6b5 100644 --- a/pkg/agent/openflow/pipeline_windows.go +++ b/pkg/agent/openflow/pipeline_windows.go @@ -281,5 +281,17 @@ func (c *client) hostBridgeUplinkFlows(localSubnet net.IPNet, category cookie.Ca Cookie(c.cookieAllocator.Request(category).Raw()). Done(), } + if c.encapMode.SupportsNoEncap() { + // If NoEncap is enabled, the reply packets from remote Pod can be forwarded to local Pod directly. + // by explicitly resubmitting them to endpointDNATTable and marking "macRewriteMark" at same time. + flows = append(flows, c.pipeline[conntrackStateTable].BuildFlow(priorityHigh).MatchProtocol(binding.ProtocolIP). + MatchRegRange(int(marksReg), markTrafficFromUplink, binding.Range{0, 15}). + MatchDstIPNet(localSubnet). + Action().LoadRegRange(int(marksReg), macRewriteMark, macRewriteMarkRange). + Action().GotoTable(endpointDNATTable). + Cookie(c.cookieAllocator.Request(category).Raw()). + Done()) + + } return flows } diff --git a/pkg/agent/openflow/testing/mock_openflow.go b/pkg/agent/openflow/testing/mock_openflow.go index 0f68c6550e8..479e37b5f14 100644 --- a/pkg/agent/openflow/testing/mock_openflow.go +++ b/pkg/agent/openflow/testing/mock_openflow.go @@ -349,17 +349,17 @@ func (mr *MockClientMockRecorder) InstallLoadBalancerServiceFromOutsideFlows(arg } // InstallNodeFlows mocks base method -func (m *MockClient) InstallNodeFlows(arg0 string, arg1 map[*net.IPNet]net.IP, arg2 net.IP, arg3 uint32) error { +func (m *MockClient) InstallNodeFlows(arg0 string, arg1 map[*net.IPNet]net.IP, arg2 net.IP, arg3 uint32, arg4 net.HardwareAddr) error { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "InstallNodeFlows", arg0, arg1, arg2, arg3) + ret := m.ctrl.Call(m, "InstallNodeFlows", arg0, arg1, arg2, arg3, arg4) ret0, _ := ret[0].(error) return ret0 } // InstallNodeFlows indicates an expected call of InstallNodeFlows -func (mr *MockClientMockRecorder) InstallNodeFlows(arg0, arg1, arg2, arg3 interface{}) *gomock.Call { +func (mr *MockClientMockRecorder) InstallNodeFlows(arg0, arg1, arg2, arg3, arg4 interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "InstallNodeFlows", reflect.TypeOf((*MockClient)(nil).InstallNodeFlows), arg0, arg1, arg2, arg3) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "InstallNodeFlows", reflect.TypeOf((*MockClient)(nil).InstallNodeFlows), arg0, arg1, arg2, arg3, arg4) } // InstallPodFlows mocks base method diff --git a/pkg/agent/route/route_windows.go b/pkg/agent/route/route_windows.go index b0d4a3032be..bcdf23db128 100644 --- a/pkg/agent/route/route_windows.go +++ b/pkg/agent/route/route_windows.go @@ -18,6 +18,7 @@ package route import ( "errors" + "fmt" "net" "sync" @@ -36,11 +37,13 @@ const ( ) type Client struct { - nr netroute.Interface - nodeConfig *config.NodeConfig - serviceCIDR *net.IPNet - hostRoutes *sync.Map - fwClient *winfirewall.Client + nr netroute.Interface + nodeConfig *config.NodeConfig + networkConfig *config.NetworkConfig + serviceCIDR *net.IPNet + hostRoutes *sync.Map + fwClient *winfirewall.Client + bridgeInfIndex int } // NewClient returns a route client. @@ -48,10 +51,11 @@ type Client struct { func NewClient(serviceCIDR *net.IPNet, networkConfig *config.NetworkConfig, noSNAT bool) (*Client, error) { nr := netroute.New() return &Client{ - nr: nr, - serviceCIDR: serviceCIDR, - hostRoutes: &sync.Map{}, - fwClient: winfirewall.NewClient(), + nr: nr, + networkConfig: networkConfig, + serviceCIDR: serviceCIDR, + hostRoutes: &sync.Map{}, + fwClient: winfirewall.NewClient(), }, nil } @@ -59,6 +63,11 @@ func NewClient(serviceCIDR *net.IPNet, networkConfig *config.NetworkConfig, noSN // Service LoadBalancing is provided by OpenFlow. func (c *Client) Initialize(nodeConfig *config.NodeConfig, done func()) error { c.nodeConfig = nodeConfig + bridgeInf, err := net.InterfaceByName(nodeConfig.OVSBridge) + if err != nil { + return fmt.Errorf("failed to find the interface %s: %v", nodeConfig.OVSBridge, err) + } + c.bridgeInfIndex = bridgeInf.Index if err := c.initFwRules(); err != nil { return err } @@ -88,6 +97,10 @@ func (c *Client) Reconcile(podCIDRs []string) error { c.hostRoutes.Store(dst, rt) continue } + // If the route is not for uplink interface, ignore it. + if c.nodeConfig.UplinkNetConfig != nil && rt.LinkIndex != c.nodeConfig.UplinkNetConfig.Index { + continue + } err := c.nr.RemoveNetRoute(rt.LinkIndex, rt.DestinationSubnet, rt.GatewayAddress) if err != nil { return err @@ -100,21 +113,42 @@ func (c *Client) Reconcile(podCIDRs []string) error { // It overrides the routes if they already exist, without error. func (c *Client) AddRoutes(podCIDR *net.IPNet, nodeName string, peerNodeIP, peerGwIP net.IP) error { obj, found := c.hostRoutes.Load(podCIDR.String()) + route := &netroute.Route{ + DestinationSubnet: podCIDR, + } + if c.networkConfig.TrafficEncapMode.NeedsEncapToPeer(peerNodeIP, c.nodeConfig.NodeIPAddr) { + route.LinkIndex = c.nodeConfig.GatewayConfig.LinkIndex + route.GatewayAddress = peerGwIP + } else if !c.networkConfig.TrafficEncapMode.NeedsRoutingToPeer(peerNodeIP, c.nodeConfig.NodeIPAddr) { + // NoEncap traffic to Node on the same subnet. + // Set the peerNodeIP as next hop. + route.LinkIndex = c.bridgeInfIndex + route.GatewayAddress = peerNodeIP + } + // NoEncap traffic to Node on the different subnet needs underlying routing support. + // Use host default route inside the Node. + if found { - rt := obj.(*netroute.Route) - if rt.GatewayAddress.Equal(peerGwIP) { + existingRoute := obj.(*netroute.Route) + if existingRoute.GatewayAddress.Equal(route.GatewayAddress) { klog.V(4).Infof("Route with destination %s already exists on %s (%s)", podCIDR.String(), nodeName, peerNodeIP) return nil } // Remove the existing route entry if the gateway address is not as expected. - if err := c.nr.RemoveNetRoute(rt.LinkIndex, rt.DestinationSubnet, rt.GatewayAddress); err != nil { + if err := c.nr.RemoveNetRoute(existingRoute.LinkIndex, existingRoute.DestinationSubnet, existingRoute.GatewayAddress); err != nil { klog.Errorf("Failed to delete existing route entry with destination %s gateway %s on %s (%s)", podCIDR.String(), peerGwIP.String(), nodeName, peerNodeIP) return err } } - if err := c.nr.NewNetRoute(c.nodeConfig.GatewayConfig.LinkIndex, podCIDR, peerGwIP); err != nil { + + if route.GatewayAddress == nil { + return nil + } + + if err := c.nr.NewNetRoute(route.LinkIndex, route.DestinationSubnet, route.GatewayAddress); err != nil { return err } + c.hostRoutes.Store(podCIDR.String(), &netroute.Route{ LinkIndex: c.nodeConfig.GatewayConfig.LinkIndex, DestinationSubnet: podCIDR, @@ -165,7 +199,7 @@ func (c *Client) listRoutes() (map[string]*netroute.Route, error) { rtMap := make(map[string]*netroute.Route) for idx := range routes { rt := routes[idx] - if rt.LinkIndex != c.nodeConfig.GatewayConfig.LinkIndex { + if rt.LinkIndex != c.nodeConfig.GatewayConfig.LinkIndex && rt.LinkIndex != c.bridgeInfIndex { continue } // Only process IPv4 route entries in the loop. diff --git a/pkg/agent/route/route_windows_test.go b/pkg/agent/route/route_windows_test.go index bef208b77af..7ce0d43c428 100644 --- a/pkg/agent/route/route_windows_test.go +++ b/pkg/agent/route/route_windows_test.go @@ -56,6 +56,7 @@ func TestRouteOperation(t *testing.T) { client, err := NewClient(serviceCIDR, &config.NetworkConfig{}, false) require.Nil(t, err) nodeConfig := &config.NodeConfig{ + OVSBridge: "Loopback Pseudo-Interface 1", GatewayConfig: &config.GatewayConfig{ Name: hostGateway, LinkIndex: gwLink, diff --git a/test/integration/agent/openflow_test.go b/test/integration/agent/openflow_test.go index 3b2802a15d6..2b528c1ed87 100644 --- a/test/integration/agent/openflow_test.go +++ b/test/integration/agent/openflow_test.go @@ -290,7 +290,7 @@ func testInstallNodeFlows(t *testing.T, config *testConfig) { peerConfig := map[*net.IPNet]net.IP{ &node.subnet: node.gateway, } - err := c.InstallNodeFlows(node.name, peerConfig, node.nodeAddress, 0) + err := c.InstallNodeFlows(node.name, peerConfig, node.nodeAddress, 0, nil) if err != nil { t.Fatalf("Failed to install Openflow entries for node connectivity: %v", err) }