diff --git a/build/yamls/antrea-windows.yml b/build/yamls/antrea-windows.yml index 009fa3db664..3f600859e87 100644 --- a/build/yamls/antrea-windows.yml +++ b/build/yamls/antrea-windows.yml @@ -71,6 +71,16 @@ data: # the flow collector. # Flow export frequency should be greater than or equal to 1. #flowExportFrequency: 12 + + # Determines how traffic is encapsulated. It has the following options: + # encap(default): Inter-node Pod traffic is always encapsulated and Pod to external network + # traffic is SNAT'd. + # noEncap: Inter-node Pod traffic is not encapsulated; Pod to external network traffic is + # SNAT'd if noSNAT is not set to true. Underlying network must be capable of + # supporting Pod traffic across IP subnets. + # hybrid: noEncap if source and destination Nodes are on the same subnet, otherwise encap. + # + #trafficEncapMode: encap antrea-cni.conflist: | { "cniVersion":"0.3.0", @@ -89,7 +99,7 @@ kind: ConfigMap metadata: labels: app: antrea - name: antrea-windows-config-kc6bfhk4mg + name: antrea-windows-config-4kcddbh9tt namespace: kube-system --- apiVersion: apps/v1 @@ -177,7 +187,7 @@ spec: operator: Exists volumes: - configMap: - name: antrea-windows-config-kc6bfhk4mg + name: antrea-windows-config-4kcddbh9tt name: antrea-windows-config - configMap: defaultMode: 420 diff --git a/build/yamls/windows/base/conf/antrea-agent.conf b/build/yamls/windows/base/conf/antrea-agent.conf index c1fc62f0914..a4e3663346a 100644 --- a/build/yamls/windows/base/conf/antrea-agent.conf +++ b/build/yamls/windows/base/conf/antrea-agent.conf @@ -53,3 +53,13 @@ featureGates: # the flow collector. # Flow export frequency should be greater than or equal to 1. #flowExportFrequency: 12 + +# Determines how traffic is encapsulated. It has the following options: +# encap(default): Inter-node Pod traffic is always encapsulated and Pod to external network +# traffic is SNAT'd. +# noEncap: Inter-node Pod traffic is not encapsulated; Pod to external network traffic is +# SNAT'd if noSNAT is not set to true. Underlying network must be capable of +# supporting Pod traffic across IP subnets. +# hybrid: noEncap if source and destination Nodes are on the same subnet, otherwise encap. +# +#trafficEncapMode: encap diff --git a/cmd/antrea-agent/options_windows.go b/cmd/antrea-agent/options_windows.go index 6489d286f55..a174e2a7d34 100644 --- a/cmd/antrea-agent/options_windows.go +++ b/cmd/antrea-agent/options_windows.go @@ -42,7 +42,7 @@ func (o *Options) checkUnsupportedFeatures() error { unsupported = append(unsupported, "OVSDatapathType: "+o.config.OVSDatapathType) } _, encapMode := config.GetTrafficEncapModeFromStr(o.config.TrafficEncapMode) - if encapMode != config.TrafficEncapModeEncap { + if encapMode == config.TrafficEncapModeNetworkPolicyOnly { unsupported = append(unsupported, "TrafficEncapMode: "+encapMode.String()) } if o.config.TunnelType == ovsconfig.GRETunnel { diff --git a/cmd/antrea-agent/options_windows_test.go b/cmd/antrea-agent/options_windows_test.go index e3e1c0ece25..ca8fee8af78 100644 --- a/cmd/antrea-agent/options_windows_test.go +++ b/cmd/antrea-agent/options_windows_test.go @@ -57,7 +57,7 @@ func TestCheckUnsupportedFeatures(t *testing.T) { { "noEncap mode", AgentConfig{TrafficEncapMode: config.TrafficEncapModeNoEncap.String()}, - false, + true, }, { "GRE tunnel", diff --git a/pkg/agent/controller/noderoute/node_route_controller.go b/pkg/agent/controller/noderoute/node_route_controller.go index acfd4aa1ddf..2efb1bb1d0e 100644 --- a/pkg/agent/controller/noderoute/node_route_controller.go +++ b/pkg/agent/controller/noderoute/node_route_controller.go @@ -35,6 +35,7 @@ import ( "github.com/vmware-tanzu/antrea/pkg/agent/interfacestore" "github.com/vmware-tanzu/antrea/pkg/agent/openflow" "github.com/vmware-tanzu/antrea/pkg/agent/route" + "github.com/vmware-tanzu/antrea/pkg/agent/types" "github.com/vmware-tanzu/antrea/pkg/agent/util" "github.com/vmware-tanzu/antrea/pkg/ovs/ovsconfig" utilip "github.com/vmware-tanzu/antrea/pkg/util/ip" @@ -461,6 +462,10 @@ func (c *Controller) addNodeRoute(nodeName string, node *corev1.Node) error { klog.Errorf("Failed to retrieve IP address of Node %s: %v", nodeName, err) return nil } + peerNodeMAC, err := GetNodeMAC(node) + if err != nil { + klog.Infof("Error when retrieving MAC of Node %s: %v", nodeName, err) + } ipsecTunOFPort := int32(0) if c.networkConfig.EnableIPSecTunnel { @@ -476,7 +481,8 @@ func (c *Controller) addNodeRoute(nodeName string, node *corev1.Node) error { nodeName, peerConfig, peerNodeIP, - uint32(ipsecTunOFPort)) + uint32(ipsecTunOFPort), + peerNodeMAC) if err != nil { return fmt.Errorf("failed to install flows to Node %s: %v", nodeName, err) } @@ -653,3 +659,17 @@ func (c *Controller) IPInPodSubnets(ip net.IP) bool { nodeInCluster, _ := c.installedNodes.ByIndex(nodeRouteInfoPodCIDRIndexName, ipCIDRStr) return len(nodeInCluster) > 0 || ipCIDRStr == curNodeCIDRStr } + +// GetNodeMAC gets Node's br-int MAC from its annotation. It is for Windows Noencap mode only. +func GetNodeMAC(node *corev1.Node) (net.HardwareAddr, error) { + macStr := node.Annotations[types.NodeMACAddressAnnotationKey] + if macStr == "" { + return nil, fmt.Errorf("Node MAC address annotation is empty") + } else { + mac, err := net.ParseMAC(macStr) + if err != nil { + return nil, err + } + return mac, nil + } +} diff --git a/pkg/agent/controller/noderoute/node_route_controller_test.go b/pkg/agent/controller/noderoute/node_route_controller_test.go index df175ca4db2..b73fa269882 100644 --- a/pkg/agent/controller/noderoute/node_route_controller_test.go +++ b/pkg/agent/controller/noderoute/node_route_controller_test.go @@ -133,7 +133,7 @@ func TestControllerWithDuplicatePodCIDR(t *testing.T) { c.clientset.CoreV1().Nodes().Create(context.TODO(), node1, metav1.CreateOptions{}) // The 2nd argument is Any() because the argument is unpredictable when it uses pointer as the key of map. // The argument type is map[*net.IPNet]net.IP. - c.ofClient.EXPECT().InstallNodeFlows("node1", gomock.Any(), nodeIP1, uint32(0)).Times(1) + c.ofClient.EXPECT().InstallNodeFlows("node1", gomock.Any(), nodeIP1, uint32(0), nil).Times(1) c.routeClient.EXPECT().AddRoutes(podCIDR, "node1", nodeIP1, podCIDRGateway).Times(1) c.processNextWorkItem() @@ -150,7 +150,7 @@ func TestControllerWithDuplicatePodCIDR(t *testing.T) { // After node1 is deleted, routes and flows should be installed for node2 successfully. // The 2nd argument is Any() because the argument is unpredictable when it uses pointer as the key of map. // The argument type is map[*net.IPNet]net.IP. - c.ofClient.EXPECT().InstallNodeFlows("node2", gomock.Any(), nodeIP2, uint32(0)).Times(1) + c.ofClient.EXPECT().InstallNodeFlows("node2", gomock.Any(), nodeIP2, uint32(0), nil).Times(1) c.routeClient.EXPECT().AddRoutes(podCIDR, "node2", nodeIP2, podCIDRGateway).Times(1) c.processNextWorkItem() }() diff --git a/pkg/agent/openflow/client.go b/pkg/agent/openflow/client.go index 8de20939d55..3ba169b36c3 100644 --- a/pkg/agent/openflow/client.go +++ b/pkg/agent/openflow/client.go @@ -27,6 +27,7 @@ import ( "github.com/vmware-tanzu/antrea/pkg/agent/types" "github.com/vmware-tanzu/antrea/pkg/agent/util" binding "github.com/vmware-tanzu/antrea/pkg/ovs/openflow" + "github.com/vmware-tanzu/antrea/pkg/util/runtime" "github.com/vmware-tanzu/antrea/third_party/proxy" ) @@ -68,7 +69,8 @@ type Client interface { hostname string, peerConfigs map[*net.IPNet]net.IP, tunnelPeerIP net.IP, - ipsecTunOFPort uint32) error + ipsecTunOFPort uint32, + peerNodeMAC net.HardwareAddr) error // UninstallNodeFlows removes the connection to the remote Node specified with the // hostname. UninstallNodeFlows will do nothing if no connection to the host was established. @@ -335,7 +337,8 @@ func (c *client) deleteFlows(cache *flowCategoryCache, flowCacheKey string) erro func (c *client) InstallNodeFlows(hostname string, peerConfigs map[*net.IPNet]net.IP, tunnelPeerIP net.IP, - ipsecTunOFPort uint32) error { + ipsecTunOFPort uint32, + remoteGatewayMAC net.HardwareAddr) error { c.replayMutex.RLock() defer c.replayMutex.RUnlock() @@ -352,6 +355,8 @@ func (c *client) InstallNodeFlows(hostname string, // tunnelPeerIP is the Node Internal Address. In a dual-stack setup, whether this address is an IPv4 address or an // IPv6 one is decided by the address family of Node Internal Address. flows = append(flows, c.l3FwdFlowToRemote(localGatewayMAC, *peerPodCIDR, tunnelPeerIP, cookie.Node)) + } else if runtime.IsWindowsPlatform() && !c.encapMode.NeedsRoutingToPeer(tunnelPeerIP, c.nodeConfig.NodeIPAddr) && remoteGatewayMAC != nil { + flows = append(flows, c.l3FwdFlowToRemoteViaRouting(remoteGatewayMAC, *peerPodCIDR, cookie.Node)...) } else { flows = append(flows, c.l3FwdFlowToRemoteViaGW(localGatewayMAC, *peerPodCIDR, cookie.Node)) } @@ -580,6 +585,14 @@ func (c *client) InstallGatewayFlows() error { if c.encapMode.SupportsEncap() { flows = append(flows, c.l3FwdFlowToGateway(gatewayIPs, gatewayConfig.MAC, cookie.Default)...) } + if runtime.IsWindowsPlatform() && c.encapMode.SupportsNoEncap() { + if gatewayConfig.IPv4 != nil { + flows = append(flows, c.l3FwdFlowRouteToWindowsGW(gatewayConfig.MAC, gatewayConfig.IPv4, cookie.Default)...) + } + if gatewayConfig.IPv6 != nil { + flows = append(flows, c.l3FwdFlowRouteToWindowsGW(gatewayConfig.MAC, gatewayConfig.IPv6, cookie.Default)...) + } + } if err := c.ofEntryOperations.AddAll(flows); err != nil { return err diff --git a/pkg/agent/openflow/client_test.go b/pkg/agent/openflow/client_test.go index 8528086037b..299e1eda703 100644 --- a/pkg/agent/openflow/client_test.go +++ b/pkg/agent/openflow/client_test.go @@ -57,7 +57,7 @@ func installNodeFlows(ofClient Client, cacheKey string) (int, error) { peerConfig := map[*net.IPNet]net.IP{ ipNet: gwIP, } - err := ofClient.InstallNodeFlows(hostName, peerConfig, peerNodeIP, 0) + err := ofClient.InstallNodeFlows(hostName, peerConfig, peerNodeIP, 0, nil) client := ofClient.(*client) fCacheI, ok := client.nodeFlowCache.Load(hostName) if ok { diff --git a/pkg/agent/openflow/pipeline.go b/pkg/agent/openflow/pipeline.go index 9c975d98307..b022ba0f8a3 100644 --- a/pkg/agent/openflow/pipeline.go +++ b/pkg/agent/openflow/pipeline.go @@ -234,6 +234,7 @@ const ( macRewriteMark = 0b1 // cnpDenyMark indicates the packet is denied(Drop/Reject). cnpDenyMark = 0b1 + bypassHostMark = 0b11 // gatewayCTMark is used to to mark connections initiated through the host gateway interface // (i.e. for which the first packet of the connection was received through the gateway). @@ -2155,3 +2156,41 @@ func NewClient(bridgeName, mgmtAddr string, ovsDatapathType ovsconfig.OVSDatapat c.generatePipeline() return c } + +// l3FwdFlowRouteToWindowsGW adds a flow table to forward traffic to antrea-gw0 on Windows in NoEncap mode. It is for Windows. +func (c *client) l3FwdFlowRouteToWindowsGW(localGatewayMAC net.HardwareAddr, localGatewayIP net.IP, category cookie.Category) []binding.Flow { + flows := []binding.Flow{ + c.pipeline[l3ForwardingTable].BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolIP). + MatchRegRange(int(marksReg), macRewriteMark, macRewriteMarkRange). + MatchDstIP(localGatewayIP). + Action().SetDstMAC(localGatewayMAC). + Action().GotoTable(l3DecTTLTable). + Cookie(c.cookieAllocator.Request(category).Raw()). + Done(), + } + return flows +} + +// l3FwdFlowToRemoteViaRouting enhances Windows Noencap mode performance by bypassing host network. +func (c *client) l3FwdFlowToRemoteViaRouting( + gatewayMAC net.HardwareAddr, + peerSubnet net.IPNet, + category cookie.Category) []binding.Flow { + var flows []binding.Flow + ipProto := getIPProtocol(peerSubnet.IP) + l3FwdTable := c.pipeline[l3ForwardingTable] + flows = append(flows, l3FwdTable.BuildFlow(priorityNormal).MatchProtocol(ipProto). + MatchDstIPNet(peerSubnet). + Action().SetDstMAC(gatewayMAC). + Action().GotoTable(l3FwdTable.GetNext()). + Cookie(c.cookieAllocator.Request(category).Raw()). + Done()) + flows = append(flows, c.pipeline[l2ForwardingCalcTable].BuildFlow(priorityNormal). + MatchDstMAC(gatewayMAC). + Action().LoadRegRange(int(PortCacheReg), bypassHostMark, ofPortRegRange). + Action().LoadRegRange(int(marksReg), macRewriteMark, ofPortMarkRange). + Action().GotoTable(conntrackCommitTable). + Cookie(c.cookieAllocator.Request(category).Raw()). + Done()) + return flows +} diff --git a/pkg/agent/openflow/pipeline_windows.go b/pkg/agent/openflow/pipeline_windows.go index d69bb7f19c4..4964837a544 100644 --- a/pkg/agent/openflow/pipeline_windows.go +++ b/pkg/agent/openflow/pipeline_windows.go @@ -281,5 +281,17 @@ func (c *client) hostBridgeUplinkFlows(localSubnet net.IPNet, category cookie.Ca Cookie(c.cookieAllocator.Request(category).Raw()). Done(), } + if c.encapMode.SupportsNoEncap() { + // If NoEncap is enabled, the reply packets from remote Pod can be forwarded to local Pod directly. + // by explicitly resubmitting them to endpointDNATTable and marking "macRewriteMark" at same time. + flows = append(flows, c.pipeline[conntrackStateTable].BuildFlow(priorityHigh).MatchProtocol(binding.ProtocolIP). + MatchRegRange(int(marksReg), markTrafficFromUplink, binding.Range{0, 15}). + MatchDstIPNet(localSubnet). + Action().LoadRegRange(int(marksReg), macRewriteMark, macRewriteMarkRange). + Action().GotoTable(endpointDNATTable). + Cookie(c.cookieAllocator.Request(category).Raw()). + Done()) + + } return flows } diff --git a/pkg/agent/openflow/testing/mock_openflow.go b/pkg/agent/openflow/testing/mock_openflow.go index a926b31d344..716dde07ccb 100644 --- a/pkg/agent/openflow/testing/mock_openflow.go +++ b/pkg/agent/openflow/testing/mock_openflow.go @@ -349,17 +349,17 @@ func (mr *MockClientMockRecorder) InstallLoadBalancerServiceFromOutsideFlows(arg } // InstallNodeFlows mocks base method -func (m *MockClient) InstallNodeFlows(arg0 string, arg1 map[*net.IPNet]net.IP, arg2 net.IP, arg3 uint32) error { +func (m *MockClient) InstallNodeFlows(arg0 string, arg1 map[*net.IPNet]net.IP, arg2 net.IP, arg3 uint32, arg4 net.HardwareAddr) error { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "InstallNodeFlows", arg0, arg1, arg2, arg3) + ret := m.ctrl.Call(m, "InstallNodeFlows", arg0, arg1, arg2, arg3, arg4) ret0, _ := ret[0].(error) return ret0 } // InstallNodeFlows indicates an expected call of InstallNodeFlows -func (mr *MockClientMockRecorder) InstallNodeFlows(arg0, arg1, arg2, arg3 interface{}) *gomock.Call { +func (mr *MockClientMockRecorder) InstallNodeFlows(arg0, arg1, arg2, arg3, arg4 interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "InstallNodeFlows", reflect.TypeOf((*MockClient)(nil).InstallNodeFlows), arg0, arg1, arg2, arg3) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "InstallNodeFlows", reflect.TypeOf((*MockClient)(nil).InstallNodeFlows), arg0, arg1, arg2, arg3, arg4) } // InstallPodFlows mocks base method diff --git a/pkg/agent/route/route_windows.go b/pkg/agent/route/route_windows.go index 7eb9033d320..0a691c4dfab 100644 --- a/pkg/agent/route/route_windows.go +++ b/pkg/agent/route/route_windows.go @@ -18,6 +18,7 @@ package route import ( "errors" + "fmt" "net" "sync" @@ -36,11 +37,13 @@ const ( ) type Client struct { - nr netroute.Interface - nodeConfig *config.NodeConfig - serviceCIDR *net.IPNet - hostRoutes *sync.Map - fwClient *winfirewall.Client + nr netroute.Interface + nodeConfig *config.NodeConfig + networkConfig *config.NetworkConfig + serviceCIDR *net.IPNet + hostRoutes *sync.Map + fwClient *winfirewall.Client + bridgeInfIndex int } // NewClient returns a route client. @@ -48,10 +51,11 @@ type Client struct { func NewClient(serviceCIDR *net.IPNet, networkConfig *config.NetworkConfig, noSNAT bool) (*Client, error) { nr := netroute.New() return &Client{ - nr: nr, - serviceCIDR: serviceCIDR, - hostRoutes: &sync.Map{}, - fwClient: winfirewall.NewClient(), + nr: nr, + networkConfig: networkConfig, + serviceCIDR: serviceCIDR, + hostRoutes: &sync.Map{}, + fwClient: winfirewall.NewClient(), }, nil } @@ -59,6 +63,11 @@ func NewClient(serviceCIDR *net.IPNet, networkConfig *config.NetworkConfig, noSN // Service LoadBalancing is provided by OpenFlow. func (c *Client) Initialize(nodeConfig *config.NodeConfig, done func()) error { c.nodeConfig = nodeConfig + bridgeInf, err := net.InterfaceByName(nodeConfig.OVSBridge) + if err != nil { + return fmt.Errorf("failed to find the interface %s: %v", nodeConfig.OVSBridge, err) + } + c.bridgeInfIndex = bridgeInf.Index if err := c.initFwRules(); err != nil { return err } @@ -88,6 +97,10 @@ func (c *Client) Reconcile(podCIDRs []string) error { c.hostRoutes.Store(dst, rt) continue } + // If the route is not for uplink interface, ignore it. + if c.nodeConfig.UplinkNetConfig != nil && rt.LinkIndex != c.nodeConfig.UplinkNetConfig.Index { + continue + } err := c.nr.RemoveNetRoute(rt.LinkIndex, rt.DestinationSubnet, rt.GatewayAddress) if err != nil { return err @@ -100,21 +113,42 @@ func (c *Client) Reconcile(podCIDRs []string) error { // It overrides the routes if they already exist, without error. func (c *Client) AddRoutes(podCIDR *net.IPNet, nodeName string, peerNodeIP, peerGwIP net.IP) error { obj, found := c.hostRoutes.Load(podCIDR.String()) + route := &netroute.Route{ + DestinationSubnet: podCIDR, + } + if c.networkConfig.TrafficEncapMode.NeedsEncapToPeer(peerNodeIP, c.nodeConfig.NodeIPAddr) { + route.LinkIndex = c.nodeConfig.GatewayConfig.LinkIndex + route.GatewayAddress = peerGwIP + } else if !c.networkConfig.TrafficEncapMode.NeedsRoutingToPeer(peerNodeIP, c.nodeConfig.NodeIPAddr) { + // NoEncap traffic to Node on the same subnet. + // Set the peerNodeIP as next hop. + route.LinkIndex = c.bridgeInfIndex + route.GatewayAddress = peerNodeIP + } + // NoEncap traffic to Node on the different subnet needs underlying routing support. + // Use host default route inside the Node. + if found { - rt := obj.(*netroute.Route) - if rt.GatewayAddress.Equal(peerGwIP) { + existingRoute := obj.(*netroute.Route) + if existingRoute.GatewayAddress.Equal(route.GatewayAddress) { klog.V(4).Infof("Route with destination %s already exists on %s (%s)", podCIDR.String(), nodeName, peerNodeIP) return nil } // Remove the existing route entry if the gateway address is not as expected. - if err := c.nr.RemoveNetRoute(rt.LinkIndex, rt.DestinationSubnet, rt.GatewayAddress); err != nil { + if err := c.nr.RemoveNetRoute(existingRoute.LinkIndex, existingRoute.DestinationSubnet, existingRoute.GatewayAddress); err != nil { klog.Errorf("Failed to delete existing route entry with destination %s gateway %s on %s (%s)", podCIDR.String(), peerGwIP.String(), nodeName, peerNodeIP) return err } } - if err := c.nr.NewNetRoute(c.nodeConfig.GatewayConfig.LinkIndex, podCIDR, peerGwIP); err != nil { + + if route.GatewayAddress == nil { + return nil + } + + if err := c.nr.NewNetRoute(route.LinkIndex, route.DestinationSubnet, route.GatewayAddress); err != nil { return err } + c.hostRoutes.Store(podCIDR.String(), &netroute.Route{ LinkIndex: c.nodeConfig.GatewayConfig.LinkIndex, DestinationSubnet: podCIDR, @@ -165,7 +199,7 @@ func (c *Client) listRoutes() (map[string]*netroute.Route, error) { rtMap := make(map[string]*netroute.Route) for idx := range routes { rt := routes[idx] - if rt.LinkIndex != c.nodeConfig.GatewayConfig.LinkIndex { + if rt.LinkIndex != c.nodeConfig.GatewayConfig.LinkIndex && rt.LinkIndex != c.bridgeInfIndex { continue } // Only process IPv4 route entries in the loop. diff --git a/pkg/agent/route/route_windows_test.go b/pkg/agent/route/route_windows_test.go index 68e02a34646..ce8a9a098c9 100644 --- a/pkg/agent/route/route_windows_test.go +++ b/pkg/agent/route/route_windows_test.go @@ -56,6 +56,7 @@ func TestRouteOperation(t *testing.T) { client, err := NewClient(serviceCIDR, &config.NetworkConfig{}, false) require.Nil(t, err) nodeConfig := &config.NodeConfig{ + OVSBridge: "Loopback Pseudo-Interface 1", GatewayConfig: &config.GatewayConfig{ Name: hostGateway, LinkIndex: gwLink, diff --git a/test/integration/agent/openflow_test.go b/test/integration/agent/openflow_test.go index 36276752f07..ffe0d4ae7d3 100644 --- a/test/integration/agent/openflow_test.go +++ b/test/integration/agent/openflow_test.go @@ -290,7 +290,7 @@ func testInstallNodeFlows(t *testing.T, config *testConfig) { peerConfig := map[*net.IPNet]net.IP{ &node.subnet: node.gateway, } - err := c.InstallNodeFlows(node.name, peerConfig, node.nodeAddress, 0) + err := c.InstallNodeFlows(node.name, peerConfig, node.nodeAddress, 0, nil) if err != nil { t.Fatalf("Failed to install Openflow entries for node connectivity: %v", err) }