From 70b6c4755e08f1205768f9f7173e1d34203576d8 Mon Sep 17 00:00:00 2001 From: Wenying Dong Date: Wed, 26 Aug 2020 18:03:10 -0700 Subject: [PATCH] [IPv6] Implement L3 connectivity for IPv6 traffic (#1011) 1. Use IPv6 in iptables and ipset configuration. 2. Identifiy IPv6 address and configure in OpenFlow. 3. Use Node Internal address for tunnel. --- Makefile | 1 + .../noderoute/node_route_controller.go | 74 +++-- pkg/agent/openflow/client.go | 29 +- pkg/agent/openflow/client_test.go | 7 +- pkg/agent/openflow/pipeline.go | 6 +- pkg/agent/openflow/testing/mock_openflow.go | 8 +- pkg/agent/route/interfaces.go | 3 +- pkg/agent/route/route_linux.go | 281 ++++++++++++++---- pkg/agent/util/ipset/ipset.go | 9 +- pkg/agent/util/iptables/iptables.go | 82 +++-- pkg/agent/util/net_linux.go | 15 +- pkg/agent/util/net_windows.go | 5 + test/integration/agent/openflow_test.go | 117 ++++++-- test/integration/agent/route_test.go | 79 +++++ test/integration/ovs/ofctrl_test.go | 12 + 15 files changed, 569 insertions(+), 159 deletions(-) diff --git a/Makefile b/Makefile index a9aafb0d8fa..85f3ea0e201 100644 --- a/Makefile +++ b/Makefile @@ -118,6 +118,7 @@ docker-test-integration: .coverage -v $(DOCKER_CACHE)/gocache:/tmp/gocache \ -v $(CURDIR)/.coverage:/usr/src/github.com/vmware-tanzu/antrea/.coverage \ -v $(CURDIR):/usr/src/github.com/vmware-tanzu/antrea:ro \ + -v /lib/modules:/lib/modules \ antrea/test test-integration $(USERID) $(GRPID) .PHONY: docker-tidy diff --git a/pkg/agent/controller/noderoute/node_route_controller.go b/pkg/agent/controller/noderoute/node_route_controller.go index 07f8a9762ba..89ead75295b 100644 --- a/pkg/agent/controller/noderoute/node_route_controller.go +++ b/pkg/agent/controller/noderoute/node_route_controller.go @@ -150,12 +150,11 @@ func (c *Controller) removeStaleGatewayRoutes() error { // running, so the route to local Pods will be desired as well. var desiredPodCIDRs []string for _, node := range nodes { - // PodCIDR is allocated by K8s NodeIpamController asynchronously so it's possible we see a Node - // with no PodCIDR set when it just joins the cluster. - if node.Spec.PodCIDR == "" { + podCIDRs := getPodCIDRsOnNode(node) + if len(podCIDRs) == 0 { continue } - desiredPodCIDRs = append(desiredPodCIDRs, node.Spec.PodCIDR) + desiredPodCIDRs = append(desiredPodCIDRs, podCIDRs...) } // routeClient will remove orphaned routes whose destinations are not in desiredPodCIDRs. @@ -353,16 +352,17 @@ func (c *Controller) syncNodeRoute(nodeName string) error { func (c *Controller) deleteNodeRoute(nodeName string) error { klog.Infof("Deleting routes and flows to Node %s", nodeName) - podCIDR, installed := c.installedNodes.Load(nodeName) + podCIDRs, installed := c.installedNodes.Load(nodeName) if !installed { // Route is not added for this Node. return nil } - if err := c.routeClient.DeleteRoutes(podCIDR.(*net.IPNet)); err != nil { - return fmt.Errorf("failed to delete the route to Node %s: %v", nodeName, err) + for _, podCIDR := range podCIDRs.([]*net.IPNet) { + if err := c.routeClient.DeleteRoutes(podCIDR); err != nil { + return fmt.Errorf("failed to delete the route to Node %s: %v", nodeName, err) + } } - if err := c.ofClient.UninstallNodeFlows(nodeName); err != nil { return fmt.Errorf("failed to uninstall flows to Node %s: %v", nodeName, err) } @@ -390,25 +390,20 @@ func (c *Controller) addNodeRoute(nodeName string, node *corev1.Node) error { return nil } - klog.Infof("Adding routes and flows to Node %s, podCIDR: %s, addresses: %v", - nodeName, node.Spec.PodCIDR, node.Status.Addresses) - - if node.Spec.PodCIDR == "" { - klog.Errorf("PodCIDR is empty for Node %s", nodeName) - // Does not help to return an error and trigger controller retries. - return nil - } - peerPodCIDRAddr, peerPodCIDR, err := net.ParseCIDR(node.Spec.PodCIDR) - if err != nil { - klog.Errorf("Failed to parse PodCIDR %s for Node %s", node.Spec.PodCIDR, nodeName) + podCIDRStrs := getPodCIDRsOnNode(node) + if len(podCIDRStrs) == 0 { + // If no valid PodCIDR is configured in Node.Spec, return immediately. return nil } + + klog.Infof("Adding routes and flows to Node %s, podCIDRs: %v, addresses: %v", + nodeName, podCIDRStrs, node.Status.Addresses) + peerNodeIP, err := GetNodeAddr(node) if err != nil { klog.Errorf("Failed to retrieve IP address of Node %s: %v", nodeName, err) return nil } - peerGatewayIP := ip.NextIP(peerPodCIDRAddr) ipsecTunOFPort := int32(0) if c.networkConfig.EnableIPSecTunnel { @@ -420,11 +415,22 @@ func (c *Controller) addNodeRoute(nodeName string, node *corev1.Node) error { } } + var podCIDRs []*net.IPNet + peerConfig := make(map[*net.IPNet]net.IP, len(podCIDRStrs)) + for _, podCIDR := range podCIDRStrs { + peerPodCIDRAddr, peerPodCIDR, err := net.ParseCIDR(podCIDR) + if err != nil { + klog.Errorf("Failed to parse PodCIDR %s for Node %s", podCIDR, nodeName) + return nil + } + peerGatewayIP := ip.NextIP(peerPodCIDRAddr) + peerConfig[peerPodCIDR] = peerGatewayIP + podCIDRs = append(podCIDRs, peerPodCIDR) + } err = c.ofClient.InstallNodeFlows( nodeName, c.nodeConfig.GatewayConfig.MAC, - *peerPodCIDR, - peerGatewayIP, + peerConfig, peerNodeIP, config.DefaultTunOFPort, uint32(ipsecTunOFPort)) @@ -432,13 +438,28 @@ func (c *Controller) addNodeRoute(nodeName string, node *corev1.Node) error { return fmt.Errorf("failed to install flows to Node %s: %v", nodeName, err) } - if err := c.routeClient.AddRoutes(peerPodCIDR, peerNodeIP, peerGatewayIP); err != nil { - return err + for peerPodCIDR, peerGatewayIP := range peerConfig { + if err := c.routeClient.AddRoutes(peerPodCIDR, peerNodeIP, peerGatewayIP); err != nil { + return err + } } - c.installedNodes.Store(nodeName, peerPodCIDR) + c.installedNodes.Store(nodeName, podCIDRs) return err } +func getPodCIDRsOnNode(node *corev1.Node) []string { + if node.Spec.PodCIDRs != nil { + return node.Spec.PodCIDRs + } + + if node.Spec.PodCIDR == "" { + klog.Errorf("PodCIDR is empty for Node %s", node.Name) + // Does not help to return an error and trigger controller retries. + return nil + } + return []string{node.Spec.PodCIDR} +} + // createIPSecTunnelPort creates an IPSec tunnel port for the remote Node if the // tunnel does not exist, and returns the ofport number. func (c *Controller) createIPSecTunnelPort(nodeName string, nodeIP net.IP) (int32, error) { @@ -524,6 +545,9 @@ func ParseTunnelInterfaceConfig( // GetNodeAddr gets the available IP address of a Node. GetNodeAddr will first try to get the // NodeInternalIP, then try to get the NodeExternalIP. +// Note: Although K8s supports dual-stack, there is only a single Internal address per Node because of issue ( +// kubernetes/kubernetes#91940 ). The Node might have multiple addresses after the issue is fixed, and one per address +// family. And we should change the return type at that time. func GetNodeAddr(node *corev1.Node) (net.IP, error) { addresses := make(map[corev1.NodeAddressType]string) for _, addr := range node.Status.Addresses { diff --git a/pkg/agent/openflow/client.go b/pkg/agent/openflow/client.go index 19854d7b5b8..a403757eeae 100644 --- a/pkg/agent/openflow/client.go +++ b/pkg/agent/openflow/client.go @@ -72,8 +72,8 @@ type Client interface { InstallNodeFlows( hostname string, localGatewayMAC net.HardwareAddr, - peerPodCIDR net.IPNet, - peerGatewayIP, tunnelPeerIP net.IP, + peerConfigs map[*net.IPNet]net.IP, + tunnelPeerIP net.IP, tunOFPort, ipsecTunOFPort uint32) error // UninstallNodeFlows removes the connection to the remote Node specified with the @@ -286,20 +286,27 @@ func (c *client) deleteFlows(cache *flowCategoryCache, flowCacheKey string) erro func (c *client) InstallNodeFlows(hostname string, localGatewayMAC net.HardwareAddr, - peerPodCIDR net.IPNet, - peerGatewayIP, tunnelPeerIP net.IP, + peerConfigs map[*net.IPNet]net.IP, + tunnelPeerIP net.IP, tunOFPort, ipsecTunOFPort uint32) error { c.replayMutex.RLock() defer c.replayMutex.RUnlock() - flows := []binding.Flow{ - c.arpResponderFlow(peerGatewayIP, cookie.Node), - } - if c.encapMode.NeedsEncapToPeer(tunnelPeerIP, c.nodeConfig.NodeIPAddr) { - flows = append(flows, c.l3FwdFlowToRemote(localGatewayMAC, peerPodCIDR, tunnelPeerIP, tunOFPort, cookie.Node)) - } else { - flows = append(flows, c.l3FwdFlowToRemoteViaGW(localGatewayMAC, peerPodCIDR, cookie.Node)) + var flows []binding.Flow + + for peerPodCIDR, peerGatewayIP := range peerConfigs { + if peerGatewayIP.To4() != nil { + // Since broadcast is not supported in IPv6, ARP should happen only with IPv4 address, and ARP responder flows + // only work for IPv4 addresses. + flows = append(flows, c.arpResponderFlow(peerGatewayIP, cookie.Node)) + } + if c.encapMode.NeedsEncapToPeer(tunnelPeerIP, c.nodeConfig.NodeIPAddr) { + flows = append(flows, c.l3FwdFlowToRemote(localGatewayMAC, *peerPodCIDR, tunnelPeerIP, tunOFPort, cookie.Node)) + } else { + flows = append(flows, c.l3FwdFlowToRemoteViaGW(localGatewayMAC, *peerPodCIDR, cookie.Node)) + } } + if ipsecTunOFPort != 0 { // When IPSec tunnel is enabled, packets received from the remote Node are // input from the Node's IPSec tunnel port, not the default tunnel port. So, diff --git a/pkg/agent/openflow/client_test.go b/pkg/agent/openflow/client_test.go index 1dc33b3c38c..6bd403f9437 100644 --- a/pkg/agent/openflow/client_test.go +++ b/pkg/agent/openflow/client_test.go @@ -41,9 +41,12 @@ var bridgeMgmtAddr = ofconfig.GetMgmtAddress(ovsconfig.DefaultOVSRunDir, bridgeN func installNodeFlows(ofClient Client, cacheKey string) (int, error) { hostName := cacheKey gwMAC, _ := net.ParseMAC("AA:BB:CC:DD:EE:FF") - gwIP, IPNet, _ := net.ParseCIDR("10.0.1.1/24") + gwIP, ipNet, _ := net.ParseCIDR("10.0.1.1/24") peerNodeIP := net.ParseIP("192.168.1.1") - err := ofClient.InstallNodeFlows(hostName, gwMAC, *IPNet, gwIP, peerNodeIP, config.DefaultTunOFPort, 0) + peerConfig := map[*net.IPNet]net.IP{ + ipNet: gwIP, + } + err := ofClient.InstallNodeFlows(hostName, gwMAC, peerConfig, peerNodeIP, config.DefaultTunOFPort, 0) client := ofClient.(*client) fCacheI, ok := client.nodeFlowCache.Load(hostName) if ok { diff --git a/pkg/agent/openflow/pipeline.go b/pkg/agent/openflow/pipeline.go index 42b6b8bb1ba..b94a1be704f 100644 --- a/pkg/agent/openflow/pipeline.go +++ b/pkg/agent/openflow/pipeline.go @@ -772,7 +772,8 @@ func (c *client) l3FwdFlowToRemote( tunnelPeer net.IP, tunOFPort uint32, category cookie.Category) binding.Flow { - return c.pipeline[l3ForwardingTable].BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolIP). + ipProto := parseIPProtocol(peerSubnet.IP) + return c.pipeline[l3ForwardingTable].BuildFlow(priorityNormal).MatchProtocol(ipProto). MatchDstIPNet(peerSubnet). Action().DecTTL(). // Rewrite src MAC to local gateway MAC and rewrite dst MAC to virtual MAC. @@ -796,8 +797,9 @@ func (c *client) l3FwdFlowToRemoteViaGW( localGatewayMAC net.HardwareAddr, peerSubnet net.IPNet, category cookie.Category) binding.Flow { + ipProto := parseIPProtocol(peerSubnet.IP) l3FwdTable := c.pipeline[l3ForwardingTable] - return l3FwdTable.BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolIP). + return l3FwdTable.BuildFlow(priorityNormal).MatchProtocol(ipProto). MatchDstIPNet(peerSubnet). Action().DecTTL(). Action().SetDstMAC(localGatewayMAC). diff --git a/pkg/agent/openflow/testing/mock_openflow.go b/pkg/agent/openflow/testing/mock_openflow.go index 13d0358bb6e..458770b9b37 100644 --- a/pkg/agent/openflow/testing/mock_openflow.go +++ b/pkg/agent/openflow/testing/mock_openflow.go @@ -336,17 +336,17 @@ func (mr *MockClientMockRecorder) InstallLoadBalancerServiceFromOutsideFlows(arg } // InstallNodeFlows mocks base method -func (m *MockClient) InstallNodeFlows(arg0 string, arg1 net.HardwareAddr, arg2 net.IPNet, arg3, arg4 net.IP, arg5, arg6 uint32) error { +func (m *MockClient) InstallNodeFlows(arg0 string, arg1 net.HardwareAddr, arg2 map[*net.IPNet]net.IP, arg3 net.IP, arg4, arg5 uint32) error { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "InstallNodeFlows", arg0, arg1, arg2, arg3, arg4, arg5, arg6) + ret := m.ctrl.Call(m, "InstallNodeFlows", arg0, arg1, arg2, arg3, arg4, arg5) ret0, _ := ret[0].(error) return ret0 } // InstallNodeFlows indicates an expected call of InstallNodeFlows -func (mr *MockClientMockRecorder) InstallNodeFlows(arg0, arg1, arg2, arg3, arg4, arg5, arg6 interface{}) *gomock.Call { +func (mr *MockClientMockRecorder) InstallNodeFlows(arg0, arg1, arg2, arg3, arg4, arg5 interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "InstallNodeFlows", reflect.TypeOf((*MockClient)(nil).InstallNodeFlows), arg0, arg1, arg2, arg3, arg4, arg5, arg6) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "InstallNodeFlows", reflect.TypeOf((*MockClient)(nil).InstallNodeFlows), arg0, arg1, arg2, arg3, arg4, arg5) } // InstallPodFlows mocks base method diff --git a/pkg/agent/route/interfaces.go b/pkg/agent/route/interfaces.go index 677b07fbd87..7d3912b992b 100644 --- a/pkg/agent/route/interfaces.go +++ b/pkg/agent/route/interfaces.go @@ -26,7 +26,8 @@ type Interface interface { // It should be idempotent and can be safely called on every startup. Initialize(nodeConfig *config.NodeConfig) error - // Reconcile should remove orphaned routes and related configuration based on the desired podCIDRs. + // Reconcile should remove orphaned routes and related configuration based on the desired podCIDRs. If IPv6 is enabled + // in the cluster, Reconcile should also remove the orphaned IPv6 neighbors. Reconcile(podCIDRs []string) error // AddRoutes should add routes to the provided podCIDR. diff --git a/pkg/agent/route/route_linux.go b/pkg/agent/route/route_linux.go index 70efb5fe94f..c61b0a479b4 100644 --- a/pkg/agent/route/route_linux.go +++ b/pkg/agent/route/route_linux.go @@ -22,6 +22,7 @@ import ( "reflect" "sync" + "github.com/containernetworking/plugins/pkg/ip" "github.com/vishvananda/netlink" "golang.org/x/sys/unix" "k8s.io/apimachinery/pkg/util/sets" @@ -38,6 +39,8 @@ const ( // Antrea managed ipset. // antreaPodIPSet contains all Pod CIDRs of this cluster. antreaPodIPSet = "ANTREA-POD-IP" + // antreaPodIP6Set contains all IPv6 Pod CIDRs of this cluster. + antreaPodIP6Set = "ANTREA-POD-IP6" // Antrea managed iptables chains. antreaForwardChain = "ANTREA-FORWARD" @@ -48,6 +51,12 @@ const ( // Client implements Interface. var _ Interface = &Client{} +var ( + // globalVMAC is used in the IPv6 neighbor configuration to advertise ND solicitation for the IPv6 address of the + // host gateway interface on other Nodes. + globalVMAC, _ = net.ParseMAC("aa:bb:cc:dd:ee:ff") +) + // Client takes care of routing container packets in host network, coordinating ip route, ip rule, iptables and ipset. type Client struct { nodeConfig *config.NodeConfig @@ -56,6 +65,8 @@ type Client struct { ipt *iptables.Client // nodeRoutes caches ip routes to remote Pods. It's a map of podCIDR to routes. nodeRoutes sync.Map + // nodeNeighbors caches IPv6 Neighbors to remote host gateway + nodeNeighbors sync.Map } // NewClient returns a route client. @@ -115,16 +126,32 @@ func (c *Client) initIPSet() error { if c.encapMode.IsNetworkPolicyOnly() { return nil } - if err := ipset.CreateIPSet(antreaPodIPSet, ipset.HashNet); err != nil { + if err := ipset.CreateIPSet(antreaPodIPSet, ipset.HashNet, false); err != nil { return err } - // Ensure its own PodCIDR is in it. - if err := ipset.AddEntry(antreaPodIPSet, c.nodeConfig.PodIPv4CIDR.String()); err != nil { + if err := ipset.CreateIPSet(antreaPodIP6Set, ipset.HashNet, true); err != nil { return err } + + // Loop all valid PodCIDR and add into the corresponding ipset. + for _, podCIDR := range []*net.IPNet{c.nodeConfig.PodIPv4CIDR, c.nodeConfig.PodIPv6CIDR} { + if podCIDR != nil { + ipsetName := getIPSetName(podCIDR.IP) + if err := ipset.AddEntry(ipsetName, podCIDR.String()); err != nil { + return err + } + } + } return nil } +func getIPSetName(ip net.IP) string { + if ip.To4() == nil { + return antreaPodIP6Set + } + return antreaPodIPSet +} + // writeEKSMangleRule writes an additional iptables mangle rule to the // iptablesData buffer, which is required to ensure that the reverse path for // NodePort Service traffic is correct on EKS. @@ -151,6 +178,9 @@ func (c *Client) writeEKSMangleRule(iptablesData *bytes.Buffer) { // initIPTables ensure that the iptables infrastructure we use is set up. // It's idempotent and can safely be called on every startup. func (c *Client) initIPTables() error { + if c.nodeConfig.PodIPv6CIDR != nil { + c.ipt.SetIPv6Supported(true) + } // Create the antrea managed chains and link them to built-in chains. // We cannot use iptables-restore for these jump rules because there // are non antrea managed rules in built-in chains. @@ -169,6 +199,27 @@ func (c *Client) initIPTables() error { } } + // Use iptables-restore to configure IPv4 settings. + if c.nodeConfig.PodIPv4CIDR != nil { + iptablesData := c.restoreIptablesData(c.nodeConfig.PodIPv4CIDR, antreaPodIPSet) + // Setting --noflush to keep the previous contents (i.e. non antrea managed chains) of the tables. + if err := c.ipt.Restore(iptablesData.Bytes(), false, false); err != nil { + return err + } + } + + // Use ip6tables-restore to configure IPv6 settings. + if c.nodeConfig.PodIPv6CIDR != nil { + iptablesData := c.restoreIptablesData(c.nodeConfig.PodIPv6CIDR, antreaPodIP6Set) + // Setting --noflush to keep the previous contents (i.e. non antrea managed chains) of the tables. + if err := c.ipt.Restore(iptablesData.Bytes(), false, true); err != nil { + return err + } + } + return nil +} + +func (c *Client) restoreIptablesData(podCIDR *net.IPNet, podIPSet string) *bytes.Buffer { // Create required rules in the antrea chains. // Use iptables-restore as it flushes the involved chains and creates the desired rules // with a single call, instead of string matching to clean up stale rules. @@ -208,17 +259,12 @@ func (c *Client) initIPTables() error { writeLine(iptablesData, []string{ "-A", antreaPostRoutingChain, "-m", "comment", "--comment", `"Antrea: masquerade pod to external packets"`, - "-s", c.nodeConfig.PodIPv4CIDR.String(), "-m", "set", "!", "--match-set", antreaPodIPSet, "dst", + "-s", podCIDR.String(), "-m", "set", "!", "--match-set", podIPSet, "dst", "-j", iptables.MasqueradeTarget, }...) } writeLine(iptablesData, "COMMIT") - - // Setting --noflush to keep the previous contents (i.e. non antrea managed chains) of the tables. - if err := c.ipt.Restore(iptablesData.Bytes(), false); err != nil { - return err - } - return nil + return iptablesData } func (c *Client) initIPRoutes() error { @@ -237,26 +283,28 @@ func (c *Client) initIPRoutes() error { func (c *Client) Reconcile(podCIDRs []string) error { desiredPodCIDRs := sets.NewString(podCIDRs...) - // Remove orphaned podCIDRs from antreaPodIPSet. - entries, err := ipset.ListEntries(antreaPodIPSet) - if err != nil { - return err - } - for _, entry := range entries { - if desiredPodCIDRs.Has(entry) { - continue - } - klog.Infof("Deleting orphaned PodIP %s from ipset and route table", entry) - if err := ipset.DelEntry(antreaPodIPSet, entry); err != nil { - return err - } - _, cidr, err := net.ParseCIDR(entry) + // Remove orphaned podCIDRs from ipset. + for _, ipsetName := range []string{antreaPodIPSet, antreaPodIP6Set} { + entries, err := ipset.ListEntries(ipsetName) if err != nil { return err } - route := &netlink.Route{Dst: cidr} - if err := netlink.RouteDel(route); err != nil && err != unix.ESRCH { - return err + for _, entry := range entries { + if desiredPodCIDRs.Has(entry) { + continue + } + klog.Infof("Deleting orphaned PodIP %s from ipset and route table", entry) + if err := ipset.DelEntry(ipsetName, entry); err != nil { + return err + } + _, cidr, err := net.ParseCIDR(entry) + if err != nil { + return err + } + route := &netlink.Route{Dst: cidr} + if err := netlink.RouteDel(route); err != nil && err != unix.ESRCH { + return err + } } } @@ -277,6 +325,27 @@ func (c *Client) Reconcile(podCIDRs []string) error { return err } } + + // Remove any unknown IPv6 neighbors on antrea-gw0. + desiredGWs := getIPv6Gateways(podCIDRs) + // Return immediately if there is no IPv6 gateway address configured on the Nodes. + if desiredGWs.Len() == 0 { + return nil + } + // Remove orphaned IPv6 Neighbors from host network. + actualNeighbors, err := c.listIPv6NeighborsOnGateway() + if err != nil { + return err + } + for neighIP, actualNeigh := range actualNeighbors { + if desiredGWs.Has(neighIP) { + continue + } + klog.V(4).Infof("Deleting orphaned IPv6 neighbor %v", actualNeigh) + if err := netlink.NeighDel(actualNeigh); err != nil { + return err + } + } return nil } @@ -284,22 +353,72 @@ func (c *Client) Reconcile(podCIDRs []string) error { func (c *Client) listIPRoutesOnGW() ([]netlink.Route, error) { filter := &netlink.Route{ LinkIndex: c.nodeConfig.GatewayConfig.LinkIndex} - return netlink.RouteListFiltered(netlink.FAMILY_V4, filter, netlink.RT_FILTER_OIF) + routes, err := netlink.RouteListFiltered(netlink.FAMILY_V4, filter, netlink.RT_FILTER_OIF) + if err != nil { + return nil, err + } + ipv6Routes, err := netlink.RouteListFiltered(netlink.FAMILY_V6, filter, netlink.RT_FILTER_OIF) + if err != nil { + return nil, err + } + routes = append(routes, ipv6Routes...) + return routes, nil +} + +// getIPv6Gateways returns the IPv6 gateway addresses of the given CIDRs. +func getIPv6Gateways(podCIDRs []string) sets.String { + ipv6GWs := sets.NewString() + for _, podCIDR := range podCIDRs { + peerPodCIDRAddr, _, _ := net.ParseCIDR(podCIDR) + if peerPodCIDRAddr.To4() != nil { + continue + } + peerGatewayIP := ip.NextIP(peerPodCIDRAddr) + ipv6GWs.Insert(peerGatewayIP.String()) + } + return ipv6GWs +} + +func (c *Client) listIPv6NeighborsOnGateway() (map[string]*netlink.Neigh, error) { + neighs, err := netlink.NeighList(c.nodeConfig.GatewayConfig.LinkIndex, netlink.FAMILY_V6) + if err != nil { + return nil, err + } + neighMap := make(map[string]*netlink.Neigh) + for i := range neighs { + if neighs[i].IP == nil { + continue + } + neighMap[neighs[i].IP.String()] = &neighs[i] + } + return neighMap, nil } // AddRoutes adds routes to a new podCIDR. It overrides the routes if they already exist. func (c *Client) AddRoutes(podCIDR *net.IPNet, nodeIP, nodeGwIP net.IP) error { podCIDRStr := podCIDR.String() + ipsetName := getIPSetName(podCIDR.IP) // Add this podCIDR to antreaPodIPSet so that packets to them won't be masqueraded when they leave the host. - if err := ipset.AddEntry(antreaPodIPSet, podCIDRStr); err != nil { + if err := ipset.AddEntry(ipsetName, podCIDRStr); err != nil { return err } // Install routes to this Node. route := &netlink.Route{ Dst: podCIDR, } + var routes []*netlink.Route if c.encapMode.NeedsEncapToPeer(nodeIP, c.nodeConfig.NodeIPAddr) { - route.Flags = int(netlink.FLAG_ONLINK) + if podCIDR.IP.To4() == nil { + // "on-link" is not identified in IPv6 route entries, so split the configuration into 2 entries. + routes = []*netlink.Route{ + { + Dst: &net.IPNet{IP: nodeGwIP, Mask: net.IPMask{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}, + LinkIndex: c.nodeConfig.GatewayConfig.LinkIndex, + }, + } + } else { + route.Flags = int(netlink.FLAG_ONLINK) + } route.LinkIndex = c.nodeConfig.GatewayConfig.LinkIndex route.Gw = nodeGwIP } else if !c.encapMode.NeedsRoutingToPeer(nodeIP, c.nodeConfig.NodeIPAddr) { @@ -309,31 +428,61 @@ func (c *Client) AddRoutes(podCIDR *net.IPNet, nodeIP, nodeGwIP net.IP) error { // NoEncap traffic to Node on the same subnet. It is handled by host default route. return nil } - if err := netlink.RouteReplace(route); err != nil { - return fmt.Errorf("failed to install route to peer %s with netlink: %v", nodeIP, err) + routes = append(routes, route) + + for _, route := range routes { + if err := netlink.RouteReplace(route); err != nil { + return fmt.Errorf("failed to install route to peer %s with netlink: %v", nodeIP, err) + } } - c.nodeRoutes.Store(podCIDRStr, route) + + if podCIDR.IP.To4() == nil { + // Add IPv6 neighbor if the given podCIDR is using IPv6 address. + neigh := &netlink.Neigh{ + LinkIndex: c.nodeConfig.GatewayConfig.LinkIndex, + Family: netlink.FAMILY_V6, + State: netlink.NUD_PERMANENT, + IP: nodeGwIP, + HardwareAddr: globalVMAC, + } + if err := netlink.NeighSet(neigh); err != nil { + return fmt.Errorf("failed to add neigh %v to gw %s: %v", neigh, c.nodeConfig.GatewayConfig.Name, err) + } + c.nodeNeighbors.Store(podCIDRStr, neigh) + } + + c.nodeRoutes.Store(podCIDRStr, routes) return nil } // DeleteRoutes deletes routes to a PodCIDR. It does nothing if the routes doesn't exist. func (c *Client) DeleteRoutes(podCIDR *net.IPNet) error { podCIDRStr := podCIDR.String() + ipsetName := getIPSetName(podCIDR.IP) // Delete this podCIDR from antreaPodIPSet as the CIDR is no longer for Pods. - if err := ipset.DelEntry(antreaPodIPSet, podCIDRStr); err != nil { + if err := ipset.DelEntry(ipsetName, podCIDRStr); err != nil { return err } - i, exists := c.nodeRoutes.Load(podCIDRStr) - if !exists { - return nil + routes, exists := c.nodeRoutes.Load(podCIDRStr) + if exists { + for _, r := range routes.([]*netlink.Route) { + klog.V(4).Infof("Deleting route %v", r) + if err := netlink.RouteDel(r); err != nil && err != unix.ESRCH { + return err + } + } + c.nodeRoutes.Delete(podCIDRStr) } - r := i.(*netlink.Route) - klog.V(4).Infof("Deleting route %v", r) - if err := netlink.RouteDel(r); err != nil && err != unix.ESRCH { - return err + if podCIDR.IP.To4() == nil { + neigh, exists := c.nodeNeighbors.Load(podCIDRStr) + if exists { + if err := netlink.NeighDel(neigh.(*netlink.Neigh)); err != nil { + return err + } + c.nodeNeighbors.Delete(podCIDRStr) + } } - c.nodeRoutes.Delete(podCIDRStr) return nil } @@ -369,31 +518,37 @@ func (c *Client) MigrateRoutesToGw(linkName string) error { return fmt.Errorf("failed to get link %s: %w", linkName, err) } - // Swap route first then address, otherwise route gets removed when address is removed. - routes, err := netlink.RouteList(link, netlink.FAMILY_V4) - if err != nil { - return fmt.Errorf("failed to get routes for link %s: %w", linkName, err) - } - for _, route := range routes { - route.LinkIndex = gwLink.Attrs().Index - if err = netlink.RouteReplace(&route); err != nil { - return fmt.Errorf("failed to add route %v to link %s: %w", &route, gwLink.Attrs().Name, err) + for _, family := range []int{netlink.FAMILY_V4, netlink.FAMILY_V6} { + // Swap route first then address, otherwise route gets removed when address is removed. + routes, err := netlink.RouteList(link, family) + if err != nil { + return fmt.Errorf("failed to get routes for link %s: %w", linkName, err) + } + for _, route := range routes { + route.LinkIndex = gwLink.Attrs().Index + if err = netlink.RouteReplace(&route); err != nil { + return fmt.Errorf("failed to add route %v to link %s: %w", &route, gwLink.Attrs().Name, err) + } } - } - // Swap address if any. - addrs, err := netlink.AddrList(link, netlink.FAMILY_V4) - if err != nil { - return fmt.Errorf("failed to get addresses for %s: %w", linkName, err) - } - for _, addr := range addrs { - if err = netlink.AddrDel(link, &addr); err != nil { - klog.Errorf("failed to delete addr %v from %s: %v", addr, link, err) + // Swap address if any. + addrs, err := netlink.AddrList(link, family) + if err != nil { + return fmt.Errorf("failed to get addresses for %s: %w", linkName, err) } - tmpAddr := &netlink.Addr{IPNet: addr.IPNet} - if err = netlink.AddrReplace(gwLink, tmpAddr); err != nil { - return fmt.Errorf("failed to add addr %v to gw %s: %w", addr, gwLink.Attrs().Name, err) + for _, addr := range addrs { + if addr.IP.IsLinkLocalMulticast() || addr.IP.IsLinkLocalUnicast() { + continue + } + if err = netlink.AddrDel(link, &addr); err != nil { + klog.Errorf("failed to delete addr %v from %s: %v", addr, link, err) + } + tmpAddr := &netlink.Addr{IPNet: addr.IPNet} + if err = netlink.AddrReplace(gwLink, tmpAddr); err != nil { + return fmt.Errorf("failed to add addr %v to gw %s: %w", addr, gwLink.Attrs().Name, err) + } } + } return nil } diff --git a/pkg/agent/util/ipset/ipset.go b/pkg/agent/util/ipset/ipset.go index ae701587cbe..1055bed0d66 100644 --- a/pkg/agent/util/ipset/ipset.go +++ b/pkg/agent/util/ipset/ipset.go @@ -33,8 +33,13 @@ const ( var memberPattern = regexp.MustCompile("(?m)^(.*\n)*Members:\n") // CreateIPSet creates a new set, it will ignore error when the set already exists. -func CreateIPSet(name string, setType SetType) error { - cmd := exec.Command("ipset", "create", name, string(setType), "-exist") +func CreateIPSet(name string, setType SetType, isIPv6 bool) error { + var cmd *exec.Cmd + if isIPv6 { + cmd = exec.Command("ipset", "create", name, string(setType), "family", "inet6", "-exist") + } else { + cmd = exec.Command("ipset", "create", name, string(setType), "-exist") + } if err := cmd.Run(); err != nil { return fmt.Errorf("error creating ipset %s: %v", name, err) } diff --git a/pkg/agent/util/iptables/iptables.go b/pkg/agent/util/iptables/iptables.go index b8d3f95d339..65195ee4333 100644 --- a/pkg/agent/util/iptables/iptables.go +++ b/pkg/agent/util/iptables/iptables.go @@ -50,9 +50,11 @@ const ( var restoreWaitSupportedMinVersion = semver.Version{Major: 1, Minor: 6, Patch: 2} type Client struct { - ipt *iptables.IPTables + ipt *iptables.IPTables + ip6t *iptables.IPTables // restoreWaitSupported indicates whether iptables-restore supports --wait flag. restoreWaitSupported bool + enableIPv6 bool } func New() (*Client, error) { @@ -60,7 +62,11 @@ func New() (*Client, error) { if err != nil { return nil, fmt.Errorf("error creating IPTables instance: %v", err) } - return &Client{ipt: ipt, restoreWaitSupported: isRestoreWaitSupported(ipt)}, nil + ip6t, err := iptables.NewWithProtocol(iptables.ProtocolIPv6) + if err != nil { + return nil, fmt.Errorf("error creating IPTables instance for IPv6: %v", err) + } + return &Client{ipt: ipt, ip6t: ip6t, restoreWaitSupported: isRestoreWaitSupported(ipt)}, nil } func isRestoreWaitSupported(ipt *iptables.IPTables) bool { @@ -71,45 +77,63 @@ func isRestoreWaitSupported(ipt *iptables.IPTables) bool { // ensureChain checks if target chain already exists, creates it if not. func (c *Client) EnsureChain(table string, chain string) error { - oriChains, err := c.ipt.ListChains(table) - if err != nil { - return fmt.Errorf("error listing existing chains in table %s: %v", table, err) - } - if contains(oriChains, chain) { - return nil - } - if err := c.ipt.NewChain(table, chain); err != nil { - return fmt.Errorf("error creating chain %s in table %s: %v", chain, table, err) + ipts := c.getIptablesInstances() + for _, ipt := range ipts { + oriChains, err := ipt.ListChains(table) + if err != nil { + return fmt.Errorf("error listing existing chains in table %s: %v", table, err) + } + if contains(oriChains, chain) { + return nil + } + if err := ipt.NewChain(table, chain); err != nil { + return fmt.Errorf("error creating chain %s in table %s: %v", chain, table, err) + } + klog.V(2).Infof("Created chain %s in table %s", chain, table) } - klog.V(2).Infof("Created chain %s in table %s", chain, table) return nil } +func (c *Client) getIptablesInstances() []*iptables.IPTables { + ipts := []*iptables.IPTables{c.ipt} + if c.enableIPv6 { + ipts = append(ipts, c.ip6t) + } + return ipts +} + // ensureRule checks if target rule already exists, appends it if not. func (c *Client) EnsureRule(table string, chain string, ruleSpec []string) error { - exist, err := c.ipt.Exists(table, chain, ruleSpec...) - if err != nil { - return fmt.Errorf("error checking if rule %v exists in table %s chain %s: %v", ruleSpec, table, chain, err) - } - if exist { - return nil - } - if err := c.ipt.Append(table, chain, ruleSpec...); err != nil { - return fmt.Errorf("error appending rule %v to table %s chain %s: %v", ruleSpec, table, chain, err) + ipts := c.getIptablesInstances() + for _, ipt := range ipts { + exist, err := ipt.Exists(table, chain, ruleSpec...) + if err != nil { + return fmt.Errorf("error checking if rule %v exists in table %s chain %s: %v", ruleSpec, table, chain, err) + } + if exist { + return nil + } + if err := ipt.Append(table, chain, ruleSpec...); err != nil { + return fmt.Errorf("error appending rule %v to table %s chain %s: %v", ruleSpec, table, chain, err) + } } klog.V(2).Infof("Appended rule %v to table %s chain %s", ruleSpec, table, chain) return nil } -// Restore calls iptable-restore to restore iptables with the provided content. +// Restore calls iptables-restore to restore iptables with the provided content. // If flush is true, all previous contents of the respective tables will be flushed. -// Otherwise only involved chains will be flushed. -func (c *Client) Restore(data []byte, flush bool) error { +// Otherwise only involved chains will be flushed. Restore supports "ip6tables-restore" for IPv6. +func (c *Client) Restore(data []byte, flush bool, useIPv6 bool) error { var args []string if !flush { args = append(args, "--noflush") } - cmd := exec.Command("iptables-restore", args...) + iptablesCmd := "iptables-restore" + if useIPv6 { + iptablesCmd = "ip6tables-restore" + } + cmd := exec.Command(iptablesCmd, args...) cmd.Stdin = bytes.NewBuffer(data) stderr := &bytes.Buffer{} cmd.Stderr = stderr @@ -131,8 +155,8 @@ func (c *Client) Restore(data []byte, flush bool) error { defer unlockFunc() } if err := cmd.Run(); err != nil { - klog.Errorf("Failed to execute iptables-restore: %v\nstdin:\n%s\nstderr:\n%s", err, data, stderr) - return fmt.Errorf("error executing iptables-restore: %v", err) + klog.Errorf("Failed to execute %s: %v\nstdin:\n%s\nstderr:\n%s", iptablesCmd, err, data, stderr) + return fmt.Errorf("error executing %s: %v", iptablesCmd, err) } return nil } @@ -142,6 +166,10 @@ func (c *Client) Save() ([]byte, error) { return exec.Command("iptables-save", "-c").CombinedOutput() } +func (c *Client) SetIPv6Supported(val bool) { + c.enableIPv6 = val +} + func contains(chains []string, targetChain string) bool { for _, val := range chains { if val == targetChain { diff --git a/pkg/agent/util/net_linux.go b/pkg/agent/util/net_linux.go index 66aacbe88e6..7c14a741343 100644 --- a/pkg/agent/util/net_linux.go +++ b/pkg/agent/util/net_linux.go @@ -135,14 +135,21 @@ func ConfigureLinkAddress(idx int, gwIPNet *net.IPNet) error { link, _ := netlink.LinkByIndex(idx) gwAddr := &netlink.Addr{IPNet: gwIPNet, Label: ""} - if addrs, err := netlink.AddrList(link, netlink.FAMILY_V4); err != nil { - klog.Errorf("Failed to query IPv4 address list for interface %s: %v", link.Attrs().Name, err) + var addrFamily int + if gwIPNet.IP.To4() != nil { + addrFamily = netlink.FAMILY_V4 + } else { + addrFamily = netlink.FAMILY_V6 + } + + if addrs, err := netlink.AddrList(link, addrFamily); err != nil { + klog.Errorf("Failed to query address list for interface %s: %v", link.Attrs().Name, err) return err } else if addrs != nil { for _, addr := range addrs { - klog.V(4).Infof("Found IPv4 address %s for interface %s", addr.IP.String(), link.Attrs().Name) + klog.V(4).Infof("Found address %s for interface %s", addr.IP.String(), link.Attrs().Name) if addr.IP.Equal(gwAddr.IPNet.IP) { - klog.V(2).Infof("IPv4 address %s already assigned to interface %s", addr.IP.String(), link.Attrs().Name) + klog.V(2).Infof("Address %s already assigned to interface %s", addr.IP.String(), link.Attrs().Name) return nil } } diff --git a/pkg/agent/util/net_windows.go b/pkg/agent/util/net_windows.go index 046f974f2ee..bcdb101dc64 100644 --- a/pkg/agent/util/net_windows.go +++ b/pkg/agent/util/net_windows.go @@ -225,6 +225,11 @@ func SetLinkUp(name string) (net.HardwareAddr, int, error) { } func ConfigureLinkAddress(idx int, gwIPNet *net.IPNet) error { + if gwIPNet.IP.To4() == nil { + klog.Warningf("Windows only supports IPv4 addresses. Skip this address %s", gwIPNet.String()) + return nil + } + iface, _ := net.InterfaceByIndex(idx) gwIP := gwIPNet.IP name := iface.Name diff --git a/test/integration/agent/openflow_test.go b/test/integration/agent/openflow_test.go index 69f191c516c..9c17d5b6a7d 100644 --- a/test/integration/agent/openflow_test.go +++ b/test/integration/agent/openflow_test.go @@ -263,7 +263,10 @@ func testInstallServiceFlows(t *testing.T, config *testConfig) { func testInstallNodeFlows(t *testing.T, config *testConfig) { for _, node := range config.peers { - err := c.InstallNodeFlows(node.name, config.localGateway.mac, node.subnet, node.gateway, node.nodeAddress, config.tunnelOFPort, 0) + peerConfig := map[*net.IPNet]net.IP{ + &node.subnet: node.gateway, + } + err := c.InstallNodeFlows(node.name, config.localGateway.mac, peerConfig, node.nodeAddress, config.tunnelOFPort, 0) if err != nil { t.Fatalf("Failed to install Openflow entries for node connectivity: %v", err) } @@ -422,6 +425,32 @@ func TestNetworkPolicyFlows(t *testing.T) { checkOVSFlowMetrics(t, c) } +func TestIPv6ConnectivityFlows(t *testing.T) { + // Initialize ovs metrics (Prometheus) to test them + metrics.InitializeOVSMetrics() + + c = ofClient.NewClient(br, bridgeMgmtAddr, true, false) + err := ofTestUtils.PrepareOVSBridge(br) + require.Nil(t, err, fmt.Sprintf("Failed to prepare OVS bridge: %v", err)) + defer func() { + err = c.Disconnect() + assert.Nil(t, err, fmt.Sprintf("Error while disconnecting from OVS bridge: %v", err)) + err = ofTestUtils.DeleteOVSBridge(br) + assert.Nil(t, err, fmt.Sprintf("Error while deleting OVS bridge: %v", err)) + }() + + config := prepareIPv6Configuration() + for _, f := range []func(t *testing.T, config *testConfig){ + testInitialize, + testInstallNodeFlows, + testInstallPodFlows, + testUninstallPodFlows, + testUninstallNodeFlows, + } { + f(t, config) + } +} + func checkDefaultDropFlows(t *testing.T, table uint8, priority int, addrType types.AddressType, addresses []types.Address, add bool) { // dump flows flowList, err := ofTestUtils.OfctlDumpTableFlows(ovsCtlClient, table) @@ -619,6 +648,42 @@ func prepareConfiguration() *testConfig { } } +func prepareIPv6Configuration() *testConfig { + podMAC, _ := net.ParseMAC("aa:aa:aa:aa:aa:13") + gwMAC, _ := net.ParseMAC("aa:aa:aa:aa:aa:11") + podCfg := &testLocalPodConfig{ + name: "container-1", + testPortConfig: &testPortConfig{ + ips: []net.IP{net.ParseIP("fd74:ca9b:172:19::3")}, + mac: podMAC, + ofPort: uint32(3), + }, + } + gwCfg := &testPortConfig{ + ips: []net.IP{net.ParseIP("fd74:ca9b:172:19::1")}, + mac: gwMAC, + ofPort: uint32(1), + } + _, serviceCIDR, _ := net.ParseCIDR("ee74:ca9b:2345:a33::/64") + _, peerSubnet, _ := net.ParseCIDR("fd74:ca9b:172:20::/64") + peerNode := &testPeerConfig{ + name: "n2", + nodeAddress: net.ParseIP("10.1.1.2"), + subnet: *peerSubnet, + gateway: net.ParseIP("fd74:ca9b:172:20::1"), + } + vMAC, _ := net.ParseMAC("aa:bb:cc:dd:ee:ff") + return &testConfig{ + bridge: br, + localGateway: gwCfg, + localPods: []*testLocalPodConfig{podCfg}, + peers: []*testPeerConfig{peerNode}, + tunnelOFPort: uint32(2), + serviceCIDR: serviceCIDR, + globalMAC: vMAC, + } +} + func preparePodFlows(podIPs []net.IP, podMAC net.HardwareAddr, podOFPort uint32, gwMAC, vMAC net.HardwareAddr) []expectTableFlows { flows := []expectTableFlows{ { @@ -642,9 +707,12 @@ func preparePodFlows(podIPs []net.IP, podMAC net.HardwareAddr, podOFPort uint32, } for _, podIP := range podIPs { - var ipProto string + var ipProto, nwSrcField, nwDstField string + var nextTableForSpoofguard uint8 if podIP.To4() != nil { ipProto = "ip" + nwSrcField = "nw_src" + nwDstField = "nw_dst" flows = append(flows, expectTableFlows{ uint8(10), @@ -655,16 +723,20 @@ func preparePodFlows(podIPs []net.IP, podMAC net.HardwareAddr, podOFPort uint32, }, }, }) + nextTableForSpoofguard = 29 } else { ipProto = "ipv6" + nwSrcField = "ipv6_src" + nwDstField = "ipv6_dst" + nextTableForSpoofguard = 21 } flows = append(flows, expectTableFlows{ uint8(10), []*ofTestUtils.ExpectFlow{ { - MatchStr: fmt.Sprintf("priority=200,%s,in_port=%d,dl_src=%s,nw_src=%s", ipProto, podOFPort, podMAC.String(), podIP.String()), - ActStr: "goto_table:29", + MatchStr: fmt.Sprintf("priority=200,%s,in_port=%d,dl_src=%s,%s=%s", ipProto, podOFPort, podMAC.String(), nwSrcField, podIP.String()), + ActStr: fmt.Sprintf("goto_table:%d", nextTableForSpoofguard), }, }, }, @@ -672,8 +744,9 @@ func preparePodFlows(podIPs []net.IP, podMAC net.HardwareAddr, podOFPort uint32, uint8(70), []*ofTestUtils.ExpectFlow{ { - MatchStr: fmt.Sprintf("priority=200,ip,reg0=0x80000/0x80000,nw_dst=%s", podIP.String()), - ActStr: fmt.Sprintf("set_field:%s->eth_src,set_field:%s->eth_dst,dec_ttl,goto_table:80", gwMAC.String(), podMAC.String())}, + MatchStr: fmt.Sprintf("priority=200,%s,reg0=0x80000/0x80000,%s=%s", ipProto, nwDstField, podIP.String()), + ActStr: fmt.Sprintf("set_field:%s->eth_src,set_field:%s->eth_dst,dec_ttl,goto_table:80", gwMAC.String(), podMAC.String()), + }, }, }, ) @@ -774,8 +847,12 @@ func prepareTunnelFlows(tunnelPort uint32, vMAC net.HardwareAddr) []expectTableF } func prepareNodeFlows(tunnelPort uint32, peerSubnet net.IPNet, peerGwIP, peerNodeIP net.IP, vMAC, localGwMAC net.HardwareAddr) []expectTableFlows { - return []expectTableFlows{ - { + var expFlows []expectTableFlows + var ipProtoStr, nwDstFieldName string + if peerGwIP.To4() != nil { + ipProtoStr = "ip" + nwDstFieldName = "nw_dst" + expFlows = append(expFlows, expectTableFlows{ uint8(20), []*ofTestUtils.ExpectFlow{ { @@ -783,17 +860,21 @@ func prepareNodeFlows(tunnelPort uint32, peerSubnet net.IPNet, peerGwIP, peerNod ActStr: fmt.Sprintf("move:NXM_OF_ETH_SRC[]->NXM_OF_ETH_DST[],set_field:%s->eth_src,load:0x2->NXM_OF_ARP_OP[],move:NXM_NX_ARP_SHA[]->NXM_NX_ARP_THA[],set_field:%s->arp_sha,move:NXM_OF_ARP_SPA[]->NXM_OF_ARP_TPA[],set_field:%s->arp_spa,IN_PORT", vMAC.String(), vMAC.String(), peerGwIP.String()), }, }, - }, - { - uint8(70), - []*ofTestUtils.ExpectFlow{ - { - MatchStr: fmt.Sprintf("priority=200,ip,nw_dst=%s", peerSubnet.String()), - ActStr: fmt.Sprintf("dec_ttl,set_field:%s->eth_src,set_field:%s->eth_dst,load:0x%x->NXM_NX_REG1[],load:0x1->NXM_NX_REG0[16],set_field:%s->tun_dst,goto_table:105", localGwMAC.String(), vMAC.String(), tunnelPort, peerNodeIP.String()), - }, - }, - }, + }) + } else { + ipProtoStr = "ipv6" + nwDstFieldName = "ipv6_dst" } + expFlows = append(expFlows, expectTableFlows{ + uint8(70), + []*ofTestUtils.ExpectFlow{ + { + MatchStr: fmt.Sprintf("priority=200,%s,%s=%s", ipProtoStr, nwDstFieldName, peerSubnet.String()), + ActStr: fmt.Sprintf("dec_ttl,set_field:%s->eth_src,set_field:%s->eth_dst,load:0x%x->NXM_NX_REG1[],load:0x1->NXM_NX_REG0[16],set_field:%s->tun_dst,goto_table:105", localGwMAC.String(), vMAC.String(), tunnelPort, peerNodeIP.String())}, + }, + }) + + return expFlows } func prepareServiceHelperFlows() []expectTableFlows { diff --git a/test/integration/agent/route_test.go b/test/integration/agent/route_test.go index 9d74ec82340..7669bbb1338 100644 --- a/test/integration/agent/route_test.go +++ b/test/integration/agent/route_test.go @@ -392,3 +392,82 @@ func TestRouteTablePolicyOnly(t *testing.T) { assert.Containsf(t, output, ipAddr.String(), output) _ = netlink.LinkDel(gwLink) } + +func TestIPv6RoutesAndNeighbors(t *testing.T) { + if _, incontainer := os.LookupEnv("INCONTAINER"); !incontainer { + // test changes file system, routing table. Run in contain only + t.Skipf("Skip test runs only in container") + } + + gwLink := createDummyGW(t) + defer netlink.LinkDel(gwLink) + + routeClient, err := route.NewClient(serviceCIDR, config.TrafficEncapModeEncap) + assert.Nil(t, err) + _, ipv6Subnet, _ := net.ParseCIDR("fd74:ca9b:172:19::/64") + gwIPv6 := net.ParseIP("fd74:ca9b:172:19::1") + dualGWConfig := &config.GatewayConfig{IPv4: gwIP, IPv6: gwIPv6, MAC: gwMAC, Name: gwName, LinkIndex: gwLink.Attrs().Index} + dualNodeConfig := &config.NodeConfig{ + Name: "test", + PodIPv4CIDR: podCIDR, + PodIPv6CIDR: ipv6Subnet, + NodeIPAddr: nodeIP, + GatewayConfig: dualGWConfig, + } + err = routeClient.Initialize(dualNodeConfig) + assert.Nil(t, err) + + tcs := []struct { + // variations + peerCIDR string + // expectations + uplink netlink.Link + }{ + {peerCIDR: "10.10.20.0/24", uplink: gwLink}, + {peerCIDR: "fd74:ca9b:172:18::/64", uplink: gwLink}, + } + + for _, tc := range tcs { + _, peerCIDR, _ := net.ParseCIDR(tc.peerCIDR) + nhCIDRIP := ip.NextIP(peerCIDR.IP) + if err := routeClient.AddRoutes(peerCIDR, localPeerIP, nhCIDRIP); err != nil { + t.Errorf("route add failed with err %v", err) + } + + link := tc.uplink + nhIP := nhCIDRIP + var expRouteStr, ipRoute, expNeighStr, ipNeigh string + if nhIP.To4() != nil { + onlink := "onlink" + expRouteStr = fmt.Sprintf("%s via %s dev %s %s", peerCIDR, nhIP, link.Attrs().Name, onlink) + ipRoute, _ = ExecOutputTrim(fmt.Sprintf("ip route show | grep %s", tc.peerCIDR)) + } else { + expRouteStr = fmt.Sprintf("%s via %s dev %s", peerCIDR, nhIP, link.Attrs().Name) + ipRoute, _ = ExecOutputTrim(fmt.Sprintf("ip -6 route show | grep %s", tc.peerCIDR)) + expNeighStr = fmt.Sprintf("%s dev %s lladdr aa:bb:cc:dd:ee:ff PERMANENT", nhIP, link.Attrs().Name) + ipNeigh, _ = ExecOutputTrim(fmt.Sprintf("ip -6 neighbor show | grep %s", nhIP)) + } + expRouteStr = strings.Join(strings.Fields(expRouteStr), "") + if len(ipRoute) > len(expRouteStr) { + ipRoute = ipRoute[:len(expRouteStr)] + } + if !assert.Equal(t, expRouteStr, ipRoute) { + t.Errorf("mismatch route") + } + if expNeighStr != "" { + expNeighStr = strings.Join(strings.Fields(expNeighStr), "") + if len(ipNeigh) > len(expNeighStr) { + ipNeigh = ipNeigh[:len(expNeighStr)] + } + if !assert.Equal(t, expNeighStr, ipNeigh) { + t.Errorf("mismatch IPv6 Neighbor") + } + } + if err := routeClient.DeleteRoutes(peerCIDR); err != nil { + t.Errorf("route delete failed with err %v", err) + } + output, err := ExecOutputTrim(fmt.Sprintf("ip route show table 0 exact %s", peerCIDR)) + assert.NoError(t, err) + assert.Equal(t, "", output, "expected no routes to %s", peerCIDR) + } +} diff --git a/test/integration/ovs/ofctrl_test.go b/test/integration/ovs/ofctrl_test.go index df47a8690e8..e270636e7fe 100644 --- a/test/integration/ovs/ofctrl_test.go +++ b/test/integration/ovs/ofctrl_test.go @@ -832,6 +832,8 @@ func prepareFlows(table binding.Table) ([]binding.Flow, []*ExpectFlow) { _, AllIPs, _ := net.ParseCIDR("0.0.0.0/0") _, conjSrcIPNet, _ := net.ParseCIDR("192.168.3.0/24") gwMACData, _ := strconv.ParseUint(strings.Replace(gwMAC.String(), ":", "", -1), 16, 64) + _, peerSubnetIPv6, _ := net.ParseCIDR("fd74:ca9b:172:21::/64") + tunnelPeerIPv6 := net.ParseIP("20:ca9b:172:35::3") flows = append(flows, table.BuildFlow(priorityNormal-10). Cookie(getCookieID()). @@ -936,6 +938,15 @@ func prepareFlows(table binding.Table) ([]binding.Flow, []*ExpectFlow) { Action().SetTunnelDst(tunnelPeer). Action().GotoTable(table.GetNext()). Done(), + table.BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolIPv6). + Cookie(getCookieID()). + MatchDstIPNet(*peerSubnetIPv6). + Action().DecTTL(). + Action().SetSrcMAC(gwMAC). + Action().SetDstMAC(vMAC). + Action().SetTunnelDst(tunnelPeerIPv6). + Action().GotoTable(table.GetNext()). + Done(), table.BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolIP). Cookie(getCookieID()). MatchDstIP(gwIP). @@ -999,6 +1010,7 @@ func prepareFlows(table binding.Table) ([]binding.Flow, []*ExpectFlow) { &ExpectFlow{"priority=190,ct_state=+new+trk,ip", fmt.Sprintf("ct(commit,table=%d,zone=65520)", table.GetNext())}, &ExpectFlow{"priority=200,ip,dl_dst=aa:bb:cc:dd:ee:ff,nw_dst=192.168.1.3", fmt.Sprintf("set_field:aa:aa:aa:aa:aa:11->eth_src,set_field:aa:aa:aa:aa:aa:13->eth_dst,dec_ttl,%s", gotoTableAction)}, &ExpectFlow{"priority=200,ip,nw_dst=192.168.2.0/24", fmt.Sprintf("dec_ttl,set_field:aa:aa:aa:aa:aa:11->eth_src,set_field:aa:bb:cc:dd:ee:ff->eth_dst,set_field:10.1.1.2->tun_dst,%s", gotoTableAction)}, + &ExpectFlow{"priority=200,ipv6,ipv6_dst=fd74:ca9b:172:21::/64", fmt.Sprintf("dec_ttl,set_field:aa:aa:aa:aa:aa:11->eth_src,set_field:aa:bb:cc:dd:ee:ff->eth_dst,set_field:20:ca9b:172:35::3->tun_ipv6_dst,%s", gotoTableAction)}, &ExpectFlow{"priority=200,ip,nw_dst=192.168.1.1", fmt.Sprintf("set_field:aa:aa:aa:aa:aa:11->eth_dst,%s", gotoTableAction)}, &ExpectFlow{"priority=200,dl_dst=aa:aa:aa:aa:aa:13", fmt.Sprintf("load:0x3->NXM_NX_REG1[],load:0x1->NXM_NX_REG0[16],%s", gotoTableAction)}, &ExpectFlow{"priority=200,ip,reg0=0x10000/0x10000", "output:NXM_NX_REG1[]"},