diff --git a/Documentation/README.md b/Documentation/README.md index e0a2893e25..d05b395932 100644 --- a/Documentation/README.md +++ b/Documentation/README.md @@ -49,7 +49,7 @@ However BGP can be leveraged to other use cases like advertising the cluster ip, ### Try Kube-router with cluster installers -Best way to get started is to deploy Kubernetes with Kube-router is through cluster installer. +The best way to get started is to deploy Kubernetes with Kube-router is with a cluster installer. #### kops Please see the [steps](https://github.com/cloudnativelabs/kube-router/blob/master/Documentation/kops.md) to deploy Kubernetes cluster with Kube-router using [Kops](https://github.com/kubernetes/kops) @@ -84,6 +84,7 @@ Also you can choose to run kube-router as agent running on each cluster node. Al --peer-router The ip address of the external router to which all nodes will peer and advertise the cluster ip and pod cidr's --nodes-full-mesh When enabled each node in the cluster will setup BGP peer with rest of the nodes. True by default --hostname-override If non-empty, this string will be used as identification of node name instead of the actual hostname. + --hairpin-mode Adds iptable rules for every ClusterIP Service Endpoint to support hairpin traffic. False by default ``` ### requirements @@ -139,6 +140,42 @@ and if you want to move back to kube-proxy then clean up config done by kube-rou and run kube-proxy with the configuration you have. - [General Setup](/README.md#getting-started) +### Hairpin Mode + +Communication from a Pod that is behind a Service to its own ClusterIP:Port is +not supported by default. However, It can be enabled per-service by adding the +`kube-router.io/hairpin-mode=` annotation, or for all Services in a cluster by +passing the flag `--hairpin-mode=true` to kube-router. + +Additionally, the `hairpin_mode` sysctl option must be set to `1` for all veth +interfaces on each node. This can be done by adding the `"hairpinMode": true` +option to your CNI configuration and rebooting all cluster nodes if they are +already running kubernetes. + +Hairpin traffic will be seen by the pod it originated from as coming from the +Service ClusterIP if it is logging the source IP. + +#### Hairpin Mode Example + +10-kuberouter.conf +```json +{ + "name":"mynet", + "type":"bridge", + "bridge":"kube-bridge", + "isDefaultGateway":true, + "hairpinMode":true, + "ipam": { + "type":"host-local" + } +} +``` + +To enable hairpin traffic for Service `my-service`: +``` +kubectl annotate service my-service 'kube-router.io/hairpin-mode=' +``` + ## Develope Guide **Go version 1.7 or above is required to build kube-router** diff --git a/app/controllers/network_services_controller.go b/app/controllers/network_services_controller.go index 795b788f2f..69ed8239de 100644 --- a/app/controllers/network_services_controller.go +++ b/app/controllers/network_services_controller.go @@ -49,6 +49,7 @@ type NetworkServicesController struct { endpointsMap endpointsInfoMap podCidr string masqueradeAll bool + globalHairpin bool client *kubernetes.Clientset } @@ -59,6 +60,7 @@ type serviceInfo struct { protocol string nodePort int sessionAffinity bool + hairpin bool } // map of all services, with unique service id(namespace name, service name, port) as key @@ -125,6 +127,10 @@ func (nsc *NetworkServicesController) sync() { nsc.serviceMap = buildServicesInfo() nsc.endpointsMap = buildEndpointsInfo() + err := nsc.syncHairpinIptablesRules() + if err != nil { + glog.Errorf("Error syncing hairpin iptable rules: %s", err.Error()) + } nsc.syncIpvsServices(nsc.serviceMap, nsc.endpointsMap) } @@ -237,17 +243,23 @@ func (nsc *NetworkServicesController) syncIpvsServices(serviceInfoMap serviceInf Port: uint16(endpoint.port), Weight: 1, } + err := ipvsAddServer(ipvs_cluster_vip_svc, &dst) if err != nil { glog.Errorf(err.Error()) } - activeServiceEndpointMap[clusterServiceId] = append(activeServiceEndpointMap[clusterServiceId], endpoint.ip) + + activeServiceEndpointMap[clusterServiceId] = + append(activeServiceEndpointMap[clusterServiceId], endpoint.ip) + if svc.nodePort != 0 { err := ipvsAddServer(ipvs_nodeport_svc, &dst) - activeServiceEndpointMap[nodeServiceId] = append(activeServiceEndpointMap[clusterServiceId], endpoint.ip) if err != nil { glog.Errorf(err.Error()) } + + activeServiceEndpointMap[nodeServiceId] = + append(activeServiceEndpointMap[clusterServiceId], endpoint.ip) } } } @@ -317,7 +329,10 @@ func buildServicesInfo() serviceInfoMap { protocol: strings.ToLower(string(port.Protocol)), nodePort: int(port.NodePort), } + svcInfo.sessionAffinity = (svc.Spec.SessionAffinity == "ClientIP") + _, svcInfo.hairpin = svc.ObjectMeta.Annotations["kube-router.io/hairpin-mode"] + svcId := generateServiceId(svc.Namespace, svc.Name, port.Name) serviceMap[svcId] = &svcInfo } @@ -370,6 +385,201 @@ func ensureMasqueradeIptablesRule(masqueradeAll bool, podCidr string) error { return nil } +// syncHairpinIptablesRules adds/removes iptables rules pertaining to traffic +// from an Endpoint (Pod) to its own service VIP. Rules are only applied if +// enabled globally via CLI argument or a service has an annotation requesting +// it. +func (nsc *NetworkServicesController) syncHairpinIptablesRules() error { + //TODO: Use ipset? + //TODO: Log a warning that this will not work without hairpin sysctl set on veth + + // Key is a string that will match iptables.List() rules + // Value is a string[] with arguments that iptables transaction functions expect + rulesNeeded := make(map[string][]string, 0) + + // Generate the rules that we need + for svcName, svcInfo := range nsc.serviceMap { + if nsc.globalHairpin || svcInfo.hairpin { + for _, ep := range nsc.endpointsMap[svcName] { + // Handle ClusterIP Service + rule, ruleArgs := hairpinRuleFrom(svcInfo.clusterIP.String(), ep.ip, svcInfo.port) + rulesNeeded[rule] = ruleArgs + + // Handle NodePort Service + if svcInfo.nodePort != 0 { + rule, ruleArgs := hairpinRuleFrom(nsc.nodeIP.String(), ep.ip, svcInfo.nodePort) + rulesNeeded[rule] = ruleArgs + } + } + } + } + + // Cleanup (if needed) and return if there's no hairpin-mode Services + if len(rulesNeeded) == 0 { + glog.Infof("No hairpin-mode enabled services found -- no hairpin rules created") + err := deleteHairpinIptablesRules() + if err != nil { + return errors.New("Error deleting hairpin rules: " + err.Error()) + } + return nil + } + + iptablesCmdHandler, err := iptables.New() + if err != nil { + return errors.New("Failed to initialize iptables executor" + err.Error()) + } + + // TODO: Factor these variables out + hairpinChain := "KUBE-ROUTER-HAIRPIN" + hasHairpinChain := false + + // TODO: Factor out this code + chains, err := iptablesCmdHandler.ListChains("nat") + if err != nil { + return errors.New("Failed to list iptables chains: " + err.Error()) + } + + // TODO: Factor out this code + for _, chain := range chains { + if chain == hairpinChain { + hasHairpinChain = true + } + } + + // Create a chain for hairpin rules, if needed + if hasHairpinChain != true { + err = iptablesCmdHandler.NewChain("nat", hairpinChain) + if err != nil { + return errors.New("Failed to create iptables chain \"" + hairpinChain + + "\": " + err.Error()) + } + } + + // Create a rule that targets our hairpin chain, if needed + // TODO: Factor this static rule out + jumpArgs := []string{"-m", "ipvs", "--vdir", "ORIGINAL", "-j", hairpinChain} + err = iptablesCmdHandler.AppendUnique("nat", "POSTROUTING", jumpArgs...) + if err != nil { + return errors.New("Failed to add hairpin iptables jump rule: %s" + err.Error()) + } + + // Apply the rules we need + for _, ruleArgs := range rulesNeeded { + err = iptablesCmdHandler.AppendUnique("nat", hairpinChain, ruleArgs...) + if err != nil { + return errors.New("Failed to apply hairpin iptables rule: " + err.Error()) + } + } + + rulesFromNode, err := iptablesCmdHandler.List("nat", hairpinChain) + if err != nil { + return errors.New("Failed to get rules from iptables chain \"" + + hairpinChain + "\": " + err.Error()) + } + + // Delete invalid/outdated rules + for _, ruleFromNode := range rulesFromNode { + _, ruleIsNeeded := rulesNeeded[ruleFromNode] + if !ruleIsNeeded { + args := strings.Fields(ruleFromNode) + if len(args) > 2 { + args = args[2:] // Strip "-A CHAIN_NAME" + + err = iptablesCmdHandler.Delete("nat", hairpinChain, args...) + if err != nil { + glog.Errorf("Unable to delete hairpin rule \"%s\" from chain %s: %e", ruleFromNode, hairpinChain, err) + } else { + glog.Info("Deleted invalid/outdated hairpin rule \"%s\" from chain %s", ruleFromNode, hairpinChain) + } + } else { + // Ignore the chain creation rule + if ruleFromNode == "-N "+hairpinChain { + continue + } + glog.Infof("Not removing invalid hairpin rule \"%s\" from chain %s", ruleFromNode, hairpinChain) + } + } + } + + return nil +} + +func hairpinRuleFrom(serviceIP string, endpointIP string, servicePort int) (string, []string) { + // TODO: Factor hairpinChain out + hairpinChain := "KUBE-ROUTER-HAIRPIN" + + ruleArgs := []string{"-s", endpointIP + "/32", "-d", endpointIP + "/32", + "-m", "ipvs", "--vaddr", serviceIP, "--vport", strconv.Itoa(servicePort), + "-j", "SNAT", "--to-source", serviceIP} + + // Trying to ensure this matches iptables.List() + ruleString := "-A " + hairpinChain + " -s " + endpointIP + "/32" + " -d " + + endpointIP + "/32" + " -m ipvs" + " --vaddr " + serviceIP + " --vport " + + strconv.Itoa(servicePort) + " -j SNAT" + " --to-source " + serviceIP + + return ruleString, ruleArgs +} + +func deleteHairpinIptablesRules() error { + iptablesCmdHandler, err := iptables.New() + if err != nil { + return errors.New("Failed to initialize iptables executor" + err.Error()) + } + + // TODO: Factor out this code + chains, err := iptablesCmdHandler.ListChains("nat") + if err != nil { + return errors.New("Failed to list iptables chains: " + err.Error()) + } + + // TODO: Factor these variables out + hairpinChain := "KUBE-ROUTER-HAIRPIN" + hasHairpinChain := false + + // TODO: Factor out this code + for _, chain := range chains { + if chain == hairpinChain { + hasHairpinChain = true + break + } + } + + // Nothing left to do if hairpin chain doesn't exist + if !hasHairpinChain { + return nil + } + + // TODO: Factor this static jump rule out + jumpArgs := []string{"-m", "ipvs", "--vdir", "ORIGINAL", "-j", hairpinChain} + hasHairpinJumpRule, err := iptablesCmdHandler.Exists("nat", "POSTROUTING", jumpArgs...) + if err != nil { + return errors.New("Failed to search POSTROUTING iptable rules: " + err.Error()) + } + + // Delete the jump rule to the hairpin chain + if hasHairpinJumpRule { + err = iptablesCmdHandler.Delete("nat", "POSTROUTING", jumpArgs...) + if err != nil { + glog.Errorf("Unable to delete hairpin jump rule from chain \"POSTROUTING\": %e", err) + } else { + glog.Info("Deleted hairpin jump rule from chain \"POSTROUTING\"") + } + } + + // Flush and delete the chain for hairpin rules + err = iptablesCmdHandler.ClearChain("nat", hairpinChain) + if err != nil { + return errors.New("Failed to flush iptables chain \"" + hairpinChain + + "\": " + err.Error()) + } + err = iptablesCmdHandler.DeleteChain("nat", hairpinChain) + if err != nil { + return errors.New("Failed to delete iptables chain \"" + hairpinChain + + "\": " + err.Error()) + } + return nil +} + func ensureIpvsConntrack() error { return ioutil.WriteFile("/proc/sys/net/ipv4/vs/conntrack", []byte(strconv.Itoa(1)), 0640) } @@ -479,7 +689,6 @@ func getKubeDummyInterface() (netlink.Link, error) { // clean up all the configurations (IPVS, iptables, links) func (nsc *NetworkServicesController) Cleanup() { - // cleanup ipvs rules by flush glog.Infof("Cleaning up IPVS configuration permanently") err := h.Flush() @@ -495,6 +704,13 @@ func (nsc *NetworkServicesController) Cleanup() { return } + // cleanup iptable hairpin rules + err = deleteHairpinIptablesRules() + if err != nil { + glog.Errorf("Failed to cleanup iptable hairpin rules: ", err.Error()) + return + } + // delete dummy interface used to assign cluster IP's dummyVipInterface, err := netlink.LinkByName(KUBE_DUMMY_IF) if err != nil { diff --git a/app/options/options.go b/app/options/options.go index f37ea67ebe..22a98b5321 100755 --- a/app/options/options.go +++ b/app/options/options.go @@ -26,6 +26,7 @@ type KubeRouterConfig struct { ClusterAsn string PeerAsn string FullMeshMode bool + GlobalHairpinMode bool } func NewKubeRouterConfig() *KubeRouterConfig { @@ -38,7 +39,8 @@ func NewKubeRouterConfig() *KubeRouterConfig { RunFirewall: true, RunRouter: true, FullMeshMode: true, - AdvertiseClusterIp: false} + AdvertiseClusterIp: false, + GlobalHairpinMode: false} } func (s *KubeRouterConfig) AddFlags(fs *pflag.FlagSet) { @@ -61,4 +63,5 @@ func (s *KubeRouterConfig) AddFlags(fs *pflag.FlagSet) { fs.StringVar(&s.PeerAsn, "peer-asn", s.PeerAsn, "ASN number of the BGP peer to which cluster nodes will advertise cluster ip and node's pod cidr") fs.BoolVar(&s.FullMeshMode, "nodes-full-mesh", s.FullMeshMode, "When enabled each node in the cluster will setup BGP peer with rest of the nodes. True by default") fs.StringVar(&s.HostnameOverride, "hostname-override", s.HostnameOverride, "If non-empty, will use this string as identification instead of the actual hostname.") + fs.BoolVar(&s.GlobalHairpinMode, "hairpin-mode", s.GlobalHairpinMode, "Adds iptable rules for every ClusterIP Service Endpoint to support hairpin traffic. False by default") }