Skip to content

Commit

Permalink
[Windows] Fix Pod cannot access k8s API server service issue
Browse files Browse the repository at this point in the history
When a Pod accesses a ClusterIP Service and the IP of the selected
endpoint is not in "cluster-cidr". The request packets need to be
SNAT'd after have been DNAT'd. For example, the endpoint Pod may
run in hostNetwork and the IP of the endpoint is the current
Node IP. Currently, on Windows Node antrea applies both DNAT
and SNAT in the same ct_zone. That's not supported by OVS.

In this patch, we introduce a new ct_zone to track this kind of
SNATed connection in a different ct_zone.

Fixes: antrea-io#1759

Signed-off-by: Rui Cao <[email protected]>
  • Loading branch information
ruicao93 committed Feb 8, 2021
1 parent efc9f5c commit 245fada
Show file tree
Hide file tree
Showing 6 changed files with 54 additions and 51 deletions.
59 changes: 31 additions & 28 deletions pkg/agent/openflow/pipeline.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,9 @@ import (
"github.com/vmware-tanzu/antrea/pkg/agent/metrics"
"github.com/vmware-tanzu/antrea/pkg/agent/openflow/cookie"
"github.com/vmware-tanzu/antrea/pkg/agent/types"
"github.com/vmware-tanzu/antrea/pkg/agent/util"
binding "github.com/vmware-tanzu/antrea/pkg/ovs/openflow"
"github.com/vmware-tanzu/antrea/pkg/ovs/ovsctl"
"github.com/vmware-tanzu/antrea/pkg/util/env"
"github.com/vmware-tanzu/antrea/third_party/proxy"
)

Expand Down Expand Up @@ -222,16 +222,19 @@ const (

CtZone = 0xfff0
CtZoneV6 = 0xffe6
// CtZoneSnat is only used on Windows and only when AntreaProxy is enabled.
// When a Pod access cluster service, and the selected endpoint uses node IP(hostnetwork mode).
// The request packets need to be SNATed after have been DNATed. We use a different
// ct_zone to track SNATed connection. It's because OVS dose not support both do
// DNAT and SNAT at same zone.
// CtZoneSNAT is only used on Windows and only when AntreaProxy is enabled.
// When a Pod access a ClusterIP Service, and the IP of the selected endpoint
// is not in "cluster-cidr". The request packets need to be SNAT'd(set src IP to local Node IP)
// after have been DNAT'd(set dst IP to endpoint IP).
// For example, the endpoint Pod may run in hostNetwork mode and the IP of the endpoint
// will is the current Node IP.
// We need to use a different ct_zone to track the SNAT'd connection because OVS
// dose not support doing both DNAT and SNAT in the same ct_zone.
//
// A example of the connection is a Pod access kubernetes API service:
// Pod --> DNAT(CtZone) --> SNAT(CtZoneSnat) --> Endpoint(API server NodeIP)
// Pod <-- unDNAT(CtZone) <-- unSNAT(CtZoneSnat) <-- Endpoint(API server NodeIP)
CtZoneSnat = 0xffdc
// An example of the connection is a Pod accesses kubernetes API service:
// Pod --> DNAT(CtZone) --> SNAT(CtZoneSNAT) --> Endpoint(API server NodeIP)
// Pod <-- unDNAT(CtZone) <-- unSNAT(CtZoneSNAT) <-- Endpoint(API server NodeIP)
CtZoneSNAT = 0xffdc

portFoundMark = 0b1
snatRequiredMark = 0b1
Expand Down Expand Up @@ -722,14 +725,14 @@ func (c *client) serviceLBBypassFlows(ipProtocol binding.Protocol) []binding.Flo
Done(),
}

if util.IsWindowsPlatform() && ipProtocol == binding.ProtocolIP {
if env.IsWindowsPlatform() && ipProtocol == binding.ProtocolIP {
// Handle the reply packets of the connection which are applied both DNAT and SNAT.
// The packets has following characteristics:
// The packets have following characteristics:
// - Received from uplink
// - ct_state is "-new+trk"
// - ct_mark is set to 0x21
// This flow resubmit the packets to the following table to avoid being forwarded
// to br-int which is default.
// - ct_mark is set to 0x21(ServiceCTMark)
// This flow resubmits the packets to the following table to avoid being forwarded
// to the bridge port by default.
flows = append(flows, c.pipeline[conntrackStateTable].BuildFlow(priorityHigh).
MatchProtocol(ipProtocol).
MatchCTStateNew(false).MatchCTStateTrk(true).
Expand Down Expand Up @@ -1570,14 +1573,14 @@ func (c *client) uplinkSNATFlows(category cookie.Category) []binding.Flow {
// Pod subnet to the external network. Non-SNAT packets will be
// output to the bridge port in conntrackStateTable.
if c.enableProxy {
// Put the packets into CtZoneSnat first for the connection which is both
// Put the packets into CtZoneSNAT first for the connection which is both
// applied DNAT and SNAT:
// Pod --> DNAT(CtZone) --> SNAT(CtZoneSnat) --> ExternalServer
// Pod <-- unDNAT(CtZone) <-- unSNAT(CtZoneSnat) <-- ExternalServer
// Pod --> DNAT(CtZone) --> SNAT(CtZoneSNAT) --> ExternalServer
// Pod <-- unDNAT(CtZone) <-- unSNAT(CtZoneSNAT) <-- ExternalServer
flows = append(flows, c.pipeline[uplinkTable].BuildFlow(priorityNormal).
MatchProtocol(binding.ProtocolIP).
MatchRegRange(int(marksReg), markTrafficFromUplink, binding.Range{0, 15}).
Action().CT(false, conntrackTable, CtZoneSnat).NAT().CTDone().
Action().CT(false, conntrackTable, CtZoneSNAT).NAT().CTDone().
Cookie(c.cookieAllocator.Request(category).Raw()).
Done())
} else {
Expand Down Expand Up @@ -1654,7 +1657,7 @@ func (c *client) snatFlows(nodeIP net.IP, localSubnet net.IPNet, category cookie
// source IP in NAT action, 4) ct_mark is set to 0x40 in the conn_track context.
c.pipeline[conntrackCommitTable].BuildFlow(priorityNormal).
MatchProtocol(binding.ProtocolIP).
MatchCTStateNew(true).MatchCTStateTrk(true).MatchCTStateDnat(false).
MatchCTStateNew(true).MatchCTStateTrk(true).MatchCTStateDNAT(false).
MatchRegRange(int(marksReg), snatRequiredMark, snatMarkRange).
Action().CT(true, L2ForwardingOutTable, CtZone).
SNAT(snatIPRange, nil).
Expand All @@ -1666,32 +1669,32 @@ func (c *client) snatFlows(nodeIP net.IP, localSubnet net.IPNet, category cookie
// If AntreaProxy is disabled, no DNAT happens in OVS pipeline.
if c.enableProxy {
// If the SNAT is needed after DNAT, mark the snatRequiredMark even the connection is now new.
// Because this kind of packets need to enter CtZoneSnat make sure the SNAT can be applied before
// Because this kind of packets need to enter CtZoneSNAT make sure the SNAT can be applied before
// leaving the pipeline.
flows = append(flows, l3FwdTable.BuildFlow(priorityLow).
MatchProtocol(binding.ProtocolIP).
MatchCTStateNew(false).MatchCTStateTrk(true).MatchCTStateDnat(true).
MatchCTStateNew(false).MatchCTStateTrk(true).MatchCTStateDNAT(true).
Action().LoadRegRange(int(marksReg), snatRequiredMark, snatMarkRange).
Action().GotoTable(nextTable).
Cookie(c.cookieAllocator.Request(category).Raw()).
Done())
// If SNAT is needed after DNAT:
// - For new connection: commit to CtZoneSnat
// - For existing connection: enter CtZoneSnat to apply SNAT
// - For new connection: commit to CtZoneSNAT
// - For existing connection: enter CtZoneSNAT to apply SNAT
flows = append(flows, c.pipeline[conntrackCommitTable].BuildFlow(priorityNormal).
MatchProtocol(binding.ProtocolIP).
MatchCTStateNew(true).MatchCTStateTrk(true).MatchCTStateDnat(true).
MatchCTStateNew(true).MatchCTStateTrk(true).MatchCTStateDNAT(true).
MatchRegRange(int(marksReg), snatRequiredMark, snatMarkRange).
Action().CT(true, L2ForwardingOutTable, CtZoneSnat).
Action().CT(true, L2ForwardingOutTable, CtZoneSNAT).
SNAT(snatIPRange, nil).
LoadToMark(snatCTMark).CTDone().
Cookie(c.cookieAllocator.Request(category).Raw()).
Done())
flows = append(flows, c.pipeline[conntrackCommitTable].BuildFlow(priorityNormal).
MatchProtocol(binding.ProtocolIP).
MatchCTStateNew(false).MatchCTStateTrk(true).MatchCTStateDnat(true).
MatchCTStateNew(false).MatchCTStateTrk(true).MatchCTStateDNAT(true).
MatchRegRange(int(marksReg), snatRequiredMark, snatMarkRange).
Action().CT(false, L2ForwardingOutTable, CtZoneSnat).NAT().CTDone().
Action().CT(false, L2ForwardingOutTable, CtZoneSNAT).NAT().CTDone().
Cookie(c.cookieAllocator.Request(category).Raw()).
Done())
}
Expand Down
7 changes: 0 additions & 7 deletions pkg/agent/util/net.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import (
"fmt"
"io"
"net"
"runtime"
)

const (
Expand All @@ -31,8 +30,6 @@ const (

FamilyIPv4 uint8 = 4
FamilyIPv6 uint8 = 6

windowsPlatform = "windows"
)

func generateInterfaceName(key string, name string, useHead bool) string {
Expand Down Expand Up @@ -152,7 +149,3 @@ func GetIPWithFamily(ips []net.IP, addrFamily uint8) (net.IP, error) {
return nil, errors.New("no IP found with IPv4 AddressFamily")
}
}

func IsWindowsPlatform() bool {
return runtime.GOOS == windowsPlatform
}
4 changes: 2 additions & 2 deletions pkg/ovs/openflow/interfaces.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,8 +225,8 @@ type FlowBuilder interface {
MatchCTStateEst(isSet bool) FlowBuilder
MatchCTStateTrk(isSet bool) FlowBuilder
MatchCTStateInv(isSet bool) FlowBuilder
MatchCTStateDnat(isSet bool) FlowBuilder
MatchCTStateSnat(isSet bool) FlowBuilder
MatchCTStateDNAT(isSet bool) FlowBuilder
MatchCTStateSNAT(isSet bool) FlowBuilder
MatchCTMark(value uint32, mask *uint32) FlowBuilder
MatchCTLabelRange(high, low uint64, bitRange Range) FlowBuilder
MatchConjID(value uint32) FlowBuilder
Expand Down
4 changes: 2 additions & 2 deletions pkg/ovs/openflow/ofctrl_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ func (b *ofFlowBuilder) MatchCTStateInv(set bool) FlowBuilder {
return b
}

func (b *ofFlowBuilder) MatchCTStateDnat(set bool) FlowBuilder {
func (b *ofFlowBuilder) MatchCTStateDNAT(set bool) FlowBuilder {
if b.ctStates == nil {
b.ctStates = openflow13.NewCTStates()
}
Expand All @@ -203,7 +203,7 @@ func (b *ofFlowBuilder) MatchCTStateDnat(set bool) FlowBuilder {
return b
}

func (b *ofFlowBuilder) MatchCTStateSnat(set bool) FlowBuilder {
func (b *ofFlowBuilder) MatchCTStateSNAT(set bool) FlowBuilder {
if b.ctStates == nil {
b.ctStates = openflow13.NewCTStates()
}
Expand Down
24 changes: 12 additions & 12 deletions pkg/ovs/openflow/testing/mock_openflow.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions pkg/util/env/env.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package env

import (
"os"
"runtime"
"strconv"

"k8s.io/klog"
Expand All @@ -29,6 +30,8 @@ const (
svcAcctNameEnvKey = "SERVICEACCOUNT_NAME"

antreaCloudEKSEnvKey = "ANTREA_CLOUD_EKS"

windowsOS = "windows"
)

// GetNodeName returns the node's name used in Kubernetes, based on the priority:
Expand Down Expand Up @@ -93,3 +96,7 @@ func getBoolEnvVar(name string, defaultValue bool) bool {
func IsCloudEKS() bool {
return getBoolEnvVar(antreaCloudEKSEnvKey, false)
}

func IsWindowsPlatform() bool {
return runtime.GOOS == windowsOS
}

0 comments on commit 245fada

Please sign in to comment.