Skip to content

Commit

Permalink
[Windows] Fix Pod cannot access endpoints with external IP through Cl…
Browse files Browse the repository at this point in the history
…usterIP Service (antrea-io#1824)

When a Pod accesses a ClusterIP Service and the IP of the selected
endpoint is not in "cluster-cidr". The request packets need to be
SNAT'd after have been DNAT'd. For example, the endpoint Pod may
run in hostNetwork and the IP of the endpoint is the current
Node IP. Currently, on Windows Node antrea applies both DNAT
and SNAT in the same ct_zone. That's not supported by OVS.

In this patch, we introduce a new ct_zone to track this kind of
SNATed connection in a different ct_zone.

Fixes: antrea-io#1759

Signed-off-by: Rui Cao <[email protected]>
  • Loading branch information
ruicao93 authored and antoninbas committed Feb 10, 2021
1 parent b1ecbc8 commit 0ccd644
Show file tree
Hide file tree
Showing 5 changed files with 161 additions and 16 deletions.
105 changes: 92 additions & 13 deletions pkg/agent/openflow/pipeline.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,19 @@ const (

CtZone = 0xfff0
CtZoneV6 = 0xffe6
// CtZoneSNAT is only used on Windows and only when AntreaProxy is enabled.
// When a Pod access a ClusterIP Service, and the IP of the selected endpoint
// is not in "cluster-cidr". The request packets need to be SNAT'd(set src IP to local Node IP)
// after have been DNAT'd(set dst IP to endpoint IP).
// For example, the endpoint Pod may run in hostNetwork mode and the IP of the endpoint
// will is the current Node IP.
// We need to use a different ct_zone to track the SNAT'd connection because OVS
// does not support doing both DNAT and SNAT in the same ct_zone.
//
// An example of the connection is a Pod accesses kubernetes API service:
// Pod --> DNAT(CtZone) --> SNAT(CtZoneSNAT) --> Endpoint(API server NodeIP)
// Pod <-- unDNAT(CtZone) <-- unSNAT(CtZoneSNAT) <-- Endpoint(API server NodeIP)
CtZoneSNAT = 0xffdc

portFoundMark = 0b1
snatRequiredMark = 0b1
Expand Down Expand Up @@ -682,7 +695,7 @@ func (c *client) ctRewriteDstMACFlows(gatewayMAC net.HardwareAddr, category cook
// service LB tables and enter egressRuleTable directly.
func (c *client) serviceLBBypassFlows(ipProtocol binding.Protocol) []binding.Flow {
connectionTrackStateTable := c.pipeline[conntrackStateTable]
return []binding.Flow{
flows := []binding.Flow{
// Tracked connections with the ServiceCTMark (load-balanced by AntreaProxy) receive
// the macRewriteMark and are sent to egressRuleTable.
connectionTrackStateTable.BuildFlow(priorityNormal).MatchProtocol(ipProtocol).
Expand All @@ -701,6 +714,26 @@ func (c *client) serviceLBBypassFlows(ipProtocol binding.Protocol) []binding.Flo
Cookie(c.cookieAllocator.Request(cookie.Service).Raw()).
Done(),
}

if runtime.IsWindowsPlatform() && ipProtocol == binding.ProtocolIP {
// Handle the reply packets of the connection which are applied both DNAT and SNAT.
// The packets have following characteristics:
// - Received from uplink
// - ct_state is "-new+trk"
// - ct_mark is set to 0x21(ServiceCTMark)
// This flow resubmits the packets to the following table to avoid being forwarded
// to the bridge port by default.
flows = append(flows, c.pipeline[conntrackStateTable].BuildFlow(priorityHigh).
MatchProtocol(ipProtocol).
MatchCTStateNew(false).MatchCTStateTrk(true).
MatchCTMark(ServiceCTMark, nil).
MatchRegRange(int(marksReg), markTrafficFromUplink, binding.Range{0, 15}).
Action().LoadRegRange(int(marksReg), macRewriteMark, macRewriteMarkRange).
Action().GotoTable(EgressRuleTable).
Cookie(c.cookieAllocator.Request(cookie.Service).Raw()).
Done())
}
return flows
}

// l2ForwardCalcFlow generates the flow that matches dst MAC and loads ofPort to reg.
Expand Down Expand Up @@ -1483,16 +1516,6 @@ func (c *client) uplinkSNATFlows(category cookie.Category) []binding.Flow {
}
bridgeOFPort := uint32(config.BridgeOFPort)
flows := []binding.Flow{
// Forward the IP packets from the uplink interface to
// conntrackTable. This is for unSNAT the traffic from the local
// Pod subnet to the external network. Non-SNAT packets will be
// output to the bridge port in conntrackStateTable.
c.pipeline[uplinkTable].BuildFlow(priorityNormal).
MatchProtocol(binding.ProtocolIP).
MatchRegRange(int(marksReg), markTrafficFromUplink, binding.Range{0, 15}).
Action().GotoTable(conntrackTable).
Cookie(c.cookieAllocator.Request(category).Raw()).
Done(),
// Mark the packet to indicate its destination MAC should be rewritten to the real MAC in the L3Forwarding
// table, if the packet is a reply to a Pod from an external address.
c.pipeline[conntrackStateTable].BuildFlow(priorityHigh).
Expand All @@ -1512,6 +1535,29 @@ func (c *client) uplinkSNATFlows(category cookie.Category) []binding.Flow {
Cookie(c.cookieAllocator.Request(category).Raw()).
Done(),
}
// Forward the IP packets from the uplink interface to
// conntrackTable. This is for unSNAT the traffic from the local
// Pod subnet to the external network. Non-SNAT packets will be
// output to the bridge port in conntrackStateTable.
if c.enableProxy {
// For the connection which is both applied DNAT and SNAT, the reply packtets
// are received from uplink and need to enter CTZoneSNAT first to do unSNAT.
// Pod --> DNAT(CtZone) --> SNAT(CtZoneSNAT) --> ExternalServer
// Pod <-- unDNAT(CtZone) <-- unSNAT(CtZoneSNAT) <-- ExternalServer
flows = append(flows, c.pipeline[uplinkTable].BuildFlow(priorityNormal).
MatchProtocol(binding.ProtocolIP).
MatchRegRange(int(marksReg), markTrafficFromUplink, binding.Range{0, 15}).
Action().CT(false, conntrackTable, CtZoneSNAT).NAT().CTDone().
Cookie(c.cookieAllocator.Request(category).Raw()).
Done())
} else {
flows = append(flows, c.pipeline[uplinkTable].BuildFlow(priorityNormal).
MatchProtocol(binding.ProtocolIP).
MatchRegRange(int(marksReg), markTrafficFromUplink, binding.Range{0, 15}).
Action().GotoTable(conntrackTable).
Cookie(c.cookieAllocator.Request(category).Raw()).
Done())
}
return flows
}

Expand Down Expand Up @@ -1567,7 +1613,6 @@ func (c *client) snatFlows(nodeIP net.IP, localSubnet net.IPNet, category cookie
Action().GotoTable(nextTable).
Cookie(c.cookieAllocator.Request(category).Raw()).
Done(),

// Force IP packet into the conntrack zone with SNAT. If the connection is SNATed, the reply packet should use
// Pod IP as the destination, and then is forwarded to conntrackStateTable.
c.pipeline[conntrackTable].BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolIP).
Expand All @@ -1579,14 +1624,48 @@ func (c *client) snatFlows(nodeIP net.IP, localSubnet net.IPNet, category cookie
// source IP in NAT action, 4) ct_mark is set to 0x40 in the conn_track context.
c.pipeline[conntrackCommitTable].BuildFlow(priorityNormal).
MatchProtocol(binding.ProtocolIP).
MatchCTStateNew(true).MatchCTStateTrk(true).
MatchCTStateNew(true).MatchCTStateTrk(true).MatchCTStateDNAT(false).
MatchRegRange(int(marksReg), snatRequiredMark, snatMarkRange).
Action().CT(true, L2ForwardingOutTable, CtZone).
SNAT(snatIPRange, nil).
LoadToMark(snatCTMark).CTDone().
Cookie(c.cookieAllocator.Request(category).Raw()).
Done(),
}
// The following flows are for both apply DNAT + SNAT for packets.
// If AntreaProxy is disabled, no DNAT happens in OVS pipeline.
if c.enableProxy {
// If the SNAT is needed after DNAT, mark the snatRequiredMark even the connection is not new.
// Because this kind of packets need to enter CtZoneSNAT to make sure the SNAT can be applied
// before leaving the pipeline.
flows = append(flows, l3FwdTable.BuildFlow(priorityLow).
MatchProtocol(binding.ProtocolIP).
MatchCTStateNew(false).MatchCTStateTrk(true).MatchCTStateDNAT(true).
MatchRegRange(int(marksReg), markTrafficFromLocal, binding.Range{0, 15}).
Action().LoadRegRange(int(marksReg), snatRequiredMark, snatMarkRange).
Action().GotoTable(nextTable).
Cookie(c.cookieAllocator.Request(category).Raw()).
Done())
// If SNAT is needed after DNAT:
// - For new connection: commit to CtZoneSNAT
// - For existing connection: enter CtZoneSNAT to apply SNAT
flows = append(flows, c.pipeline[conntrackCommitTable].BuildFlow(priorityNormal).
MatchProtocol(binding.ProtocolIP).
MatchCTStateNew(true).MatchCTStateTrk(true).MatchCTStateDNAT(true).
MatchRegRange(int(marksReg), snatRequiredMark, snatMarkRange).
Action().CT(true, L2ForwardingOutTable, CtZoneSNAT).
SNAT(snatIPRange, nil).
LoadToMark(snatCTMark).CTDone().
Cookie(c.cookieAllocator.Request(category).Raw()).
Done())
flows = append(flows, c.pipeline[conntrackCommitTable].BuildFlow(priorityNormal).
MatchProtocol(binding.ProtocolIP).
MatchCTStateNew(false).MatchCTStateTrk(true).MatchCTStateDNAT(true).
MatchRegRange(int(marksReg), snatRequiredMark, snatMarkRange).
Action().CT(false, L2ForwardingOutTable, CtZoneSNAT).NAT().CTDone().
Cookie(c.cookieAllocator.Request(category).Raw()).
Done())
}
return flows
}

Expand Down
2 changes: 2 additions & 0 deletions pkg/ovs/openflow/interfaces.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,8 @@ type FlowBuilder interface {
MatchCTStateEst(isSet bool) FlowBuilder
MatchCTStateTrk(isSet bool) FlowBuilder
MatchCTStateInv(isSet bool) FlowBuilder
MatchCTStateDNAT(isSet bool) FlowBuilder
MatchCTStateSNAT(isSet bool) FlowBuilder
MatchCTMark(value uint32, mask *uint32) FlowBuilder
MatchCTLabelRange(high, low uint64, bitRange Range) FlowBuilder
MatchConjID(value uint32) FlowBuilder
Expand Down
28 changes: 28 additions & 0 deletions pkg/ovs/openflow/ofctrl_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,34 @@ func (b *ofFlowBuilder) MatchCTStateInv(set bool) FlowBuilder {
return b
}

func (b *ofFlowBuilder) MatchCTStateDNAT(set bool) FlowBuilder {
if b.ctStates == nil {
b.ctStates = openflow13.NewCTStates()
}
if set {
b.ctStates.SetDNAT()
b.addCTStateString("+dnat")
} else {
b.ctStates.UnsetDNAT()
b.addCTStateString("-dnat")
}
return b
}

func (b *ofFlowBuilder) MatchCTStateSNAT(set bool) FlowBuilder {
if b.ctStates == nil {
b.ctStates = openflow13.NewCTStates()
}
if set {
b.ctStates.SetSNAT()
b.addCTStateString("+snat")
} else {
b.ctStates.UnsetSNAT()
b.addCTStateString("-snat")
}
return b
}

// MatchCTMark adds match condition for matching ct_mark. If mask is nil, the mask should be not set in the OpenFlow
// message which is sent to OVS, and OVS should match the value exactly.
func (b *ofFlowBuilder) MatchCTMark(value uint32, mask *uint32) FlowBuilder {
Expand Down
30 changes: 29 additions & 1 deletion pkg/ovs/openflow/testing/mock_openflow.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 10 additions & 2 deletions test/integration/agent/openflow_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1272,7 +1272,7 @@ func prepareExternalFlows(nodeIP net.IP, localSubnet *net.IPNet, vMAC net.Hardwa
[]*ofTestUtils.ExpectFlow{
{
MatchStr: fmt.Sprintf("priority=200,ip,reg0=0x4/0xffff"),
ActStr: "goto_table:30",
ActStr: "ct(table=30,zone=65500,nat)",
},
},
},
Expand Down Expand Up @@ -1321,9 +1321,17 @@ func prepareExternalFlows(nodeIP net.IP, localSubnet *net.IPNet, vMAC net.Hardwa
uint8(105),
[]*ofTestUtils.ExpectFlow{
{
MatchStr: "priority=200,ct_state=+new+trk,ip,reg0=0x20000/0x20000",
MatchStr: "priority=200,ct_state=+new+trk-dnat,ip,reg0=0x20000/0x20000",
ActStr: fmt.Sprintf("ct(commit,table=110,zone=65520,nat(src=%s),exec(load:0x40->NXM_NX_CT_MARK[]))", nodeIP.String()),
},
{
MatchStr: "priority=200,ct_state=+new+trk+dnat,ip,reg0=0x20000/0x20000",
ActStr: fmt.Sprintf("ct(commit,table=110,zone=65500,nat(src=%s),exec(load:0x40->NXM_NX_CT_MARK[]))", nodeIP.String()),
},
{
MatchStr: "priority=200,ct_state=-new+trk+dnat,ip,reg0=0x20000/0x20000",
ActStr: "ct(table=110,zone=65500,nat)",
},
},
},
}
Expand Down

0 comments on commit 0ccd644

Please sign in to comment.