-
Notifications
You must be signed in to change notification settings - Fork 370
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Windows] Fix Pod cannot access endpoints with external IP through ClusterIP Service #1824
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -222,6 +222,19 @@ const ( | |
|
||
CtZone = 0xfff0 | ||
CtZoneV6 = 0xffe6 | ||
// CtZoneSNAT is only used on Windows and only when AntreaProxy is enabled. | ||
// When a Pod access a ClusterIP Service, and the IP of the selected endpoint | ||
// is not in "cluster-cidr". The request packets need to be SNAT'd(set src IP to local Node IP) | ||
// after have been DNAT'd(set dst IP to endpoint IP). | ||
// For example, the endpoint Pod may run in hostNetwork mode and the IP of the endpoint | ||
// will is the current Node IP. | ||
// We need to use a different ct_zone to track the SNAT'd connection because OVS | ||
// does not support doing both DNAT and SNAT in the same ct_zone. | ||
// | ||
// An example of the connection is a Pod accesses kubernetes API service: | ||
// Pod --> DNAT(CtZone) --> SNAT(CtZoneSNAT) --> Endpoint(API server NodeIP) | ||
// Pod <-- unDNAT(CtZone) <-- unSNAT(CtZoneSNAT) <-- Endpoint(API server NodeIP) | ||
CtZoneSNAT = 0xffdc | ||
|
||
portFoundMark = 0b1 | ||
snatRequiredMark = 0b1 | ||
|
@@ -692,7 +705,7 @@ func (c *client) ctRewriteDstMACFlows(gatewayMAC net.HardwareAddr, category cook | |
// service LB tables and enter egressRuleTable directly. | ||
func (c *client) serviceLBBypassFlows(ipProtocol binding.Protocol) []binding.Flow { | ||
connectionTrackStateTable := c.pipeline[conntrackStateTable] | ||
return []binding.Flow{ | ||
flows := []binding.Flow{ | ||
// Tracked connections with the ServiceCTMark (load-balanced by AntreaProxy) receive | ||
// the macRewriteMark and are sent to egressRuleTable. | ||
connectionTrackStateTable.BuildFlow(priorityNormal).MatchProtocol(ipProtocol). | ||
|
@@ -711,6 +724,26 @@ func (c *client) serviceLBBypassFlows(ipProtocol binding.Protocol) []binding.Flo | |
Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). | ||
Done(), | ||
} | ||
|
||
if runtime.IsWindowsPlatform() && ipProtocol == binding.ProtocolIP { | ||
// Handle the reply packets of the connection which are applied both DNAT and SNAT. | ||
// The packets have following characteristics: | ||
// - Received from uplink | ||
// - ct_state is "-new+trk" | ||
// - ct_mark is set to 0x21(ServiceCTMark) | ||
// This flow resubmits the packets to the following table to avoid being forwarded | ||
// to the bridge port by default. | ||
flows = append(flows, c.pipeline[conntrackStateTable].BuildFlow(priorityHigh). | ||
MatchProtocol(ipProtocol). | ||
MatchCTStateNew(false).MatchCTStateTrk(true). | ||
MatchCTMark(ServiceCTMark, nil). | ||
MatchRegRange(int(marksReg), markTrafficFromUplink, binding.Range{0, 15}). | ||
Action().LoadRegRange(int(marksReg), macRewriteMark, macRewriteMarkRange). | ||
Action().GotoTable(EgressRuleTable). | ||
Cookie(c.cookieAllocator.Request(cookie.Service).Raw()). | ||
Done()) | ||
} | ||
return flows | ||
} | ||
|
||
// l2ForwardCalcFlow generates the flow that matches dst MAC and loads ofPort to reg. | ||
|
@@ -1516,16 +1549,6 @@ func (c *client) uplinkSNATFlows(category cookie.Category) []binding.Flow { | |
} | ||
bridgeOFPort := uint32(config.BridgeOFPort) | ||
flows := []binding.Flow{ | ||
// Forward the IP packets from the uplink interface to | ||
// conntrackTable. This is for unSNAT the traffic from the local | ||
// Pod subnet to the external network. Non-SNAT packets will be | ||
// output to the bridge port in conntrackStateTable. | ||
c.pipeline[uplinkTable].BuildFlow(priorityNormal). | ||
MatchProtocol(binding.ProtocolIP). | ||
MatchRegRange(int(marksReg), markTrafficFromUplink, binding.Range{0, 15}). | ||
Action().GotoTable(conntrackTable). | ||
Cookie(c.cookieAllocator.Request(category).Raw()). | ||
Done(), | ||
// Mark the packet to indicate its destination MAC should be rewritten to the real MAC in the L3Forwarding | ||
// table, if the packet is a reply to a Pod from an external address. | ||
c.pipeline[conntrackStateTable].BuildFlow(priorityHigh). | ||
|
@@ -1545,6 +1568,29 @@ func (c *client) uplinkSNATFlows(category cookie.Category) []binding.Flow { | |
Cookie(c.cookieAllocator.Request(category).Raw()). | ||
Done(), | ||
} | ||
// Forward the IP packets from the uplink interface to | ||
// conntrackTable. This is for unSNAT the traffic from the local | ||
// Pod subnet to the external network. Non-SNAT packets will be | ||
// output to the bridge port in conntrackStateTable. | ||
if c.enableProxy { | ||
// For the connection which is both applied DNAT and SNAT, the reply packtets | ||
// are received from uplink and need to enter CTZoneSNAT first to do unSNAT. | ||
// Pod --> DNAT(CtZone) --> SNAT(CtZoneSNAT) --> ExternalServer | ||
// Pod <-- unDNAT(CtZone) <-- unSNAT(CtZoneSNAT) <-- ExternalServer | ||
flows = append(flows, c.pipeline[uplinkTable].BuildFlow(priorityNormal). | ||
MatchProtocol(binding.ProtocolIP). | ||
MatchRegRange(int(marksReg), markTrafficFromUplink, binding.Range{0, 15}). | ||
Action().CT(false, conntrackTable, CtZoneSNAT).NAT().CTDone(). | ||
Cookie(c.cookieAllocator.Request(category).Raw()). | ||
Done()) | ||
} else { | ||
flows = append(flows, c.pipeline[uplinkTable].BuildFlow(priorityNormal). | ||
MatchProtocol(binding.ProtocolIP). | ||
MatchRegRange(int(marksReg), markTrafficFromUplink, binding.Range{0, 15}). | ||
Action().GotoTable(conntrackTable). | ||
Cookie(c.cookieAllocator.Request(category).Raw()). | ||
Done()) | ||
} | ||
return flows | ||
} | ||
|
||
|
@@ -1600,7 +1646,6 @@ func (c *client) snatFlows(nodeIP net.IP, localSubnet net.IPNet, category cookie | |
Action().GotoTable(nextTable). | ||
Cookie(c.cookieAllocator.Request(category).Raw()). | ||
Done(), | ||
|
||
// Force IP packet into the conntrack zone with SNAT. If the connection is SNATed, the reply packet should use | ||
// Pod IP as the destination, and then is forwarded to conntrackStateTable. | ||
c.pipeline[conntrackTable].BuildFlow(priorityNormal).MatchProtocol(binding.ProtocolIP). | ||
|
@@ -1612,14 +1657,48 @@ func (c *client) snatFlows(nodeIP net.IP, localSubnet net.IPNet, category cookie | |
// source IP in NAT action, 4) ct_mark is set to 0x40 in the conn_track context. | ||
c.pipeline[conntrackCommitTable].BuildFlow(priorityNormal). | ||
MatchProtocol(binding.ProtocolIP). | ||
MatchCTStateNew(true).MatchCTStateTrk(true). | ||
MatchCTStateNew(true).MatchCTStateTrk(true).MatchCTStateDNAT(false). | ||
MatchRegRange(int(marksReg), snatRequiredMark, snatMarkRange). | ||
Action().CT(true, L2ForwardingOutTable, CtZone). | ||
SNAT(snatIPRange, nil). | ||
LoadToMark(snatCTMark).CTDone(). | ||
Cookie(c.cookieAllocator.Request(category).Raw()). | ||
Done(), | ||
} | ||
// The following flows are for both apply DNAT + SNAT for packets. | ||
// If AntreaProxy is disabled, no DNAT happens in OVS pipeline. | ||
if c.enableProxy { | ||
// If the SNAT is needed after DNAT, mark the snatRequiredMark even the connection is not new. | ||
// Because this kind of packets need to enter CtZoneSNAT to make sure the SNAT can be applied | ||
// before leaving the pipeline. | ||
flows = append(flows, l3FwdTable.BuildFlow(priorityLow). | ||
MatchProtocol(binding.ProtocolIP). | ||
MatchCTStateNew(false).MatchCTStateTrk(true).MatchCTStateDNAT(true). | ||
ruicao93 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
MatchRegRange(int(marksReg), markTrafficFromLocal, binding.Range{0, 15}). | ||
Action().LoadRegRange(int(marksReg), snatRequiredMark, snatMarkRange). | ||
Action().GotoTable(nextTable). | ||
Cookie(c.cookieAllocator.Request(category).Raw()). | ||
Done()) | ||
// If SNAT is needed after DNAT: | ||
// - For new connection: commit to CtZoneSNAT | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we consider doing all SNAT in this zone later? It seems currently SNAT is performed in CtZone when it's not DNATed and in CtZoneSNAT otherwise, which seems a little difficult complex. Or you plan to unify them when moving to two bridges? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think unify them when moving to two bridges. This PR only handle the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would agree we should handle all SNAT in a single way. Need to understand the two bridge proposal better. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Once you have something, could you share? I hope to understand how we are going to organize flows with two bridges, as I am designing flows for SNAT policy, which might be impacted by the two-bridge change. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure @jianjuns. Actually the Agree with you and Quan, But consider the v0.13.0 is near to release, do you think if we could merge current change first and make further step(all SNAT in single ct_zone or use other ways) after v0.13.0? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can unify SNAT flows in the next release. |
||
// - For existing connection: enter CtZoneSNAT to apply SNAT | ||
flows = append(flows, c.pipeline[conntrackCommitTable].BuildFlow(priorityNormal). | ||
MatchProtocol(binding.ProtocolIP). | ||
MatchCTStateNew(true).MatchCTStateTrk(true).MatchCTStateDNAT(true). | ||
MatchRegRange(int(marksReg), snatRequiredMark, snatMarkRange). | ||
Action().CT(true, L2ForwardingOutTable, CtZoneSNAT). | ||
SNAT(snatIPRange, nil). | ||
LoadToMark(snatCTMark).CTDone(). | ||
Cookie(c.cookieAllocator.Request(category).Raw()). | ||
Done()) | ||
flows = append(flows, c.pipeline[conntrackCommitTable].BuildFlow(priorityNormal). | ||
MatchProtocol(binding.ProtocolIP). | ||
MatchCTStateNew(false).MatchCTStateTrk(true).MatchCTStateDNAT(true). | ||
MatchRegRange(int(marksReg), snatRequiredMark, snatMarkRange). | ||
Action().CT(false, L2ForwardingOutTable, CtZoneSNAT).NAT().CTDone(). | ||
Cookie(c.cookieAllocator.Request(category).Raw()). | ||
Done()) | ||
} | ||
return flows | ||
} | ||
|
||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How is it different from the first flow created in L711? seems duplicate
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Here we add a new match field
markTrafficFromUplink
. Or the traffic recieved from uplink will hit L1563 and be forwarded to br-int directly.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For simplicity, could we make L1563 low priority, L1552 flow can be normal priority too?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it's reasonable by analyzing the flows. Will have a try.