Skip to content

Commit

Permalink
Make tunnel csum option configurable and default to false
Browse files Browse the repository at this point in the history
For Linux kernel before Mar 2021, UDP checksum must be present to
trigger GRO on the receiver for better performance of Geneve and VXLAN
tunnels. The issue has been fixed in Linux kernel [1], thus computing
UDP checksum is no longer necessary.

This patch exposes a configuration parameter TunnelCsum which
determines whether to compute UDP encapsulation header (Geneve or
VXLAN) checksums on outgoing packets and makes it default to false. It
should only be set to true when Nodes run an unpatched Linux kernel and
poor transfer performance is observed.

[1] torvalds/linux@89e5c58

Signed-off-by: Quan Tian <[email protected]>
  • Loading branch information
tnqn committed Sep 29, 2022
1 parent 0f77529 commit 0f8891b
Show file tree
Hide file tree
Showing 16 changed files with 228 additions and 30 deletions.
1 change: 1 addition & 0 deletions build/charts/antrea/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ Kubernetes: `>= 1.16.0-0`
| trafficEncryptionMode | string | `"none"` | Determines how tunnel traffic is encrypted. Currently encryption only works with encap mode.It must be one of "none", "ipsec", "wireGuard". |
| transportInterface | string | `""` | Name of the interface on Node which is used for tunneling or routing the traffic across Nodes. |
| transportInterfaceCIDRs | list | `[]` | Network CIDRs of the interface on Node which is used for tunneling or routing the traffic across Nodes. |
| tunnelCsum | bool | `false` | TunnelCsum determines whether to compute UDP encapsulation header (Geneve or VXLAN) checksums on outgoing packets. For Linux kernel before Mar 2021, UDP checksum must be present to trigger GRO on the receiver for better performance of Geneve and VXLAN tunnels. The issue has been fixed by https://github.com/torvalds/linux/commit/89e5c58fc1e2857ccdaae506fb8bc5fed57ee063, thus computing UDP checksum is no longer necessary. It should only be set to true when you are using an unpatched Linux kernel and observing poor transfer performance. |
| tunnelPort | int | `0` | TunnelPort is the destination port for UDP and TCP based tunnel protocols (Geneve, VXLAN, and STT). If zero, it will use the assigned IANA port for the protocol, i.e. 6081 for Geneve, 4789 for VXLAN, and 7471 for STT. |
| tunnelType | string | `"geneve"` | Tunnel protocol used for encapsulating traffic across Nodes. It must be one of "geneve", "vxlan", "gre", "stt". |
| webhooks.labelsMutator.enable | bool | `false` | |
Expand Down
8 changes: 8 additions & 0 deletions build/charts/antrea/conf/antrea-agent.conf
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,14 @@ tunnelType: {{ .Values.tunnelType | quote }}
# and 7471 for STT.
tunnelPort: {{ .Values.tunnelPort }}

# TunnelCsum determines whether to compute UDP encapsulation header (Geneve or VXLAN) checksums on outgoing
# packets. For Linux kernel before Mar 2021, UDP checksum must be present to trigger GRO on the receiver for better
# performance of Geneve and VXLAN tunnels. The issue has been fixed by
# https://github.com/torvalds/linux/commit/89e5c58fc1e2857ccdaae506fb8bc5fed57ee063, thus computing UDP checksum is
# no longer necessary.
# It should only be set to true when you are using an unpatched Linux kernel and observing poor transfer performance.
tunnelCsum: {{ .Values.tunnelCsum }}

# Determines how tunnel traffic is encrypted. Currently encryption only works with encap mode.
# It has the following options:
# - none (default): Inter-node Pod traffic will not be encrypted.
Expand Down
9 changes: 9 additions & 0 deletions build/charts/antrea/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,15 @@ tunnelType: "geneve"
# (Geneve, VXLAN, and STT). If zero, it will use the assigned IANA port for the
# protocol, i.e. 6081 for Geneve, 4789 for VXLAN, and 7471 for STT.
tunnelPort: 0
# -- TunnelCsum determines whether to compute UDP encapsulation header (Geneve
# or VXLAN) checksums on outgoing packets. For Linux kernel before Mar 2021, UDP
# checksum must be present to trigger GRO on the receiver for better performance
# of Geneve and VXLAN tunnels. The issue has been fixed by
# https://github.com/torvalds/linux/commit/89e5c58fc1e2857ccdaae506fb8bc5fed57ee063,
# thus computing UDP checksum is no longer necessary.
# It should only be set to true when you are using an unpatched Linux kernel and
# observing poor transfer performance.
tunnelCsum: false
# -- Determines how tunnel traffic is encrypted. Currently encryption only works
# with encap mode.It must be one of "none", "ipsec", "wireGuard".
trafficEncryptionMode: "none"
Expand Down
12 changes: 10 additions & 2 deletions build/yamls/antrea-aks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2952,6 +2952,14 @@ data:
# and 7471 for STT.
tunnelPort: 0
# TunnelCsum determines whether to compute UDP encapsulation header (Geneve or VXLAN) checksums on outgoing
# packets. For Linux kernel before Mar 2021, UDP checksum must be present to trigger GRO on the receiver for better
# performance of Geneve and VXLAN tunnels. The issue has been fixed by
# https://github.com/torvalds/linux/commit/89e5c58fc1e2857ccdaae506fb8bc5fed57ee063, thus computing UDP checksum is
# no longer necessary.
# It should only be set to true when you are using an unpatched Linux kernel and observing poor transfer performance.
tunnelCsum: false
# Determines how tunnel traffic is encrypted. Currently encryption only works with encap mode.
# It has the following options:
# - none (default): Inter-node Pod traffic will not be encrypted.
Expand Down Expand Up @@ -4140,7 +4148,7 @@ spec:
kubectl.kubernetes.io/default-container: antrea-agent
# Automatically restart Pods with a RollingUpdate if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: 030faeb129b70e3abd5720d2262d122abea2428e342f02836846a3af26f20999
checksum/config: ec59529f4b774af0f12a30c31eb48e5494ac6371ce624e872d1e6b76b797325e
labels:
app: antrea
component: antrea-agent
Expand Down Expand Up @@ -4381,7 +4389,7 @@ spec:
annotations:
# Automatically restart Pod if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: 030faeb129b70e3abd5720d2262d122abea2428e342f02836846a3af26f20999
checksum/config: ec59529f4b774af0f12a30c31eb48e5494ac6371ce624e872d1e6b76b797325e
labels:
app: antrea
component: antrea-controller
Expand Down
12 changes: 10 additions & 2 deletions build/yamls/antrea-eks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2952,6 +2952,14 @@ data:
# and 7471 for STT.
tunnelPort: 0
# TunnelCsum determines whether to compute UDP encapsulation header (Geneve or VXLAN) checksums on outgoing
# packets. For Linux kernel before Mar 2021, UDP checksum must be present to trigger GRO on the receiver for better
# performance of Geneve and VXLAN tunnels. The issue has been fixed by
# https://github.com/torvalds/linux/commit/89e5c58fc1e2857ccdaae506fb8bc5fed57ee063, thus computing UDP checksum is
# no longer necessary.
# It should only be set to true when you are using an unpatched Linux kernel and observing poor transfer performance.
tunnelCsum: false
# Determines how tunnel traffic is encrypted. Currently encryption only works with encap mode.
# It has the following options:
# - none (default): Inter-node Pod traffic will not be encrypted.
Expand Down Expand Up @@ -4140,7 +4148,7 @@ spec:
kubectl.kubernetes.io/default-container: antrea-agent
# Automatically restart Pods with a RollingUpdate if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: 030faeb129b70e3abd5720d2262d122abea2428e342f02836846a3af26f20999
checksum/config: ec59529f4b774af0f12a30c31eb48e5494ac6371ce624e872d1e6b76b797325e
labels:
app: antrea
component: antrea-agent
Expand Down Expand Up @@ -4383,7 +4391,7 @@ spec:
annotations:
# Automatically restart Pod if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: 030faeb129b70e3abd5720d2262d122abea2428e342f02836846a3af26f20999
checksum/config: ec59529f4b774af0f12a30c31eb48e5494ac6371ce624e872d1e6b76b797325e
labels:
app: antrea
component: antrea-controller
Expand Down
12 changes: 10 additions & 2 deletions build/yamls/antrea-gke.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2952,6 +2952,14 @@ data:
# and 7471 for STT.
tunnelPort: 0
# TunnelCsum determines whether to compute UDP encapsulation header (Geneve or VXLAN) checksums on outgoing
# packets. For Linux kernel before Mar 2021, UDP checksum must be present to trigger GRO on the receiver for better
# performance of Geneve and VXLAN tunnels. The issue has been fixed by
# https://github.com/torvalds/linux/commit/89e5c58fc1e2857ccdaae506fb8bc5fed57ee063, thus computing UDP checksum is
# no longer necessary.
# It should only be set to true when you are using an unpatched Linux kernel and observing poor transfer performance.
tunnelCsum: false
# Determines how tunnel traffic is encrypted. Currently encryption only works with encap mode.
# It has the following options:
# - none (default): Inter-node Pod traffic will not be encrypted.
Expand Down Expand Up @@ -4140,7 +4148,7 @@ spec:
kubectl.kubernetes.io/default-container: antrea-agent
# Automatically restart Pods with a RollingUpdate if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: 9f057043633784b60338fda16b8a439e85a763cbabdf377c1318c56a0e0abc26
checksum/config: ae585cead46246e4d15fa336e081caaa51d432909dfd21a22720a1c37b2e37e9
labels:
app: antrea
component: antrea-agent
Expand Down Expand Up @@ -4380,7 +4388,7 @@ spec:
annotations:
# Automatically restart Pod if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: 9f057043633784b60338fda16b8a439e85a763cbabdf377c1318c56a0e0abc26
checksum/config: ae585cead46246e4d15fa336e081caaa51d432909dfd21a22720a1c37b2e37e9
labels:
app: antrea
component: antrea-controller
Expand Down
12 changes: 10 additions & 2 deletions build/yamls/antrea-ipsec.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2965,6 +2965,14 @@ data:
# and 7471 for STT.
tunnelPort: 0
# TunnelCsum determines whether to compute UDP encapsulation header (Geneve or VXLAN) checksums on outgoing
# packets. For Linux kernel before Mar 2021, UDP checksum must be present to trigger GRO on the receiver for better
# performance of Geneve and VXLAN tunnels. The issue has been fixed by
# https://github.com/torvalds/linux/commit/89e5c58fc1e2857ccdaae506fb8bc5fed57ee063, thus computing UDP checksum is
# no longer necessary.
# It should only be set to true when you are using an unpatched Linux kernel and observing poor transfer performance.
tunnelCsum: false
# Determines how tunnel traffic is encrypted. Currently encryption only works with encap mode.
# It has the following options:
# - none (default): Inter-node Pod traffic will not be encrypted.
Expand Down Expand Up @@ -4153,7 +4161,7 @@ spec:
kubectl.kubernetes.io/default-container: antrea-agent
# Automatically restart Pods with a RollingUpdate if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: 0d23a6a2c5f2f815ef77b338ae72534ce3715e5f0a84de4c2910ddf2fd013f5b
checksum/config: 8dc962ec6575509a540500f9cc1bd399118256fc0485c02705b129922b981c4e
checksum/ipsec-secret: d0eb9c52d0cd4311b6d252a951126bf9bea27ec05590bed8a394f0f792dcb2a4
labels:
app: antrea
Expand Down Expand Up @@ -4439,7 +4447,7 @@ spec:
annotations:
# Automatically restart Pod if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: 0d23a6a2c5f2f815ef77b338ae72534ce3715e5f0a84de4c2910ddf2fd013f5b
checksum/config: 8dc962ec6575509a540500f9cc1bd399118256fc0485c02705b129922b981c4e
labels:
app: antrea
component: antrea-controller
Expand Down
12 changes: 10 additions & 2 deletions build/yamls/antrea.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2952,6 +2952,14 @@ data:
# and 7471 for STT.
tunnelPort: 0
# TunnelCsum determines whether to compute UDP encapsulation header (Geneve or VXLAN) checksums on outgoing
# packets. For Linux kernel before Mar 2021, UDP checksum must be present to trigger GRO on the receiver for better
# performance of Geneve and VXLAN tunnels. The issue has been fixed by
# https://github.com/torvalds/linux/commit/89e5c58fc1e2857ccdaae506fb8bc5fed57ee063, thus computing UDP checksum is
# no longer necessary.
# It should only be set to true when you are using an unpatched Linux kernel and observing poor transfer performance.
tunnelCsum: false
# Determines how tunnel traffic is encrypted. Currently encryption only works with encap mode.
# It has the following options:
# - none (default): Inter-node Pod traffic will not be encrypted.
Expand Down Expand Up @@ -4140,7 +4148,7 @@ spec:
kubectl.kubernetes.io/default-container: antrea-agent
# Automatically restart Pods with a RollingUpdate if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: cda023525442c38c869663e4fd703a76cafa307220ae85ec0a8bd393587fa3ed
checksum/config: b609bc3b220de28b3fe935093fc7d99dfdc533916597e906a6c627c6233fedbc
labels:
app: antrea
component: antrea-agent
Expand Down Expand Up @@ -4380,7 +4388,7 @@ spec:
annotations:
# Automatically restart Pod if the ConfigMap changes
# See https://helm.sh/docs/howto/charts_tips_and_tricks/#automatically-roll-deployments
checksum/config: cda023525442c38c869663e4fd703a76cafa307220ae85ec0a8bd393587fa3ed
checksum/config: b609bc3b220de28b3fe935093fc7d99dfdc533916597e906a6c627c6233fedbc
labels:
app: antrea
component: antrea-controller
Expand Down
1 change: 1 addition & 0 deletions cmd/antrea-agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ func run(o *Options) error {
networkConfig := &config.NetworkConfig{
TunnelType: ovsconfig.TunnelType(o.config.TunnelType),
TunnelPort: o.config.TunnelPort,
TunnelCsum: o.config.TunnelCsum,
TrafficEncapMode: encapMode,
TrafficEncryptionMode: encryptionMode,
TransportIface: o.config.TransportInterface,
Expand Down
24 changes: 13 additions & 11 deletions pkg/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -747,8 +747,10 @@ func (i *Initializer) setupDefaultTunnelInterface() error {
}

// Enabling UDP checksum can greatly improve the performance for Geneve and
// VXLAN tunnels by triggering GRO on the receiver.
shouldEnableCsum := i.networkConfig.TunnelType == ovsconfig.GeneveTunnel || i.networkConfig.TunnelType == ovsconfig.VXLANTunnel
// VXLAN tunnels by triggering GRO on the receiver for old Linux kernel versions.
// It's not necessary for new Linux kernel versions with the following patch:
// https://github.com/torvalds/linux/commit/89e5c58fc1e2857ccdaae506fb8bc5fed57ee063.
shouldEnableCsum := i.networkConfig.TunnelCsum && (i.networkConfig.TunnelType == ovsconfig.GeneveTunnel || i.networkConfig.TunnelType == ovsconfig.VXLANTunnel)

// Check the default tunnel port.
if portExists {
Expand All @@ -757,12 +759,12 @@ func (i *Initializer) setupDefaultTunnelInterface() error {
tunnelIface.TunnelInterfaceConfig.DestinationPort == i.networkConfig.TunnelPort &&
tunnelIface.TunnelInterfaceConfig.LocalIP.Equal(localIP) {
klog.V(2).Infof("Tunnel port %s already exists on OVS bridge", tunnelPortName)
// This could happen when upgrading from previous versions that didn't set it.
if shouldEnableCsum && !tunnelIface.TunnelInterfaceConfig.Csum {
if err := i.enableTunnelCsum(tunnelPortName); err != nil {
return fmt.Errorf("failed to enable csum for tunnel port %s: %v", tunnelPortName, err)
if shouldEnableCsum != tunnelIface.TunnelInterfaceConfig.Csum {
klog.InfoS("Updating csum for tunnel port", "port", tunnelPortName, "csum", shouldEnableCsum)
if err := i.setTunnelCsum(tunnelPortName, shouldEnableCsum); err != nil {
return fmt.Errorf("failed to update csum for tunnel port %s to %v: %v", tunnelPortName, shouldEnableCsum, err)
}
tunnelIface.TunnelInterfaceConfig.Csum = true
tunnelIface.TunnelInterfaceConfig.Csum = shouldEnableCsum
}
i.nodeConfig.TunnelOFPort = uint32(tunnelIface.OFPort)
return nil
Expand Down Expand Up @@ -806,15 +808,15 @@ func (i *Initializer) setupDefaultTunnelInterface() error {
return err
}
klog.InfoS("Allocated OpenFlow port for tunnel interface", "port", tunnelPortName, "ofPort", tunPort)
tunnelIface = interfacestore.NewTunnelInterface(tunnelPortName, i.networkConfig.TunnelType, i.networkConfig.TunnelPort, localIP, shouldEnableCsum)
tunnelIface.OVSPortConfig = &interfacestore.OVSPortConfig{PortUUID: tunnelPortUUID, OFPort: tunPort}
ovsPortConfig := &interfacestore.OVSPortConfig{PortUUID: tunnelPortUUID, OFPort: tunPort}
tunnelIface = interfacestore.NewTunnelInterface(tunnelPortName, i.networkConfig.TunnelType, i.networkConfig.TunnelPort, localIP, shouldEnableCsum, ovsPortConfig)
i.ifaceStore.AddInterface(tunnelIface)
i.nodeConfig.TunnelOFPort = uint32(tunPort)
}
return nil
}

func (i *Initializer) enableTunnelCsum(tunnelPortName string) error {
func (i *Initializer) setTunnelCsum(tunnelPortName string, enable bool) error {
options, err := i.ovsBridgeClient.GetInterfaceOptions(tunnelPortName)
if err != nil {
return fmt.Errorf("error getting interface options: %w", err)
Expand All @@ -824,7 +826,7 @@ func (i *Initializer) enableTunnelCsum(tunnelPortName string) error {
for k, v := range options {
updatedOptions[k] = v
}
updatedOptions["csum"] = "true"
updatedOptions["csum"] = strconv.FormatBool(enable)
return i.ovsBridgeClient.SetInterfaceOptions(tunnelPortName, updatedOptions)
}

Expand Down
Loading

0 comments on commit 0f8891b

Please sign in to comment.