Skip to content

Commit

Permalink
Resolve reconnect issue between Agent and OVS
Browse files Browse the repository at this point in the history
1. Bump up ofnet version
2. Delete existing OpenFlow groups before replay groups in reconnections.

Signed-off-by: wenyingd <[email protected]>
  • Loading branch information
wenyingd committed Aug 16, 2022
1 parent d1c6a43 commit 2ac2636
Show file tree
Hide file tree
Showing 7 changed files with 38 additions and 8 deletions.
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -189,3 +189,5 @@ require (
sigs.k8s.io/json v0.0.0-20211208200746-9f7c6b3444d2 // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.2.1 // indirect
)

replace antrea.io/ofnet v0.6.0 => github.com/wenyingd/ofnet v0.0.0-20220809071156-3efb2b08804b
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
antrea.io/libOpenflow v0.8.0 h1:Xm6mlSqdXtDD418nf1lndoDvMi8scqUan8pkEUZ2oas=
antrea.io/libOpenflow v0.8.0/go.mod h1:CzEJZxDNAupiGxeL5VOw92PsxfyvehEAvE3PiC6gr8o=
antrea.io/ofnet v0.6.0 h1:XuZ1WjOd0R8nIdRG7bfCF1AryLLlaroFjrgnIDfR1Vg=
antrea.io/ofnet v0.6.0/go.mod h1:qWqi11pI3kBYcS9SYWm92ZOiOPBx04Jx21cDmJlJhOg=
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU=
Expand Down Expand Up @@ -879,6 +877,8 @@ github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae h1:4hwBBUfQCFe3C
github.com/vishvananda/netns v0.0.0-20200728191858-db3c7e526aae/go.mod h1:DD4vA1DwXk04H54A1oHXtwZmA0grkVMdPxx/VGLCah0=
github.com/vmware/go-ipfix v0.5.12 h1:mqQknlvnvDY25apPNy9c27ri3FMDFIhzvO68Kk5Qp58=
github.com/vmware/go-ipfix v0.5.12/go.mod h1:yzbG1rv+yJ8GeMrRm+MDhOV3akygNZUHLhC1pDoD2AY=
github.com/wenyingd/ofnet v0.0.0-20220809071156-3efb2b08804b h1:P4/do/PT/qZc+NtXINm6TPGyflJZVmQQE5bW07H3SUo=
github.com/wenyingd/ofnet v0.0.0-20220809071156-3efb2b08804b/go.mod h1:qWqi11pI3kBYcS9SYWm92ZOiOPBx04Jx21cDmJlJhOg=
github.com/xdg/scram v0.0.0-20180814205039-7eeb5667e42c/go.mod h1:lB8K/P019DLNhemzwFU4jHLhdvlE6uDZjXFejJXr49I=
github.com/xdg/stringprep v1.0.0/go.mod h1:Jhud4/sHMO4oL310DaZAKk9ZaJ08SJfe+sJh0HrGL1Y=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5Qo6v2eYzo7kUS51QINcR5jNpbZS8=
Expand Down
6 changes: 0 additions & 6 deletions pkg/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -488,12 +488,6 @@ func persistRoundNum(num uint64, bridgeClient ovsconfig.OVSBridgeClient, interva
// agent restarts (with the agent crashing before step 4 can be completed). With the sequence
// described above, We guarantee that at most two rounds of flows exist in the switch at any given
// time.
// Note that at the moment we assume that all OpenFlow groups are deleted every time there is an
// Antrea Agent restart. This allows us to add the necessary groups without having to worry about
// the operation failing because a (stale) group with the same ID already exists in OVS. This
// assumption is currently guaranteed by the ofnet implementation:
// https://github.com/wenyingd/ofnet/blob/14a78b27ef8762e45a0cfc858c4d07a4572a99d5/ofctrl/fgraphSwitch.go#L57-L62
// All previous groups have been deleted by the time the call to i.ofClient.Initialize returns.
func (i *Initializer) initOpenFlowPipeline() error {
roundInfo := getRoundInfo(i.ovsBridgeClient)

Expand Down
5 changes: 5 additions & 0 deletions pkg/agent/openflow/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -931,6 +931,11 @@ func (c *client) ReplayFlows() {
c.replayMutex.Lock()
defer c.replayMutex.Unlock()

// Delete the existing groups to avoid unexpected error "OFPGMFC_GROUP_EXISTS" when replaying groups.
if err := c.bridge.DeleteAllGroups(); err != nil {
klog.Errorf("Error when deleting all existing groups: %v", err)
}

if err := c.initialize(); err != nil {
klog.Errorf("Error during flow replay: %v", err)
}
Expand Down
3 changes: 3 additions & 0 deletions pkg/ovs/openflow/interfaces.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,9 @@ type Bridge interface {
SendPacketOut(packetOut *ofctrl.PacketOut) error
// BuildPacketOut returns a new PacketOutBuilder.
BuildPacketOut() PacketOutBuilder
// DeleteAllGroups deletes all groups which are exiting on the OVS bridge. It should be called
// by the agent in the re-connection to OVS before the groups are replayed.
DeleteAllGroups() error
}

// TableStatus represents the status of a specific flow table. The status is useful for debugging.
Expand Down
12 changes: 12 additions & 0 deletions pkg/ovs/openflow/ofctrl_bridge.go
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,18 @@ func (b *OFBridge) RetryInterval() time.Duration {
return b.retryInterval
}

func (b *OFBridge) DeleteAllGroups() error {
groupMod := openflow15.NewGroupMod()
// For OpenFlow group modification message, group type is ignored when openflow15.GT_ALL is used as group ID, and
// all the existing groups are deleted.
groupMod.GroupId = openflow15.GT_ALL
groupMod.Command = openflow15.OFPGC_DELETE
if err := b.ofSwitch.Send(groupMod); err != nil {
return fmt.Errorf("failed to delete all groups: %v", err)
}
return nil
}

func (b *OFBridge) queryTableFeatures() {
mpartRequest := &openflow15.MultipartRequest{
Header: openflow15.NewOfp15Header(),
Expand Down
14 changes: 14 additions & 0 deletions pkg/ovs/openflow/testing/mock_openflow.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 2ac2636

Please sign in to comment.