-
Notifications
You must be signed in to change notification settings - Fork 370
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Extends the Endpoints support from 500 to 800, extra ones will be dropped in AntreaProxy #2101
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,7 @@ package proxy | |
import ( | ||
"fmt" | ||
"net" | ||
"sort" | ||
"strings" | ||
"sync" | ||
"time" | ||
|
@@ -25,6 +26,7 @@ import ( | |
"k8s.io/api/discovery/v1beta1" | ||
"k8s.io/apimachinery/pkg/runtime" | ||
k8sapitypes "k8s.io/apimachinery/pkg/types" | ||
"k8s.io/apimachinery/pkg/util/sets" | ||
"k8s.io/client-go/informers" | ||
"k8s.io/client-go/tools/record" | ||
"k8s.io/klog" | ||
|
@@ -42,6 +44,10 @@ import ( | |
const ( | ||
resyncPeriod = time.Minute | ||
componentName = "antrea-agent-proxy" | ||
// Due to the maximum message size in Openflow 1.3 and the implementation of Services in Antrea, the maximum number | ||
// of Endpoints that Antrea can support at the moment is 800. If the number of Endpoints for a given Service exceeds | ||
// 800, extra Endpoints will be dropped. | ||
maxEndpoints = 800 | ||
) | ||
|
||
// Proxier wraps proxy.Provider and adds extra methods. It is introduced for | ||
|
@@ -90,6 +96,8 @@ type proxier struct { | |
serviceStringMap map[string]k8sproxy.ServicePortName | ||
// serviceStringMapMutex protects serviceStringMap object. | ||
serviceStringMapMutex sync.Mutex | ||
// oversizeServiceSet records the Services that have more than 800 Endpoints. | ||
oversizeServiceSet sets.String | ||
|
||
runner *k8sproxy.BoundedFrequencyRunner | ||
stopChan <-chan struct{} | ||
|
@@ -117,6 +125,9 @@ func (p *proxier) removeStaleServices() { | |
} | ||
svcInfo := svcPort.(*types.ServiceInfo) | ||
klog.V(2).Infof("Removing stale Service: %s %s", svcPortName.Name, svcInfo.String()) | ||
if p.oversizeServiceSet.Has(svcPortName.String()) { | ||
p.oversizeServiceSet.Delete(svcPortName.String()) | ||
} | ||
if err := p.ofClient.UninstallServiceFlows(svcInfo.ClusterIP(), uint16(svcInfo.Port()), svcInfo.OFProtocol); err != nil { | ||
klog.Errorf("Failed to remove flows of Service %v: %v", svcPortName, err) | ||
continue | ||
|
@@ -264,6 +275,34 @@ func (p *proxier) installServices() { | |
} | ||
endpointUpdateList = append(endpointUpdateList, endpoint) | ||
} | ||
|
||
// If the length of endpointUpdateList > maxEndpoints, endpointUpdateList should be cut. However, the iteration | ||
// of map in Golang is random, so endpointUpdateList are not always in the same order. If endpointUpdateList is | ||
// cut directly without any sorting, some Endpoints may not be installed on the cut endpointUpdateList(Last sync, | ||
// these Endpoints may be cut off from endpointUpdateList, they are of course not installed.) So cutting | ||
// endpointUpdateList after sorting can avoid this situation in some degree. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it hasn't addressed the concern: #2101 (comment). The extra endpoints won't be in "endpointsInstalled" and "needUpdateEndpoints" will always be true regardless of how many times it has been reconciled, unless I misunderstand the code. Could you test it with one oversized service and one normal size service and check whether updating the latter one will cause the first one resynced? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll try to add some test code to verify this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't mean to add test code to verify it as it will have to check logs and really create a service with 800+ endpoints. Could you just change maxEndpoints to 10 and verify it manually if you haven't done it before? From the code, I think it has the issue. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK, I'll verify this manually. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. any update? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have changed
|
||
if len(endpointUpdateList) > maxEndpoints { | ||
if !p.oversizeServiceSet.Has(svcPortName.String()) { | ||
klog.Warningf("Since Endpoints of Service %s exceeds %d, extra Endpoints will be dropped", svcPortName.String(), maxEndpoints) | ||
p.oversizeServiceSet.Insert(svcPortName.String()) | ||
} | ||
|
||
needUpdateEndpoints = false | ||
sort.Sort(byEndpoint(endpointUpdateList)) | ||
endpointUpdateList = endpointUpdateList[:maxEndpoints] | ||
// Check if the cut endpointUpdateList are all installed. | ||
for _, endpoint := range endpointUpdateList { | ||
if _, ok := endpointsInstalled[endpoint.String()]; !ok { | ||
needUpdateEndpoints = true | ||
break | ||
} | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Sorting More detailed, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think L274 will set There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If a Service is oversize, L274 will always set There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I missed L290. However, it still seems wrong to reset There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Variable There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It needs to convert it to a list sooner or later, no harm to do it earlier? It can save the repeated calculation of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated. |
||
} else { | ||
if p.oversizeServiceSet.Has(svcPortName.String()) { | ||
p.oversizeServiceSet.Delete(svcPortName.String()) | ||
} | ||
} | ||
|
||
if len(endpoints) < len(endpointsInstalled) { // There are Endpoints which expired. | ||
klog.V(2).Infof("Some Endpoints of Service %s removed, updating Endpoints", svcInfo.String()) | ||
needUpdateEndpoints = true | ||
|
@@ -605,6 +644,7 @@ func NewProxier( | |
endpointsMap: types.EndpointsMap{}, | ||
endpointReferenceCounter: map[string]int{}, | ||
serviceStringMap: map[string]k8sproxy.ServicePortName{}, | ||
oversizeServiceSet: sets.NewString(), | ||
groupCounter: types.NewGroupCounter(), | ||
ofClient: ofClient, | ||
isIPv6: isIPv6, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How about:
Please note that in the current AntreaProxy implementation there is a restriction that the maximum number...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unless you feel strongly about it, I suggest merging as it is. We have been postponing v1.0.1 for a while now, and this is pretty much the last change we are waiting for.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That works for me.