-
Notifications
You must be signed in to change notification settings - Fork 721
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
etcdutil, leadership: make more high availability #6577
Changes from 11 commits
03cb666
3154498
5ec12cc
087bcab
12d592e
78fe096
1e612e0
5b33784
d7ac44a
c4e8150
6ba315b
bb38df3
4f412bf
06ce464
e1e7e08
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -18,6 +18,7 @@ import ( | |||
"context" | ||||
"sync" | ||||
"sync/atomic" | ||||
"time" | ||||
|
||||
"github.com/pingcap/failpoint" | ||||
"github.com/pingcap/kvproto/pkg/pdpb" | ||||
|
@@ -30,6 +31,11 @@ import ( | |||
"go.uber.org/zap" | ||||
) | ||||
|
||||
const ( | ||||
watchLoopUnhealthyTimeout = 60 * time.Second | ||||
detectHealthyInterval = 10 * time.Second | ||||
) | ||||
|
||||
// GetLeader gets the corresponding leader from etcd by given leaderPath (as the key). | ||||
func GetLeader(c *clientv3.Client, leaderPath string) (*pdpb.Member, int64, error) { | ||||
leader := &pdpb.Member{} | ||||
|
@@ -182,26 +188,81 @@ func (ls *Leadership) Watch(serverCtx context.Context, revision int64) { | |||
if ls == nil { | ||||
return | ||||
} | ||||
|
||||
interval := detectHealthyInterval | ||||
unhealthyTimeout := watchLoopUnhealthyTimeout | ||||
failpoint.Inject("fastTick", func() { | ||||
unhealthyTimeout = 5 * time.Second | ||||
interval = 1 * time.Second | ||||
}) | ||||
ticker := time.NewTicker(interval) | ||||
defer ticker.Stop() | ||||
lastHealthyTime := time.Now() | ||||
|
||||
watcher := clientv3.NewWatcher(ls.client) | ||||
defer watcher.Close() | ||||
ctx, cancel := context.WithCancel(serverCtx) | ||||
defer cancel() | ||||
// The revision is the revision of last modification on this key. | ||||
// If the revision is compacted, will meet required revision has been compacted error. | ||||
// In this case, use the compact revision to re-watch the key. | ||||
var watchChanCancel *context.CancelFunc | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why use a pointer? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about just skipping the lint if you are sure about it? It seems a known problem of go analysis. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think pointer is acceptable There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agree with rleungx. Probably just use something like //nolint to avoid pointer. |
||||
defer func() { | ||||
if watchChanCancel != nil { | ||||
(*watchChanCancel)() | ||||
} | ||||
}() | ||||
for { | ||||
failpoint.Inject("delayWatcher", nil) | ||||
rch := watcher.Watch(ctx, ls.leaderKey, clientv3.WithRev(revision)) | ||||
for wresp := range rch { | ||||
if watchChanCancel != nil { | ||||
(*watchChanCancel)() | ||||
} | ||||
// In order to prevent a watch stream being stuck in a partitioned node, | ||||
// make sure to wrap context with "WithRequireLeader". | ||||
watchChanCtx, cancel := context.WithCancel(clientv3.WithRequireLeader(serverCtx)) | ||||
watchChanCancel = &cancel | ||||
|
||||
// When etcd is not available, the watcher.Watch will block, | ||||
// so we check the etcd availability first. | ||||
if !etcdutil.IsHealthy(serverCtx, ls.client) { | ||||
if time.Since(lastHealthyTime) > unhealthyTimeout { | ||||
log.Error("the connect of leadership watcher is unhealthy", | ||||
zap.Int64("revision", revision), | ||||
zap.String("leader-key", ls.leaderKey), | ||||
zap.String("purpose", ls.purpose)) | ||||
return | ||||
} | ||||
select { | ||||
case <-serverCtx.Done(): | ||||
// server closed, return | ||||
lhy1024 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
return | ||||
case <-ticker.C: | ||||
// continue to check the etcd availability | ||||
continue | ||||
} | ||||
} | ||||
|
||||
watchChan := watcher.Watch(watchChanCtx, ls.leaderKey, clientv3.WithRev(revision)) | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Regarding #6554 "TSO primary election took 14 minutes after PD(API) Pods being deleted at the same time and PD(API) leader being re-elected", I have two questions:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I do think the current PR can help to mitigate the situation even without perfect solution or answers to 1 and 2, because with this pr, it seems that the secondary can be elected as the new primary which addressed the question 1 above and the question 2 is a parameter tuning problem. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's not sure because the watch loop can't exit that it takes fifteen minutes to re-elect, there could be some reasons, stuck on grpc retry, stuck on tcp retry, or watch stuck on etcd watch. |
||||
WatchChan: | ||||
select { | ||||
case <-serverCtx.Done(): | ||||
// server closed, return | ||||
return | ||||
case <-ticker.C: | ||||
if !etcdutil.IsHealthy(serverCtx, ls.client) { | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To avoid temporary failure and false alarm, we'd better to check etcd health status multiple times, if it's unhealthy for a defined period (e.g., check heath 5 times every 1 second and all returned unhealthy status), then we cancel the wach channel and re-watch. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, I have added it in Line 247 in 5b33784
|
||||
if time.Since(lastHealthyTime) > unhealthyTimeout { | ||||
log.Error("the connect of leadership watcher is unhealthy", | ||||
zap.Int64("revision", revision), | ||||
zap.String("leader-key", ls.leaderKey), | ||||
zap.String("purpose", ls.purpose)) | ||||
return | ||||
} | ||||
goto WatchChan | ||||
} | ||||
case wresp := <-watchChan: | ||||
// meet compacted error, use the compact revision. | ||||
if wresp.CompactRevision != 0 { | ||||
log.Warn("required revision has been compacted, use the compact revision", | ||||
zap.Int64("required-revision", revision), | ||||
zap.Int64("compact-revision", wresp.CompactRevision)) | ||||
revision = wresp.CompactRevision | ||||
break | ||||
} | ||||
if wresp.Canceled { | ||||
continue | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 'continue' goes to the beginning of the for loop, correct? Do we need to reset lastHealthyTime here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, we need to recreate watch chan with new revision |
||||
} else if wresp.Err() != nil { // wresp.Err() contains CompactRevision not equal to 0 | ||||
log.Error("leadership watcher is canceled with", | ||||
zap.Int64("revision", revision), | ||||
zap.String("leader-key", ls.leaderKey), | ||||
|
@@ -213,19 +274,16 @@ func (ls *Leadership) Watch(serverCtx context.Context, revision int64) { | |||
for _, ev := range wresp.Events { | ||||
if ev.Type == mvccpb.DELETE { | ||||
log.Info("current leadership is deleted", | ||||
zap.Int64("revision", wresp.Header.Revision), | ||||
zap.String("leader-key", ls.leaderKey), | ||||
zap.String("purpose", ls.purpose)) | ||||
return | ||||
} | ||||
} | ||||
revision = wresp.Header.Revision + 1 | ||||
} | ||||
|
||||
select { | ||||
case <-ctx.Done(): | ||||
// server closed, return | ||||
return | ||||
default: | ||||
} | ||||
lastHealthyTime = time.Now() | ||||
goto WatchChan // use goto to avoid to create a new watchChan | ||||
} | ||||
} | ||||
|
||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How about adding log when we exits the get leader loop?