From bab16f6a307dc5be06c25d1a3ea45cd615638d25 Mon Sep 17 00:00:00 2001 From: vacheli Date: Wed, 15 May 2024 16:47:53 +0800 Subject: [PATCH] fixed a bug where try fix a group repeatly (#2650) Co-authored-by: yangwenhao --- codis/pkg/topom/topom_group.go | 10 ++++++---- codis/pkg/topom/topom_sentinel.go | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/codis/pkg/topom/topom_group.go b/codis/pkg/topom/topom_group.go index 517fb2da4c..9f7ed5568b 100644 --- a/codis/pkg/topom/topom_group.go +++ b/codis/pkg/topom/topom_group.go @@ -330,7 +330,7 @@ func (s *Topom) GroupPromoteServer(gid int, addr string) error { } } -func (s *Topom) tryFixReplicationRelationships(ctx *context, recoveredGroupServers []*redis.ReplicationState) { +func (s *Topom) tryFixReplicationRelationships(ctx *context, recoveredGroupServers []*redis.ReplicationState, masterOffGroupLen int) { for _, state := range recoveredGroupServers { log.Infof("group-[%d] try to fix server[%v-%v] replication relationship", state.GroupID, state.Index, state.Addr) group, err := ctx.getGroup(state.GroupID) @@ -346,7 +346,7 @@ func (s *Topom) tryFixReplicationRelationships(ctx *context, recoveredGroupServe continue } - err = s.tryFixReplicationRelationship(group, state.Server, state) + err = s.tryFixReplicationRelationship(group, state.Server, state, masterOffGroupLen) if err != nil { log.Warnf("group-[%d] fix server[%v] replication relationship failed, err: %v", group.Id, state.Addr, err) continue @@ -371,12 +371,14 @@ func (s *Topom) tryFixReplicationRelationships(ctx *context, recoveredGroupServe // only fix which the old state of GroupServer is GroupServerStateOffline. // It will only update the state of GroupServer to GroupServerStateNormal, If the GroupServer have right // master-slave replication relationship. -func (s *Topom) tryFixReplicationRelationship(group *models.Group, groupServer *models.GroupServer, state *redis.ReplicationState) (err error) { +func (s *Topom) tryFixReplicationRelationship(group *models.Group, groupServer *models.GroupServer, state *redis.ReplicationState, masterOffGroupLen int) (err error) { curMasterAddr := group.Servers[0].Addr if isGroupMaster(state, group) { // current server is master, if models.GroupServerRole(state.Replication.Role) == models.RoleMaster { - return nil + if masterOffGroupLen > 0 { + return nil + } } // execute the command `slaveof no one` diff --git a/codis/pkg/topom/topom_sentinel.go b/codis/pkg/topom/topom_sentinel.go index 3ea8b3cd9f..9d4299583e 100644 --- a/codis/pkg/topom/topom_sentinel.go +++ b/codis/pkg/topom/topom_sentinel.go @@ -48,7 +48,7 @@ func (s *Topom) CheckStateAndSwitchSlavesAndMasters(filter func(index int, g *mo if len(recoveredGroupServersState) > 0 { // offline GroupServer's service has recovered, check and fix it's master-slave replication relationship - s.tryFixReplicationRelationships(ctx, recoveredGroupServersState) + s.tryFixReplicationRelationships(ctx, recoveredGroupServersState,len(masterOfflineGroups)) } return nil