Skip to content

Commit

Permalink
fixed a bug where try fix a group repeatly (#2650)
Browse files Browse the repository at this point in the history
Co-authored-by: yangwenhao <[email protected]>
  • Loading branch information
vacheli and yangwenhao authored May 15, 2024
1 parent a0fc8a1 commit bab16f6
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 5 deletions.
10 changes: 6 additions & 4 deletions codis/pkg/topom/topom_group.go
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ func (s *Topom) GroupPromoteServer(gid int, addr string) error {
}
}

func (s *Topom) tryFixReplicationRelationships(ctx *context, recoveredGroupServers []*redis.ReplicationState) {
func (s *Topom) tryFixReplicationRelationships(ctx *context, recoveredGroupServers []*redis.ReplicationState, masterOffGroupLen int) {
for _, state := range recoveredGroupServers {
log.Infof("group-[%d] try to fix server[%v-%v] replication relationship", state.GroupID, state.Index, state.Addr)
group, err := ctx.getGroup(state.GroupID)
Expand All @@ -346,7 +346,7 @@ func (s *Topom) tryFixReplicationRelationships(ctx *context, recoveredGroupServe
continue
}

err = s.tryFixReplicationRelationship(group, state.Server, state)
err = s.tryFixReplicationRelationship(group, state.Server, state, masterOffGroupLen)
if err != nil {
log.Warnf("group-[%d] fix server[%v] replication relationship failed, err: %v", group.Id, state.Addr, err)
continue
Expand All @@ -371,12 +371,14 @@ func (s *Topom) tryFixReplicationRelationships(ctx *context, recoveredGroupServe
// only fix which the old state of GroupServer is GroupServerStateOffline.
// It will only update the state of GroupServer to GroupServerStateNormal, If the GroupServer have right
// master-slave replication relationship.
func (s *Topom) tryFixReplicationRelationship(group *models.Group, groupServer *models.GroupServer, state *redis.ReplicationState) (err error) {
func (s *Topom) tryFixReplicationRelationship(group *models.Group, groupServer *models.GroupServer, state *redis.ReplicationState, masterOffGroupLen int) (err error) {
curMasterAddr := group.Servers[0].Addr
if isGroupMaster(state, group) {
// current server is master,
if models.GroupServerRole(state.Replication.Role) == models.RoleMaster {
return nil
if masterOffGroupLen > 0 {
return nil
}
}

// execute the command `slaveof no one`
Expand Down
2 changes: 1 addition & 1 deletion codis/pkg/topom/topom_sentinel.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ func (s *Topom) CheckStateAndSwitchSlavesAndMasters(filter func(index int, g *mo

if len(recoveredGroupServersState) > 0 {
// offline GroupServer's service has recovered, check and fix it's master-slave replication relationship
s.tryFixReplicationRelationships(ctx, recoveredGroupServersState)
s.tryFixReplicationRelationships(ctx, recoveredGroupServersState,len(masterOfflineGroups))
}

return nil
Expand Down

0 comments on commit bab16f6

Please sign in to comment.