Skip to content

Commit

Permalink
Merge pull request #8070 from heyitsanthony/etcdctl-cluster-health
Browse files Browse the repository at this point in the history
ctlv2: report unhealthy in cluster-health if any node is unavailable
  • Loading branch information
Anthony Romano authored Jun 9, 2017
2 parents b194276 + 3fcb833 commit 933aa09
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 12 deletions.
22 changes: 18 additions & 4 deletions e2e/ctl_v2_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -321,17 +321,31 @@ func TestCtlV2ClusterHealth(t *testing.T) {
}
}()

// has quorum
// all members available
if err := etcdctlClusterHealth(epc, "cluster is healthy"); err != nil {
t.Fatalf("cluster-health expected to be healthy (%v)", err)
}

// cut quorum
// missing members, has quorum
epc.procs[0].Stop()

for i := 0; i < 3; i++ {
err := etcdctlClusterHealth(epc, "cluster is degraded")
if err == nil {
break
} else if i == 2 {
t.Fatalf("cluster-health expected to be degraded (%v)", err)
}
// possibly no leader yet; retry
time.Sleep(time.Second)
}

// no quorum
epc.procs[1].Stop()
if err := etcdctlClusterHealth(epc, "cluster is unhealthy"); err != nil {
t.Fatalf("cluster-health expected to be unhealthy (%v)", err)
if err := etcdctlClusterHealth(epc, "cluster is unavailable"); err != nil {
t.Fatalf("cluster-health expected to be unavailable (%v)", err)
}

epc.procs[0], epc.procs[1] = nil, nil
}

Expand Down
17 changes: 9 additions & 8 deletions etcdctl/ctlv2/command/cluster_health.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ func handleClusterHealth(c *cli.Context) error {
}

for {
health := false
healthyMembers := 0
for _, m := range ms {
if len(m.ClientURLs) == 0 {
fmt.Printf("member %s is unreachable: no available published client urls\n", m.ID)
Expand Down Expand Up @@ -105,8 +105,8 @@ func handleClusterHealth(c *cli.Context) error {

checked = true
if result.Health == "true" || nresult.Health {
health = true
fmt.Printf("member %s is healthy: got healthy result from %s\n", m.ID, url)
healthyMembers++
} else {
fmt.Printf("member %s is unhealthy: got unhealthy result from %s\n", m.ID, url)
}
Expand All @@ -116,19 +116,20 @@ func handleClusterHealth(c *cli.Context) error {
fmt.Printf("member %s is unreachable: %v are all unreachable\n", m.ID, m.ClientURLs)
}
}
if health {
switch healthyMembers {
case len(ms):
fmt.Println("cluster is healthy")
} else {
fmt.Println("cluster is unhealthy")
case 0:
fmt.Println("cluster is unavailable")
default:
fmt.Println("cluster is degraded")
}

if !forever {
if health {
if healthyMembers == len(ms) {
os.Exit(ExitSuccess)
return nil
}
os.Exit(ExitClusterNotHealthy)
return nil
}

fmt.Printf("\nnext check after 10 second...\n\n")
Expand Down

0 comments on commit 933aa09

Please sign in to comment.