Skip to content

Commit

Permalink
server: try re-enable TestAdminDecommissionedOperations
Browse files Browse the repository at this point in the history
The test is flaky; sadly, the error does not reproduce locally and the
precise cause of the flakiness is obscured by cockroachdb#100796.

This commit re-enables the test with the fix for cockroachdb#100796, so that the
next CI failure can reveal more details about what went wrong.

Release note: None
  • Loading branch information
knz committed Apr 8, 2023
1 parent 8e6f530 commit 3cbd40c
Showing 1 changed file with 42 additions and 33 deletions.
75 changes: 42 additions & 33 deletions pkg/server/admin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3008,7 +3008,7 @@ func TestAdminDecommissionedOperations(t *testing.T) {
defer leaktest.AfterTest(t)()
defer log.Scope(t).Close(t)

skip.WithIssue(t, 100789, "flaky test")
skip.UnderRace(t, "test uses timeouts, and race builds cause the timeouts to be exceeded")

ctx := context.Background()
tc := serverutils.StartNewTestCluster(t, 2, base.TestClusterArgs{
Expand Down Expand Up @@ -3036,11 +3036,19 @@ func TestAdminDecommissionedOperations(t *testing.T) {
require.NoError(t, srv.Decommission(ctx, status, []roachpb.NodeID{decomSrv.NodeID()}))
}

require.Eventually(t, func() bool {
_, err := decomSrv.DB().Scan(ctx, keys.MinKey, keys.MaxKey, 0)
testutils.SucceedsWithin(t, func() error {
timeoutCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()
_, err := decomSrv.DB().Scan(timeoutCtx, keys.MinKey, keys.MaxKey, 0)
if err == nil {
return errors.New("expected error")
}
s, ok := status.FromError(errors.UnwrapAll(err))
return ok && s.Code() == codes.PermissionDenied
}, 10*time.Second, 100*time.Millisecond, "timed out waiting for server to lose cluster access")
if ok && s.Code() == codes.PermissionDenied {
return nil
}
return err
}, 10*time.Second)

// Set up an admin client.
//lint:ignore SA1019 grpc.WithInsecure is deprecated
Expand All @@ -3058,94 +3066,94 @@ func TestAdminDecommissionedOperations(t *testing.T) {
testcases := []struct {
name string
expectCode codes.Code
op func(serverpb.AdminClient) error
op func(context.Context, serverpb.AdminClient) error
}{
{"Cluster", codes.OK, func(c serverpb.AdminClient) error {
{"Cluster", codes.OK, func(ctx context.Context, c serverpb.AdminClient) error {
_, err := c.Cluster(ctx, &serverpb.ClusterRequest{})
return err
}},
{"Databases", codes.Internal, func(c serverpb.AdminClient) error {
{"Databases", codes.Internal, func(ctx context.Context, c serverpb.AdminClient) error {
_, err := c.Databases(ctx, &serverpb.DatabasesRequest{})
return err
}},
{"DatabaseDetails", codes.Internal, func(c serverpb.AdminClient) error {
{"DatabaseDetails", codes.Internal, func(ctx context.Context, c serverpb.AdminClient) error {
_, err := c.DatabaseDetails(ctx, &serverpb.DatabaseDetailsRequest{Database: "foo"})
return err
}},
{"DataDistribution", codes.Internal, func(c serverpb.AdminClient) error {
{"DataDistribution", codes.Internal, func(ctx context.Context, c serverpb.AdminClient) error {
_, err := c.DataDistribution(ctx, &serverpb.DataDistributionRequest{})
return err
}},
{"Decommission", codes.Internal, func(c serverpb.AdminClient) error {
{"Decommission", codes.Internal, func(ctx context.Context, c serverpb.AdminClient) error {
_, err := c.Decommission(ctx, &serverpb.DecommissionRequest{
NodeIDs: []roachpb.NodeID{srv.NodeID(), decomSrv.NodeID()},
TargetMembership: livenesspb.MembershipStatus_DECOMMISSIONED,
})
return err
}},
{"DecommissionStatus", codes.Internal, func(c serverpb.AdminClient) error {
{"DecommissionStatus", codes.Internal, func(ctx context.Context, c serverpb.AdminClient) error {
_, err := c.DecommissionStatus(ctx, &serverpb.DecommissionStatusRequest{
NodeIDs: []roachpb.NodeID{srv.NodeID(), decomSrv.NodeID()},
})
return err
}},
{"EnqueueRange", codes.Internal, func(c serverpb.AdminClient) error {
{"EnqueueRange", codes.Internal, func(ctx context.Context, c serverpb.AdminClient) error {
_, err := c.EnqueueRange(ctx, &serverpb.EnqueueRangeRequest{
RangeID: scratchRange.RangeID,
Queue: "replicaGC",
})
return err
}},
{"Events", codes.Internal, func(c serverpb.AdminClient) error {
{"Events", codes.Internal, func(ctx context.Context, c serverpb.AdminClient) error {
_, err := c.Events(ctx, &serverpb.EventsRequest{})
return err
}},
{"Health", codes.OK, func(c serverpb.AdminClient) error {
{"Health", codes.OK, func(ctx context.Context, c serverpb.AdminClient) error {
_, err := c.Health(ctx, &serverpb.HealthRequest{})
return err
}},
{"Jobs", codes.Internal, func(c serverpb.AdminClient) error {
{"Jobs", codes.Internal, func(ctx context.Context, c serverpb.AdminClient) error {
_, err := c.Jobs(ctx, &serverpb.JobsRequest{})
return err
}},
{"Liveness", codes.Internal, func(c serverpb.AdminClient) error {
{"Liveness", codes.Internal, func(ctx context.Context, c serverpb.AdminClient) error {
_, err := c.Liveness(ctx, &serverpb.LivenessRequest{})
return err
}},
{"Locations", codes.Internal, func(c serverpb.AdminClient) error {
{"Locations", codes.Internal, func(ctx context.Context, c serverpb.AdminClient) error {
_, err := c.Locations(ctx, &serverpb.LocationsRequest{})
return err
}},
{"NonTableStats", codes.Internal, func(c serverpb.AdminClient) error {
{"NonTableStats", codes.Internal, func(ctx context.Context, c serverpb.AdminClient) error {
_, err := c.NonTableStats(ctx, &serverpb.NonTableStatsRequest{})
return err
}},
{"QueryPlan", codes.OK, func(c serverpb.AdminClient) error {
{"QueryPlan", codes.OK, func(ctx context.Context, c serverpb.AdminClient) error {
_, err := c.QueryPlan(ctx, &serverpb.QueryPlanRequest{Query: "SELECT 1"})
return err
}},
{"RangeLog", codes.Internal, func(c serverpb.AdminClient) error {
{"RangeLog", codes.Internal, func(ctx context.Context, c serverpb.AdminClient) error {
_, err := c.RangeLog(ctx, &serverpb.RangeLogRequest{})
return err
}},
{"Settings", codes.OK, func(c serverpb.AdminClient) error {
{"Settings", codes.OK, func(ctx context.Context, c serverpb.AdminClient) error {
_, err := c.Settings(ctx, &serverpb.SettingsRequest{})
return err
}},
{"TableStats", codes.Internal, func(c serverpb.AdminClient) error {
{"TableStats", codes.Internal, func(ctx context.Context, c serverpb.AdminClient) error {
_, err := c.TableStats(ctx, &serverpb.TableStatsRequest{Database: "foo", Table: "bar"})
return err
}},
{"TableDetails", codes.Internal, func(c serverpb.AdminClient) error {
{"TableDetails", codes.Internal, func(ctx context.Context, c serverpb.AdminClient) error {
_, err := c.TableDetails(ctx, &serverpb.TableDetailsRequest{Database: "foo", Table: "bar"})
return err
}},
{"Users", codes.Internal, func(c serverpb.AdminClient) error {
{"Users", codes.Internal, func(ctx context.Context, c serverpb.AdminClient) error {
_, err := c.Users(ctx, &serverpb.UsersRequest{})
return err
}},
// We drain at the end, since it may evict us.
{"Drain", codes.Unknown, func(c serverpb.AdminClient) error {
{"Drain", codes.Unknown, func(ctx context.Context, c serverpb.AdminClient) error {
stream, err := c.Drain(ctx, &serverpb.DrainRequest{DoDrain: true})
if err != nil {
return err
Expand All @@ -3158,20 +3166,21 @@ func TestAdminDecommissionedOperations(t *testing.T) {
for _, tc := range testcases {
tc := tc
t.Run(tc.name, func(t *testing.T) {
var err error
require.Eventually(t, func() bool {
err = tc.op(adminClient)
testutils.SucceedsWithin(t, func() error {
timeoutCtx, cancel := context.WithTimeout(ctx, 10*time.Second)
defer cancel()
err := tc.op(timeoutCtx, adminClient)
if tc.expectCode == codes.OK {
require.NoError(t, err)
return true
return nil
}
s, ok := status.FromError(errors.UnwrapAll(err))
if s == nil || !ok {
return false
return err
}
require.Equal(t, tc.expectCode, s.Code())
return true
}, 10*time.Second, 100*time.Millisecond, "timed out waiting for gRPC error, got %s", err)
return nil
}, 10*time.Second)
})
}
}
Expand Down

0 comments on commit 3cbd40c

Please sign in to comment.