From 8b37d97c4e5313c6d8997a031f35b0a3fd24ce53 Mon Sep 17 00:00:00 2001 From: Antonin Bas Date: Fri, 17 May 2024 11:46:17 -0700 Subject: [PATCH] Add "-run" filter for antctl check installation command (#6333) To support running a subset only of tests, based on which test names match the provided regex. We also log stderr when `/agnhost connect` fails, to assist in troubleshooting. I have seen the `antctl check installation` command fail in CI, and at the moment it is impossible to troubleshoot. Signed-off-by: Antonin Bas --- pkg/antctl/raw/check/installation/command.go | 101 +++++++++++++---- .../raw/check/installation/command_test.go | 106 ++++++++++++++++++ .../check/installation/test_podtointernet.go | 5 +- .../installation/test_podtopodinternode.go | 5 +- .../installation/test_podtopodintranode.go | 5 +- .../test_podtoserviceinternode.go | 5 +- .../test_podtoserviceintranode.go | 5 +- 7 files changed, 188 insertions(+), 44 deletions(-) create mode 100644 pkg/antctl/raw/check/installation/command_test.go diff --git a/pkg/antctl/raw/check/installation/command.go b/pkg/antctl/raw/check/installation/command.go index 82b550ac1cc..902da8e4d59 100644 --- a/pkg/antctl/raw/check/installation/command.go +++ b/pkg/antctl/raw/check/installation/command.go @@ -20,6 +20,7 @@ import ( "fmt" "net" "os" + "regexp" "time" "github.com/fatih/color" @@ -41,12 +42,14 @@ func Command() *cobra.Command { return Run(o) }, } - command.Flags().StringVarP(&o.antreaNamespace, "Namespace", "n", o.antreaNamespace, "Configure Namespace in which Antrea is running") + command.Flags().StringVarP(&o.antreaNamespace, "namespace", "n", o.antreaNamespace, "Configure Namespace in which Antrea is running") + command.Flags().StringVar(&o.runFilter, "run", o.runFilter, "Run only the tests that match the provided regex") return command } type options struct { antreaNamespace string + runFilter string } func newOptions() *options { @@ -100,38 +103,48 @@ type testContext struct { echoSameNodePod *corev1.Pod echoOtherNodePod *corev1.Pod namespace string + // A nil regex indicates that all the tests should be run. + runFilterRegex *regexp.Regexp +} + +type testStats struct { + numSuccess int + numFailure int + numSkipped int +} + +func compileRunFilter(runFilter string) (*regexp.Regexp, error) { + if runFilter == "" { + return nil, nil + } + re, err := regexp.Compile(runFilter) + if err != nil { + return nil, fmt.Errorf("invalid regex for run filter: %w", err) + } + return re, nil } func Run(o *options) error { + runFilterRegex, err := compileRunFilter(o.runFilter) + if err != nil { + return err + } + client, config, clusterName, err := check.NewClient() if err != nil { - return fmt.Errorf("unable to create Kubernetes client: %s", err) + return fmt.Errorf("unable to create Kubernetes client: %w", err) } ctx := context.Background() - testContext := NewTestContext(client, config, clusterName, o) + testContext := NewTestContext(client, config, clusterName, o.antreaNamespace, runFilterRegex) if err := testContext.setup(ctx); err != nil { return err } - var numSuccess, numFailure, numSkipped int - for name, test := range testsRegistry { - testContext.Header("Running test: %s", name) - if err := test.Run(ctx, testContext); err != nil { - if errors.As(err, new(notRunnableError)) { - testContext.Warning("Test %s was skipped: %v", name, err) - numSkipped++ - } else { - testContext.Fail("Test %s failed: %v", name, err) - numFailure++ - } - } else { - testContext.Success("Test %s passed", name) - numSuccess++ - } - } - testContext.Log("Test finished: %v tests succeeded, %v tests failed, %v tests were skipped", numSuccess, numFailure, numSkipped) + stats := testContext.runTests(ctx) + + testContext.Log("Test finished: %v tests succeeded, %v tests failed, %v tests were skipped", stats.numSuccess, stats.numFailure, stats.numSkipped) check.Teardown(ctx, testContext.client, testContext.clusterName, testContext.namespace) - if numFailure > 0 { - return fmt.Errorf("%v/%v tests failed", numFailure, len(testsRegistry)) + if stats.numFailure > 0 { + return fmt.Errorf("%v/%v tests failed", stats.numFailure, len(testsRegistry)) } return nil } @@ -156,13 +169,20 @@ func newService(name string, selector map[string]string, port int) *corev1.Servi } } -func NewTestContext(client kubernetes.Interface, config *rest.Config, clusterName string, o *options) *testContext { +func NewTestContext( + client kubernetes.Interface, + config *rest.Config, + clusterName string, + antreaNamespace string, + runFilterRegex *regexp.Regexp, +) *testContext { return &testContext{ client: client, config: config, clusterName: clusterName, - antreaNamespace: o.antreaNamespace, + antreaNamespace: antreaNamespace, namespace: check.GenerateRandomNamespace(testNamespacePrefix), + runFilterRegex: runFilterRegex, } } @@ -305,6 +325,39 @@ func (t *testContext) setup(ctx context.Context) error { return nil } +func (t *testContext) runTests(ctx context.Context) testStats { + var stats testStats + for name, test := range testsRegistry { + if t.runFilterRegex != nil && !t.runFilterRegex.MatchString(name) { + continue + } + t.Header("Running test: %s", name) + if err := test.Run(ctx, t); err != nil { + if errors.As(err, new(notRunnableError)) { + t.Warning("Test %s was skipped: %v", name, err) + stats.numSkipped++ + } else { + t.Fail("Test %s failed: %v", name, err) + stats.numFailure++ + } + } else { + t.Success("Test %s passed", name) + stats.numSuccess++ + } + } + return stats +} + +func (t *testContext) runAgnhostConnect(ctx context.Context, clientPodName string, container string, target string, targetPort int) error { + cmd := agnhostConnectCommand(target, fmt.Sprint(targetPort)) + _, stderr, err := check.ExecInPod(ctx, t.client, t.config, t.namespace, clientPodName, container, cmd) + if err != nil { + // We log the contents of stderr here for troubleshooting purposes. + t.Log("/agnhost command failed - stderr: %s", stderr) + } + return err +} + func (t *testContext) Log(format string, a ...interface{}) { fmt.Fprintf(os.Stdout, fmt.Sprintf("[%s] ", t.clusterName)+format+"\n", a...) } diff --git a/pkg/antctl/raw/check/installation/command_test.go b/pkg/antctl/raw/check/installation/command_test.go new file mode 100644 index 00000000000..923d30ce781 --- /dev/null +++ b/pkg/antctl/raw/check/installation/command_test.go @@ -0,0 +1,106 @@ +// Copyright 2024 Antrea Authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package installation + +import ( + "context" + "fmt" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func overrideTestsRegistry(t *testing.T, registry map[string]Test) { + oldRegistry := testsRegistry + testsRegistry = registry + t.Cleanup(func() { + testsRegistry = oldRegistry + }) +} + +type notRunnableTest struct{} + +func (t *notRunnableTest) Run(ctx context.Context, testContext *testContext) error { + return newNotRunnableError("not runnable") +} + +type failedTest struct{} + +func (t *failedTest) Run(ctx context.Context, testContext *testContext) error { + return fmt.Errorf("failed") +} + +type successfulTest struct{} + +func (t *successfulTest) Run(ctx context.Context, testContext *testContext) error { + return nil +} + +func TestRun(t *testing.T) { + ctx := context.Background() + + registry := map[string]Test{ + "not-runnable": ¬RunnableTest{}, + "failure": &failedTest{}, + "success": &successfulTest{}, + } + + testCases := []struct { + name string + registry map[string]Test + runFilter string + expectedStats testStats + }{ + { + name: "no test in registry", + expectedStats: testStats{}, + }, + { + name: "run all tests", + registry: registry, + expectedStats: testStats{ + numSuccess: 1, + numFailure: 1, + numSkipped: 1, + }, + }, + { + name: "run single test", + registry: registry, + runFilter: "success", + expectedStats: testStats{ + numSuccess: 1, + }, + }, + { + name: "no matching test", + registry: registry, + runFilter: "my-test", + expectedStats: testStats{}, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + overrideTestsRegistry(t, tc.registry) + runFilterRegex, err := compileRunFilter(tc.runFilter) + require.NoError(t, err) + testContext := NewTestContext(nil, nil, "test-cluster", "kube-system", runFilterRegex) + stats := testContext.runTests(ctx) + assert.Equal(t, tc.expectedStats, stats) + }) + } +} diff --git a/pkg/antctl/raw/check/installation/test_podtointernet.go b/pkg/antctl/raw/check/installation/test_podtointernet.go index 50bc7978d54..c5be54b252a 100644 --- a/pkg/antctl/raw/check/installation/test_podtointernet.go +++ b/pkg/antctl/raw/check/installation/test_podtointernet.go @@ -17,8 +17,6 @@ package installation import ( "context" "fmt" - - "antrea.io/antrea/pkg/antctl/raw/check" ) type PodToInternetConnectivityTest struct{} @@ -31,8 +29,7 @@ func (t *PodToInternetConnectivityTest) Run(ctx context.Context, testContext *te for _, clientPod := range testContext.clientPods { srcPod := testContext.namespace + "/" + clientPod.Name testContext.Log("Validating connectivity from Pod %s to the world (google.com)...", srcPod) - _, _, err := check.ExecInPod(ctx, testContext.client, testContext.config, testContext.namespace, clientPod.Name, clientDeploymentName, agnhostConnectCommand("google.com", "80")) - if err != nil { + if err := testContext.runAgnhostConnect(ctx, clientPod.Name, "", "google.com", 80); err != nil { return fmt.Errorf("Pod %s was not able to connect to google.com: %w", srcPod, err) } testContext.Log("Pod %s was able to connect to google.com", srcPod) diff --git a/pkg/antctl/raw/check/installation/test_podtopodinternode.go b/pkg/antctl/raw/check/installation/test_podtopodinternode.go index 556e72faf06..feb72d15c06 100644 --- a/pkg/antctl/raw/check/installation/test_podtopodinternode.go +++ b/pkg/antctl/raw/check/installation/test_podtopodinternode.go @@ -17,8 +17,6 @@ package installation import ( "context" "fmt" - - "antrea.io/antrea/pkg/antctl/raw/check" ) type PodToPodInterNodeConnectivityTest struct{} @@ -37,8 +35,7 @@ func (t *PodToPodInterNodeConnectivityTest) Run(ctx context.Context, testContext for _, podIP := range testContext.echoOtherNodePod.Status.PodIPs { echoIP := podIP.IP testContext.Log("Validating from Pod %s to Pod %s at IP %s...", srcPod, dstPod, echoIP) - _, _, err := check.ExecInPod(ctx, testContext.client, testContext.config, testContext.namespace, clientPod.Name, "", agnhostConnectCommand(echoIP, "80")) - if err != nil { + if err := testContext.runAgnhostConnect(ctx, clientPod.Name, "", echoIP, 80); err != nil { return fmt.Errorf("client Pod %s was not able to communicate with echo Pod %s (%s): %w", clientPod.Name, testContext.echoOtherNodePod.Name, echoIP, err) } testContext.Log("client Pod %s was able to communicate with echo Pod %s (%s)", clientPod.Name, testContext.echoOtherNodePod.Name, echoIP) diff --git a/pkg/antctl/raw/check/installation/test_podtopodintranode.go b/pkg/antctl/raw/check/installation/test_podtopodintranode.go index 5fae4cbe947..0762e542b89 100644 --- a/pkg/antctl/raw/check/installation/test_podtopodintranode.go +++ b/pkg/antctl/raw/check/installation/test_podtopodintranode.go @@ -17,8 +17,6 @@ package installation import ( "context" "fmt" - - "antrea.io/antrea/pkg/antctl/raw/check" ) type PodToPodIntraNodeConnectivityTest struct{} @@ -34,8 +32,7 @@ func (t *PodToPodIntraNodeConnectivityTest) Run(ctx context.Context, testContext for _, podIP := range testContext.echoSameNodePod.Status.PodIPs { echoIP := podIP.IP testContext.Log("Validating from Pod %s to Pod %s at IP %s...", srcPod, dstPod, echoIP) - _, _, err := check.ExecInPod(ctx, testContext.client, testContext.config, testContext.namespace, clientPod.Name, "", agnhostConnectCommand(echoIP, "80")) - if err != nil { + if err := testContext.runAgnhostConnect(ctx, clientPod.Name, "", echoIP, 80); err != nil { return fmt.Errorf("client Pod %s was not able to communicate with echo Pod %s (%s): %w", clientPod.Name, testContext.echoSameNodePod.Name, echoIP, err) } testContext.Log("client Pod %s was able to communicate with echo Pod %s (%s)", clientPod.Name, testContext.echoSameNodePod.Name, echoIP) diff --git a/pkg/antctl/raw/check/installation/test_podtoserviceinternode.go b/pkg/antctl/raw/check/installation/test_podtoserviceinternode.go index 19ec6a76f56..29e2ff59fbe 100644 --- a/pkg/antctl/raw/check/installation/test_podtoserviceinternode.go +++ b/pkg/antctl/raw/check/installation/test_podtoserviceinternode.go @@ -17,8 +17,6 @@ package installation import ( "context" "fmt" - - "antrea.io/antrea/pkg/antctl/raw/check" ) type PodToServiceInterNodeConnectivityTest struct{} @@ -34,8 +32,7 @@ func (t *PodToServiceInterNodeConnectivityTest) Run(ctx context.Context, testCon service := echoOtherNodeDeploymentName for _, clientPod := range testContext.clientPods { testContext.Log("Validating from Pod %s to Service %s in Namespace %s...", clientPod.Name, service, testContext.namespace) - _, _, err := check.ExecInPod(ctx, testContext.client, testContext.config, testContext.namespace, clientPod.Name, "", agnhostConnectCommand(service, "80")) - if err != nil { + if err := testContext.runAgnhostConnect(ctx, clientPod.Name, "", service, 80); err != nil { return fmt.Errorf("client Pod %s was not able to communicate with Service %s", clientPod.Name, service) } testContext.Log("client Pod %s was able to communicate with Service %s", clientPod.Name, service) diff --git a/pkg/antctl/raw/check/installation/test_podtoserviceintranode.go b/pkg/antctl/raw/check/installation/test_podtoserviceintranode.go index 30d2cb28838..3a377c0c220 100644 --- a/pkg/antctl/raw/check/installation/test_podtoserviceintranode.go +++ b/pkg/antctl/raw/check/installation/test_podtoserviceintranode.go @@ -17,8 +17,6 @@ package installation import ( "context" "fmt" - - "antrea.io/antrea/pkg/antctl/raw/check" ) type PodToServiceIntraNodeConnectivityTest struct{} @@ -31,8 +29,7 @@ func (t *PodToServiceIntraNodeConnectivityTest) Run(ctx context.Context, testCon service := echoSameNodeDeploymentName for _, clientPod := range testContext.clientPods { testContext.Log("Validating from Pod %s to Service %s in Namespace %s...", clientPod.Name, service, testContext.namespace) - _, _, err := check.ExecInPod(ctx, testContext.client, testContext.config, testContext.namespace, clientPod.Name, "", agnhostConnectCommand(service, "80")) - if err != nil { + if err := testContext.runAgnhostConnect(ctx, clientPod.Name, "", service, 80); err != nil { return fmt.Errorf("client Pod %s was not able to communicate with Service %s", clientPod.Name, service) } testContext.Log("client Pod %s was able to communicate with Service %s", clientPod.Name, service)