From 8b37d97c4e5313c6d8997a031f35b0a3fd24ce53 Mon Sep 17 00:00:00 2001
From: Antonin Bas <antonin.bas@broadcom.com>
Date: Fri, 17 May 2024 11:46:17 -0700
Subject: [PATCH] Add "-run" filter for antctl check installation command
 (#6333)

To support running a subset only of tests, based on which test names
match the provided regex.

We also log stderr when `/agnhost connect` fails, to assist in
troubleshooting. I have seen the `antctl check installation` command
fail in CI, and at the moment it is impossible to troubleshoot.

Signed-off-by: Antonin Bas <antonin.bas@broadcom.com>
---
 pkg/antctl/raw/check/installation/command.go  | 101 +++++++++++++----
 .../raw/check/installation/command_test.go    | 106 ++++++++++++++++++
 .../check/installation/test_podtointernet.go  |   5 +-
 .../installation/test_podtopodinternode.go    |   5 +-
 .../installation/test_podtopodintranode.go    |   5 +-
 .../test_podtoserviceinternode.go             |   5 +-
 .../test_podtoserviceintranode.go             |   5 +-
 7 files changed, 188 insertions(+), 44 deletions(-)
 create mode 100644 pkg/antctl/raw/check/installation/command_test.go

diff --git a/pkg/antctl/raw/check/installation/command.go b/pkg/antctl/raw/check/installation/command.go
index 82b550ac1cc..902da8e4d59 100644
--- a/pkg/antctl/raw/check/installation/command.go
+++ b/pkg/antctl/raw/check/installation/command.go
@@ -20,6 +20,7 @@ import (
 	"fmt"
 	"net"
 	"os"
+	"regexp"
 	"time"
 
 	"github.com/fatih/color"
@@ -41,12 +42,14 @@ func Command() *cobra.Command {
 			return Run(o)
 		},
 	}
-	command.Flags().StringVarP(&o.antreaNamespace, "Namespace", "n", o.antreaNamespace, "Configure Namespace in which Antrea is running")
+	command.Flags().StringVarP(&o.antreaNamespace, "namespace", "n", o.antreaNamespace, "Configure Namespace in which Antrea is running")
+	command.Flags().StringVar(&o.runFilter, "run", o.runFilter, "Run only the tests that match the provided regex")
 	return command
 }
 
 type options struct {
 	antreaNamespace string
+	runFilter       string
 }
 
 func newOptions() *options {
@@ -100,38 +103,48 @@ type testContext struct {
 	echoSameNodePod  *corev1.Pod
 	echoOtherNodePod *corev1.Pod
 	namespace        string
+	// A nil regex indicates that all the tests should be run.
+	runFilterRegex *regexp.Regexp
+}
+
+type testStats struct {
+	numSuccess int
+	numFailure int
+	numSkipped int
+}
+
+func compileRunFilter(runFilter string) (*regexp.Regexp, error) {
+	if runFilter == "" {
+		return nil, nil
+	}
+	re, err := regexp.Compile(runFilter)
+	if err != nil {
+		return nil, fmt.Errorf("invalid regex for run filter: %w", err)
+	}
+	return re, nil
 }
 
 func Run(o *options) error {
+	runFilterRegex, err := compileRunFilter(o.runFilter)
+	if err != nil {
+		return err
+	}
+
 	client, config, clusterName, err := check.NewClient()
 	if err != nil {
-		return fmt.Errorf("unable to create Kubernetes client: %s", err)
+		return fmt.Errorf("unable to create Kubernetes client: %w", err)
 	}
 	ctx := context.Background()
-	testContext := NewTestContext(client, config, clusterName, o)
+	testContext := NewTestContext(client, config, clusterName, o.antreaNamespace, runFilterRegex)
 	if err := testContext.setup(ctx); err != nil {
 		return err
 	}
-	var numSuccess, numFailure, numSkipped int
-	for name, test := range testsRegistry {
-		testContext.Header("Running test: %s", name)
-		if err := test.Run(ctx, testContext); err != nil {
-			if errors.As(err, new(notRunnableError)) {
-				testContext.Warning("Test %s was skipped: %v", name, err)
-				numSkipped++
-			} else {
-				testContext.Fail("Test %s failed: %v", name, err)
-				numFailure++
-			}
-		} else {
-			testContext.Success("Test %s passed", name)
-			numSuccess++
-		}
-	}
-	testContext.Log("Test finished: %v tests succeeded, %v tests failed, %v tests were skipped", numSuccess, numFailure, numSkipped)
+	stats := testContext.runTests(ctx)
+
+	testContext.Log("Test finished: %v tests succeeded, %v tests failed, %v tests were skipped", stats.numSuccess, stats.numFailure, stats.numSkipped)
 	check.Teardown(ctx, testContext.client, testContext.clusterName, testContext.namespace)
-	if numFailure > 0 {
-		return fmt.Errorf("%v/%v tests failed", numFailure, len(testsRegistry))
+	if stats.numFailure > 0 {
+		return fmt.Errorf("%v/%v tests failed", stats.numFailure, len(testsRegistry))
 	}
 	return nil
 }
@@ -156,13 +169,20 @@ func newService(name string, selector map[string]string, port int) *corev1.Servi
 	}
 }
 
-func NewTestContext(client kubernetes.Interface, config *rest.Config, clusterName string, o *options) *testContext {
+func NewTestContext(
+	client kubernetes.Interface,
+	config *rest.Config,
+	clusterName string,
+	antreaNamespace string,
+	runFilterRegex *regexp.Regexp,
+) *testContext {
 	return &testContext{
 		client:          client,
 		config:          config,
 		clusterName:     clusterName,
-		antreaNamespace: o.antreaNamespace,
+		antreaNamespace: antreaNamespace,
 		namespace:       check.GenerateRandomNamespace(testNamespacePrefix),
+		runFilterRegex:  runFilterRegex,
 	}
 }
 
@@ -305,6 +325,39 @@ func (t *testContext) setup(ctx context.Context) error {
 	return nil
 }
 
+func (t *testContext) runTests(ctx context.Context) testStats {
+	var stats testStats
+	for name, test := range testsRegistry {
+		if t.runFilterRegex != nil && !t.runFilterRegex.MatchString(name) {
+			continue
+		}
+		t.Header("Running test: %s", name)
+		if err := test.Run(ctx, t); err != nil {
+			if errors.As(err, new(notRunnableError)) {
+				t.Warning("Test %s was skipped: %v", name, err)
+				stats.numSkipped++
+			} else {
+				t.Fail("Test %s failed: %v", name, err)
+				stats.numFailure++
+			}
+		} else {
+			t.Success("Test %s passed", name)
+			stats.numSuccess++
+		}
+	}
+	return stats
+}
+
+func (t *testContext) runAgnhostConnect(ctx context.Context, clientPodName string, container string, target string, targetPort int) error {
+	cmd := agnhostConnectCommand(target, fmt.Sprint(targetPort))
+	_, stderr, err := check.ExecInPod(ctx, t.client, t.config, t.namespace, clientPodName, container, cmd)
+	if err != nil {
+		// We log the contents of stderr here for troubleshooting purposes.
+		t.Log("/agnhost command failed - stderr: %s", stderr)
+	}
+	return err
+}
+
 func (t *testContext) Log(format string, a ...interface{}) {
 	fmt.Fprintf(os.Stdout, fmt.Sprintf("[%s] ", t.clusterName)+format+"\n", a...)
 }
diff --git a/pkg/antctl/raw/check/installation/command_test.go b/pkg/antctl/raw/check/installation/command_test.go
new file mode 100644
index 00000000000..923d30ce781
--- /dev/null
+++ b/pkg/antctl/raw/check/installation/command_test.go
@@ -0,0 +1,106 @@
+// Copyright 2024 Antrea Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package installation
+
+import (
+	"context"
+	"fmt"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func overrideTestsRegistry(t *testing.T, registry map[string]Test) {
+	oldRegistry := testsRegistry
+	testsRegistry = registry
+	t.Cleanup(func() {
+		testsRegistry = oldRegistry
+	})
+}
+
+type notRunnableTest struct{}
+
+func (t *notRunnableTest) Run(ctx context.Context, testContext *testContext) error {
+	return newNotRunnableError("not runnable")
+}
+
+type failedTest struct{}
+
+func (t *failedTest) Run(ctx context.Context, testContext *testContext) error {
+	return fmt.Errorf("failed")
+}
+
+type successfulTest struct{}
+
+func (t *successfulTest) Run(ctx context.Context, testContext *testContext) error {
+	return nil
+}
+
+func TestRun(t *testing.T) {
+	ctx := context.Background()
+
+	registry := map[string]Test{
+		"not-runnable": &notRunnableTest{},
+		"failure":      &failedTest{},
+		"success":      &successfulTest{},
+	}
+
+	testCases := []struct {
+		name          string
+		registry      map[string]Test
+		runFilter     string
+		expectedStats testStats
+	}{
+		{
+			name:          "no test in registry",
+			expectedStats: testStats{},
+		},
+		{
+			name:     "run all tests",
+			registry: registry,
+			expectedStats: testStats{
+				numSuccess: 1,
+				numFailure: 1,
+				numSkipped: 1,
+			},
+		},
+		{
+			name:      "run single test",
+			registry:  registry,
+			runFilter: "success",
+			expectedStats: testStats{
+				numSuccess: 1,
+			},
+		},
+		{
+			name:          "no matching test",
+			registry:      registry,
+			runFilter:     "my-test",
+			expectedStats: testStats{},
+		},
+	}
+
+	for _, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			overrideTestsRegistry(t, tc.registry)
+			runFilterRegex, err := compileRunFilter(tc.runFilter)
+			require.NoError(t, err)
+			testContext := NewTestContext(nil, nil, "test-cluster", "kube-system", runFilterRegex)
+			stats := testContext.runTests(ctx)
+			assert.Equal(t, tc.expectedStats, stats)
+		})
+	}
+}
diff --git a/pkg/antctl/raw/check/installation/test_podtointernet.go b/pkg/antctl/raw/check/installation/test_podtointernet.go
index 50bc7978d54..c5be54b252a 100644
--- a/pkg/antctl/raw/check/installation/test_podtointernet.go
+++ b/pkg/antctl/raw/check/installation/test_podtointernet.go
@@ -17,8 +17,6 @@ package installation
 import (
 	"context"
 	"fmt"
-
-	"antrea.io/antrea/pkg/antctl/raw/check"
 )
 
 type PodToInternetConnectivityTest struct{}
@@ -31,8 +29,7 @@ func (t *PodToInternetConnectivityTest) Run(ctx context.Context, testContext *te
 	for _, clientPod := range testContext.clientPods {
 		srcPod := testContext.namespace + "/" + clientPod.Name
 		testContext.Log("Validating connectivity from Pod %s to the world (google.com)...", srcPod)
-		_, _, err := check.ExecInPod(ctx, testContext.client, testContext.config, testContext.namespace, clientPod.Name, clientDeploymentName, agnhostConnectCommand("google.com", "80"))
-		if err != nil {
+		if err := testContext.runAgnhostConnect(ctx, clientPod.Name, "", "google.com", 80); err != nil {
 			return fmt.Errorf("Pod %s was not able to connect to google.com: %w", srcPod, err)
 		}
 		testContext.Log("Pod %s was able to connect to google.com", srcPod)
diff --git a/pkg/antctl/raw/check/installation/test_podtopodinternode.go b/pkg/antctl/raw/check/installation/test_podtopodinternode.go
index 556e72faf06..feb72d15c06 100644
--- a/pkg/antctl/raw/check/installation/test_podtopodinternode.go
+++ b/pkg/antctl/raw/check/installation/test_podtopodinternode.go
@@ -17,8 +17,6 @@ package installation
 import (
 	"context"
 	"fmt"
-
-	"antrea.io/antrea/pkg/antctl/raw/check"
 )
 
 type PodToPodInterNodeConnectivityTest struct{}
@@ -37,8 +35,7 @@ func (t *PodToPodInterNodeConnectivityTest) Run(ctx context.Context, testContext
 		for _, podIP := range testContext.echoOtherNodePod.Status.PodIPs {
 			echoIP := podIP.IP
 			testContext.Log("Validating from Pod %s to Pod %s at IP %s...", srcPod, dstPod, echoIP)
-			_, _, err := check.ExecInPod(ctx, testContext.client, testContext.config, testContext.namespace, clientPod.Name, "", agnhostConnectCommand(echoIP, "80"))
-			if err != nil {
+			if err := testContext.runAgnhostConnect(ctx, clientPod.Name, "", echoIP, 80); err != nil {
 				return fmt.Errorf("client Pod %s was not able to communicate with echo Pod %s (%s): %w", clientPod.Name, testContext.echoOtherNodePod.Name, echoIP, err)
 			}
 			testContext.Log("client Pod %s was able to communicate with echo Pod %s (%s)", clientPod.Name, testContext.echoOtherNodePod.Name, echoIP)
diff --git a/pkg/antctl/raw/check/installation/test_podtopodintranode.go b/pkg/antctl/raw/check/installation/test_podtopodintranode.go
index 5fae4cbe947..0762e542b89 100644
--- a/pkg/antctl/raw/check/installation/test_podtopodintranode.go
+++ b/pkg/antctl/raw/check/installation/test_podtopodintranode.go
@@ -17,8 +17,6 @@ package installation
 import (
 	"context"
 	"fmt"
-
-	"antrea.io/antrea/pkg/antctl/raw/check"
 )
 
 type PodToPodIntraNodeConnectivityTest struct{}
@@ -34,8 +32,7 @@ func (t *PodToPodIntraNodeConnectivityTest) Run(ctx context.Context, testContext
 		for _, podIP := range testContext.echoSameNodePod.Status.PodIPs {
 			echoIP := podIP.IP
 			testContext.Log("Validating from Pod %s to Pod %s at IP %s...", srcPod, dstPod, echoIP)
-			_, _, err := check.ExecInPod(ctx, testContext.client, testContext.config, testContext.namespace, clientPod.Name, "", agnhostConnectCommand(echoIP, "80"))
-			if err != nil {
+			if err := testContext.runAgnhostConnect(ctx, clientPod.Name, "", echoIP, 80); err != nil {
 				return fmt.Errorf("client Pod %s was not able to communicate with echo Pod %s (%s): %w", clientPod.Name, testContext.echoSameNodePod.Name, echoIP, err)
 			}
 			testContext.Log("client Pod %s was able to communicate with echo Pod %s (%s)", clientPod.Name, testContext.echoSameNodePod.Name, echoIP)
diff --git a/pkg/antctl/raw/check/installation/test_podtoserviceinternode.go b/pkg/antctl/raw/check/installation/test_podtoserviceinternode.go
index 19ec6a76f56..29e2ff59fbe 100644
--- a/pkg/antctl/raw/check/installation/test_podtoserviceinternode.go
+++ b/pkg/antctl/raw/check/installation/test_podtoserviceinternode.go
@@ -17,8 +17,6 @@ package installation
 import (
 	"context"
 	"fmt"
-
-	"antrea.io/antrea/pkg/antctl/raw/check"
 )
 
 type PodToServiceInterNodeConnectivityTest struct{}
@@ -34,8 +32,7 @@ func (t *PodToServiceInterNodeConnectivityTest) Run(ctx context.Context, testCon
 	service := echoOtherNodeDeploymentName
 	for _, clientPod := range testContext.clientPods {
 		testContext.Log("Validating from Pod %s to Service %s in Namespace %s...", clientPod.Name, service, testContext.namespace)
-		_, _, err := check.ExecInPod(ctx, testContext.client, testContext.config, testContext.namespace, clientPod.Name, "", agnhostConnectCommand(service, "80"))
-		if err != nil {
+		if err := testContext.runAgnhostConnect(ctx, clientPod.Name, "", service, 80); err != nil {
 			return fmt.Errorf("client Pod %s was not able to communicate with Service %s", clientPod.Name, service)
 		}
 		testContext.Log("client Pod %s was able to communicate with Service %s", clientPod.Name, service)
diff --git a/pkg/antctl/raw/check/installation/test_podtoserviceintranode.go b/pkg/antctl/raw/check/installation/test_podtoserviceintranode.go
index 30d2cb28838..3a377c0c220 100644
--- a/pkg/antctl/raw/check/installation/test_podtoserviceintranode.go
+++ b/pkg/antctl/raw/check/installation/test_podtoserviceintranode.go
@@ -17,8 +17,6 @@ package installation
 import (
 	"context"
 	"fmt"
-
-	"antrea.io/antrea/pkg/antctl/raw/check"
 )
 
 type PodToServiceIntraNodeConnectivityTest struct{}
@@ -31,8 +29,7 @@ func (t *PodToServiceIntraNodeConnectivityTest) Run(ctx context.Context, testCon
 	service := echoSameNodeDeploymentName
 	for _, clientPod := range testContext.clientPods {
 		testContext.Log("Validating from Pod %s to Service %s in Namespace %s...", clientPod.Name, service, testContext.namespace)
-		_, _, err := check.ExecInPod(ctx, testContext.client, testContext.config, testContext.namespace, clientPod.Name, "", agnhostConnectCommand(service, "80"))
-		if err != nil {
+		if err := testContext.runAgnhostConnect(ctx, clientPod.Name, "", service, 80); err != nil {
 			return fmt.Errorf("client Pod %s was not able to communicate with Service %s", clientPod.Name, service)
 		}
 		testContext.Log("client Pod %s was able to communicate with Service %s", clientPod.Name, service)