Skip to content

Commit

Permalink
Fix Flow Aggregator e2e test issues
Browse files Browse the repository at this point in the history
- deploy ch operator in a separate step from in-repo yml bundle
- define antrea side exporter configs from helm values file
- extract ch operator logs as well in case of failure
- update ch related image with flow-visibility prefix

Signed-off-by: Shawn Wang <[email protected]>
  • Loading branch information
wsquan171 committed May 2, 2022
1 parent 6a7a389 commit a76de68
Show file tree
Hide file tree
Showing 8 changed files with 59 additions and 105 deletions.

This file was deleted.

12 changes: 8 additions & 4 deletions ci/kind/test-e2e-kind.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ TESTBED_CMD=$(dirname $0)"/kind-setup.sh"
YML_CMD=$(dirname $0)"/../../hack/generate-manifest.sh"
FLOWAGGREGATOR_YML_CMD=$(dirname $0)"/../../hack/generate-manifest-flow-aggregator.sh"
FLOW_VISIBILITY_CMD=$(dirname $0)"/../../hack/generate-manifest-flow-visibility.sh --mode e2e"
FLOW_VISIBILITY_HELM_VALUES=$(dirname $0)"/values-flow-exporter.yml"
CH_OPERATOR_YML=$(dirname $0)"/../../build/yamls/clickhouse-operator-install-bundle.yml"

function quit {
result=$?
Expand Down Expand Up @@ -158,7 +160,7 @@ if ! $np; then
manifest_args="$manifest_args --no-np"
fi
if $flow_visibility; then
manifest_args="$manifest_args --flow-exporter"
manifest_args="$manifest_args --flow-exporter --extra-helm-values-file $FLOW_VISIBILITY_HELM_VALUES"
fi

COMMON_IMAGES_LIST=("k8s.gcr.io/e2e-test-images/agnhost:2.29" \
Expand All @@ -167,9 +169,10 @@ COMMON_IMAGES_LIST=("k8s.gcr.io/e2e-test-images/agnhost:2.29" \
"projects.registry.vmware.com/antrea/perftool")

FLOW_VISIBILITY_IMAGE_LIST=("projects.registry.vmware.com/antrea/ipfix-collector:v0.5.12" \
"projects.registry.vmware.com/antrea/clickhouse-operator:0.18.2" \
"projects.registry.vmware.com/antrea/metrics-exporter:0.18.2" \
"projects.registry.vmware.com/antrea/clickhouse-server:21.11")
"projects.registry.vmware.com/antrea/flow-visibility-clickhouse-operator:0.18.2" \
"projects.registry.vmware.com/antrea/flow-visibility-metrics-exporter:0.18.2" \
"projects.registry.vmware.com/antrea/flow-visibility-clickhouse-server:21.11" \
"projects.registry.vmware.com/antrea/flow-visibility-clickhouse-monitor:latest")
if $coverage; then
manifest_args="$manifest_args --coverage"
COMMON_IMAGES_LIST+=("antrea/antrea-ubuntu-coverage:latest")
Expand Down Expand Up @@ -240,6 +243,7 @@ function run_test {
$FLOWAGGREGATOR_YML_CMD | docker exec -i kind-control-plane dd of=/root/flow-aggregator.yml
fi
$FLOW_VISIBILITY_CMD | docker exec -i kind-control-plane dd of=/root/flow-visibility.yml
cat $CH_OPERATOR_YML | docker exec -i kind-control-plane dd of=/root/clickhouse-operator-install-bundle.yml
fi

if $proxy_all; then
Expand Down
4 changes: 4 additions & 0 deletions ci/kind/values-flow-exporter.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
flowCollector:
flowPollInterval: "1s"
activeFlowExportTimeout: "2s"
idleFlowExportTimeout: "1s"
3 changes: 0 additions & 3 deletions hack/generate-manifest-flow-visibility.sh
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,6 @@ if [ "$MODE" == "e2e" ]; then
cp $KUSTOMIZATION_DIR/../clickhouse-operator-install-bundle.yml clickhouse-operator-install-bundle.yml

$KUSTOMIZE edit add base base
$KUSTOMIZE edit add base clickhouse-operator-install-bundle.yml
$KUSTOMIZE edit add patch --path imagePullPolicyOperator.yml

$KUSTOMIZE edit set image flow-visibility-clickhouse-monitor=projects.registry.vmware.com/antrea/flow-visibility-clickhouse-monitor:latest
$KUSTOMIZE edit add patch --path imagePullPolicyClickhouse.yml --group clickhouse.altinity.com --version v1 --kind ClickHouseInstallation --name clickhouse
else
Expand Down
14 changes: 10 additions & 4 deletions hack/generate-manifest.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ Generate a YAML manifest for Antrea using Helm and print it to stdout.
--help, -h Print this message and exit
--multicast Generates a manifest for multicast.
--multicast-interfaces Multicast interface names (default is empty)
--extra-helm-values-file Optional extra helm values file to override the default config values
In 'release' mode, environment variables IMG_NAME and IMG_TAG must be set.
Expand Down Expand Up @@ -89,6 +90,7 @@ WHEREABOUTS=false
FLEXIBLE_IPAM=false
MULTICAST=false
MULTICAST_INTERFACES=""
HELM_VALUES_FILES=()

while [[ $# -gt 0 ]]
do
Expand Down Expand Up @@ -199,6 +201,14 @@ case $key in
MULTICAST_INTERFACES="$2"
shift 2
;;
--extra-helm-values-file)
if [[ ! -f "$2" ]]; then
echoerr "Helm values file $2 does not exist."
exit 1
fi
HELM_VALUES_FILES=("$2")
shift 2
;;
-h|--help)
print_usage
exit 0
Expand Down Expand Up @@ -287,7 +297,6 @@ fi

TMP_DIR=$(mktemp -d $THIS_DIR/../build/yamls/chart-values.XXXXXXXX)
HELM_VALUES=()
HELM_VALUES_FILES=()

if $IPSEC; then
HELM_VALUES+=("trafficEncryptionMode=ipsec" "tunnelType=gre")
Expand Down Expand Up @@ -319,9 +328,6 @@ fi

if $FLOW_EXPORTER; then
HELM_VALUES+=("featureGates.FlowExporter=true")
if [ "$MODE" == "dev" ]; then
HELM_VALUES+=("flowCollector.flowPollInterval=1s" "flowCollector.activeFlowExportTimeout=2s" "flowCollector.idleFlowExportTimeout=1s")
fi
fi

if ! $NP; then
Expand Down
8 changes: 6 additions & 2 deletions test/e2e/fixtures.go
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,8 @@ func setupTestForFlowAggregator(tb testing.TB) (*TestData, bool, bool, error) {
if err != nil {
return testData, v4Enabled, v6Enabled, err
}
tb.Logf("ClickHouse Pod running on address: %s", chPodIPs.String())
tb.Logf("ClickHouse Pod running on address: %v", chPodIPs.ipStrings[0])

var clickHouseIP string
if v6Enabled && chPodIPs.ipv6 != nil {
clickHouseIP = chPodIPs.ipv6.String()
Expand Down Expand Up @@ -338,6 +339,9 @@ func exportLogs(tb testing.TB, data *TestData, logsSubDir string, writeNodeLogs
// dump the logs for flow-visibility Pods to disk.
data.forAllMatchingPodsInNamespace("", flowVisibilityNamespace, writePodLogs)

// dump the logs for clickhouse operator Pods to disk.
data.forAllMatchingPodsInNamespace("app=clickhouse-operator", kubeNamespace, writePodLogs)

// dump the output of "kubectl describe" for Antrea pods to disk.
data.forAllMatchingPodsInNamespace("app=antrea", antreaNamespace, func(nodeName, podName, nsName string) error {
w := getPodWriter(nodeName, podName, "describe")
Expand Down Expand Up @@ -408,7 +412,7 @@ func teardownFlowAggregator(tb testing.TB, data *TestData) {

tb.Logf("Deleting '%s' K8s Namespace and ClickHouse Operator", flowVisibilityNamespace)
if err := data.DeleteNamespace(flowVisibilityNamespace, defaultTimeout); err != nil {
tb.Logf("Error when tearing down flow aggregator: %v", err)
tb.Logf("Error when tearing down flow visibility: %v", err)
}
if err := data.deleteClickHouseOperator(); err != nil {
tb.Logf("Error when removing ClickHouse Operator: %v", err)
Expand Down
98 changes: 29 additions & 69 deletions test/e2e/framework.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ const (
flowAggregatorYML string = "flow-aggregator.yml"
flowAggregatorCovYML string = "flow-aggregator-coverage.yml"
flowVisibilityYML string = "flow-visibility.yml"
chOperatorYML string = "clickhouse-operator-install-bundle.yml"
flowVisibilityCHPodName string = "chi-clickhouse-clickhouse-0-0-0"
flowVisibilityNamespace string = "flow-visibility"
defaultBridgeName string = "br-int"
Expand All @@ -126,6 +127,7 @@ const (
perftoolImage = "projects.registry.vmware.com/antrea/perftool"
ipfixCollectorImage = "projects.registry.vmware.com/antrea/ipfix-collector:v0.5.12"
ipfixCollectorPort = "4739"
clickHousePort = "9000"

nginxLBService = "nginx-loadbalancer"

Expand All @@ -137,9 +139,6 @@ const (
aggregatorClickHouseCommitInterval = 1 * time.Second

statefulSetRestartAnnotationKey = "antrea-e2e/restartedAt"

clickHousePort = "9000"
clickHouseOperatorYMLUrl = "https://raw.githubusercontent.com/Altinity/clickhouse-operator/0.18.2/deploy/operator/clickhouse-operator-install-bundle.yaml"
)

type ClusterNode struct {
Expand Down Expand Up @@ -720,6 +719,10 @@ func (data *TestData) deployFlowVisibilityClickHouse() (*PodIPs, error) {
return nil, err
}

rc, _, _, err := data.provider.RunCommandOnNode(controlPlaneNodeName(), fmt.Sprintf("kubectl apply -f %s", chOperatorYML))
if err != nil || rc != 0 {
return nil, fmt.Errorf("error when deploying the ClickHouse Operator YML; %s not available on the control-plane Node", chOperatorYML)
}
if err := wait.Poll(2*time.Second, 10*time.Second, func() (bool, error) {
rc, stdout, stderr, err := data.provider.RunCommandOnNode(controlPlaneNodeName(), fmt.Sprintf("kubectl apply -f %s", flowVisibilityYML))
if err != nil || rc != 0 {
Expand All @@ -736,33 +739,8 @@ func (data *TestData) deployFlowVisibilityClickHouse() (*PodIPs, error) {
return nil, err
}

// check for clickhouse operator running
chOperator, err := data.getClickHouseOperator()
if err != nil {
return nil, fmt.Errorf("error when getting clickhouse-operator Pod: %v", err)
}
podName := chOperator.Name
_, err = data.PodWaitFor(defaultTimeout, podName, kubeNamespace, func(p *corev1.Pod) (bool, error) {
for _, condition := range p.Status.Conditions {
if condition.Type == corev1.PodReady {
return condition.Status == corev1.ConditionTrue, nil
}
}
return false, nil
})
if err != nil {
return nil, err
}
// check for clickhouse pod running
_, err = data.PodWaitFor(defaultTimeout, flowVisibilityCHPodName, flowVisibilityNamespace, func(p *corev1.Pod) (bool, error) {
for _, condition := range p.Status.Conditions {
if condition.Type == corev1.PodReady {
return condition.Status == corev1.ConditionTrue, nil
}
}
return false, nil
})
if err != nil {
// check for clickhouse pod Ready. Wait for 2x timeout as ch operator needs to be running first to handle chi
if err = data.podWaitForReady(2*defaultTimeout, flowVisibilityCHPodName, flowVisibilityNamespace); err != nil {
return nil, err
}
podIPs, err := data.podWaitForIPs(defaultTimeout, flowVisibilityCHPodName, flowVisibilityNamespace)
Expand All @@ -773,15 +751,15 @@ func (data *TestData) deployFlowVisibilityClickHouse() (*PodIPs, error) {
}

func (data *TestData) deleteClickHouseOperator() error {
rc, _, _, err := data.provider.RunCommandOnNode(controlPlaneNodeName(), fmt.Sprintf("kubectl delete -f %s -n kube-system", clickHouseOperatorYMLUrl))
rc, _, _, err := data.provider.RunCommandOnNode(controlPlaneNodeName(), fmt.Sprintf("kubectl delete -f %s -n kube-system", chOperatorYML))
if err != nil || rc != 0 {
return fmt.Errorf("error when deleting ClickHouse operator: %v", err)
}
return nil
}

// deployFlowAggregator deploys the Flow Aggregator with ipfix collector and clickHouse address.
func (data *TestData) deployFlowAggregator(ipfixCollector, clickHouse string) error {
func (data *TestData) deployFlowAggregator(ipfixCollector, clickHouseAddr string) error {
flowAggYaml := flowAggregatorYML
if testOptions.enableCoverage {
flowAggYaml = flowAggregatorCovYML
Expand All @@ -790,7 +768,7 @@ func (data *TestData) deployFlowAggregator(ipfixCollector, clickHouse string) er
if err != nil || rc != 0 {
return fmt.Errorf("error when deploying the Flow Aggregator; %s not available on the control-plane Node", flowAggYaml)
}
if err = data.mutateFlowAggregatorConfigMap(ipfixCollector, clickHouse); err != nil {
if err = data.mutateFlowAggregatorConfigMap(ipfixCollector, clickHouseAddr); err != nil {
return err
}
if rc, _, _, err = data.provider.RunCommandOnNode(controlPlaneNodeName(), fmt.Sprintf("kubectl -n %s rollout status deployment/%s --timeout=%v", flowAggregatorNamespace, flowAggregatorDeployment, 2*defaultTimeout)); err != nil || rc != 0 {
Expand All @@ -803,22 +781,13 @@ func (data *TestData) deployFlowAggregator(ipfixCollector, clickHouse string) er
if err != nil {
return fmt.Errorf("error when getting flow-aggregator Pod: %v", err)
}
podName := flowAggPod.Name
_, err = data.PodWaitFor(defaultTimeout*2, podName, flowAggregatorNamespace, func(p *corev1.Pod) (bool, error) {
for _, condition := range p.Status.Conditions {
if condition.Type == corev1.PodReady {
return condition.Status == corev1.ConditionTrue, nil
}
}
return false, nil
})
if err != nil {
if err = data.podWaitForReady(2*defaultTimeout, flowAggPod.Name, flowAggregatorNamespace); err != nil {
return err
}
return nil
}

func (data *TestData) mutateFlowAggregatorConfigMap(ipfixCollectorAddr, clickHouseURL string) error {
func (data *TestData) mutateFlowAggregatorConfigMap(ipfixCollectorAddr, clickHouseAddr string) error {
configMap, err := data.GetFlowAggregatorConfigMap()
if err != nil {
return err
Expand All @@ -835,8 +804,8 @@ func (data *TestData) mutateFlowAggregatorConfigMap(ipfixCollectorAddr, clickHou
}
flowAggregatorConf.ClickHouse = flowaggregatorconfig.ClickHouseConfig{
Enable: true,
DatabaseURL: clickHouseURL,
CommitInterval: "1s",
DatabaseURL: clickHouseAddr,
CommitInterval: aggregatorClickHouseCommitInterval.String(),
}
flowAggregatorConf.ActiveFlowRecordTimeout = aggregatorActiveFlowRecordTimeout.String()
flowAggregatorConf.InactiveFlowRecordTimeout = aggregatorInactiveFlowRecordTimeout.String()
Expand Down Expand Up @@ -1287,6 +1256,20 @@ func (data *TestData) podWaitForRunning(timeout time.Duration, name, namespace s
return err
}

// podWaitForReady polls the k8s apiserver until the specified Pod is in the "Ready" status (or
// until the provided timeout expires).
func (data *TestData) podWaitForReady(timeout time.Duration, name, namespace string) error {
_, err := data.PodWaitFor(timeout, name, namespace, func(p *corev1.Pod) (bool, error) {
for _, condition := range p.Status.Conditions {
if condition.Type == corev1.PodReady {
return condition.Status == corev1.ConditionTrue, nil
}
}
return false, nil
})
return err
}

// podWaitForIPs polls the K8s apiserver until the specified Pod is in the "running" state (or until
// the provided timeout expires). The function then returns the IP addresses assigned to the Pod. If the
// Pod is not using "hostNetwork", the function also checks that an IP address exists in each required
Expand Down Expand Up @@ -1473,29 +1456,6 @@ func (data *TestData) getAntreaController() (*corev1.Pod, error) {
return &pods.Items[0], nil
}

// getClickHouseOperator retrieves the name of the clickhouse operator Pod (clickhouse-operator-*).
func (data *TestData) getClickHouseOperator() (*corev1.Pod, error) {
listOptions := metav1.ListOptions{
LabelSelector: "app=clickhouse-operator",
}
var pod *corev1.Pod
if err := wait.Poll(defaultInterval, defaultTimeout, func() (bool, error) {
pods, err := data.clientset.CoreV1().Pods(kubeNamespace).List(context.TODO(), listOptions)
if err != nil {
return false, fmt.Errorf("failed to list ClickHouse Operator Pod: %v", err)
}
if len(pods.Items) == 0 {
return false, nil
}
pod = &pods.Items[0]
return true, nil
}); err != nil {
return nil, err
}

return pod, nil
}

// restartAntreaControllerPod deletes the antrea-controller Pod to force it to be re-scheduled. It then waits
// for the new Pod to become available, and returns it.
func (data *TestData) restartAntreaControllerPod(timeout time.Duration) (*corev1.Pod, error) {
Expand Down
10 changes: 2 additions & 8 deletions test/e2e/infra/vagrant/push_antrea.sh
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,8 @@ FLOW_VIS_YML="/tmp/flow-visibility.yml"
# If a flow collector address is also provided, we update the Antrea
# manifest (to enable all features)
if [[ $FLOW_COLLECTOR != "" ]]; then
echo "Generating manifest with all features enabled along with FlowExporter feature"
$THIS_DIR/../../../../hack/generate-manifest.sh --mode dev --all-features > "${ANTREA_YML}"
echo "Generating manifest with flow exporter enabled"
$THIS_DIR/../../../../hack/generate-manifest.sh --mode dev --flow-exporter > "${ANTREA_YML}"
fi

# Push Antrea image and related manifest.
Expand Down Expand Up @@ -229,16 +229,10 @@ if [ "$FLOW_AGGREGATOR" == "true" ]; then
$THIS_DIR/../../../../hack/generate-manifest-flow-aggregator.sh --mode dev -fc $FLOW_COLLECTOR > "${FLOW_AGG_YML}"
fi
else
$THIS_DIR/../../../../hack/generate-manifest.sh --mode dev --flow-exporter > "${ANTREA_YML}"
$THIS_DIR/../../../../hack/generate-manifest-flow-aggregator.sh --mode dev > "${FLOW_AGG_YML}"
fi

copyManifestToNodes "$FLOW_AGG_YML"

FLOW_VISIBILITY_CH_YML="/tmp/flow-visibility.yml"
echo "Generating manifest for flow visibility with only clickhouse operator and db"
$THIS_DIR/../../../../hack/generate-manifest-flow-visibility.sh --mode e2e > "${FLOW_VISIBILITY_CH_YML}"
copyManifestToNodes "$FLOW_VISIBILITY_CH_YML"
if [[ $FLOW_COLLECTOR != "" ]]; then
echo "Restarting Flow Aggregator deployment"
ssh -F ssh-config k8s-node-control-plane kubectl -n flow-aggregator delete pod --all
Expand Down

0 comments on commit a76de68

Please sign in to comment.