Skip to content

Commit

Permalink
fix: Optimize CI fails sporadically.
Browse files Browse the repository at this point in the history
Signed-off-by: tao.yang <[email protected]>
  • Loading branch information
ty-dc committed Oct 22, 2024
1 parent 1b50453 commit aafc910
Show file tree
Hide file tree
Showing 27 changed files with 1,803 additions and 145 deletions.
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ require github.com/google/go-cmp v0.6.0 // indirect
require k8s.io/component-base v0.29.4 // indirect

require (
github.com/hashicorp/go-multierror v1.1.1
go.uber.org/automaxprocs v1.5.3
k8s.io/kubectl v0.26.3
)
Expand Down Expand Up @@ -113,6 +114,7 @@ require (
github.com/google/uuid v1.3.0 // indirect
github.com/gorilla/handlers v1.5.1 // indirect
github.com/grafana/pyroscope-go/godeltaprof v0.1.3 // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect
github.com/hashicorp/hcl v1.0.0 // indirect
github.com/huandu/xstrings v1.3.3 // indirect
github.com/imdario/mergo v0.3.13 // indirect
Expand Down
5 changes: 5 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,11 @@ github.com/grafana/pyroscope-go v1.0.2/go.mod h1:bShDKsVZdzxq+Ol6no0JKigU9y5FTWU
github.com/grafana/pyroscope-go/godeltaprof v0.1.3 h1:eunWpv1B3Z7ZK9o4499EmQGlY+CsDmSZ4FbxjRx37uk=
github.com/grafana/pyroscope-go/godeltaprof v0.1.3/go.mod h1:1HSPtjU8vLG0jE9JrTdzjgFqdJ/VgN7fvxBNq3luJko=
github.com/gregjones/httpcache v0.0.0-20180305231024-9cad4c3443a7/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA=
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I=
github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
Expand Down
2 changes: 2 additions & 0 deletions test/e2e/common/constant.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ const (
BatchCreateTimeout = time.Minute * 5
KdoctorCheckTime = time.Minute * 10
SpiderSyncMultusTime = time.Minute * 2
InformerSyncStatusTime = time.Second * 30
KDoctorRunTimeout = time.Minute * 10
)

var ForcedWaitingTime = time.Second
Expand Down
34 changes: 34 additions & 0 deletions test/e2e/common/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"fmt"
"os/exec"

"github.com/hashicorp/go-multierror"
. "github.com/onsi/ginkgo/v2"
e2e "github.com/spidernet-io/e2eframework/framework"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -55,3 +56,36 @@ func RestartNodeUntilClusterReady(ctx context.Context, frame *e2e.Framework, nod
GinkgoWriter.Println("Check that the status of all Pods in the cluster is running")
return nil
}

func GetNodeNetworkInfo(ctx context.Context, frame *e2e.Framework, nodeList []string) error {
var jobResult *multierror.Error
for _, node := range nodeList {
GinkgoWriter.Printf("=============== Check the network information of the node %v ============== \n", node)
commands := []string{
"ip a",
"ip link show",
"ip n",
"ip -6 n",
"ip rule",
"ip -6 rule",
"ip route",
"ip route show table 100",
"ip route show table 101",
"ip route show table 500",
"ip -6 route",
"ip -6 route show table 100",
"ip -6 route show table 101",
"ip -6 route show table 500",
}

for _, command := range commands {
GinkgoWriter.Printf("--------------- execute %v in node: %v ------------ \n", command, node)
out, err := frame.DockerExecCommand(ctx, node, command)
if err != nil {
jobResult = multierror.Append(jobResult, fmt.Errorf("node %v: command '%v' failed with error: %w, output: %s", node, command, err, out))
}
}
}

return jobResult.ErrorOrNil()
}
47 changes: 46 additions & 1 deletion test/e2e/common/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,10 @@ import (
"time"

"github.com/spidernet-io/spiderpool/pkg/constant"
"github.com/spidernet-io/spiderpool/pkg/utils/retry"
"sigs.k8s.io/controller-runtime/pkg/client"

"github.com/hashicorp/go-multierror"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
e2e "github.com/spidernet-io/e2eframework/framework"
Expand Down Expand Up @@ -48,7 +51,14 @@ func GenerateExamplePodYaml(podName, namespace string) *corev1.Pod {
func CreatePodUntilReady(frame *e2e.Framework, podYaml *corev1.Pod, podName, namespace string, waitPodStartTimeout time.Duration) (pod *corev1.Pod, podIPv4, podIPv6 string) {
// create pod
GinkgoWriter.Printf("create pod %v/%v \n", namespace, podName)
err := frame.CreatePod(podYaml)
err := retry.RetryOnConflictWithContext(context.Background(), retry.DefaultBackoff, func(ctx context.Context) error {
err := frame.CreatePod(podYaml)
if err != nil {
GinkgoLogr.Error(fmt.Errorf("failed to create pod %v/%v, error: %v", namespace, podName, err), "Failed")
return err
}
return nil
})
Expect(err).NotTo(HaveOccurred(), "failed to create pod")

// wait for pod ip
Expand Down Expand Up @@ -125,6 +135,10 @@ func CheckPodIpReadyByLabel(frame *e2e.Framework, label map[string]string, v4Poo
return podList
}

func DeletePods(frame *e2e.Framework, opts ...client.DeleteAllOfOption) error {
return frame.KClient.DeleteAllOf(context.TODO(), &corev1.Pod{}, opts...)
}

func ValidatePodIPConflict(podList *corev1.PodList) error {
isIPConflictMap := make(map[string]string)
for _, pod := range podList.Items {
Expand All @@ -138,3 +152,34 @@ func ValidatePodIPConflict(podList *corev1.PodList) error {
}
return nil
}

func GetPodNetworkInfo(ctx context.Context, frame *e2e.Framework, podList *corev1.PodList) error {
var errResult *multierror.Error
for _, pod := range podList.Items {
GinkgoWriter.Printf("=============== Check the network information of the pod %v/%v ============== \n", pod.Namespace, pod.Name)
commands := []string{
"ip a",
"ip link show",
"ip n",
"ip -6 n",
"ip rule",
"ip -6 rule",
"ip route",
"ip route show table 100",
"ip route show table 101",
"ip -6 route",
"ip -6 route show table 100",
"ip -6 route show table 101",
}

for _, command := range commands {
GinkgoWriter.Printf("--------------- execute %v in pod: %v/%v on node: %v ------------ \n", command, pod.Namespace, pod.Name, pod.Spec.NodeName)
out, err := frame.ExecCommandInPod(pod.Name, pod.Namespace, command, ctx)
if err != nil {
errResult = multierror.Append(errResult, fmt.Errorf("pod %v/%v: command '%v' failed with error: %w, output: %s", pod.Namespace, pod.Name, command, err, out))
}
}
}

return errResult.ErrorOrNil()
}
77 changes: 53 additions & 24 deletions test/e2e/common/spiderpool.go
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,7 @@ func DeleteIPPoolUntilFinish(f *frame.Framework, poolName string, ctx context.Co
default:
_, err := GetIppoolByName(f, poolName)
if err != nil {
GinkgoWriter.Printf("IPPool '%s' has been removederror: %v", poolName, err)
GinkgoWriter.Printf("IPPool '%s' has been removed, error: %v", poolName, err)
return nil
}
time.Sleep(ForcedWaitingTime)
Expand Down Expand Up @@ -573,7 +573,7 @@ func WaitWorkloadDeleteUntilFinish(ctx context.Context, f *frame.Framework, name
_, err := GetWorkloadByName(f, namespace, name)
if err != nil {
if api_errors.IsNotFound(err) {
GinkgoWriter.Printf("workload '%s/%s' has been removederror: %v", namespace, name, err)
GinkgoWriter.Printf("workload '%s/%s' has been removed, error: %v", namespace, name, err)
return nil
}
return err
Expand Down Expand Up @@ -888,14 +888,15 @@ func CheckIppoolSanity(f *frame.Framework, poolName string) error {
podYaml, err := f.GetPod(podName, podNS)
if err != nil {
if api_errors.IsNotFound(err) {
GinkgoLogr.Error(fmt.Errorf("pod %s/%s does not exist", podNS, podName), "Failed")
GinkgoLogr.Error(fmt.Errorf("the pod %s/%s in ippool %s, but pod does not exist in kubernetes", podNS, podName, poolName), "Failed")
isSanity = false
continue
} else {
return fmt.Errorf("failed to get pod %s/%s, error: %v", podNS, podName, err)
}
}

podNetworkIPs, err := ParsePodNetworkAnnotation(f, podYaml)
if nil != err {
if err != nil {
return fmt.Errorf("failed to parse pod %s/%s network annotation \n pod yaml %v, \n error: %v ", podNS, podName, podYaml, err)
}

Expand Down Expand Up @@ -928,9 +929,11 @@ func CheckIppoolSanity(f *frame.Framework, poolName string) error {
wep, err := GetWorkloadByName(f, podYaml.Namespace, podYaml.Name)
if err != nil {
if api_errors.IsNotFound(err) {
GinkgoLogr.Error(fmt.Errorf("endpoint %s/%s dose not exist", podYaml.Namespace, podYaml.Name), "Failed")
GinkgoLogr.Error(fmt.Errorf("pod %s/%s exists in ippool %s, but endpoint does not exist", podYaml.Namespace, podYaml.Name, poolName), "Failed")
isSanity = false
continue
}
return fmt.Errorf("failed to get endpoint %s/%s, error %v", podYaml.Namespace, podYaml.Name, err)
return fmt.Errorf("pod %s/%s exists in ippool %s, but failed to get endpoint, error %v", podYaml.Namespace, podYaml.Name, poolName, err)
}

podUsedIPs := convert.GroupIPAllocationDetails(wep.Status.Current.UID, wep.Status.Current.IPs)
Expand All @@ -952,24 +955,50 @@ func CheckIppoolSanity(f *frame.Framework, poolName string) error {
}
}

if *ippool.Status.AllocatedIPCount > *ippool.Status.TotalIPCount {
GinkgoWriter.Printf(
"allocated IP count (%v) exceeds total IP count (%v) \n",
*ippool.Status.AllocatedIPCount, *ippool.Status.TotalIPCount,
)
isSanity = false
}
// The status of IPPool is automatically synchronized by the IPPool informer based on the events it receives.
// In the CI environment, the creation of IPPools happens very quickly, and their health checks are performed promptly.
// When checking the TotalIPCount status, if the spiderpool-controller undergoes a leader election or the informer has not yet completed synchronization,
// the IPPool status TotalIPCount may be nil. This can lead to a panic.
// In such cases, try waiting for the informer to complete status synchronization before checking the robustness of the IPPool.
ctx, cancel := context.WithTimeout(context.Background(), InformerSyncStatusTime)
defer cancel()
for {
select {
case <-ctx.Done():
return fmt.Errorf("waiting for informer to synchronize IPPool %s status timed out", poolName)
default:
if ippool.Status.AllocatedIPCount == nil || ippool.Status.TotalIPCount == nil {
GinkgoLogr.Error(fmt.Errorf("IPPool %s has nil status fields, retrying", poolName), "Failed")
ippool, err = GetIppoolByName(f, poolName)
if err != nil {
if api_errors.IsNotFound(err) {
return fmt.Errorf("ippool %s does not exist", poolName)
}
return fmt.Errorf("failed to get ippool %s, error %v", poolName, err)
}
time.Sleep(ForcedWaitingTime)
continue
}

// Ensure that the IP pool's reported usage matches the actual usage
if actualIPUsageCount != int(*ippool.Status.AllocatedIPCount) {
GinkgoWriter.Printf("IPPool %s usage count mismatch: expected %d, got %d \n", poolName, actualIPUsageCount, *ippool.Status.AllocatedIPCount)
isSanity = false
}
if *ippool.Status.AllocatedIPCount > *ippool.Status.TotalIPCount {
GinkgoWriter.Printf(
"allocated IP count (%v) exceeds total IP count (%v) \n",
*ippool.Status.AllocatedIPCount, *ippool.Status.TotalIPCount,
)
isSanity = false
}
// Ensure that the IP pool's reported usage matches the actual usage
if actualIPUsageCount != int(*ippool.Status.AllocatedIPCount) {
GinkgoWriter.Printf("IPPool %s usage count mismatch: expected %d, got %d \n", poolName, actualIPUsageCount, *ippool.Status.AllocatedIPCount)
isSanity = false
}

if !isSanity {
return fmt.Errorf("IPPool %s sanity check failed", poolName)
}
if !isSanity {
return fmt.Errorf("IPPool %s sanity check failed", poolName)
}

GinkgoWriter.Printf("Successfully checked IPPool %s sanity, IPPool record information is correct \n", poolName)
return nil
GinkgoWriter.Printf("Successfully checked IPPool %s sanity, IPPool record information is correct \n", poolName)
return nil
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ var (
request *kdoctorV1beta1.NetHttpRequest
condition *kdoctorV1beta1.NetSuccessCondition
schedule *kdoctorV1beta1.SchedulePlan
run = true
)

var _ = BeforeSuite(func() {
Expand Down
Loading

0 comments on commit aafc910

Please sign in to comment.