diff --git a/go.mod b/go.mod index 991d9cbe575..724fc5cf20c 100644 --- a/go.mod +++ b/go.mod @@ -37,7 +37,7 @@ require ( github.com/stretchr/testify v1.9.0 github.com/vishvananda/netlink v1.2.1-beta.2.0.20240713210050-d13535d71ed3 go.uber.org/mock v0.4.0 - golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa + golang.org/x/exp v0.0.0-20240823005443-9b4947da3948 golang.org/x/mod v0.20.0 golang.org/x/sys v0.24.0 golang.org/x/time v0.6.0 @@ -186,7 +186,7 @@ require ( replace ( github.com/mdlayher/arp => github.com/kubeovn/arp v0.0.0-20240218024213-d9612a263f68 github.com/openshift/client-go => github.com/openshift/client-go v0.0.1 - github.com/ovn-org/libovsdb => github.com/kubeovn/libovsdb v0.0.0-20240218023647-f0bc3ce57fcd + github.com/ovn-org/libovsdb => github.com/kubeovn/libovsdb v0.0.0-20240814054845-978196448fb2 k8s.io/api => k8s.io/api v0.30.4 k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.30.4 k8s.io/apimachinery => k8s.io/apimachinery v0.30.4 diff --git a/go.sum b/go.sum index 3359022283a..3c62a82eab9 100644 --- a/go.sum +++ b/go.sum @@ -1015,8 +1015,8 @@ github.com/kubeovn/go-iptables v0.0.0-20230322103850-8619a8ab3dca h1:fTMjoho2et9 github.com/kubeovn/go-iptables v0.0.0-20230322103850-8619a8ab3dca/go.mod h1:jY1XeGzkx8ASNJ+SqQSxTESNXARkjvt+I6IJOTnzIjw= github.com/kubeovn/gonetworkmanager/v2 v2.0.0-20230905082151-e28c4d73a589 h1:y9exo1hjCsq7jsGUzt11kxhTiEGrGSQ0ZqibAiZk2PQ= github.com/kubeovn/gonetworkmanager/v2 v2.0.0-20230905082151-e28c4d73a589/go.mod h1:49upX+/hUyppWIqu58cumojyIwXdkA8k6reA/mQlKuI= -github.com/kubeovn/libovsdb v0.0.0-20240218023647-f0bc3ce57fcd h1:GhgvSBFKEkVNgDq8IslC04NVuoznreZH/Imz/cr6bhs= -github.com/kubeovn/libovsdb v0.0.0-20240218023647-f0bc3ce57fcd/go.mod h1:pTnlGt1JZrncr6pJn/Fhnp3FFTMQRaTVxiSKBLVGa5s= +github.com/kubeovn/libovsdb v0.0.0-20240814054845-978196448fb2 h1:jH4yKIJLu2ZBy6fLMrlVa27ccgjzc53rsGDzNvddh0E= +github.com/kubeovn/libovsdb v0.0.0-20240814054845-978196448fb2/go.mod h1:od3agzU0e50RPBxap7mMvBWZ+u37kqX0W849BYufdHI= github.com/kubeovn/ovsdb v0.0.0-20240410091831-5dd26006c475 h1:KZba2Kj9TXCUdUSqOR3eiy4VvkkIyhDVImYmYs6GQWU= github.com/kubeovn/ovsdb v0.0.0-20240410091831-5dd26006c475/go.mod h1:LAd0qoeAAm/QyZcpxN2BnpndM2/dhZt+/kokPvcxKcE= github.com/kubernetes-csi/external-snapshotter/client/v4 v4.2.0 h1:nHHjmvjitIiyPlUHk/ofpgvBcNcawJLtf4PYHORLjAA= @@ -1332,8 +1332,8 @@ golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u0 golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= golang.org/x/exp v0.0.0-20220827204233-334a2380cb91/go.mod h1:cyybsKvd6eL0RnXn6p/Grxp8F5bW7iYuBgsNCOHpMYE= -golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa h1:ELnwvuAXPNtPk1TJRuGkI9fDTwym6AYBu0qzT8AcHdI= -golang.org/x/exp v0.0.0-20240808152545-0cdaa3abc0fa/go.mod h1:akd2r19cwCdwSwWeIdzYQGa/EZZyqcOdwWiwj5L5eKQ= +golang.org/x/exp v0.0.0-20240823005443-9b4947da3948 h1:kx6Ds3MlpiUHKj7syVnbp57++8WpuKPcR5yjLBjvLEA= +golang.org/x/exp v0.0.0-20240823005443-9b4947da3948/go.mod h1:akd2r19cwCdwSwWeIdzYQGa/EZZyqcOdwWiwj5L5eKQ= golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= diff --git a/pkg/controller/config.go b/pkg/controller/config.go index 2e42770c33f..6f0953d264a 100644 --- a/pkg/controller/config.go +++ b/pkg/controller/config.go @@ -20,13 +20,15 @@ import ( // Configuration is the controller conf type Configuration struct { - OvnNbAddr string - OvnSbAddr string - OvnTimeout int - CustCrdRetryMaxDelay int - CustCrdRetryMinDelay int - KubeConfigFile string - KubeRestConfig *rest.Config + OvnNbAddr string + OvnSbAddr string + OvnTimeout int + OvsDbConnectTimeout int + OvsDbInactivityTimeout int + CustCrdRetryMaxDelay int + CustCrdRetryMinDelay int + KubeConfigFile string + KubeRestConfig *rest.Config KubeClient kubernetes.Interface KubeOvnClient clientset.Interface @@ -106,12 +108,14 @@ type Configuration struct { // TODO: validate configuration func ParseFlags() (*Configuration, error) { var ( - argOvnNbAddr = pflag.String("ovn-nb-addr", "", "ovn-nb address") - argOvnSbAddr = pflag.String("ovn-sb-addr", "", "ovn-sb address") - argOvnTimeout = pflag.Int("ovn-timeout", 60, "") - argCustCrdRetryMinDelay = pflag.Int("cust-crd-retry-min-delay", 1, "The min delay seconds between custom crd two retries") - argCustCrdRetryMaxDelay = pflag.Int("cust-crd-retry-max-delay", 20, "The max delay seconds between custom crd two retries") - argKubeConfigFile = pflag.String("kubeconfig", "", "Path to kubeconfig file with authorization and master location information. If not set use the inCluster token.") + argOvnNbAddr = pflag.String("ovn-nb-addr", "", "ovn-nb address") + argOvnSbAddr = pflag.String("ovn-sb-addr", "", "ovn-sb address") + argOvnTimeout = pflag.Int("ovn-timeout", 60, "The seconds to wait ovn command timeout") + argOvsDbConTimeout = pflag.Int("ovsdb-con-timeout", 3, "The seconds to wait ovsdb connect timeout") + argOvsDbInactivityTimeout = pflag.Int("ovsdb-inactivity-timeout", 10, "The seconds to wait ovsdb inactivity check timeout") + argCustCrdRetryMinDelay = pflag.Int("cust-crd-retry-min-delay", 1, "The min delay seconds between custom crd two retries") + argCustCrdRetryMaxDelay = pflag.Int("cust-crd-retry-max-delay", 20, "The max delay seconds between custom crd two retries") + argKubeConfigFile = pflag.String("kubeconfig", "", "Path to kubeconfig file with authorization and master location information. If not set use the inCluster token.") argDefaultLogicalSwitch = pflag.String("default-ls", util.DefaultSubnet, "The default logical switch name") argDefaultCIDR = pflag.String("default-cidr", "10.16.0.0/16", "Default CIDR for namespace with no logical switch annotation") @@ -196,6 +200,8 @@ func ParseFlags() (*Configuration, error) { OvnNbAddr: *argOvnNbAddr, OvnSbAddr: *argOvnSbAddr, OvnTimeout: *argOvnTimeout, + OvsDbConnectTimeout: *argOvsDbConTimeout, + OvsDbInactivityTimeout: *argOvsDbInactivityTimeout, CustCrdRetryMinDelay: *argCustCrdRetryMinDelay, CustCrdRetryMaxDelay: *argCustCrdRetryMaxDelay, KubeConfigFile: *argKubeConfigFile, diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 03ee4cbc42f..5da3ce3f71c 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -466,10 +466,19 @@ func Run(ctx context.Context, config *Configuration) { } var err error - if controller.OVNNbClient, err = ovs.NewOvnNbClient(config.OvnNbAddr, config.OvnTimeout); err != nil { + if controller.OVNNbClient, err = ovs.NewOvnNbClient( + config.OvnNbAddr, + config.OvnTimeout, + config.OvsDbConnectTimeout, + config.OvsDbInactivityTimeout); err != nil { util.LogFatalAndExit(err, "failed to create ovn nb client") } - if controller.OVNSbClient, err = ovs.NewOvnSbClient(config.OvnSbAddr, config.OvnTimeout); err != nil { + if controller.OVNSbClient, err = ovs.NewOvnSbClient( + config.OvnSbAddr, + config.OvnTimeout, + config.OvsDbConnectTimeout, + config.OvsDbInactivityTimeout, + ); err != nil { util.LogFatalAndExit(err, "failed to create ovn sb client") } if config.EnableLb { diff --git a/pkg/ovn_ic_controller/config.go b/pkg/ovn_ic_controller/config.go index a02d7c1d97c..0346fe8b5f0 100644 --- a/pkg/ovn_ic_controller/config.go +++ b/pkg/ovn_ic_controller/config.go @@ -21,10 +21,12 @@ type Configuration struct { KubeClient kubernetes.Interface KubeOvnClient clientset.Interface - PodNamespace string - OvnNbAddr string - OvnSbAddr string - OvnTimeout int + PodNamespace string + OvnNbAddr string + OvnSbAddr string + OvnTimeout int + OvsDbConnectTimeout int + OvsDbInactivityTimeout int NodeSwitch string ClusterRouter string @@ -35,9 +37,11 @@ func ParseFlags() (*Configuration, error) { var ( argKubeConfigFile = pflag.String("kubeconfig", "", "Path to kubeconfig file with authorization and master location information. If not set use the inCluster token.") - argOvnNbAddr = pflag.String("ovn-nb-addr", "", "ovn-nb address") - argOvnSbAddr = pflag.String("ovn-sb-addr", "", "ovn-sb address") - argOvnTimeout = pflag.Int("ovn-timeout", 60, "") + argOvnNbAddr = pflag.String("ovn-nb-addr", "", "ovn-nb address") + argOvnSbAddr = pflag.String("ovn-sb-addr", "", "ovn-sb address") + argOvnTimeout = pflag.Int("ovn-timeout", 60, "") + argOvsDbConTimeout = pflag.Int("ovsdb-con-timeout", 3, "") + argOvsDbInactivityTimeout = pflag.Int("ovsdb-inactivity-timeout", 10, "") argClusterRouter = pflag.String("cluster-router", util.DefaultVpc, "The router name for cluster router") argNodeSwitch = pflag.String("node-switch", "join", "The name of node gateway switch which help node to access pod network") @@ -71,10 +75,12 @@ func ParseFlags() (*Configuration, error) { config := &Configuration{ KubeConfigFile: *argKubeConfigFile, - PodNamespace: os.Getenv("POD_NAMESPACE"), - OvnNbAddr: *argOvnNbAddr, - OvnSbAddr: *argOvnSbAddr, - OvnTimeout: *argOvnTimeout, + PodNamespace: os.Getenv("POD_NAMESPACE"), + OvnNbAddr: *argOvnNbAddr, + OvnSbAddr: *argOvnSbAddr, + OvnTimeout: *argOvnTimeout, + OvsDbConnectTimeout: *argOvsDbConTimeout, + OvsDbInactivityTimeout: *argOvsDbInactivityTimeout, ClusterRouter: *argClusterRouter, NodeSwitch: *argNodeSwitch, diff --git a/pkg/ovn_ic_controller/controller.go b/pkg/ovn_ic_controller/controller.go index 95067a7e817..19104814ee2 100644 --- a/pkg/ovn_ic_controller/controller.go +++ b/pkg/ovn_ic_controller/controller.go @@ -87,10 +87,20 @@ func NewController(config *Configuration) *Controller { } var err error - if controller.OVNNbClient, err = ovs.NewOvnNbClient(config.OvnNbAddr, config.OvnTimeout); err != nil { + if controller.OVNNbClient, err = ovs.NewOvnNbClient( + config.OvnNbAddr, + config.OvnTimeout, + config.OvsDbConnectTimeout, + config.OvsDbInactivityTimeout, + ); err != nil { util.LogFatalAndExit(err, "failed to create ovn nb client") } - if controller.OVNSbClient, err = ovs.NewOvnSbClient(config.OvnSbAddr, config.OvnTimeout); err != nil { + if controller.OVNSbClient, err = ovs.NewOvnSbClient( + config.OvnSbAddr, + config.OvnTimeout, + config.OvsDbConnectTimeout, + config.OvsDbInactivityTimeout, + ); err != nil { util.LogFatalAndExit(err, "failed to create ovn sb client") } diff --git a/pkg/ovs/ovn.go b/pkg/ovs/ovn.go index cad3a8e71af..b351bc1cc76 100644 --- a/pkg/ovs/ovn.go +++ b/pkg/ovs/ovn.go @@ -53,7 +53,7 @@ func NewLegacyClient(timeout int) *LegacyClient { } } -func NewOvnNbClient(ovnNbAddr string, ovnNbTimeout int) (*OVNNbClient, error) { +func NewOvnNbClient(ovnNbAddr string, ovnNbTimeout, ovsDbConTimeout, ovsDbInactivityTimeout int) (*OVNNbClient, error) { dbModel, err := ovnnb.FullDatabaseModel() if err != nil { klog.Error(err) @@ -78,7 +78,7 @@ func NewOvnNbClient(ovnNbAddr string, ovnNbTimeout int) (*OVNNbClient, error) { client.WithTable(&ovnnb.NBGlobal{}), client.WithTable(&ovnnb.PortGroup{}), } - nbClient, err := ovsclient.NewOvsDbClient(ovsclient.NBDB, ovnNbAddr, dbModel, monitors) + nbClient, err := ovsclient.NewOvsDbClient(ovsclient.NBDB, ovnNbAddr, dbModel, monitors, ovsDbConTimeout, ovsDbInactivityTimeout) if err != nil { klog.Errorf("failed to create OVN NB client: %v", err) return nil, err @@ -93,7 +93,7 @@ func NewOvnNbClient(ovnNbAddr string, ovnNbTimeout int) (*OVNNbClient, error) { return c, nil } -func NewOvnSbClient(ovnSbAddr string, ovnSbTimeout int) (*OVNSbClient, error) { +func NewOvnSbClient(ovnSbAddr string, ovnSbTimeout, ovsDbConTimeout, ovsDbInactivityTimeout int) (*OVNSbClient, error) { dbModel, err := ovnsb.FullDatabaseModel() if err != nil { klog.Error(err) @@ -104,7 +104,7 @@ func NewOvnSbClient(ovnSbAddr string, ovnSbTimeout int) (*OVNSbClient, error) { client.WithTable(&ovnsb.Chassis{}), // TODO:// monitor other necessary tables in ovsdb/ovnsb/model.go } - sbClient, err := ovsclient.NewOvsDbClient(ovsclient.SBDB, ovnSbAddr, dbModel, monitors) + sbClient, err := ovsclient.NewOvsDbClient(ovsclient.SBDB, ovnSbAddr, dbModel, monitors, ovsDbConTimeout, ovsDbInactivityTimeout) if err != nil { klog.Errorf("failed to create OVN SB client: %v", err) return nil, err diff --git a/pkg/ovsdb/client/client.go b/pkg/ovsdb/client/client.go index 38371fa204c..f0baf859a03 100644 --- a/pkg/ovsdb/client/client.go +++ b/pkg/ovsdb/client/client.go @@ -25,7 +25,6 @@ const ( ICNBDB = "icnbdb" ICSBDB = "icsbdb" ) -const timeout = 3 * time.Second var namedUUIDCounter uint32 @@ -42,10 +41,24 @@ func NamedUUID() string { } // NewOvsDbClient creates a new ovsdb client -func NewOvsDbClient(db, addr string, dbModel model.ClientDBModel, monitors []client.MonitorOption) (client.Client, error) { +func NewOvsDbClient( + db string, + addr string, + dbModel model.ClientDBModel, + monitors []client.MonitorOption, + ovsDbConTimeout int, + ovsDbInactivityTimeout int, +) (client.Client, error) { logger := klog.NewKlogr().WithName("libovsdb").WithValues("db", db) + connectTimeout := time.Duration(ovsDbConTimeout) * time.Second + inactivityTimeout := time.Duration(ovsDbInactivityTimeout) * time.Second options := []client.Option{ - client.WithReconnect(timeout, &backoff.ConstantBackOff{Interval: time.Second}), + // Reading and parsing the DB after reconnect at scale can (unsurprisingly) + // take longer than a normal ovsdb operation. Give it a bit more time so + // we don't time out and enter a reconnect loop. In addition it also enables + // inactivity check on the ovsdb connection. + client.WithInactivityCheck(inactivityTimeout, connectTimeout, &backoff.ZeroBackOff{}), + client.WithLeaderOnly(true), client.WithLogger(&logger), } @@ -84,7 +97,7 @@ func NewOvsDbClient(db, addr string, dbModel model.ClientDBModel, monitors []cli klog.Error(err) return nil, err } - ctx, cancel := context.WithTimeout(context.Background(), time.Duration(len(endpoints)+1)*timeout) + ctx, cancel := context.WithTimeout(context.Background(), connectTimeout) defer cancel() if err = c.Connect(ctx); err != nil { klog.Errorf("failed to connect to OVN NB server %s: %v", addr, err)