Skip to content

Commit

Permalink
Wait for control plane
Browse files Browse the repository at this point in the history
- add `cfg.WaitForControlPlane`
- deprecate `--aws-api-timeout`
- add generalised `--timeout`
  • Loading branch information
errordeveloper committed Jul 13, 2018
1 parent 5ad2351 commit 1033a01
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 11 deletions.
9 changes: 7 additions & 2 deletions cmd/eksctl/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package main
import (
"fmt"
"os"
"time"

"github.com/pkg/errors"
"github.com/spf13/cobra"
Expand Down Expand Up @@ -82,7 +81,9 @@ func createClusterCmd() *cobra.Command {
fs.StringVar(&kubeconfigPath, "kubeconfig", kubeconfig.DefaultPath, "path to write kubeconfig (incompatible with --auto-kubeconfig)")
fs.BoolVar(&setContext, "set-kubeconfig-context", true, "if true then current-context will be set in kubeconfig; if a context is already set then it will be overwritten")

fs.DurationVar(&cfg.AWSOperationTimeout, "aws-api-timeout", 20*time.Minute, "number of seconds after which to timeout AWS API operations")
fs.DurationVar(&cfg.WaitTimeout, "aws-api-timeout", eks.DefaultWaitTimeout, "")
fs.MarkHidden("aws-api-timeout") // TODO deprecate in 0.2.0
fs.DurationVar(&cfg.WaitTimeout, "timeout", eks.DefaultWaitTimeout, "how long to wait for any of the timely operations")

fs.BoolVar(&cfg.Addons.WithIAM.PolicyAmazonEC2ContainerRegistryPowerUser, "full-ecr-access", false, "enable full access to ECR")

Expand Down Expand Up @@ -162,6 +163,10 @@ func doCreateCluster(cfg *eks.ClusterConfig, name string) error {
return err
}

if err := cfg.WaitForControlPlane(clientSet); err != nil {
return err
}

// authorise nodes to join
if err := cfg.CreateDefaultNodeGroupAuthConfigMap(clientSet); err != nil {
return err
Expand Down
3 changes: 2 additions & 1 deletion cmd/eksctl/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ func waitNodesCmd() *cobra.Command {
fs := cmd.Flags()

fs.StringVar(&utilsKubeconfigInputPath, "kubeconfig", "kubeconfig", "path to read kubeconfig")
fs.IntVarP(&cfg.MinNodes, "nodes-min", "m", DEFAULT_NODE_COUNT, "minimum nodes to wait for")
fs.IntVarP(&cfg.MinNodes, "nodes-min", "m", DEFAULT_NODE_COUNT, "minimum number of nodes to wait for")
fs.DurationVar(&cfg.WaitTimeout, "timeout", eks.DefaultWaitTimeout, "how long to wait")

return cmd
}
Expand Down
4 changes: 3 additions & 1 deletion pkg/eks/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ const (
AWSDebugLevel = 5
)

var DefaultWaitTimeout = 20 * time.Minute

type ClusterProvider struct {
// core fields used for config and AWS APIs
cfg *ClusterConfig
Expand Down Expand Up @@ -61,7 +63,7 @@ type ClusterConfig struct {
SSHPublicKeyPath string
SSHPublicKey []byte

AWSOperationTimeout time.Duration
WaitTimeout time.Duration

keyName string
clusterRoleARN string
Expand Down
4 changes: 2 additions & 2 deletions pkg/eks/cfn.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,14 @@ func (c *ClusterProvider) CreateStack(name string, templateBody []byte, paramete
ticker := time.NewTicker(20 * time.Second)
defer ticker.Stop()

timer := time.NewTimer(c.cfg.AWSOperationTimeout)
timer := time.NewTimer(c.cfg.WaitTimeout)
defer timer.Stop()

defer close(errs)
for {
select {
case <-timer.C:
errs <- fmt.Errorf("timed out creating CloudFormation stack %q after %d", name, c.cfg.AWSOperationTimeout)
errs <- fmt.Errorf("timed out creating CloudFormation stack %q after %d", name, c.cfg.WaitTimeout)
return

case <-ticker.C:
Expand Down
30 changes: 28 additions & 2 deletions pkg/eks/eks.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"time"

"github.com/pkg/errors"
"k8s.io/client-go/kubernetes"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/service/eks"
Expand Down Expand Up @@ -72,7 +73,7 @@ func (c *ClusterProvider) createControlPlane(errs chan error) error {
ticker := time.NewTicker(20 * time.Second)
defer ticker.Stop()

timer := time.NewTimer(c.cfg.AWSOperationTimeout)
timer := time.NewTimer(c.cfg.WaitTimeout)
defer timer.Stop()

defer close(taskErrs)
Expand All @@ -81,7 +82,7 @@ func (c *ClusterProvider) createControlPlane(errs chan error) error {
for {
select {
case <-timer.C:
taskErrs <- fmt.Errorf("timed out creating control plane %q after %s", c.cfg.ClusterName, c.cfg.AWSOperationTimeout)
taskErrs <- fmt.Errorf("timed out creating control plane %q after %s", c.cfg.ClusterName, c.cfg.WaitTimeout)
return

case <-ticker.C:
Expand Down Expand Up @@ -190,3 +191,28 @@ func (c *ClusterProvider) ListAllTaggedResources() error {
// TODO: https://github.com/weaveworks/eksctl/issues/26
return nil
}

func (c *ClusterConfig) WaitForControlPlane(clientSet *kubernetes.Clientset) error {
if _, err := clientSet.ServerVersion(); err == nil {
return nil
}

ticker := time.NewTicker(20 * time.Second)
defer ticker.Stop()

timer := time.NewTimer(c.WaitTimeout)
defer timer.Stop()

for {
select {
case <-ticker.C:
_, err := clientSet.ServerVersion()
if err == nil {
return nil
}
logger.Debug("control plane not ready yet – %s", err.Error())
case <-timer.C:
return fmt.Errorf("timed out waiting for control plane %q after %s", c.ClusterName, c.WaitTimeout)
}
}
}
5 changes: 2 additions & 3 deletions pkg/eks/nodegroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,7 @@ func getNodes(clientSet *clientset.Clientset) (int, error) {
}

func (c *ClusterConfig) WaitForNodes(clientSet *clientset.Clientset) error {
timeoutAfter := 20 * time.Minute
timer := time.After(timeoutAfter)
timer := time.After(c.WaitTimeout)
timeout := false
watcher, err := clientSet.Core().Nodes().Watch(metav1.ListOptions{})
if err != nil {
Expand Down Expand Up @@ -115,7 +114,7 @@ func (c *ClusterConfig) WaitForNodes(clientSet *clientset.Clientset) error {
}
}
if timeout {
return fmt.Errorf("timed out (after %s) waitiing for at least %d nodes to join the cluster and become ready", timeoutAfter, c.MinNodes)
return fmt.Errorf("timed out (after %s) waitiing for at least %d nodes to join the cluster and become ready", c.WaitTimeout, c.MinNodes)
}

if _, err = getNodes(clientSet); err != nil {
Expand Down

0 comments on commit 1033a01

Please sign in to comment.