Skip to content

Commit

Permalink
Add AssumeRole error cache
Browse files Browse the repository at this point in the history
When AssumeRole is successful, the success is cached for (by default) 15
minutes.  On failure, there is no cache.  In cases of misconfiguration,
this can result in a large number of AssumeRole calls against the AWS
API, which, in extreme cases, can result in API rate limiting, causing
other applications in the AWS account to experience failures.

This adds a negative cache on AssumeRole to prevent error cases from
spamming the API.
  • Loading branch information
schleyfox committed Feb 26, 2019
1 parent 80efbb1 commit 46bce9d
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 10 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -555,12 +555,13 @@ Usage of kube2iam:
--backoff-max-elapsed-time duration Max elapsed time for backoff when querying for role. (default 2s)
--backoff-max-interval duration Max interval for backoff when querying for role. (default 1s)
--base-role-arn string Base role ARN
--iam-role-session-ttl Length of session when assuming the roles (default 15m)
--debug Enable debug features
--default-role string Fallback role to use when annotation is not set
--host-interface string Host interface for proxying AWS metadata (default "docker0")
--host-ip string IP address of host
--iam-role-error-ttl duration TTL for caching assume role errors
--iam-role-key string Pod annotation key used to retrieve the IAM role (default "iam.amazonaws.com/role")
--iam-role-session-ttl duration TTL for the assume role session (default 15m0s)
--insecure Kubernetes server should be accessed without verifying the TLS. Testing only
--iptables Add iptables rule (also requires --host-ip)
--log-format string Log format (text/json) (default "text")
Expand All @@ -574,6 +575,7 @@ Usage of kube2iam:
--use-regional-sts-endpoint use the regional sts endpoint if AWS_REGION is set
--verbose Verbose
--version Print the version and exits
```

## Development loop
Expand Down
1 change: 1 addition & 0 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ func addFlags(s *server.Server, fs *pflag.FlagSet) {
fs.StringVar(&s.DefaultIAMRole, "default-role", s.DefaultIAMRole, "Fallback role to use when annotation is not set")
fs.StringVar(&s.IAMRoleKey, "iam-role-key", s.IAMRoleKey, "Pod annotation key used to retrieve the IAM role")
fs.DurationVar(&s.IAMRoleSessionTTL, "iam-role-session-ttl", s.IAMRoleSessionTTL, "TTL for the assume role session")
fs.DurationVar(&s.IAMRoleErrorTTL, "iam-role-error-ttl", s.IAMRoleErrorTTL, "TTL for caching assume role errors")
fs.BoolVar(&s.Insecure, "insecure", false, "Kubernetes server should be accessed without verifying the TLS. Testing only")
fs.StringVar(&s.MetadataAddress, "metadata-addr", s.MetadataAddress, "Address for the ec2 metadata")
fs.BoolVar(&s.AddIPTablesRule, "iptables", false, "Add iptables rule (also requires --host-ip)")
Expand Down
11 changes: 10 additions & 1 deletion iam/iam.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ import (

var cache = ccache.New(ccache.Configure())

var errorCache = ccache.New(ccache.Configure())

const (
maxSessNameLength = 64
)
Expand Down Expand Up @@ -126,9 +128,14 @@ func (iam *Client) EndpointFor(service, region string, optFns ...func(*endpoints
}

// AssumeRole returns an IAM role Credentials using AWS STS.
func (iam *Client) AssumeRole(roleARN, remoteIP string, sessionTTL time.Duration) (*Credentials, error) {
func (iam *Client) AssumeRole(roleARN, remoteIP string, sessionTTL time.Duration, errorTTL time.Duration) (*Credentials, error) {
hitCache := true
item, err := cache.Fetch(roleARN, sessionTTL, func() (interface{}, error) {
errItem := errorCache.Get(roleARN)
if errItem != nil && !errItem.Expired() {
return nil, errItem.Value().(error)
}

hitCache = false

// Set up a prometheus timer to track the AWS request duration. It stores the timer value when
Expand All @@ -142,6 +149,7 @@ func (iam *Client) AssumeRole(roleARN, remoteIP string, sessionTTL time.Duration

sess, err := session.NewSession()
if err != nil {
errorCache.Set(roleARN, err, errorTTL)
return nil, err
}
config := aws.NewConfig().WithLogLevel(2)
Expand All @@ -155,6 +163,7 @@ func (iam *Client) AssumeRole(roleARN, remoteIP string, sessionTTL time.Duration
RoleSessionName: aws.String(sessionName(roleARN, remoteIP)),
})
if err != nil {
errorCache.Set(roleARN, err, errorTTL)
return nil, err
}

Expand Down
14 changes: 7 additions & 7 deletions mappings/mapper.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ func (r *RoleMapper) checkRoleForNamespace(roleArn string, namespace string) boo

ns, err := r.store.NamespaceByName(namespace)
if err != nil {
log.Debug("Unable to find an indexed namespace of %s", namespace)
log.Debugf("Unable to find an indexed namespace of %s", namespace)
return false
}

Expand Down Expand Up @@ -149,12 +149,12 @@ func (r *RoleMapper) DumpDebugInfo() map[string]interface{} {
// NewRoleMapper returns a new RoleMapper for use.
func NewRoleMapper(roleKey string, defaultRole string, namespaceRestriction bool, namespaceKey string, iamInstance *iam.Client, kubeStore store, namespaceRestrictionFormat string) *RoleMapper {
return &RoleMapper{
defaultRoleARN: iamInstance.RoleARN(defaultRole),
iamRoleKey: roleKey,
namespaceKey: namespaceKey,
namespaceRestriction: namespaceRestriction,
iam: iamInstance,
store: kubeStore,
defaultRoleARN: iamInstance.RoleARN(defaultRole),
iamRoleKey: roleKey,
namespaceKey: namespaceKey,
namespaceRestriction: namespaceRestriction,
iam: iamInstance,
store: kubeStore,
namespaceRestrictionFormat: namespaceRestrictionFormat,
}
}
5 changes: 4 additions & 1 deletion server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ const (
defaultLogFormat = "text"
defaultMaxElapsedTime = 2 * time.Second
defaultIAMRoleSessionTTL = 15 * time.Minute
defaultIAMRoleErrorTTL = 0
defaultMaxInterval = 1 * time.Second
defaultMetadataAddress = "169.254.169.254"
defaultNamespaceKey = "iam.amazonaws.com/allowed-roles"
Expand All @@ -53,6 +54,7 @@ type Server struct {
DefaultIAMRole string
IAMRoleKey string
IAMRoleSessionTTL time.Duration
IAMRoleErrorTTL time.Duration
MetadataAddress string
HostInterface string
HostIP string
Expand Down Expand Up @@ -311,7 +313,7 @@ func (s *Server) roleHandler(logger *log.Entry, w http.ResponseWriter, r *http.R
return
}

credentials, err := s.iam.AssumeRole(wantedRoleARN, remoteIP, s.IAMRoleSessionTTL)
credentials, err := s.iam.AssumeRole(wantedRoleARN, remoteIP, s.IAMRoleSessionTTL, s.IAMRoleErrorTTL)
if err != nil {
roleLogger.Errorf("Error assuming role %+v", err)
http.Error(w, err.Error(), http.StatusInternalServerError)
Expand Down Expand Up @@ -409,5 +411,6 @@ func NewServer() *Server {
NamespaceRestrictionFormat: defaultNamespaceRestrictionFormat,
HealthcheckFailReason: "Healthcheck not yet performed",
IAMRoleSessionTTL: defaultIAMRoleSessionTTL,
IAMRoleErrorTTL: defaultIAMRoleErrorTTL,
}
}

0 comments on commit 46bce9d

Please sign in to comment.