Skip to content

Commit

Permalink
Add Config Knob to allow Pod to use different VPC subnet
Browse files Browse the repository at this point in the history
  • Loading branch information
liwenwu-amazon committed Sep 3, 2018
1 parent 5751423 commit 8bb4c71
Show file tree
Hide file tree
Showing 17 changed files with 924 additions and 35 deletions.
130 changes: 130 additions & 0 deletions config/v1.2/aws-k8s-cni.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
---
apiVersion: rbac.authorization.k8s.io/v1
# kubernetes versions before 1.8.0 should use rbac.authorization.k8s.io/v1beta1
kind: ClusterRole
metadata:
name: aws-node
rules:
- apiGroups:
- crd.k8s.amazonaws.com
resources:
- "*"
- namespaecs
verbs:
- "*"
- apiGroups: [""]
resources:
- pods
- nodes
- namespaces
verbs: ["list", "watch", "get"]
- apiGroups: ["extensions"]
resources:
- daemonsets
verbs: ["list", "watch"]
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: aws-node
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
# kubernetes versions before 1.8.0 should use rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
name: aws-node
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: aws-node
subjects:
- kind: ServiceAccount
name: aws-node
namespace: kube-system
---
kind: DaemonSet
apiVersion: extensions/v1beta1
metadata:
name: aws-node
namespace: kube-system
labels:
k8s-app: aws-node
spec:
updateStrategy:
type: RollingUpdate
selector:
matchLabels:
k8s-app: aws-node
template:
metadata:
labels:
k8s-app: aws-node
annotations:
scheduler.alpha.kubernetes.io/critical-pod: ''
spec:
serviceAccountName: aws-node
hostNetwork: true
tolerations:
- operator: Exists
containers:
- image: 602401143452.dkr.ecr.us-west-2.amazonaws.com/amazon-k8s-cni:1.2.0
imagePullPolicy: Always
ports:
- containerPort: 60000
name: metrics
name: aws-node
env:
- name: AWS_VPC_K8S_CNI_LOGLEVEL
value: DEBUG
- name: MY_NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: WATCH_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
resources:
requests:
cpu: 10m
securityContext:
privileged: true
volumeMounts:
- mountPath: /host/opt/cni/bin
name: cni-bin-dir
- mountPath: /host/etc/cni/net.d
name: cni-net-dir
- mountPath: /host/var/log
name: log-dir
- mountPath: /var/run/docker.sock
name: dockersock
volumes:
- name: cni-bin-dir
hostPath:
path: /opt/cni/bin
- name: cni-net-dir
hostPath:
path: /etc/cni/net.d
- name: log-dir
hostPath:
path: /var/log
- name: dockersock
hostPath:
path: /var/run/docker.sock
---
apiVersion: apiextensions.k8s.io/v1beta1
kind: CustomResourceDefinition
metadata:
name: eniconfigs.crd.k8s.amazonaws.com
spec:
scope: Cluster
group: crd.k8s.amazonaws.com
version: v1alpha1
names:
scope: Cluster
plural: eniconfigs
singuar: eniconfig
kind: ENIConfig


6 changes: 5 additions & 1 deletion ipamd/datastore/data_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -351,8 +351,12 @@ func (ds *DataStore) getDeletableENI() *ENIIPPool {
}

// GetENINeedsIP finds out the eni in datastore which failed to get secondary IP address
func (ds *DataStore) GetENINeedsIP(maxIPperENI int64) *ENIIPPool {
func (ds *DataStore) GetENINeedsIP(maxIPperENI int64, skipPrimary bool) *ENIIPPool {
for _, eni := range ds.eniIPPools {
if skipPrimary && eni.IsPrimary {
log.Debugf("Skip the primary ENI for need IP check")
continue
}
if int64(len(eni.IPv4Addresses)) < maxIPperENI {
log.Debugf("Found eni %s that have less IP address allocated: cur=%d, max=%d",
eni.ID, len(eni.IPv4Addresses), maxIPperENI)
Expand Down
34 changes: 30 additions & 4 deletions ipamd/introspect.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,11 @@ func (c *IPAMContext) SetupHTTP() {

func (c *IPAMContext) setupServer() *http.Server {
serverFunctions := map[string]func(w http.ResponseWriter, r *http.Request){
"/v1/enis": eniV1RequestHandler(c),
"/v1/pods": podV1RequestHandler(c),
"/v1/env-settings": envV1RequestHandler(c),
"/v1/enis": eniV1RequestHandler(c),
"/v1/pods": podV1RequestHandler(c),
"/v1/networkutils-env-settings": networkEnvV1RequestHandler(c),
"/v1/ipamd-env-settings": ipamdEnvV1RequestHandler(c),
"/v1/eni-configs": eniConfigRequestHandler(c),
}
paths := make([]string, 0, len(serverFunctions))
for path := range serverFunctions {
Expand Down Expand Up @@ -134,7 +136,19 @@ func podV1RequestHandler(ipam *IPAMContext) func(http.ResponseWriter, *http.Requ
}
}

func envV1RequestHandler(ipam *IPAMContext) func(http.ResponseWriter, *http.Request) {
func eniConfigRequestHandler(ipam *IPAMContext) func(http.ResponseWriter, *http.Request) {
return func(w http.ResponseWriter, r *http.Request) {
responseJSON, err := json.Marshal(ipam.eniConfig.Getter())
if err != nil {
log.Error("Failed to marshal pod data: %v", err)
http.Error(w, http.StatusText(http.StatusInternalServerError), http.StatusInternalServerError)
return
}
w.Write(responseJSON)
}
}

func networkEnvV1RequestHandler(ipam *IPAMContext) func(http.ResponseWriter, *http.Request) {
return func(w http.ResponseWriter, r *http.Request) {
responseJSON, err := json.Marshal(networkutils.GetConfigForDebug())
if err != nil {
Expand All @@ -146,6 +160,18 @@ func envV1RequestHandler(ipam *IPAMContext) func(http.ResponseWriter, *http.Requ
}
}

func ipamdEnvV1RequestHandler(ipam *IPAMContext) func(http.ResponseWriter, *http.Request) {
return func(w http.ResponseWriter, r *http.Request) {
responseJSON, err := json.Marshal(GetConfigForDebug())
if err != nil {
log.Error("Failed to marshal env var data: %v", err)
http.Error(w, http.StatusText(http.StatusInternalServerError), http.StatusInternalServerError)
return
}
w.Write(responseJSON)
}
}

func metricsHandler(ipam *IPAMContext) func(http.ResponseWriter, *http.Request) {
return func(w http.ResponseWriter, r *http.Request) {
promhttp.Handler()
Expand Down
85 changes: 79 additions & 6 deletions ipamd/ipamd.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
package ipamd

import (
"fmt"
"net"
"os"
"strconv"
Expand All @@ -31,6 +32,7 @@ import (
"github.com/aws/amazon-vpc-cni-k8s/ipamd/datastore"
"github.com/aws/amazon-vpc-cni-k8s/pkg/awsutils"
"github.com/aws/amazon-vpc-cni-k8s/pkg/docker"
"github.com/aws/amazon-vpc-cni-k8s/pkg/eniconfig"
"github.com/aws/amazon-vpc-cni-k8s/pkg/k8sapi"
"github.com/aws/amazon-vpc-cni-k8s/pkg/networkutils"
)
Expand All @@ -43,11 +45,40 @@ const (
ipPoolMonitorInterval = 5 * time.Second
maxRetryCheckENI = 5
eniAttachTime = 10 * time.Second
defaultWarmENITarget = 1
nodeIPPoolReconcileInterval = 60 * time.Second
maxK8SRetries = 12
retryK8SInterval = 5 * time.Second
noWarmIPTarget = 0

// This environment is used to specify the desired number of free IPs always available in "warm pool"
// When it is not set, ipamD defaut to use the number IPs per ENI for that instance.
// For example, for a m4.4xlarge node,
// if WARM-IP-TARGET is set to 1, and there are 9 pods running on the node, ipamD will try
// to make "warm pool" to have 10 IP address with 9 being assigned to Pod and 1 free IP.
//
// if "WARM-IP-TARGET is not set, it will be defaulted to 30 (which the number of IPs per ENI). If there are 9 pods
// running on the node, ipamD will try to make "warm pool" to have 39 IPs with 9 being assigned to Pod and 30 free IPs.
envWarmIPTarget = "WARM_IP_TARGET"

// This environment is used to specify the desired number of free ENIs along with all of its IP addresses
// always available in "warm pool".
// When it is not set, it is default to 1.
//
// when "WARM-IP-TARGET" is defined, ipamD will use behavior defined for "WARM-IP-TARGET".
//
// For example, for a m4.4xlarget node
// if WARM_ENI_TARGET is set to 2, and there are 9 pods running on the node, ipamD will try to
// make "warm pool" to have 2 extra ENIs and its IP addresses, in another word, 90 IP addresses with 9 IPs assigne to Pod
// and 81 free IPs.
//
// if "WARM_ENI_TARGET" is not set, it is default to 1, if there 9 pods running on the node, ipamD will try to
// make "warm pool" to have 1 extra ENI, in aother word 60 IPs with 9 being assigned to Pod and 51 free IPs.
envWarmENITarget = "WARM_ENI_TARGET"
defaultWarmENITarget = 1

// This environment is used to specify whether Pods need to use securitygroup and subnet defined in ENIConfig CRD
// When it is NOT set or set to false, ipamD will use primary interface security group and subnet for Pod network.
envCustomNetworkCfg = "AWS_VPC_K8S_CNI_CUSTOM_NETWORK_CFG"
)

var (
Expand Down Expand Up @@ -105,6 +136,7 @@ type IPAMContext struct {
awsClient awsutils.APIs
dataStore *datastore.DataStore
k8sClient k8sapi.K8SAPIs
eniConfig eniconfig.ENIConfig
dockerClient docker.APIs
networkClient networkutils.NetworkAPIs

Expand Down Expand Up @@ -133,13 +165,14 @@ func prometheusRegister() {

// New retrieves IP address usage information from Instance MetaData service and Kubelet
// then initializes IP address pool data store
func New(k8sapiClient k8sapi.K8SAPIs) (*IPAMContext, error) {
func New(k8sapiClient k8sapi.K8SAPIs, eniConfig *eniconfig.ENIConfigController) (*IPAMContext, error) {
prometheusRegister()
c := &IPAMContext{}

c.k8sClient = k8sapiClient
c.networkClient = networkutils.New()
c.dockerClient = docker.New()
c.eniConfig = eniConfig

client, err := awsutils.New()
if err != nil {
Expand Down Expand Up @@ -314,7 +347,7 @@ func (c *IPAMContext) retryAllocENIIP() {
log.Infof("Failed to retrieve ENI IP limit: %v", err)
return
}
eni := c.dataStore.GetENINeedsIP(maxIPLimit)
eni := c.dataStore.GetENINeedsIP(maxIPLimit, useCustomNetworkCfg())
if eni != nil {
log.Debugf("Attempt again to allocate IP address for eni :%s", eni.ID)
var err error
Expand Down Expand Up @@ -392,7 +425,25 @@ func (c *IPAMContext) increaseIPPool() {
log.Debugf("Skipping increase IPPOOL due to max ENI already attached to the instance : %d", c.maxENI)
return
}
eni, err := c.awsClient.AllocENI()

var securityGroups []string
var subnet string
customNetworkCfg := useCustomNetworkCfg()

if customNetworkCfg {
eniCfg, err := c.eniConfig.MyENIConfig()

if err != nil {
log.Errorf("Failed to get pod ENI config")
return
}

log.Infof("ipamd: using custom network config: %v, %s", eniCfg.SecurityGroups, eniCfg.Subnet)
securityGroups = eniCfg.SecurityGroups
subnet = eniCfg.Subnet
}

eni, err := c.awsClient.AllocENI(customNetworkCfg, securityGroups, subnet)
if err != nil {
log.Errorf("Failed to increase pool size due to not able to allocate ENI %v", err)

Expand Down Expand Up @@ -546,7 +597,7 @@ func (c *IPAMContext) waitENIAttached(eni string) (awsutils.ENIMetadata, error)
}

func getWarmENITarget() int {
inputStr, found := os.LookupEnv("WARM_ENI_TARGET")
inputStr, found := os.LookupEnv(envWarmENITarget)

if !found {
return defaultWarmENITarget
Expand Down Expand Up @@ -722,8 +773,21 @@ func (c *IPAMContext) eniIPPoolReconcile(ipPool map[string]*datastore.AddressInf

}

func useCustomNetworkCfg() bool {
defaultValue := false
if strValue := os.Getenv(envCustomNetworkCfg); strValue != "" {
parsedValue, err := strconv.ParseBool(strValue)
if err != nil {
log.Error("Failed to parse "+envCustomNetworkCfg+"; using default: "+fmt.Sprint(defaultValue), err.Error())
return defaultValue
}
return parsedValue
}
return defaultValue
}

func getWarmIPTarget() int {
inputStr, found := os.LookupEnv("WARM_IP_TARGET")
inputStr, found := os.LookupEnv(envWarmIPTarget)

if !found {
return noWarmIPTarget
Expand Down Expand Up @@ -753,3 +817,12 @@ func (c *IPAMContext) getCurWarmIPTarget() (int, bool) {

return curTarget, true
}

// GetConfigForDebug returns the active values of the configuration env vars (for debugging purposes).
func GetConfigForDebug() map[string]interface{} {
return map[string]interface{}{
envWarmIPTarget: getWarmIPTarget(),
envWarmENITarget: getWarmENITarget(),
envCustomNetworkCfg: useCustomNetworkCfg(),
}
}
Loading

0 comments on commit 8bb4c71

Please sign in to comment.