Skip to content

Commit

Permalink
Fix device number and update table name the device index
Browse files Browse the repository at this point in the history
  • Loading branch information
Claes Mogren authored and SaranBalaji90 committed Sep 1, 2020
1 parent b1a774f commit 6eb28fa
Show file tree
Hide file tree
Showing 7 changed files with 65 additions and 62 deletions.
35 changes: 19 additions & 16 deletions cmd/routed-eni-cni-plugin/driver/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ const (

// NetworkAPIs defines network API calls
type NetworkAPIs interface {
SetupNS(hostVethName string, contVethName string, netnsPath string, addr *net.IPNet, table int, vpcCIDRs []string, useExternalSNAT bool, mtu int, log logger.Logger) error
TeardownNS(addr *net.IPNet, table int, log logger.Logger) error
SetupNS(hostVethName string, contVethName string, netnsPath string, addr *net.IPNet, deviceNumber int, vpcCIDRs []string, useExternalSNAT bool, mtu int, log logger.Logger) error
TeardownNS(addr *net.IPNet, deviceNumber int, log logger.Logger) error
SetupPodENINetwork(hostVethName string, contVethName string, netnsPath string, addr *net.IPNet, vlanID int, eniMAC string,
subnetGW string, parentIfIndex int, mtu int, log logger.Logger) error
TeardownPodENINetwork(vlanID int, log logger.Logger) error
Expand Down Expand Up @@ -173,12 +173,12 @@ func (createVethContext *createVethPairContext) run(hostNS ns.NetNS) error {
}

// SetupNS wires up linux networking for a pod's network
func (os *linuxNetwork) SetupNS(hostVethName string, contVethName string, netnsPath string, addr *net.IPNet, table int, vpcCIDRs []string, useExternalSNAT bool, mtu int, log logger.Logger) error {
log.Debugf("SetupNS: hostVethName=%s, contVethName=%s, netnsPath=%s, table=%d, mtu=%d", hostVethName, contVethName, netnsPath, table, mtu)
return setupNS(hostVethName, contVethName, netnsPath, addr, table, vpcCIDRs, useExternalSNAT, os.netLink, os.ns, mtu, log, os.procSys)
func (os *linuxNetwork) SetupNS(hostVethName string, contVethName string, netnsPath string, addr *net.IPNet, deviceNumber int, vpcCIDRs []string, useExternalSNAT bool, mtu int, log logger.Logger) error {
log.Debugf("SetupNS: hostVethName=%s, contVethName=%s, netnsPath=%s, deviceNumber=%d, mtu=%d", hostVethName, contVethName, netnsPath, deviceNumber, mtu)
return setupNS(hostVethName, contVethName, netnsPath, addr, deviceNumber, vpcCIDRs, useExternalSNAT, os.netLink, os.ns, mtu, log, os.procSys)
}

func setupNS(hostVethName string, contVethName string, netnsPath string, addr *net.IPNet, table int, vpcCIDRs []string, useExternalSNAT bool,
func setupNS(hostVethName string, contVethName string, netnsPath string, addr *net.IPNet, deviceNumber int, vpcCIDRs []string, useExternalSNAT bool,
netLink netlinkwrapper.NetLink, ns nswrapper.NS, mtu int, log logger.Logger, procSys procsyswrapper.ProcSys) error {

hostVeth, err := setupVeth(hostVethName, contVethName, netnsPath, addr, netLink, ns, mtu, procSys, log)
Expand Down Expand Up @@ -213,22 +213,24 @@ func setupNS(hostVethName string, contVethName string, netnsPath string, addr *n
log.Infof("Added toContainer rule for %s", addr.String())

// add from-pod rule, only need it when it is not primary ENI
if table > 0 {
if deviceNumber > 0 {
// To be backwards compatible, we will have to keep this off-by one setting
tableNumber := deviceNumber + 1
if useExternalSNAT {
// add rule: 1536: from <podIP> use table <table>
err = addContainerRule(netLink, false, addr, table)
err = addContainerRule(netLink, false, addr, tableNumber)
if err != nil {
log.Errorf("Failed to add fromContainer rule for %s err: %v", addr.String(), err)
return errors.Wrap(err, "add NS network: failed to add fromContainer rule")
}
log.Infof("Added rule priority %d from %s table %d", fromContainerRulePriority, addr.String(), table)
log.Infof("Added rule priority %d from %s table %d", fromContainerRulePriority, addr.String(), tableNumber)
} else {
// add rule: 1536: list of from <podIP> to <vpcCIDR> use table <table>
for _, cidr := range vpcCIDRs {
podRule := netLink.NewRule()
_, podRule.Dst, _ = net.ParseCIDR(cidr)
podRule.Src = addr
podRule.Table = table
podRule.Table = tableNumber
podRule.Priority = fromContainerRulePriority

err = netLink.RuleAdd(podRule)
Expand Down Expand Up @@ -426,12 +428,12 @@ func addContainerRule(netLink netlinkwrapper.NetLink, isToContainer bool, addr *
}

// TeardownPodNetwork cleanup ip rules
func (os *linuxNetwork) TeardownNS(addr *net.IPNet, table int, log logger.Logger) error {
log.Debugf("TeardownNS: addr %s, table %d", addr.String(), table)
return tearDownNS(addr, table, os.netLink, log)
func (os *linuxNetwork) TeardownNS(addr *net.IPNet, deviceNumber int, log logger.Logger) error {
log.Debugf("TeardownNS: addr %s, deviceNumber %d", addr.String(), deviceNumber)
return tearDownNS(addr, deviceNumber, os.netLink, log)
}

func tearDownNS(addr *net.IPNet, table int, netLink netlinkwrapper.NetLink, log logger.Logger) error {
func tearDownNS(addr *net.IPNet, deviceNumber int, netLink netlinkwrapper.NetLink, log logger.Logger) error {
if addr == nil {
return errors.New("can't tear down network namespace with no IP address")
}
Expand All @@ -447,14 +449,15 @@ func tearDownNS(addr *net.IPNet, table int, netLink netlinkwrapper.NetLink, log
log.Infof("Delete toContainer rule for %s ", addr.String())
}

if table > 0 {
if deviceNumber > 0 {
// remove from-pod rule only for non main table
err := deleteRuleListBySrc(*addr)
if err != nil {
log.Errorf("Failed to delete fromContainer for %s %v", addr.String(), err)
return errors.Wrapf(err, "delete NS network: failed to delete fromContainer rule for %s", addr.String())
}
log.Infof("Delete fromContainer rule for %s in table %d", addr.String(), table)
tableNumber := deviceNumber + 1
log.Infof("Delete fromContainer rule for %s in table %d", addr.String(), tableNumber)
}

addrHostAddr := &net.IPNet{
Expand Down
35 changes: 20 additions & 15 deletions pkg/awsutils/awsutils.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ const (
eniDescriptionPrefix = "aws-K8S-"

// AllocENI need to choose a first free device number between 0 and maxENI
maxENIs = 128
// 100 is a hard limit because we use vlanID + 100 for pod networking table names
maxENIs = 100
clusterNameEnvVar = "CLUSTER_NAME"
eniNodeTagKey = "node.k8s.amazonaws.com/instance_id"
eniCreatedAtTagKey = "node.k8s.amazonaws.com/createdAt"
Expand Down Expand Up @@ -629,7 +630,7 @@ func (cache *EC2InstanceMetadataCache) getIPsAndCIDR(eniMAC string) ([]*ec2.Netw
// There may be multiple IPv4 addresses on an instance. https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-categories.html
isFirst := true
for _, ipv4 := range ipv4Strs {
// TODO: Verify that the first IP is always the primary
// The first IP in the list is always the primary IP of that ENI
primary := isFirst
ip := ipv4
ipv4s = append(ipv4s, &ec2.NetworkInterfacePrivateIpAddress{PrivateIpAddress: &ip, Primary: &primary})
Expand All @@ -639,38 +640,42 @@ func (cache *EC2InstanceMetadataCache) getIPsAndCIDR(eniMAC string) ([]*ec2.Netw
}

// getENIDeviceNumber returns ENI ID, device number, error
func (cache *EC2InstanceMetadataCache) getENIDeviceNumber(eniMAC string) (string, int, error) {
func (cache *EC2InstanceMetadataCache) getENIDeviceNumber(eniMAC string) (eniID string, deviceNumber int, err error) {
// get device-number
start := time.Now()
device, err := cache.ec2Metadata.GetMetadata(metadataMACPath + eniMAC + metadataDeviceNum)
awsAPILatency.WithLabelValues("GetMetadata", fmt.Sprint(err != nil)).Observe(msSince(start))
if err != nil {
awsAPIErrInc("GetMetadata", err)
log.Errorf("Failed to retrieve the device-number of ENI %s, %v", eniMAC, err)
return "", 0, errors.Wrapf(err, "failed to retrieve device-number for ENI %s",
eniMAC)
return "", -1, errors.Wrapf(err, "failed to retrieve device-number for ENI %s", eniMAC)
}

deviceNum, err := strconv.ParseInt(device, 0, 32)
deviceNumber, err = strconv.Atoi(device)
if err != nil {
return "", 0, errors.Wrapf(err, "invalid device %s for ENI %s", device, eniMAC)
return "", -1, errors.Wrapf(err, "invalid device %s for ENI %s", device, eniMAC)
}

start = time.Now()
eni, err := cache.ec2Metadata.GetMetadata(metadataMACPath + eniMAC + metadataInterface)
eniID, err = cache.ec2Metadata.GetMetadata(metadataMACPath + eniMAC + metadataInterface)
awsAPILatency.WithLabelValues("GetMetadata", fmt.Sprint(err != nil)).Observe(msSince(start))
if err != nil {
awsAPIErrInc("GetMetadata", err)
log.Errorf("Failed to retrieve the interface-id data from instance metadata service, %v", err)
return "", 0, errors.Wrapf(err, "get attached ENIs: failed to retrieve interface-id for ENI %s", eniMAC)
return "", -1, errors.Wrapf(err, "get attached ENIs: failed to retrieve interface-id for ENI %s", eniMAC)
}

if cache.primaryENI == eni {
log.Debugf("Using device number 0 for primary eni: %s", eni)
return eni, 0, nil
if cache.primaryENI == eniID {
log.Debugf("Using device number 0 for primary ENI: %s", eniID)
if deviceNumber != 0 {
// Can this even happen? To be backwards compatible, we will always return 0 here and log an error.
log.Errorf("Device number of primary ENI is %d! It was expected to be 0", deviceNumber)
return eniID, 0, nil
}
return eniID, deviceNumber, nil
}
// 0 is reserved for primary ENI, the rest of them has to +1 to avoid collision at 0
return eni, int(deviceNum + 1), nil
// 0 is reserved for primary ENI
return eniID, deviceNumber, nil
}

// awsGetFreeDeviceNumber calls EC2 API DescribeInstances to get the next free device index
Expand Down Expand Up @@ -1285,7 +1290,7 @@ func (cache *EC2InstanceMetadataCache) GetENILimit() (int, error) {
return 0, errors.New(fmt.Sprintf("%s: %s", UnknownInstanceType, cache.instanceType))
}
}
return int(eniLimits.ENILimit), nil
return eniLimits.ENILimit, nil
}

// AllocIPAddresses allocates numIPs of IP address on an ENI
Expand Down
2 changes: 1 addition & 1 deletion pkg/awsutils/awsutils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -822,7 +822,7 @@ func TestEC2InstanceMetadataCache_waitForENIAndIPsAttached(t *testing.T) {
eni2Metadata := ENIMetadata{
ENIID: eni2ID,
MAC: eni2MAC,
DeviceNumber: 2,
DeviceNumber: 1,
SubnetIPv4CIDR: subnetCIDR,
IPv4Addresses: []*ec2.NetworkInterfacePrivateIpAddress{
{
Expand Down
6 changes: 3 additions & 3 deletions pkg/ipamd/datastore/data_store.go
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,7 @@ func (ds *DataStore) DelIPv4AddressFromStore(eniID string, ipv4 string, force bo

// AssignPodIPv4Address assigns an IPv4 address to pod
// It returns the assigned IPv4 address, device number, error
func (ds *DataStore) AssignPodIPv4Address(ipamKey IPAMKey) (string, int, error) {
func (ds *DataStore) AssignPodIPv4Address(ipamKey IPAMKey) (ipv4address string, deviceNumber int, err error) {
ds.lock.Lock()
defer ds.lock.Unlock()

Expand All @@ -501,7 +501,7 @@ func (ds *DataStore) AssignPodIPv4Address(ipamKey IPAMKey) (string, int, error)
ds.log.Warnf("Failed to update backing store: %v", err)
// Important! Unwind assignment
ds.unassignPodIPv4AddressUnsafe(eni, addr)
return "", 0, err
return "", -1, err
}

return addr.Address, eni.DeviceNumber, nil
Expand All @@ -510,7 +510,7 @@ func (ds *DataStore) AssignPodIPv4Address(ipamKey IPAMKey) (string, int, error)
ds.log.Debugf("AssignPodIPv4Address: ENI %s does not have available addresses", eni.ID)
}
ds.log.Errorf("DataStore has no available IP addresses")
return "", 0, errors.New("assignPodIPv4AddressUnsafe: no available IP addresses")
return "", -1, errors.New("assignPodIPv4AddressUnsafe: no available IP addresses")
}

// It returns the assigned IPv4 address, device number
Expand Down
39 changes: 17 additions & 22 deletions pkg/networkutils/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,8 @@ var log = logger.Get()
type NetworkAPIs interface {
// SetupNodeNetwork performs node level network configuration
SetupHostNetwork(vpcCIDRs []string, primaryMAC string, primaryAddr *net.IP, enablePodENI bool) error
// SetupENINetwork performs eni level network configuration
SetupENINetwork(eniIP string, mac string, table int, subnetCIDR string) error
// SetupENINetwork performs ENI level network configuration. Not needed on the primary ENI
SetupENINetwork(eniIP string, mac string, deviceNumber int, subnetCIDR string) error
UseExternalSNAT() bool
GetExcludeSNATCIDRs() []string
GetRuleList() ([]netlink.Rule, error)
Expand Down Expand Up @@ -698,21 +698,19 @@ func linkByMac(mac string, netLink netlinkwrapper.NetLink, retryInterval time.Du
}
}

// SetupENINetwork adds default route to route table (eni-<eni_table>)
func (n *linuxNetwork) SetupENINetwork(eniIP string, eniMAC string, eniTable int, eniSubnetCIDR string) error {
return setupENINetwork(eniIP, eniMAC, eniTable, eniSubnetCIDR, n.netLink, retryLinkByMacInterval, retryRouteAddInterval, n.mtu)
// SetupENINetwork adds default route to route table (eni-<eni_table>), so it does not need to be called on the primary ENI
func (n *linuxNetwork) SetupENINetwork(eniIP string, eniMAC string, deviceNumber int, eniSubnetCIDR string) error {
return setupENINetwork(eniIP, eniMAC, deviceNumber, eniSubnetCIDR, n.netLink, retryLinkByMacInterval, retryRouteAddInterval, n.mtu)
}

func setupENINetwork(eniIP string, eniMAC string, eniTable int, eniSubnetCIDR string, netLink netlinkwrapper.NetLink,
func setupENINetwork(eniIP string, eniMAC string, deviceNumber int, eniSubnetCIDR string, netLink netlinkwrapper.NetLink,
retryLinkByMacInterval time.Duration, retryRouteAddInterval time.Duration, mtu int) error {

if eniTable == 0 {
log.Debugf("Skipping set up ENI network for primary interface")
return nil
if deviceNumber == 0 {
return errors.New("setupENINetwork should never be called on the primary ENI")
}

tableNumber := deviceNumber + 1
log.Infof("Setting up network for an ENI with IP address %s, MAC address %s, CIDR %s and route table %d",
eniIP, eniMAC, eniSubnetCIDR, eniTable)
eniIP, eniMAC, eniSubnetCIDR, tableNumber)
link, err := linkByMac(eniMAC, netLink, retryLinkByMacInterval)
if err != nil {
return errors.Wrapf(err, "setupENINetwork: failed to find the link which uses MAC address %s", eniMAC)
Expand All @@ -726,8 +724,6 @@ func setupENINetwork(eniIP string, eniMAC string, eniTable int, eniSubnetCIDR st
return errors.Wrapf(err, "setupENINetwork: failed to bring up ENI %s", eniIP)
}

deviceNumber := link.Attrs().Index

_, ipnet, err := net.ParseCIDR(eniSubnetCIDR)
if err != nil {
return errors.Wrapf(err, "setupENINetwork: invalid IPv4 CIDR block %s", eniSubnetCIDR)
Expand Down Expand Up @@ -764,22 +760,23 @@ func setupENINetwork(eniIP string, eniMAC string, eniTable int, eniSubnetCIDR st
return errors.Wrap(err, "setupENINetwork: failed to add IP addr to ENI")
}

log.Debugf("Setting up ENI's default gateway %v", gw)
linkIndex := link.Attrs().Index
log.Debugf("Setting up ENI's default gateway %v, table %d, linkIndex %d", gw, tableNumber, linkIndex)
routes := []netlink.Route{
// Add a direct link route for the host's ENI IP only
{
LinkIndex: deviceNumber,
LinkIndex: linkIndex,
Dst: &net.IPNet{IP: gw, Mask: net.CIDRMask(32, 32)},
Scope: netlink.SCOPE_LINK,
Table: eniTable,
Table: tableNumber,
},
// Route all other traffic via the host's ENI IP
{
LinkIndex: deviceNumber,
LinkIndex: linkIndex,
Dst: &net.IPNet{IP: net.IPv4zero, Mask: net.CIDRMask(0, 32)},
Scope: netlink.SCOPE_UNIVERSE,
Gw: gw,
Table: eniTable,
Table: tableNumber,
},
}
for _, r := range routes {
Expand All @@ -790,11 +787,9 @@ func setupENINetwork(eniIP string, eniMAC string, eniTable int, eniSubnetCIDR st

err = retry.RetryNWithBackoff(retry.NewSimpleBackoff(500*time.Millisecond, retryRouteAddInterval, 0.15, 2.0), maxRetryRouteAdd, func() error {
if err := netLink.RouteReplace(&r); err != nil {
log.Debugf("Not able to set route %s/0 via %s table %d",
r.Dst.IP.String(), gw.String(), eniTable)
log.Debugf("Not able to set route %s/0 via %s table %d", r.Dst.IP.String(), gw.String(), tableNumber)
return errors.Wrapf(err, "setupENINetwork: unable to replace route entry %s", r.Dst.IP.String())
}

log.Debugf("Successfully added/replaced route to be %s/0", r.Dst.IP.String())
return nil
})
Expand Down
8 changes: 4 additions & 4 deletions pkg/networkutils/network_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,12 +132,12 @@ func TestSetupENINetworkMACFail(t *testing.T) {
assert.Errorf(t, err, "simulated failure")
}

func TestSetupENINetworkPrimary(t *testing.T) {
func TestSetupENINetowrkErrorOnPrimaryENI(t *testing.T) {
ctrl, mockNetLink, _, _, _, _ := setup(t)
defer ctrl.Finish()

err := setupENINetwork(testeniIP, testMAC2, 0, testeniSubnet, mockNetLink, 0*time.Second, 0*time.Second, testMTU)
assert.NoError(t, err)
deviceNumber := 0
err := setupENINetwork(testeniIP, testMAC2, deviceNumber, testeniSubnet, mockNetLink, 0*time.Second, 0*time.Second, testMTU)
assert.Error(t, err)
}

func TestSetupHostNetworkNodePortDisabled(t *testing.T) {
Expand Down
2 changes: 1 addition & 1 deletion scripts/gen_vpc_ip_limits.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ func main() {
for {
output, err := svc.DescribeInstanceTypes(describeInstanceTypesInput)
if err != nil {
log.Fatalf("Failed to call EC2 DescibeInstanceTypes: %v", err)
log.Fatalf("Failed to call EC2 DescribeInstanceTypes: %v", err)
}
// We just want the type name, ENI and IP limits
for _, info := range output.InstanceTypes {
Expand Down

0 comments on commit 6eb28fa

Please sign in to comment.