Skip to content

Commit

Permalink
Add DHCP IP Retries in PrepareHNSNetwork
Browse files Browse the repository at this point in the history
To address the potential race condition issue where acquiring a DHCP IP address may fail after CreateHNSNetwork,
we added a retry mechanism to wait for an available IP. If the DHCP IP cannot be acquired within six seconds,
a warning message will be returned.

Signed-off-by: Shuyang Xin <[email protected]>
  • Loading branch information
XinShuYang committed Dec 28, 2023
1 parent 0c6d471 commit f2f6c7a
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 61 deletions.
1 change: 0 additions & 1 deletion ci/jenkins/test-rancher.sh
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,6 @@ function deliver_antrea {
scp -q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no -i "${WORKDIR}/.ssh/id_rsa" build/yamls/*.yml ubuntu@[${control_plane_ip}]:~/

echo "====== Delivering Antrea to all the Nodes ======"
docker save -o antrea-ubuntu.tar antrea/antrea-ubuntu:latest
docker save -o flow-aggregator.tar antrea/flow-aggregator:latest
kubectl get nodes -o wide --no-headers=true | awk '{print $6}' | while read IP; do
rsync -avr --progress --inplace -e "ssh -o StrictHostKeyChecking=no" antrea-ubuntu.tar ubuntu@[${IP}]:~/antrea-ubuntu.tar
Expand Down
19 changes: 19 additions & 0 deletions ci/jenkins/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ IP_MODE=""
K8S_VERSION="1.28.2-00"
WINDOWS_YAML_SUFFIX="windows"
WIN_IMAGE_NODE=""
WIN_DHCP=""
GOLANG_RELEASE_DIR=${WORKDIR}/golang-releases

WINDOWS_CONFORMANCE_FOCUS="\[sig-network\].+\[Conformance\]|\[sig-windows\]"
Expand Down Expand Up @@ -256,6 +257,18 @@ function collect_windows_network_info_and_logs {
tar zcf debug_logs.tar.gz "${DEBUG_LOG_PATH}"
}

function check_dhcp_status {
echo "===== Check Interface DHCP Status ====="
WINIP=$(kubectl get nodes -o wide --no-headers=true | awk '$1 ~ /win-0/ {print $6}')
WIN_BRINT_DHCP=$(ssh -o StrictHostKeyChecking=no -n administrator@${WINIP} 'powershell.exe "(Get-NetIPInterface -InterfaceAlias br-int -AddressFamily IPv4).Dhcp"')
if [[ ${WIN_DHCP} == ${WIN_BRINT_DHCP} ]]; then
echo "Newly created uplink DHCP status is consistent with the original adapter."
else
echo "Newly created uplink DHCP status is different from the original adapter. Original DHCP: $WIN_DHCP, br-int DHCP: $WIN_BRINT_DHCP"
exit 1
fi
}

function wait_for_antrea_windows_pods_ready {
kubectl apply -f "${WORKDIR}/antrea.yml"
if [[ "${PROXY_ALL}" == false && ${TESTCASE} =~ "windows-e2e" ]]; then
Expand All @@ -277,6 +290,7 @@ function wait_for_antrea_windows_pods_ready {
done
sleep 10
done
check_dhcp_status
}

function wait_for_antrea_windows_processes_ready {
Expand All @@ -295,6 +309,7 @@ function wait_for_antrea_windows_processes_ready {
done
sleep 10
done
check_dhcp_status
}

function clean_ns {
Expand Down Expand Up @@ -652,6 +667,10 @@ function deliver_antrea_windows_containerd {
ssh -o StrictHostKeyChecking=no -n Administrator@${IP} "gzip -d antrea-windows.tar.gz && ctr -n k8s.io images import antrea-windows.tar"
fi
done
# Add Windows interface DHCP check using CI script to obtain the original interface status.
WINIP=$(kubectl get nodes -o wide --no-headers=true | awk '$1 ~ /win-0/ {print $6}')
declare -g WIN_DHCP=$(ssh -o StrictHostKeyChecking=no -n administrator@${WINIP} 'powershell.exe "(Get-NetIPInterface -InterfaceAlias \"Ethernet0 2\" -AddressFamily IPv4).Dhcp"')
echo "Original adapter DHCP status: $WIN_DHCP"
rm -f antrea-windows.tar
echo "==== Finish building and delivering Windows containerd images ===="
}
Expand Down
41 changes: 38 additions & 3 deletions pkg/agent/util/net_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ const (
RT_FILTER_METRIC
RT_FILTER_DST
RT_FILTER_GW

// IP_ADAPTER_DHCP_ENABLED is defined in the Win32 API document.
// https://learn.microsoft.com/en-us/windows/win32/api/iptypes/ns-iptypes-ip_adapter_addresses_lh
IP_ADAPTER_DHCP_ENABLED = 0x00000004
)

var (
Expand Down Expand Up @@ -460,10 +464,29 @@ func PrepareHNSNetwork(subnetCIDR *net.IPNet, nodeIPNet *net.IPNet, uplinkAdapte
hnsNetworkDelete(hnsNet)
}
}()

adapter, ipFound, err := adapterIPExists(nodeIPNet.IP, uplinkAdapter.HardwareAddr, ContainerVNICPrefix)
var adapter *net.Interface
var ipFound bool
// On the current Windows testbed, it takes a maximum of 1.8 seconds to obtain a valid IP.
// Therefore, we set the timeout limit to triple of that value, allowing a maximum wait of 6 seconds here.
err = wait.PollImmediate(1*time.Second, 6*time.Second, func() (bool, error) {
var checkErr error
adapter, ipFound, checkErr = adapterIPExists(nodeIPNet.IP, uplinkAdapter.HardwareAddr, ContainerVNICPrefix)
if checkErr != nil {
return false, checkErr
}
return ipFound, nil
})
if err != nil {
return err
if err == wait.ErrWaitTimeout {
dhcpStatus, err := InterfaceIPv4DhcpEnabled(uplinkAdapter.Name)
if err != nil {
klog.ErrorS(err, "Failed to get Ipv4 DHCP status on the network adapter", "adapter", uplinkAdapter.Name)
} else {
klog.ErrorS(err, "Timeout acquiring IP for the adapter", "dhcpStatus", dhcpStatus)
}
} else {
return err
}
}
vNicName, index := adapter.Name, adapter.Index
// By default, "ipFound" should be true after Windows creates the HNSNetwork. The following check is for some corner
Expand Down Expand Up @@ -647,6 +670,16 @@ func HostInterfaceExists(ifaceName string) bool {
return true
}

// InterfaceIPv4DhcpEnabled returns the Ipv4 DHCP status on the specified interface.
func InterfaceIPv4DhcpEnabled(ifaceName string) (bool, error) {
adapter, err := getAdapterInAllCompartmentsByName(ifaceName)
if err != nil {
return false, err
}
ipv4Dhcp := (adapter.flags&IP_ADAPTER_DHCP_ENABLED != 0)
return ipv4Dhcp, nil
}

// SetInterfaceMTU configures interface MTU on host for Pods. MTU change cannot be realized with HNSEndpoint because
// there's no MTU field in HNSEndpoint:
// https://github.com/Microsoft/hcsshim/blob/4a468a6f7ae547974bc32911395c51fb1862b7df/internal/hns/hnsendpoint.go#L12
Expand Down Expand Up @@ -1085,6 +1118,7 @@ type updateIPInterfaceFunc func(entry *antreasyscall.MibIPInterfaceRow) *antreas
type adapter struct {
net.Interface
compartmentID uint32
flags uint32
}

func (a *adapter) setMTU(mtu int, family uint16) error {
Expand Down Expand Up @@ -1250,6 +1284,7 @@ func getAdaptersByName(name string) ([]adapter, error) {
adapter := adapter{
Interface: ifi,
compartmentID: aa.CompartmentId,
flags: aa.Flags,
}
adapters = append(adapters, adapter)
}
Expand Down
129 changes: 72 additions & 57 deletions pkg/agent/util/net_windows_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,7 @@ func TestPrepareHNSNetwork(t *testing.T) {
setVMCmd := fmt.Sprintf("Set-VMSwitch -ComputerName $(hostname) -Name %s -EnableSoftwareRsc $True", LocalHNSNetwork)
tests := []struct {
name string
testAdapterAddresses *windows.IpAdapterAddresses
nodeIPNet *net.IPNet
dnsServers string
newName string
Expand All @@ -320,89 +321,99 @@ func TestPrepareHNSNetwork(t *testing.T) {
wantErr error
}{
{
name: "Prepare Success",
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
newName: testNewName,
ipFound: true,
name: "Prepare Success",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
newName: testNewName,
ipFound: true,
wantCmds: []string{getAdapterCmd, renameAdapterCmd, renameNetCmd,
getVMCmd, setVMCmd},
},
{
name: "Create Error",
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
ipFound: true,
hnsNetworkCreateErr: testInvalidErr,
wantErr: fmt.Errorf("error creating HNSNetwork: invalid"),
name: "Create Error",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
ipFound: true,
hnsNetworkCreateErr: testInvalidErr,
wantErr: fmt.Errorf("error creating HNSNetwork: invalid"),
},
{
name: "adapter Err",
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
ipFound: true,
testNetInterfaceErr: testInvalidErr,
wantErr: testInvalidErr,
name: "adapter Err",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
ipFound: true,
testNetInterfaceErr: testInvalidErr,
wantErr: testInvalidErr,
},
{
name: "Rename Err",
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
newName: testNewName,
ipFound: true,
commandErr: testInvalidErr,
wantCmds: []string{getAdapterCmd},
wantErr: testInvalidErr,
name: "Rename Err",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
newName: testNewName,
ipFound: true,
commandErr: testInvalidErr,
wantCmds: []string{getAdapterCmd},
wantErr: testInvalidErr,
},
{
name: "Enable HNS Err",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
ipFound: true,
hnsNetworkRequestError: testInvalidErr,
wantErr: testInvalidErr,
},
{
name: "Enable RSC Err",
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
ipFound: true,
commandErr: testInvalidErr,
wantCmds: []string{getVMCmd},
wantErr: testInvalidErr,
name: "Enable RSC Err",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
ipFound: true,
commandErr: testInvalidErr,
wantCmds: []string{getVMCmd},
wantErr: testInvalidErr,
},
{
name: "IP Not Found",
nodeIPNet: &ipv4ZeroIPNet,
dnsServers: testDNSServer,
ipFound: false,
wantCmds: []string{newIPCmd, setServerCmd, getVMCmd, setVMCmd},
name: "IP Not Found",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4ZeroIPNet,
dnsServers: testDNSServer,
ipFound: false,
wantCmds: []string{newIPCmd, setServerCmd, getVMCmd, setVMCmd},
},
{
name: "IP Not Found Configure Default Err",
nodeIPNet: &ipv4ZeroIPNet,
dnsServers: testDNSServer,
ipFound: false,
commandErr: testInvalidErr,
wantCmds: []string{newIPCmd},
wantErr: testInvalidErr,
name: "IP Not Found Configure Default Err",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4ZeroIPNet,
dnsServers: testDNSServer,
ipFound: false,
commandErr: testInvalidErr,
wantCmds: []string{newIPCmd},
wantErr: testInvalidErr,
},
{
name: "IP Not Found Set adapter Err",
nodeIPNet: &ipv4ZeroIPNet,
dnsServers: testDNSServer,
ipFound: false,
commandErr: alreadyExistsErr,
wantCmds: []string{newIPCmd, setServerCmd},
wantErr: alreadyExistsErr,
name: "IP Not Found Set adapter Err",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4ZeroIPNet,
dnsServers: testDNSServer,
ipFound: false,
commandErr: alreadyExistsErr,
wantCmds: []string{newIPCmd, setServerCmd},
wantErr: alreadyExistsErr,
},
{
name: "IP Not Found New Net Route Err",
nodeIPNet: &ipv4ZeroIPNet,
ipFound: false,
createRowErr: fmt.Errorf("ip route not found"),
wantCmds: []string{newIPCmd},
wantErr: fmt.Errorf("failed to create new IPForward row: ip route not found"),
name: "IP Not Found New Net Route Err",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4ZeroIPNet,
ipFound: false,
createRowErr: fmt.Errorf("ip route not found"),
wantCmds: []string{newIPCmd},
wantErr: fmt.Errorf("failed to create new IPForward row: ip route not found"),
},
}

Expand All @@ -415,6 +426,7 @@ func TestPrepareHNSNetwork(t *testing.T) {
defer mockHNSNetworkCreate(tc.hnsNetworkCreateErr)()
defer mockHNSNetworkDelete(nil)()
defer mockAntreaNetIO(&antreasyscalltest.MockNetIO{CreateIPForwardEntryErr: tc.createRowErr})()
defer mockGetAdaptersAddresses(tc.testAdapterAddresses, nil)()
gotErr := PrepareHNSNetwork(testSubnetCIDR, tc.nodeIPNet, testUplinkAdapter, "testGateway", tc.dnsServers, testRoutes, tc.newName)
assert.Equal(t, tc.wantErr, gotErr)
})
Expand Down Expand Up @@ -1076,6 +1088,7 @@ func TestGetAdapterInAllCompartmentsByName(t *testing.T) {
HardwareAddr: testMACAddr,
},
compartmentID: 1,
flags: IP_ADAPTER_DHCP_ENABLED,
}
tests := []struct {
name string
Expand Down Expand Up @@ -1139,6 +1152,7 @@ func createTestAdapterAddresses(name string) *windows.IpAdapterAddresses {
PhysicalAddressLength: 6,
PhysicalAddress: testPhysicalAddress,
CompartmentId: 1,
Flags: IP_ADAPTER_DHCP_ENABLED,
}
}

Expand Down Expand Up @@ -1171,6 +1185,7 @@ func mockGetAdaptersAddresses(testAdaptersAddresses *windows.IpAdapterAddresses,
adapterAddresses.PhysicalAddressLength = testAdaptersAddresses.PhysicalAddressLength
adapterAddresses.PhysicalAddress = testAdaptersAddresses.PhysicalAddress
adapterAddresses.CompartmentId = testAdaptersAddresses.CompartmentId
adapterAddresses.Flags = testAdaptersAddresses.Flags
}
return err
}
Expand Down

0 comments on commit f2f6c7a

Please sign in to comment.