Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DHCP IP Retries in PrepareHNSNetwork #5819

Merged
merged 1 commit into from
Jan 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions ci/jenkins/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ IP_MODE=""
K8S_VERSION="1.28.2-00"
WINDOWS_YAML_SUFFIX="windows"
WIN_IMAGE_NODE=""
echo "" > WIN_DHCP
GOLANG_RELEASE_DIR=${WORKDIR}/golang-releases

WINDOWS_CONFORMANCE_FOCUS="\[sig-network\].+\[Conformance\]|\[sig-windows\]"
Expand Down Expand Up @@ -256,6 +257,19 @@ function collect_windows_network_info_and_logs {
tar zcf debug_logs.tar.gz "${DEBUG_LOG_PATH}"
}

function check_dhcp_status {
echo "===== Check Interface DHCP Status ====="
WINIP=$(kubectl get nodes -o wide --no-headers=true | awk '$1 ~ /win-0/ {print $6}')
WIN_BRINT_DHCP=$(ssh -o StrictHostKeyChecking=no -n administrator@${WINIP} 'powershell.exe "(Get-NetIPInterface -InterfaceAlias br-int -AddressFamily IPv4).Dhcp"')
WIN_DHCP=$(<WIN_DHCP)
if [[ ${WIN_DHCP} == ${WIN_BRINT_DHCP} ]]; then
echo "Newly created uplink DHCP status is consistent with the original adapter."
else
echo "Newly created uplink DHCP status is different from the original adapter. Original DHCP: $WIN_DHCP, br-int DHCP: $WIN_BRINT_DHCP"
exit 1
fi
}

function wait_for_antrea_windows_pods_ready {
kubectl apply -f "${WORKDIR}/antrea.yml"
if [[ "${PROXY_ALL}" == false && ${TESTCASE} =~ "windows-e2e" ]]; then
Expand All @@ -277,6 +291,7 @@ function wait_for_antrea_windows_pods_ready {
done
sleep 10
done
check_dhcp_status
}

function wait_for_antrea_windows_processes_ready {
Expand All @@ -295,6 +310,7 @@ function wait_for_antrea_windows_processes_ready {
done
sleep 10
done
check_dhcp_status
}

function clean_ns {
Expand Down Expand Up @@ -652,6 +668,11 @@ function deliver_antrea_windows_containerd {
ssh -o StrictHostKeyChecking=no -n Administrator@${IP} "gzip -d antrea-windows.tar.gz && ctr -n k8s.io images import antrea-windows.tar"
fi
done
# Add Windows interface DHCP check using CI script to obtain the original interface status.
WINIP=$(kubectl get nodes -o wide --no-headers=true | awk '$1 ~ /win-0/ {print $6}')
WIN_DHCP=$(ssh -o StrictHostKeyChecking=no -n administrator@${WINIP} 'powershell.exe "(Get-NetIPInterface -InterfaceAlias \"Ethernet0 2\" -AddressFamily IPv4).Dhcp"')
echo "Original adapter DHCP status: $WIN_DHCP"
echo $WIN_DHCP > WIN_DHCP
rm -f antrea-windows.tar
echo "==== Finish building and delivering Windows containerd images ===="
}
Expand Down
41 changes: 38 additions & 3 deletions pkg/agent/util/net_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,10 @@ const (
RT_FILTER_METRIC
RT_FILTER_DST
RT_FILTER_GW

// IP_ADAPTER_DHCP_ENABLED is defined in the Win32 API document.
// https://learn.microsoft.com/en-us/windows/win32/api/iptypes/ns-iptypes-ip_adapter_addresses_lh
IP_ADAPTER_DHCP_ENABLED = 0x00000004
)

var (
Expand Down Expand Up @@ -460,10 +464,29 @@ func PrepareHNSNetwork(subnetCIDR *net.IPNet, nodeIPNet *net.IPNet, uplinkAdapte
hnsNetworkDelete(hnsNet)
}
}()

adapter, ipFound, err := adapterIPExists(nodeIPNet.IP, uplinkAdapter.HardwareAddr, ContainerVNICPrefix)
var adapter *net.Interface
var ipFound bool
// On the current Windows testbed, it takes a maximum of 1.8 seconds to obtain a valid IP.
// Therefore, we set the timeout limit to triple of that value, allowing a maximum wait of 6 seconds here.
err = wait.PollImmediate(1*time.Second, 6*time.Second, func() (bool, error) {
tnqn marked this conversation as resolved.
Show resolved Hide resolved
var checkErr error
adapter, ipFound, checkErr = adapterIPExists(nodeIPNet.IP, uplinkAdapter.HardwareAddr, ContainerVNICPrefix)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does Windows support both DHCP and static case? If yes, these retries would add unnecessary initialization delay for static IP case.
I think it should first check if this is DHCP case, and only expects it to get IP from DHCP server when DHCP is enabled?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In my understanding, the "IP delay available issue" occurs during the creation of a new uplink interface, and this delay is consistent regardless of whether DHCP is enabled on the interface. Regarding the static IP case, we still expect to get available IP from adapter timely. Otherwise it may indicate an issue with the adapter itself. @wenyingd please correct me if I am wrong.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For static IP configurations, we still expect Windows OS can automatically migrate the IP address from pnic to vnic. We would like to use this check to ensure that Windows OS has performed expected behavior, and gives warning logs if not.

if checkErr != nil {
return false, checkErr
}
return ipFound, nil
})
if err != nil {
return err
if err == wait.ErrWaitTimeout {
dhcpStatus, err := InterfaceIPv4DhcpEnabled(uplinkAdapter.Name)
if err != nil {
klog.ErrorS(err, "Failed to get IPv4 DHCP status on the network adapter", "adapter", uplinkAdapter.Name)
} else {
klog.ErrorS(err, "Timeout acquiring IP for the adapter", "dhcpStatus", dhcpStatus)
}
} else {
return err
}
}
vNicName, index := adapter.Name, adapter.Index
// By default, "ipFound" should be true after Windows creates the HNSNetwork. The following check is for some corner
Expand Down Expand Up @@ -647,6 +670,16 @@ func HostInterfaceExists(ifaceName string) bool {
return true
}

// InterfaceIPv4DhcpEnabled returns the IPv4 DHCP status on the specified interface.
func InterfaceIPv4DhcpEnabled(ifaceName string) (bool, error) {
adapter, err := getAdapterInAllCompartmentsByName(ifaceName)
if err != nil {
return false, err
}
ipv4Dhcp := (adapter.flags&IP_ADAPTER_DHCP_ENABLED != 0)
return ipv4Dhcp, nil
}

// SetInterfaceMTU configures interface MTU on host for Pods. MTU change cannot be realized with HNSEndpoint because
// there's no MTU field in HNSEndpoint:
// https://github.com/Microsoft/hcsshim/blob/4a468a6f7ae547974bc32911395c51fb1862b7df/internal/hns/hnsendpoint.go#L12
Expand Down Expand Up @@ -1085,6 +1118,7 @@ type updateIPInterfaceFunc func(entry *antreasyscall.MibIPInterfaceRow) *antreas
type adapter struct {
net.Interface
compartmentID uint32
flags uint32
}

func (a *adapter) setMTU(mtu int, family uint16) error {
Expand Down Expand Up @@ -1250,6 +1284,7 @@ func getAdaptersByName(name string) ([]adapter, error) {
adapter := adapter{
Interface: ifi,
compartmentID: aa.CompartmentId,
flags: aa.Flags,
}
adapters = append(adapters, adapter)
}
Expand Down
129 changes: 72 additions & 57 deletions pkg/agent/util/net_windows_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,7 @@ func TestPrepareHNSNetwork(t *testing.T) {
setVMCmd := fmt.Sprintf("Set-VMSwitch -ComputerName $(hostname) -Name %s -EnableSoftwareRsc $True", LocalHNSNetwork)
tests := []struct {
name string
testAdapterAddresses *windows.IpAdapterAddresses
nodeIPNet *net.IPNet
dnsServers string
newName string
Expand All @@ -320,89 +321,99 @@ func TestPrepareHNSNetwork(t *testing.T) {
wantErr error
}{
{
name: "Prepare Success",
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
newName: testNewName,
ipFound: true,
name: "Prepare Success",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
newName: testNewName,
ipFound: true,
wantCmds: []string{getAdapterCmd, renameAdapterCmd, renameNetCmd,
getVMCmd, setVMCmd},
},
{
name: "Create Error",
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
ipFound: true,
hnsNetworkCreateErr: testInvalidErr,
wantErr: fmt.Errorf("error creating HNSNetwork: invalid"),
name: "Create Error",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
ipFound: true,
hnsNetworkCreateErr: testInvalidErr,
wantErr: fmt.Errorf("error creating HNSNetwork: invalid"),
},
{
name: "adapter Err",
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
ipFound: true,
testNetInterfaceErr: testInvalidErr,
wantErr: testInvalidErr,
name: "adapter Err",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
ipFound: true,
testNetInterfaceErr: testInvalidErr,
wantErr: testInvalidErr,
},
{
name: "Rename Err",
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
newName: testNewName,
ipFound: true,
commandErr: testInvalidErr,
wantCmds: []string{getAdapterCmd},
wantErr: testInvalidErr,
name: "Rename Err",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
newName: testNewName,
ipFound: true,
commandErr: testInvalidErr,
wantCmds: []string{getAdapterCmd},
wantErr: testInvalidErr,
},
{
name: "Enable HNS Err",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
ipFound: true,
hnsNetworkRequestError: testInvalidErr,
wantErr: testInvalidErr,
},
{
name: "Enable RSC Err",
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
ipFound: true,
commandErr: testInvalidErr,
wantCmds: []string{getVMCmd},
wantErr: testInvalidErr,
name: "Enable RSC Err",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
ipFound: true,
commandErr: testInvalidErr,
wantCmds: []string{getVMCmd},
wantErr: testInvalidErr,
},
{
name: "IP Not Found",
nodeIPNet: &ipv4ZeroIPNet,
dnsServers: testDNSServer,
ipFound: false,
wantCmds: []string{newIPCmd, setServerCmd, getVMCmd, setVMCmd},
name: "IP Not Found",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4ZeroIPNet,
dnsServers: testDNSServer,
ipFound: false,
wantCmds: []string{newIPCmd, setServerCmd, getVMCmd, setVMCmd},
},
{
name: "IP Not Found Configure Default Err",
nodeIPNet: &ipv4ZeroIPNet,
dnsServers: testDNSServer,
ipFound: false,
commandErr: testInvalidErr,
wantCmds: []string{newIPCmd},
wantErr: testInvalidErr,
name: "IP Not Found Configure Default Err",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4ZeroIPNet,
dnsServers: testDNSServer,
ipFound: false,
commandErr: testInvalidErr,
wantCmds: []string{newIPCmd},
wantErr: testInvalidErr,
},
{
name: "IP Not Found Set adapter Err",
nodeIPNet: &ipv4ZeroIPNet,
dnsServers: testDNSServer,
ipFound: false,
commandErr: alreadyExistsErr,
wantCmds: []string{newIPCmd, setServerCmd},
wantErr: alreadyExistsErr,
name: "IP Not Found Set adapter Err",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4ZeroIPNet,
dnsServers: testDNSServer,
ipFound: false,
commandErr: alreadyExistsErr,
wantCmds: []string{newIPCmd, setServerCmd},
wantErr: alreadyExistsErr,
},
{
name: "IP Not Found New Net Route Err",
nodeIPNet: &ipv4ZeroIPNet,
ipFound: false,
createRowErr: fmt.Errorf("ip route not found"),
wantCmds: []string{newIPCmd},
wantErr: fmt.Errorf("failed to create new IPForward row: ip route not found"),
name: "IP Not Found New Net Route Err",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4ZeroIPNet,
ipFound: false,
createRowErr: fmt.Errorf("ip route not found"),
wantCmds: []string{newIPCmd},
wantErr: fmt.Errorf("failed to create new IPForward row: ip route not found"),
},
}

Expand All @@ -415,6 +426,7 @@ func TestPrepareHNSNetwork(t *testing.T) {
defer mockHNSNetworkCreate(tc.hnsNetworkCreateErr)()
defer mockHNSNetworkDelete(nil)()
defer mockAntreaNetIO(&antreasyscalltest.MockNetIO{CreateIPForwardEntryErr: tc.createRowErr})()
defer mockGetAdaptersAddresses(tc.testAdapterAddresses, nil)()
gotErr := PrepareHNSNetwork(testSubnetCIDR, tc.nodeIPNet, testUplinkAdapter, "testGateway", tc.dnsServers, testRoutes, tc.newName)
assert.Equal(t, tc.wantErr, gotErr)
})
Expand Down Expand Up @@ -1076,6 +1088,7 @@ func TestGetAdapterInAllCompartmentsByName(t *testing.T) {
HardwareAddr: testMACAddr,
},
compartmentID: 1,
flags: IP_ADAPTER_DHCP_ENABLED,
}
tests := []struct {
name string
Expand Down Expand Up @@ -1139,6 +1152,7 @@ func createTestAdapterAddresses(name string) *windows.IpAdapterAddresses {
PhysicalAddressLength: 6,
PhysicalAddress: testPhysicalAddress,
CompartmentId: 1,
Flags: IP_ADAPTER_DHCP_ENABLED,
}
}

Expand Down Expand Up @@ -1171,6 +1185,7 @@ func mockGetAdaptersAddresses(testAdaptersAddresses *windows.IpAdapterAddresses,
adapterAddresses.PhysicalAddressLength = testAdaptersAddresses.PhysicalAddressLength
adapterAddresses.PhysicalAddress = testAdaptersAddresses.PhysicalAddress
adapterAddresses.CompartmentId = testAdaptersAddresses.CompartmentId
adapterAddresses.Flags = testAdaptersAddresses.Flags
}
return err
}
Expand Down
Loading