From 1510905c3a5910f34583824e083e6717db825f67 Mon Sep 17 00:00:00 2001 From: oilbeater Date: Tue, 30 Aug 2022 16:04:47 +0800 Subject: [PATCH] feat: reduce downtime by increasing arp cache timeout When upgrade ovs-ovn if the arp cache timeout, new arp request cannot be processed and leads network unreachable error. Increase the base_reachable_time_ms and gc_stale_time to 3 minutes to prevent arp cache timeout during upgrade. (cherry picked from commit 0adecb0cb7217e77210b6afcf8b7bab530ae98b5) --- dist/images/install.sh | 12 ++++++++++++ dist/images/start-ovs.sh | 22 +++++++++++++++++++--- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/dist/images/install.sh b/dist/images/install.sh index aae124cdeae..0eb2ef36cb6 100755 --- a/dist/images/install.sh +++ b/dist/images/install.sh @@ -1766,6 +1766,9 @@ spec: - name: OVN_DB_IPS value: $addresses volumeMounts: + - mountPath: /var/run/netns + name: host-ns + mountPropagation: HostToContainer - mountPath: /lib/modules name: host-modules readOnly: true @@ -1834,6 +1837,9 @@ spec: - name: host-sys hostPath: path: /sys + - name: host-ns + hostPath: + path: /var/run/netns - name: cni-conf hostPath: path: /etc/cni/net.d @@ -2251,6 +2257,9 @@ spec: - name: OVN_DB_IPS value: $addresses volumeMounts: + - mountPath: /var/run/netns + name: host-ns + mountPropagation: HostToContainer - mountPath: /lib/modules name: host-modules readOnly: true @@ -2314,6 +2323,9 @@ spec: - name: host-sys hostPath: path: /sys + - name: host-ns + hostPath: + path: /var/run/netns - name: cni-conf hostPath: path: /etc/cni/net.d diff --git a/dist/images/start-ovs.sh b/dist/images/start-ovs.sh index 33836a15d2b..149043db2ac 100755 --- a/dist/images/start-ovs.sh +++ b/dist/images/start-ovs.sh @@ -35,9 +35,18 @@ cat /proc/cmdline" fi function quit { - /usr/share/ovn/scripts/grace_stop_ovn_controller - /usr/share/openvswitch/scripts/ovs-ctl stop - exit 0 + set +e + for netns in /var/run/netns/*; do + nsenter --net=$netns sysctl -w net.ipv4.neigh.eth0.base_reachable_time_ms=180000; + nsenter --net=$netns sysctl -w net.ipv4.neigh.eth0.gc_stale_time=180; + done + # If the arp is in stale or delay status, stop vswitchd will lead prob failed. + # Wait a while for prob ready. + # As the timeout has been increased existing entry will not change to stale or delay at the moment + sleep 5 + /usr/share/ovn/scripts/grace_stop_ovn_controller + /usr/share/openvswitch/scripts/ovs-ctl stop + exit 0 } trap quit EXIT @@ -253,5 +262,12 @@ set -e ovs-vsctl --no-wait set open_vswitch . other_config:flow-restore-wait="false" +set +e +for netns in /var/run/netns/*; do + nsenter --net=$netns sysctl -w net.ipv4.neigh.eth0.base_reachable_time_ms=30000; + nsenter --net=$netns sysctl -w net.ipv4.neigh.eth0.gc_stale_time=60; +done +set -e + chmod 600 /etc/openvswitch/* tail --follow=name --retry /var/log/ovn/ovn-controller.log