diff --git a/neighsyncd/neighsync.h b/neighsyncd/neighsync.h index 9360a18713e0..66fd1c2645b1 100644 --- a/neighsyncd/neighsync.h +++ b/neighsyncd/neighsync.h @@ -11,10 +11,10 @@ /* * This is the timer value (in seconds) that the neighsyncd waits for restore_neighbors - * service to finish, should be longer than the restore_neighbors timeout value (60) + * service to finish, should be longer than the restore_neighbors timeout value (110) * This should not happen, if happens, system is in a unknown state, we should exit. */ -#define RESTORE_NEIGH_WAIT_TIME_OUT 70 +#define RESTORE_NEIGH_WAIT_TIME_OUT 120 namespace swss { diff --git a/neighsyncd/restore_neighbors.py b/neighsyncd/restore_neighbors.py index 387723dfe9e5..e0a0eea9434e 100755 --- a/neighsyncd/restore_neighbors.py +++ b/neighsyncd/restore_neighbors.py @@ -30,11 +30,12 @@ logger.setLevel(logging.WARNING) logger.addHandler(logging.NullHandler()) -# timeout the restore process in 1 min if not finished +# timeout the restore process in 110 seconds if not finished # This is mostly to wait for interfaces to be created and up after system warm-reboot # and this process is started by supervisord in swss docker. -# It would be good to keep that time below routing reconciliation time-out. -TIME_OUT = 60 +# There had been devices taking close to 70 seconds to complete restoration, setting +# default timeout to 110 seconds. +DEF_TIME_OUT = 110 # every 5 seconds to check interfaces states CHECK_INTERVAL = 5 @@ -189,13 +190,13 @@ def set_statedb_neigh_restore_done(): # Once all the entries are restored, this function is returned. # The interfaces' states were checked in a loop with an interval (CHECK_INTERVAL) # The function will timeout in case interfaces' states never meet the condition -# after some time (TIME_OUT). -def restore_update_kernel_neighbors(intf_neigh_map): +# after some time (DEF_TIME_OUT). +def restore_update_kernel_neighbors(intf_neigh_map, timeout=DEF_TIME_OUT): # create object for netlink calls to kernel ipclass = IPRoute() mtime = monotonic.time.time start_time = mtime() - while (mtime() - start_time) < TIME_OUT: + while (mtime() - start_time) < timeout: for intf, family_neigh_map in intf_neigh_map.items(): # only try to restore to kernel when link is up if is_intf_oper_state_up(intf):