From a5b51eb9a5133194bb4d4770114b39f7119e2ebd Mon Sep 17 00:00:00 2001 From: Ying Xie Date: Thu, 29 Nov 2018 15:00:37 -0800 Subject: [PATCH] [warm reboot] add 2-stage SAI shutdown support for system level warm reboot (#394) * [warm reboot] pre-shutdown syncd during warm reboot Signed-off-by: Ying Xie * [warm reboot] backup database before killing teamd * [warm-reboot] wait up to 60 seconds for pre-shutdown to complete Signed-off-by: Ying Xie * fix typo --- scripts/fast-reboot | 96 +++++++++++++++++++++++++++++++++------------ 1 file changed, 70 insertions(+), 26 deletions(-) diff --git a/scripts/fast-reboot b/scripts/fast-reboot index 36dee8ebfbb6..8e2a94b95289 100755 --- a/scripts/fast-reboot +++ b/scripts/fast-reboot @@ -25,6 +25,60 @@ function clear_warm_boot() fi } +function initialize_pre_shutdown() +{ + TABLE="WARM_RESTART_TABLE|warm-shutdown" + RESTORE_COUNT=`/usr/bin/redis-cli -n 6 hget "${TABLE}" restore_count` + if [[ -z "$RESTORE_COUNT" ]]; then + /usr/bin/redis-cli -n 6 hset "${TABLE}" restore_count 0 + fi + /usr/bin/redis-cli -n 6 hset "${TABLE}" state requesting +} + +function request_pre_shutdown() +{ + /usr/bin/docker exec -i syncd /usr/bin/syncd_request_shutdown --pre +} + +function wait_for_pre_shutdown_complete_or_fail() +{ + TABLE="WARM_RESTART_TABLE|warm-shutdown" + STATE="requesting" + declare -i waitcount; + waitcount=0 + # Wait up to 60 seconds for pre-shutdown to complete + while [[ ${waitcount} -lt 600 ]]; do + STATE=`/usr/bin/redis-cli -n 6 hget "${TABLE}" state` + if [[ x"${STATE}" != x"requesting" ]]; then + break + fi + sleep 0.1 + waitcount+=1 + done + + if [[ x"${STATE}" != x"pre-shutdown-succeeded" ]]; then + echo "Syncd pre-shutdown failed: ${STATE} ..." + exit 10 + fi +} + +function backup_datebase() +{ + # Dump redis content to a file 'dump.rdb' in warmboot directory + mkdir -p $WARM_DIR + # Delete keys in stateDB except FDB_TABLE|* and WARM_RESTA* + redis-cli -n 6 eval " + for _, k in ipairs(redis.call('keys', '*')) do + if not string.match(k, 'FDB_TABLE|') and not string.match(k, 'WARM_RESTART_TABLE|') then + redis.call('del', k) + end + end + " 0 + redis-cli save + docker cp database:/var/lib/redis/$REDIS_FILE $WARM_DIR + docker exec -i database rm /var/lib/redis/$REDIS_FILE +} + # Check reboot type supported BOOT_TYPE_ARG="cold" case "$REBOOT_TYPE" in @@ -116,14 +170,6 @@ docker exec -i bgp pkill -9 bgpd # Kill lldp, otherwise it sends informotion about reboot docker kill lldp > /dev/null -# Stop teamd gracefully -if [[ "$REBOOT_TYPE" = "warm-reboot" ]]; then - # Send USR1 signal to all teamd instances to stop them - # It will prepare teamd for warm-reboot - # Note: We must send USR1 signal before syncd, because it will send the last packet through CPU port - docker exec -i teamd pkill -USR1 teamd > /dev/null -fi - if [[ "$REBOOT_TYPE" = "fast-reboot" ]]; then # Kill teamd, otherwise it gets down all LAGs # Note: teamd must be killed before syncd, because it will send the last packet through CPU port @@ -134,27 +180,25 @@ fi # Kill swss dockers docker kill swss -# syncd service stop is capable of handling both warm/fast/cold shutdown -systemctl stop syncd - -# Warm reboot: dump state to host disk -# Note: even if syncd changed ASIC_DB before killed, we don't care +# Pre-shutdown syncd and stop teamd gracefully if [[ "$REBOOT_TYPE" = "warm-reboot" ]]; then - # Dump redis content to a file 'dump.rdb' in warmboot directory - mkdir -p $WARM_DIR - # Delete keys in stateDB except FDB_TABLE|* and WARM_RESTA* - redis-cli -n 6 eval " - for _, k in ipairs(redis.call('keys', '*')) do - if not string.match(k, 'FDB_TABLE|') and not string.match(k, 'WARM_RESTART_TABLE|') then - redis.call('del', k) - end - end - " 0 - redis-cli save - docker cp database:/var/lib/redis/$REDIS_FILE $WARM_DIR - docker exec -i database rm /var/lib/redis/$REDIS_FILE + initialize_pre_shutdown + + request_pre_shutdown + + wait_for_pre_shutdown_complete_or_fail + + backup_datebase + + # Send USR1 signal to all teamd instances to stop them + # It will prepare teamd for warm-reboot + # Note: We must send USR1 signal before syncd, because it will send the last packet through CPU port + docker exec -i teamd pkill -USR1 teamd > /dev/null fi +# syncd service stop is capable of handling both warm/fast/cold shutdown +systemctl stop syncd + # Kill other containers to make the reboot faster docker ps -q | xargs docker kill > /dev/null