Skip to content

Commit

Permalink
[warm reboot] add 2-stage SAI shutdown support for system level warm …
Browse files Browse the repository at this point in the history
…reboot (sonic-net#394)

* [warm reboot] pre-shutdown syncd during warm reboot

Signed-off-by: Ying Xie <[email protected]>

* [warm reboot] backup database before killing teamd

* [warm-reboot] wait up to 60 seconds for pre-shutdown to complete

Signed-off-by: Ying Xie <[email protected]>

* fix typo
  • Loading branch information
yxieca authored Nov 29, 2018
1 parent 815bb8a commit a5b51eb
Showing 1 changed file with 70 additions and 26 deletions.
96 changes: 70 additions & 26 deletions scripts/fast-reboot
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,60 @@ function clear_warm_boot()
fi
}

function initialize_pre_shutdown()
{
TABLE="WARM_RESTART_TABLE|warm-shutdown"
RESTORE_COUNT=`/usr/bin/redis-cli -n 6 hget "${TABLE}" restore_count`
if [[ -z "$RESTORE_COUNT" ]]; then
/usr/bin/redis-cli -n 6 hset "${TABLE}" restore_count 0
fi
/usr/bin/redis-cli -n 6 hset "${TABLE}" state requesting
}

function request_pre_shutdown()
{
/usr/bin/docker exec -i syncd /usr/bin/syncd_request_shutdown --pre
}

function wait_for_pre_shutdown_complete_or_fail()
{
TABLE="WARM_RESTART_TABLE|warm-shutdown"
STATE="requesting"
declare -i waitcount;
waitcount=0
# Wait up to 60 seconds for pre-shutdown to complete
while [[ ${waitcount} -lt 600 ]]; do
STATE=`/usr/bin/redis-cli -n 6 hget "${TABLE}" state`
if [[ x"${STATE}" != x"requesting" ]]; then
break
fi
sleep 0.1
waitcount+=1
done

if [[ x"${STATE}" != x"pre-shutdown-succeeded" ]]; then
echo "Syncd pre-shutdown failed: ${STATE} ..."
exit 10
fi
}

function backup_datebase()
{
# Dump redis content to a file 'dump.rdb' in warmboot directory
mkdir -p $WARM_DIR
# Delete keys in stateDB except FDB_TABLE|* and WARM_RESTA*
redis-cli -n 6 eval "
for _, k in ipairs(redis.call('keys', '*')) do
if not string.match(k, 'FDB_TABLE|') and not string.match(k, 'WARM_RESTART_TABLE|') then
redis.call('del', k)
end
end
" 0
redis-cli save
docker cp database:/var/lib/redis/$REDIS_FILE $WARM_DIR
docker exec -i database rm /var/lib/redis/$REDIS_FILE
}

# Check reboot type supported
BOOT_TYPE_ARG="cold"
case "$REBOOT_TYPE" in
Expand Down Expand Up @@ -116,14 +170,6 @@ docker exec -i bgp pkill -9 bgpd
# Kill lldp, otherwise it sends informotion about reboot
docker kill lldp > /dev/null

# Stop teamd gracefully
if [[ "$REBOOT_TYPE" = "warm-reboot" ]]; then
# Send USR1 signal to all teamd instances to stop them
# It will prepare teamd for warm-reboot
# Note: We must send USR1 signal before syncd, because it will send the last packet through CPU port
docker exec -i teamd pkill -USR1 teamd > /dev/null
fi

if [[ "$REBOOT_TYPE" = "fast-reboot" ]]; then
# Kill teamd, otherwise it gets down all LAGs
# Note: teamd must be killed before syncd, because it will send the last packet through CPU port
Expand All @@ -134,27 +180,25 @@ fi
# Kill swss dockers
docker kill swss

# syncd service stop is capable of handling both warm/fast/cold shutdown
systemctl stop syncd

# Warm reboot: dump state to host disk
# Note: even if syncd changed ASIC_DB before killed, we don't care
# Pre-shutdown syncd and stop teamd gracefully
if [[ "$REBOOT_TYPE" = "warm-reboot" ]]; then
# Dump redis content to a file 'dump.rdb' in warmboot directory
mkdir -p $WARM_DIR
# Delete keys in stateDB except FDB_TABLE|* and WARM_RESTA*
redis-cli -n 6 eval "
for _, k in ipairs(redis.call('keys', '*')) do
if not string.match(k, 'FDB_TABLE|') and not string.match(k, 'WARM_RESTART_TABLE|') then
redis.call('del', k)
end
end
" 0
redis-cli save
docker cp database:/var/lib/redis/$REDIS_FILE $WARM_DIR
docker exec -i database rm /var/lib/redis/$REDIS_FILE
initialize_pre_shutdown

request_pre_shutdown

wait_for_pre_shutdown_complete_or_fail

backup_datebase

# Send USR1 signal to all teamd instances to stop them
# It will prepare teamd for warm-reboot
# Note: We must send USR1 signal before syncd, because it will send the last packet through CPU port
docker exec -i teamd pkill -USR1 teamd > /dev/null
fi

# syncd service stop is capable of handling both warm/fast/cold shutdown
systemctl stop syncd

# Kill other containers to make the reboot faster
docker ps -q | xargs docker kill > /dev/null

Expand Down

0 comments on commit a5b51eb

Please sign in to comment.