diff --git a/resources/playbook/roles/bibigrid/files/slurm/fail.sh b/resources/playbook/roles/bibigrid/files/slurm/fail.sh index 38d723b4..af7904e9 100644 --- a/resources/playbook/roles/bibigrid/files/slurm/fail.sh +++ b/resources/playbook/roles/bibigrid/files/slurm/fail.sh @@ -21,6 +21,7 @@ process_string() { } mkdir -p worker_logs +mkdir -p worker_logs/fail mkdir -p worker_logs/fail/out mkdir -p worker_logs/fail/err @@ -34,16 +35,13 @@ function log { log "Fail-Script started" -# $1 is in slurm node format for example: bibigrid-worker0-cid-[0-1],bibigrid-worker1-cid-0 and needs no converting -scontrol update NodeName="$1" state=RESUME reason=FailedStartup # no sudo needed cause executed by slurm user - hosts=$(scontrol show hostnames "$1") -echo "Hosts $hosts used" +log "Hosts $hosts used" -# delete servers -python3 /usr/local/bin/delete_server.py "${hosts}" +# $1 is in slurm node format for example: bibigrid-worker0-cid-[0-1],bibigrid-worker1-cid-0 and needs no converting +scontrol update NodeName="$1" state=POWER_DOWN reason=FailedStartup # no sudo needed cause executed by slurm user -echo "Finished delete_server.py execution." +log "Nodes $1 set to POWER_DOWN." exit $?