diff --git a/parts/k8s/kubernetesagentcustomdata.yml b/parts/k8s/kubernetesagentcustomdata.yml index 8c018a7343..ff8adabec2 100644 --- a/parts/k8s/kubernetesagentcustomdata.yml +++ b/parts/k8s/kubernetesagentcustomdata.yml @@ -165,24 +165,24 @@ coreos: ExecStart=/opt/azure/containers/provision-setup.sh {{else}} runcmd: +- retrycmd_if_failure() { for i in $(seq 1 36); do $@; [ $? -eq 0 ] && break || sleep 5; done ; } - echo `date`,`hostname`, startruncmd>>/opt/m - apt-mark hold walinuxagent{{GetKubernetesAgentPreprovisionYaml .}} - echo `date`,`hostname`, preaptupdate>>/opt/m -- apt-get update +- retrycmd_if_failure apt-get update - echo `date`,`hostname`, postaptupdate>>/opt/m -- apt-get install -y apt-transport-https ca-certificates nfs-common +- retrycmd_if_failure apt-get install -y apt-transport-https ca-certificates nfs-common - echo `date`,`hostname`, aptinstall>>/opt/m - systemctl enable rpcbind - systemctl enable rpc-statd - systemctl start rpcbind - systemctl start rpc-statd - echo `date`,`hostname`, predockerinstall>>/opt/m -- for i in 1 2 3 4 5; do curl --max-time 60 -fsSL https://aptdocker.azureedge.net/gpg | apt-key add -; [ $? -eq 0 ] && break || sleep 5; done +- retrycmd_if_failure curl --retry 5 --retry-delay 10 --retry-max-time 30 --max-time 60 -fsSL https://aptdocker.azureedge.net/gpg | apt-key add - - echo "deb {{WrapAsVariable "dockerEngineDownloadRepo"}} ubuntu-xenial main" | sudo tee /etc/apt/sources.list.d/docker.list - "echo \"Package: docker-engine\nPin: version {{WrapAsVariable "dockerEngineVersion"}}\nPin-Priority: 550\n\" > /etc/apt/preferences.d/docker.pref" -- apt-get update -- apt-get install -y ebtables -- apt-get install -y docker-engine +- retrycmd_if_failure apt-get update +- retrycmd_if_failure apt-get install -y ebtables docker-engine - echo "ExecStartPost=/sbin/iptables -P FORWARD ACCEPT" >> /etc/systemd/system/docker.service.d/exec_start.conf - systemctl daemon-reload - echo `date`,`hostname`, postdockerinstall>>/opt/m diff --git a/parts/k8s/kubernetesmastercustomdata.yml b/parts/k8s/kubernetesmastercustomdata.yml index 29ceeb89fe..bba92de2f8 100644 --- a/parts/k8s/kubernetesmastercustomdata.yml +++ b/parts/k8s/kubernetesmastercustomdata.yml @@ -292,22 +292,17 @@ MASTER_ARTIFACTS_CONFIG_PLACEHOLDER content: | #!/bin/bash set -x + retrycmd_if_failure() { for i in $(seq 1 36); do $@; [ $? -eq 0 ] && break || sleep 5; done ; } ETCD_VER=v{{WrapAsVariable "etcdVersion"}} DOWNLOAD_URL={{WrapAsVariable "etcdDownloadURLBase"}} mkdir -p /tmp/etcd-download - curl -L ${DOWNLOAD_URL}/etcd-${ETCD_VER}-linux-amd64.tar.gz -o /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz + retrycmd_if_failure curl --retry 5 --retry-delay 10 --retry-max-time 30 -L ${DOWNLOAD_URL}/etcd-${ETCD_VER}-linux-amd64.tar.gz -o /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz tar xzvf /tmp/etcd-${ETCD_VER}-linux-amd64.tar.gz -C /usr/bin/ --strip-components=1 - useradd -U "etcd" + useradd -U "etcd" > /dev/null 2>&1 usermod -p "$(head -c 32 /dev/urandom | base64)" "etcd" - passwd -u "$etcd" > /dev/null + passwd -u "etcd" > /dev/null systemctl daemon-reload systemctl enable etcd.service - for i in {1..60}; do - if [ -e /opt/azure/containers/etcdcerts.complete ]; then - break - fi - sleep 1 - done sudo sed -i "1iETCDCTL_ENDPOINTS=https://127.0.0.1:2379" /etc/environment sudo sed -i "1iETCDCTL_CA_FILE={{WrapAsVariable "etcdCaFilepath"}}" /etc/environment sudo sed -i "1iETCDCTL_KEY_FILE={{WrapAsVariable "etcdClientKeyFilepath"}}" /etc/environment @@ -319,6 +314,7 @@ MASTER_ARTIFACTS_CONFIG_PLACEHOLDER owner: "root" content: | #!/bin/bash + retrycmd_if_failure() { for i in $(seq 1 36); do $@; [ $? -eq 0 ] && break || sleep 5; done ; } /bin/echo DAEMON_ARGS=--name "{{WrapAsVerbatim "variables('masterVMNames')[copyIndex(variables('masterOffset'))]"}}" --initial-advertise-peer-urls "{{WrapAsVerbatim "variables('masterEtcdPeerURLs')[copyIndex(variables('masterOffset'))]"}}" --listen-peer-urls "{{WrapAsVerbatim "variables('masterEtcdPeerURLs')[copyIndex(variables('masterOffset'))]"}}" --advertise-client-urls "{{WrapAsVerbatim "variables('masterEtcdClientURLs')[copyIndex(variables('masterOffset'))]"}}" --listen-client-urls "{{WrapAsVerbatim "concat(variables('masterEtcdClientURLs')[copyIndex(variables('masterOffset'))], ',http://127.0.0.1:', variables('masterEtcdClientPort'))"}}" --initial-cluster-token "k8s-etcd-cluster" --initial-cluster "{{WrapAsVerbatim "variables('masterEtcdClusterStates')[div(variables('masterCount'), 2)]"}} --data-dir "/var/lib/etcddisk"" --initial-cluster-state "new" | tee -a /etc/default/etcd sudo /bin/chown -R etcd:etcd /var/lib/etcd/default /opt/azure/containers/mountetcd.sh @@ -328,7 +324,7 @@ MASTER_ARTIFACTS_CONFIG_PLACEHOLDER sudo /bin/sed -i s/Restart=on-failure/Restart=always/g /lib/systemd/system/etcd-member.service systemctl daemon-reload systemctl restart etcd-member - for i in $(seq 1 20); do curl --max-time 60 http://127.0.0.1:2379/v2/machines; [ $? -eq 0 ] && break || sleep 5; done + retrycmd_if_failure curl --retry 5 --retry-delay 10 --retry-max-time 30 --max-time 60 http://127.0.0.1:2379/v2/machines mkdir -p /etc/kubernetes/manifests usermod -aG docker {{WrapAsVariable "username"}} @@ -351,8 +347,8 @@ coreos: ExecStart=/opt/azure/containers/provision-setup.sh {{else}} runcmd: -- retrycmd_if_failure() { for i in {1..5}; do $@; [ $? -eq 0 ] && break || sleep 5; done ; } -- /opt/azure/containers/setup-etcd.sh +- retrycmd_if_failure() { for i in $(seq 1 36); do $@; [ $? -eq 0 ] && break || sleep 5; done ; } +- /opt/azure/containers/setup-etcd.sh > /opt/azure/containers/setup-etcd.log 2>&1 - apt-mark hold walinuxagent {{GetKubernetesMasterPreprovisionYaml}} - /bin/echo DAEMON_ARGS=--name "{{WrapAsVerbatim "variables('masterVMNames')[copyIndex(variables('masterOffset'))]"}}" --peer-client-cert-auth --peer-trusted-ca-file={{WrapAsVariable "etcdCaFilepath"}} --peer-cert-file={{WrapAsVerbatim "variables('etcdPeerCertFilepath')[copyIndex(variables('masterOffset'))]"}} --peer-key-file={{WrapAsVerbatim "variables('etcdPeerKeyFilepath')[copyIndex(variables('masterOffset'))]"}} --initial-advertise-peer-urls "{{WrapAsVerbatim "variables('masterEtcdPeerURLs')[copyIndex(variables('masterOffset'))]"}}" --listen-peer-urls "{{WrapAsVerbatim "variables('masterEtcdPeerURLs')[copyIndex(variables('masterOffset'))]"}}" --client-cert-auth --trusted-ca-file={{WrapAsVariable "etcdCaFilepath"}} --cert-file={{WrapAsVariable "etcdServerCertFilepath"}} --key-file={{WrapAsVariable "etcdServerKeyFilepath"}} --advertise-client-urls "{{WrapAsVerbatim "variables('masterEtcdClientURLs')[copyIndex(variables('masterOffset'))]"}}" --listen-client-urls "{{WrapAsVerbatim "concat(variables('masterEtcdClientURLs')[copyIndex(variables('masterOffset'))], ',https://127.0.0.1:', variables('masterEtcdClientPort'))"}}" --initial-cluster-token "k8s-etcd-cluster" --initial-cluster "{{WrapAsVerbatim "variables('masterEtcdClusterStates')[div(variables('masterCount'), 2)]"}} --data-dir "/var/lib/etcddisk"" --initial-cluster-state "new" | tee -a /etc/default/etcd - /opt/azure/containers/mountetcd.sh @@ -360,15 +356,14 @@ runcmd: - systemctl stop etcd - sudo -u etcd rm -rf /var/lib/etcd/default - systemctl daemon-reload -- sudo /bin/chown etcd:etcd {{WrapAsVariable "etcdServerKeyFilepath"}} -- sudo /bin/chown etcd:etcd {{WrapAsVerbatim "variables('etcdPeerKeyFilepath')[copyIndex(variables('masterOffset'))]"}} +- retrycmd_if_failure cat {{WrapAsVerbatim "variables('etcdPeerCertFilepath')[copyIndex(variables('masterOffset'))]"}} - systemctl restart etcd - MEMBER="$(sudo etcdctl member list | grep -E {{WrapAsVerbatim "variables('masterVMNames')[copyIndex(variables('masterOffset'))]"}} | cut -d{{WrapAsVariable "singleQuote"}}:{{WrapAsVariable "singleQuote"}} -f 1)" - sudo etcdctl member update ${MEMBER} {{WrapAsVerbatim "variables('masterEtcdPeerURLs')[copyIndex(variables('masterOffset'))]"}} -- for i in $(seq 1 20); do curl --cacert /etc/kubernetes/certs/ca.crt --cert /etc/kubernetes/certs/etcdclient.crt --key /etc/kubernetes/certs/etcdclient.key --max-time 60 "{{WrapAsVerbatim "variables('masterEtcdClientURLs')[copyIndex(variables('masterOffset'))]"}}"/v2/machines; [ $? -eq 0 ] && break || sleep 5; done +- retrycmd_if_failure curl --cacert /etc/kubernetes/certs/ca.crt --cert /etc/kubernetes/certs/etcdclient.crt --key /etc/kubernetes/certs/etcdclient.key --retry 5 --retry-delay 10 --retry-max-time 30 --max-time 60 "{{WrapAsVerbatim "variables('masterEtcdClientURLs')[copyIndex(variables('masterOffset'))]"}}"/v2/machines - retrycmd_if_failure apt-get update - retrycmd_if_failure apt-get install -y apt-transport-https ca-certificates -- retrycmd_if_failure curl --max-time 60 -fsSL https://aptdocker.azureedge.net/gpg | apt-key add - +- retrycmd_if_failure curl --retry 5 --retry-delay 10 --retry-max-time 30 --max-time 60 -fsSL https://aptdocker.azureedge.net/gpg | apt-key add - - echo "deb {{WrapAsVariable "dockerEngineDownloadRepo"}} ubuntu-xenial main" | sudo tee /etc/apt/sources.list.d/docker.list - "echo \"Package: docker-engine\nPin: version {{WrapAsVariable "dockerEngineVersion"}}\nPin-Priority: 550\n\" > /etc/apt/preferences.d/docker.pref" - retrycmd_if_failure apt-get update diff --git a/parts/k8s/kubernetesmastercustomscript.sh b/parts/k8s/kubernetesmastercustomscript.sh index 71e29e78c5..986cc8770f 100644 --- a/parts/k8s/kubernetesmastercustomscript.sh +++ b/parts/k8s/kubernetesmastercustomscript.sh @@ -69,7 +69,9 @@ fi if [[ ! -z "${MASTER_NODE}" ]]; then echo "executing master node provision operations" - + + useradd -U "etcd" > /dev/null 2>&1 + APISERVER_PRIVATE_KEY_PATH="/etc/kubernetes/certs/apiserver.key" touch "${APISERVER_PRIVATE_KEY_PATH}" chmod 0600 "${APISERVER_PRIVATE_KEY_PATH}" @@ -85,7 +87,7 @@ if [[ ! -z "${MASTER_NODE}" ]]; then ETCD_SERVER_PRIVATE_KEY_PATH="/etc/kubernetes/certs/etcdserver.key" touch "${ETCD_SERVER_PRIVATE_KEY_PATH}" chmod 0600 "${ETCD_SERVER_PRIVATE_KEY_PATH}" - chown root:root "${ETCD_SERVER_PRIVATE_KEY_PATH}" + chown etcd:etcd "${ETCD_SERVER_PRIVATE_KEY_PATH}" echo "${ETCD_SERVER_PRIVATE_KEY}" | base64 --decode > "${ETCD_SERVER_PRIVATE_KEY_PATH}" ETCD_CLIENT_PRIVATE_KEY_PATH="/etc/kubernetes/certs/etcdclient.key" @@ -97,7 +99,7 @@ if [[ ! -z "${MASTER_NODE}" ]]; then ETCD_PEER_PRIVATE_KEY_PATH="/etc/kubernetes/certs/etcdpeer${MASTER_INDEX}.key" touch "${ETCD_PEER_PRIVATE_KEY_PATH}" chmod 0600 "${ETCD_PEER_PRIVATE_KEY_PATH}" - chown root:root "${ETCD_PEER_PRIVATE_KEY_PATH}" + chown etcd:etcd "${ETCD_PEER_PRIVATE_KEY_PATH}" echo "${ETCD_PEER_KEY}" | base64 --decode > "${ETCD_PEER_PRIVATE_KEY_PATH}" ETCD_SERVER_CERTIFICATE_PATH="/etc/kubernetes/certs/etcdserver.crt" @@ -119,7 +121,6 @@ if [[ ! -z "${MASTER_NODE}" ]]; then echo "${ETCD_PEER_CERT}" | base64 --decode > "${ETCD_PEER_CERTIFICATE_PATH}" echo `date`,`hostname`, finishedGettingEtcdCerts>>/opt/m - mkdir -p /opt/azure/containers && touch /opt/azure/containers/etcdcerts.complete else echo "skipping master node provision operations, this is an agent node" fi @@ -272,7 +273,7 @@ function configNetworkPolicy() { function installClearContainersRuntime() { # Add Clear Containers repository key echo "Adding Clear Containers repository key..." - curl -sSL "https://download.opensuse.org/repositories/home:clearcontainers:clear-containers-3/xUbuntu_16.04/Release.key" | apt-key add - + curl -sSL --retry 5 --retry-delay 10 --retry-max-time 30 "https://download.opensuse.org/repositories/home:clearcontainers:clear-containers-3/xUbuntu_16.04/Release.key" | apt-key add - # Add Clear Container repository echo "Adding Clear Containers repository..." @@ -321,13 +322,13 @@ function installGo() { fi # Get the latest Go version - GO_VERSION=$(curl -sSL "https://golang.org/VERSION?m=text") + GO_VERSION=$(curl --retry 5 --retry-delay 10 --retry-max-time 30 -sSL "https://golang.org/VERSION?m=text") echo "Installing Go version $GO_VERSION..." # subshell ( - curl -sSL "https://storage.googleapis.com/golang/${GO_VERSION}.linux-amd64.tar.gz" | sudo tar -v -C /usr/local -xz + curl --retry 5 --retry-delay 10 --retry-max-time 30 -sSL "https://storage.googleapis.com/golang/${GO_VERSION}.linux-amd64.tar.gz" | sudo tar -v -C /usr/local -xz ) # Set GOPATH and update PATH diff --git a/pkg/acsengine/engine.go b/pkg/acsengine/engine.go index 76a0650880..faced27983 100644 --- a/pkg/acsengine/engine.go +++ b/pkg/acsengine/engine.go @@ -1629,7 +1629,7 @@ func makeExtensionScriptCommands(extension *api.Extension, extensionProfiles []* extensionsParameterReference := fmt.Sprintf("parameters('%sParameters')", extensionProfile.Name) scriptURL := getExtensionURL(extensionProfile.RootURL, extensionProfile.Name, extensionProfile.Version, extensionProfile.Script, extensionProfile.URLQuery) scriptFilePath := fmt.Sprintf("/opt/azure/containers/extensions/%s/%s", extensionProfile.Name, extensionProfile.Script) - return fmt.Sprintf("- sudo /usr/bin/curl -o %s --create-dirs \"%s\" \n- sudo /bin/chmod 744 %s \n- sudo %s ',%s,' > /var/log/%s-output.log", + return fmt.Sprintf("- sudo /usr/bin/curl --retry 5 --retry-delay 10 --retry-max-time 30 -o %s --create-dirs \"%s\" \n- sudo /bin/chmod 744 %s \n- sudo %s ',%s,' > /var/log/%s-output.log", scriptFilePath, scriptURL, scriptFilePath, scriptFilePath, extensionsParameterReference, extensionProfile.Name) }