Fix control plane node remediation #808
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Pre Submit | |
on: | |
push: | |
branches: | |
- main | |
- release-* | |
pull_request: | |
branches: | |
- main | |
- release-* | |
jobs: | |
build-and-unit-test: | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v3 | |
with: | |
fetch-depth: 0 | |
- name: Set up Go | |
uses: actions/setup-go@v4 | |
with: | |
go-version-file: go.mod | |
- name: Build | |
run: make manager | |
- name: Test | |
run: make test | |
- name: Test container build | |
run: make container-build | |
- name: TestMutations | |
run: make test-mutation-ci | |
e2e-k8s: | |
runs-on: ubuntu-22.04 | |
timeout-minutes: 30 | |
env: | |
IMAGE_REGISTRY: kind-registry:5000 | |
VERSION: 9.9.9-ci | |
DEPLOY_NAMESPACE: k8s-test | |
# see https://github.com/kubernetes-sigs/kind/tags | |
KIND_VERSION: v0.18.0 | |
# see https://hub.docker.com/r/kindest/node/tags for available versions! | |
K8S_VERSION: v1.26.3 | |
steps: | |
- name: Checkout code | |
uses: actions/checkout@v3 | |
with: | |
fetch-depth: 0 | |
- name: Set up Go | |
uses: actions/setup-go@v4 | |
with: | |
go-version-file: go.mod | |
- name: Configure insecure registry | |
run: | | |
#sudo cat /etc/docker/daemon.json | |
# allow insecure registry but keep original config! | |
sudo bash -c "cat <<EOF >/etc/docker/daemon.json | |
{ | |
\"exec-opts\": [\"native.cgroupdriver=cgroupfs\"], | |
\"cgroup-parent\": \"/actions_job\", | |
\"insecure-registries\" : [\"${IMAGE_REGISTRY}\"] | |
} | |
EOF" | |
#sudo cat /etc/docker/daemon.json | |
sudo systemctl restart docker | |
# same for podman | |
sudo bash -c "cat <<EOF >/etc/containers/registries.conf | |
[[registry]] | |
location=\"${IMAGE_REGISTRY}\" | |
insecure=true | |
EOF" | |
#sudo cat /etc/containers/registries.conf | |
- name: Start kind cluster | |
uses: container-tools/kind-action@v2 | |
with: | |
version: ${{env.KIND_VERSION}} | |
config: ./hack/kind-config.yaml | |
node_image: kindest/node:${{env.K8S_VERSION}} | |
kubectl_version: ${{env.K8S_VERSION}} | |
registry: true | |
- name: Cluster info | |
run: | | |
kubectl version -o=yaml | |
kubectl cluster-info | |
kubectl get nodes -o=wide | |
- name: Install OLM | |
run: | | |
curl -sL https://github.com/operator-framework/operator-lifecycle-manager/releases/download/v0.22.0/install.sh | bash -s v0.22.0 | |
- name: Build NHC | |
run: | | |
export NHC_SKIP_TEST=true | |
make container-build-k8s container-push | |
# make operator-sdk findable... | |
cp ./bin/operator-sdk /usr/local/bin/ | |
- name: Deploy NHC + SNR | |
run: | | |
kubectl create ns ${DEPLOY_NAMESPACE} | |
# deploy SNR first | |
operator-sdk run bundle -n ${DEPLOY_NAMESPACE} quay.io/medik8s/self-node-remediation-operator-bundle:latest | |
# deploy NHC | |
operator-sdk run bundle -n ${DEPLOY_NAMESPACE} --use-http ${IMAGE_REGISTRY}/node-healthcheck-operator-bundle:v${VERSION} | |
# wait a bit for OLM creating CSV etc | |
sleep 1m | |
- name: OLM status | |
if: ${{ always() }} | |
run: | | |
kubectl get -A OperatorGroup -o wide | |
kubectl get -A CatalogSource -o wide | |
kubectl get -A Subscription -o wide | |
kubectl get -A ClusterServiceVersion -o wide | |
kubectl get -A InstallPlan -o wide | |
- name: Wait for deployments | |
run: | | |
kubectl wait deployment -n ${DEPLOY_NAMESPACE} self-node-remediation-controller-manager --for condition=Available=True --timeout=300s | |
# give SNR some time to create the DS | |
sleep 30s | |
kubectl rollout status daemonset -n ${DEPLOY_NAMESPACE} self-node-remediation-ds --timeout 300s | |
kubectl wait deployment -n ${DEPLOY_NAMESPACE} node-healthcheck-controller-manager --for condition=Available=True --timeout=300s | |
- name: Deployment status | |
if: ${{ always() }} | |
run: | | |
kubectl -n ${DEPLOY_NAMESPACE} get deployments,daemonsets,pods -o=wide | |
- name: Run NHC e2e | |
run: | | |
echo "running e2e test" | |
OPERATOR_NS=${DEPLOY_NAMESPACE} TEST_OPTS='-ginkgo.label-filter="!OCP-ONLY"' make test-e2e | |
echo "finished e2e test" | |
- name: Debug | |
if: ${{ failure() }} | |
run: | | |
# debug NHC | |
echo "Debugging NHC" | |
kubectl describe deployment -n ${DEPLOY_NAMESPACE} node-healthcheck-controller-manager | |
echo "\n\n" | |
kubectl describe pod -n ${DEPLOY_NAMESPACE} --selector=app.kubernetes.io/name=node-healthcheck-operator,app.kubernetes.io/component=controller-manager | |
echo "\n\n" | |
kubectl logs -n ${DEPLOY_NAMESPACE} -c manager --selector=app.kubernetes.io/name=node-healthcheck-operator,app.kubernetes.io/component=controller-manager --tail -1 | |
echo "\n\n" | |
echo "Debugging SNR operator" | |
kubectl describe deployment -n ${DEPLOY_NAMESPACE} self-node-remediation-controller-manager | |
echo "\n\n" | |
kubectl describe pod -n ${DEPLOY_NAMESPACE} --selector=self-node-remediation-operator=,control-plane=controller-manager | |
echo "\n\n" | |
kubectl logs -n ${DEPLOY_NAMESPACE} -c manager --selector=self-node-remediation-operator=,control-plane=controller-manager --tail -1 | |
echo "\n\n" | |
echo "Debugging SNR agents" | |
kubectl describe daemonset -n ${DEPLOY_NAMESPACE} self-node-remediation-ds | |
echo "\n\n" | |
kubectl describe pod -n ${DEPLOY_NAMESPACE} --selector=app=self-node-remediation-agent,control-plane=controller-manager | |
echo "\n\n" | |
kubectl logs -n ${DEPLOY_NAMESPACE} --selector=app=self-node-remediation-agent,control-plane=controller-manager --tail -1 |