From ff5273d5575e6ffb53a8e36b335a1c245639b76b Mon Sep 17 00:00:00 2001 From: Zarquan Date: Mon, 19 Feb 2024 19:18:57 +0000 Subject: [PATCH] Notes on Somerville deploy issues --- notes/zrq/20240219-02-jade-test.txt | 2365 ++++++++++++++++++++++ notes/zrq/20240219-03-jade-reconnect.txt | 114 ++ 2 files changed, 2479 insertions(+) create mode 100644 notes/zrq/20240219-02-jade-test.txt create mode 100644 notes/zrq/20240219-03-jade-reconnect.txt diff --git a/notes/zrq/20240219-02-jade-test.txt b/notes/zrq/20240219-02-jade-test.txt new file mode 100644 index 00000000..53a0081d --- /dev/null +++ b/notes/zrq/20240219-02-jade-test.txt @@ -0,0 +1,2365 @@ +# +# +# +# Copyright (c) 2024, ROE (http://www.roe.ac.uk/) +# +# This information is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This information is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# +#zrq-notes-time +#zrq-notes-indent +#zrq-notes-crypto +#zrq-notes-ansible +#zrq-notes-osformat +#zrq-notes-zeppelin +# +# AIMetrics: [] +# + + Target: + + Test to see if the platform is working today. + + Result: + + Work in progress .... + + Nope, same issue as before. + https://github.com/lsst-uk/somerville-operations/issues/144 + + Common theme between the Pods, liveness and readiness probes failing. + (see list at the end of the notes) + + +# ----------------------------------------------------- +# Run our local client. +#[user@desktop] + + source "${HOME:?}/aglais.env" + export PATH=${PATH}:${AGLAIS_CODE}/bin + + kube-client jade + + > .... + > .... + + +# ----------------------------------------------------- +# Delete and create everything. +#[root@ansibler] + + export cloudsite=somerville-jade + + /deployments/openstack/bin/delete-all.sh \ + "${cloudname:?}" + + ansible-playbook \ + --inventory 'bootstrap,' \ + '/deployments/cluster-api/ansible/00-create-all.yml' + + > .... + > .... + + +# ----------------------------------------------------- +# Check the deployment configuration. +#[root@ansibler] + + cat /opt/aglais/aglais-status.yml + + > aglais: + > ansibler: + > external: + > ipv4: 90.155.51.57 + > deployment: + > date: 20240219 + > name: somerville-jade-20240219 + > type: cluster-api + > kubernetes: + > cluster: + > kind: + > conf: /opt/aglais/somerville-jade-20240219-kind.yml + > name: somerville-jade-20240219-kind + > work: + > conf: /opt/aglais/somerville-jade-20240219-work.yml + > name: somerville-jade-20240219-work + > version: 1.26.7 + > openstack: + > cloud: + > name: somerville-jade + > site: somerville-jade + > keypair: + > fingerprint: 2e:84:98:98:df:70:06:0e:4c:ed:bd:d4:d6:6b:eb:16 + > id: somerville-jade-20240219-keypair + > name: somerville-jade-20240219-keypair + > networks: + > bootstrap: + > network: + > id: 66a06ed8-caa4-4b70-a4a8-6b629749bdfd + > name: somerville-jade-20240219-bootstrap-network + > router: + > id: 8e9e30e2-fad1-4faf-8456-293b845efffb + > name: somerville-jade-20240219-bootstrap-network-router + > subnet: + > cidr: 10.10.0.0/16 + > id: 3bacd876-66b2-4bc7-9f6b-05e31eb08c96 + > name: somerville-jade-20240219-bootstrap-network-subnet + > external: + > network: + > id: 1875828a-ccc3-419b-87fd-856aaa781492 + > name: external + > project: + > id: be227fe0300b4ce5b03f44264df615df, + > name: Somerville-Gaia-Jade + > servers: + > bootstrap: + > float: + > external: 192.41.122.174 + > id: fa010a48-0dd2-4030-8101-8eb90f0acdd0 + > internal: 10.10.0.211 + > server: + > address: + > ipv4: 10.10.0.211 + > flavor: + > name: gaia.vm.2vcpu + > hostname: bootstrap + > id: d8d63532-0ca9-4a0c-9e84-93644df8af49 + > image: + > id: ce533fcf-75a6-4267-a622-d0227e6940b0 + > name: gaia-dmp-fedora-cloud-38-1.6 + > name: somerville-jade-20240219-bootstrap-node + > user: + > id: c4aad146ab7acaf44819e90e3e67a4d0490c164fbb02d388823c1ac9f0ae2e13, + > name: Dave Morris + + +# ----------------------------------------------------- +# Check the cluster status. +#[root@ansibler] + + ssh bootstrap -t \ + ' + source loadconfig + watch \ + clusterctl \ + --kubeconfig "${kindclusterconf:?}" \ + describe cluster \ + "${workclustername:?}" + ' + + > NAME READY SEVERITY REASON SINCE MESSAGE + > Cluster/somerville-jade-20240219-work True 2m10s + > ├─ClusterInfrastructure - OpenStackCluster/somerville-jade-20240219-work + > ├─ControlPlane - KubeadmControlPlane/somerville-jade-20240219-work-control-plane True 2m13s + > │ └─3 Machines... True 9m36s See somerville-jade-20240219-work-control-plane-d7m9n, somerville-jade-20240219-work-control-plane-pnmlk, ... + > └─Workers + > └─MachineDeployment/somerville-jade-20240219-work-md-0 False Warning WaitingForAvailableMachines 7m7s Minimum availability requires 5 replicas, current 2 available + > ├─4 Machines... True 8m10s See somerville-jade-20240219-work-md-0-8pvbc-54gfk, somerville-jade-20240219-work-md-0-8pvbc-8zlmq, ... + > └─2 Machines... False Warning UnhealthyNode 93s See somerville-jade-20240219-work-md-0-8pvbc-jtqcg, somerville-jade-20240219-work-md-0-8pvbc-tq6ft + > Connection to bootstrap closed. + + +# ----------------------------------------------------- +# List our machines in Openstack. +#[root@ansibler] + + openstack \ + --os-cloud "${cloudname:?}" \ + server list + + > +--------------------------------------+------------------------------------------------------------+--------+----------------------------------------------------------------------------+-----------------------------------+----------------+ + > | ID | Name | Status | Networks | Image | Flavor | + > +--------------------------------------+------------------------------------------------------------+--------+----------------------------------------------------------------------------+-----------------------------------+----------------+ + > | f380cceb-6385-48e5-bf70-030ace07b8e7 | somerville-jade-20240219-work-control-plane-ac9af912-v42dq | ACTIVE | k8s-clusterapi-cluster-default-somerville-jade-20240219-work=192.168.3.171 | gaia-dmp-ubuntu-2204-kube-v1.26.7 | gaia.vm.2vcpu | + > | 2ff8c8d9-34a1-444d-b32c-8db1a806e833 | somerville-jade-20240219-work-control-plane-ac9af912-m4vdt | ACTIVE | k8s-clusterapi-cluster-default-somerville-jade-20240219-work=192.168.3.47 | gaia-dmp-ubuntu-2204-kube-v1.26.7 | gaia.vm.2vcpu | + > | d64cc19f-f0eb-4044-b28d-476fc39208f9 | somerville-jade-20240219-work-md-0-fb50a5e8-ntqbd | ACTIVE | k8s-clusterapi-cluster-default-somerville-jade-20240219-work=192.168.3.129 | gaia-dmp-ubuntu-2204-kube-v1.26.7 | gaia.vm.26vcpu | + > | 0fb1ec3c-3ec8-42e9-baed-ea4303799be3 | somerville-jade-20240219-work-md-0-fb50a5e8-5vphm | ACTIVE | k8s-clusterapi-cluster-default-somerville-jade-20240219-work=192.168.3.109 | gaia-dmp-ubuntu-2204-kube-v1.26.7 | gaia.vm.26vcpu | + > | 1b09660a-3b0f-48fe-8a0a-13866cb0a9f7 | somerville-jade-20240219-work-md-0-fb50a5e8-8vtjk | ACTIVE | k8s-clusterapi-cluster-default-somerville-jade-20240219-work=192.168.3.34 | gaia-dmp-ubuntu-2204-kube-v1.26.7 | gaia.vm.26vcpu | + > | 9f6802fc-8601-4f76-b8e1-87ace6433717 | somerville-jade-20240219-work-md-0-fb50a5e8-vd5gr | ACTIVE | k8s-clusterapi-cluster-default-somerville-jade-20240219-work=192.168.3.77 | gaia-dmp-ubuntu-2204-kube-v1.26.7 | gaia.vm.26vcpu | + > | d05a0082-33f7-4d8d-bc57-b33757c67cd2 | somerville-jade-20240219-work-md-0-fb50a5e8-whft4 | ACTIVE | k8s-clusterapi-cluster-default-somerville-jade-20240219-work=192.168.3.113 | gaia-dmp-ubuntu-2204-kube-v1.26.7 | gaia.vm.26vcpu | + > | 3eca33f6-b2f0-414a-b7ca-c2a35541022e | somerville-jade-20240219-work-control-plane-ac9af912-gjv45 | ACTIVE | k8s-clusterapi-cluster-default-somerville-jade-20240219-work=192.168.3.186 | gaia-dmp-ubuntu-2204-kube-v1.26.7 | gaia.vm.2vcpu | + > | d8d63532-0ca9-4a0c-9e84-93644df8af49 | somerville-jade-20240219-bootstrap-node | ACTIVE | somerville-jade-20240219-bootstrap-network=10.10.0.211, 192.41.122.174 | gaia-dmp-fedora-cloud-38-1.6 | gaia.vm.2vcpu | + > +--------------------------------------+------------------------------------------------------------+--------+----------------------------------------------------------------------------+-----------------------------------+----------------+ + + +# ----------------------------------------------------- +# List our machines in Kubernetes. +#[root@ansibler] + + ssh bootstrap -t \ + ' + source loadconfig + kubectl \ + --kubeconfig "${kindclusterconf:?}" \ + get machines \ + --all-namespaces + ' + + > NAMESPACE NAME CLUSTER NODENAME PROVIDERID PHASE AGE VERSION + > default somerville-jade-20240219-work-control-plane-d7m9n somerville-jade-20240219-work somerville-jade-20240219-work-control-plane-ac9af912-gjv45 openstack:///3eca33f6-b2f0-414a-b7ca-c2a35541022e Running 10m v1.26.7 + > default somerville-jade-20240219-work-control-plane-pnmlk somerville-jade-20240219-work somerville-jade-20240219-work-control-plane-ac9af912-m4vdt openstack:///2ff8c8d9-34a1-444d-b32c-8db1a806e833 Running 8m7s v1.26.7 + > default somerville-jade-20240219-work-control-plane-xx6ps somerville-jade-20240219-work somerville-jade-20240219-work-control-plane-ac9af912-v42dq openstack:///f380cceb-6385-48e5-bf70-030ace07b8e7 Running 5m10s v1.26.7 + > default somerville-jade-20240219-work-md-0-8pvbc-54gfk somerville-jade-20240219-work somerville-jade-20240219-work-md-0-fb50a5e8-5vphm openstack:///0fb1ec3c-3ec8-42e9-baed-ea4303799be3 Deleting 12m v1.26.7 + > default somerville-jade-20240219-work-md-0-8pvbc-8zlmq somerville-jade-20240219-work somerville-jade-20240219-work-md-0-fb50a5e8-ntqbd openstack:///d64cc19f-f0eb-4044-b28d-476fc39208f9 Running 12m v1.26.7 + > default somerville-jade-20240219-work-md-0-8pvbc-cgbm4 somerville-jade-20240219-work Provisioning 8s v1.26.7 + > default somerville-jade-20240219-work-md-0-8pvbc-gwtbf somerville-jade-20240219-work somerville-jade-20240219-work-md-0-fb50a5e8-8vtjk openstack:///1b09660a-3b0f-48fe-8a0a-13866cb0a9f7 Deleting 12m v1.26.7 + > default somerville-jade-20240219-work-md-0-8pvbc-n2jrx somerville-jade-20240219-work somerville-jade-20240219-work-md-0-fb50a5e8-whft4 openstack:///d05a0082-33f7-4d8d-bc57-b33757c67cd2 Running 12m v1.26.7 + > default somerville-jade-20240219-work-md-0-8pvbc-tq6ft somerville-jade-20240219-work somerville-jade-20240219-work-md-0-fb50a5e8-vd5gr openstack:///9f6802fc-8601-4f76-b8e1-87ace6433717 Deleting 12m v1.26.7 + + +# ----------------------------------------------------- +# List our nodes in Kubernetes. +#[root@ansibler] + + ssh bootstrap -t \ + ' + source loadconfig + kubectl \ + --kubeconfig "${workclusterconf:?}" \ + get nodes \ + --all-namespaces + ' + + > E0219 17:12:36.613657 13961 memcache.go:287] couldn't get resource list for metrics.k8s.io/v1beta1: the server is currently unable to handle the request + > E0219 17:12:36.625355 13961 memcache.go:287] couldn't get resource list for projectcalico.org/v3: the server is currently unable to handle the request + > E0219 17:12:41.630669 13961 memcache.go:121] couldn't get resource list for metrics.k8s.io/v1beta1: the server is currently unable to handle the request + > E0219 17:12:46.646248 13961 memcache.go:121] couldn't get resource list for projectcalico.org/v3: the server is currently unable to handle the request + > E0219 17:12:51.652165 13961 memcache.go:121] couldn't get resource list for metrics.k8s.io/v1beta1: the server is currently unable to handle the request + > E0219 17:12:51.656390 13961 memcache.go:121] couldn't get resource list for projectcalico.org/v3: the server is currently unable to handle the request + > E0219 17:12:56.662389 13961 memcache.go:121] couldn't get resource list for metrics.k8s.io/v1beta1: the server is currently unable to handle the request + > E0219 17:12:56.667093 13961 memcache.go:121] couldn't get resource list for projectcalico.org/v3: the server is currently unable to handle the request + > NAME STATUS ROLES AGE VERSION + > somerville-jade-20240219-work-control-plane-ac9af912-gjv45 Ready control-plane 10m v1.26.7 + > somerville-jade-20240219-work-control-plane-ac9af912-m4vdt Ready control-plane 7m50s v1.26.7 + > somerville-jade-20240219-work-control-plane-ac9af912-v42dq NotReady control-plane 3m14s v1.26.7 + > somerville-jade-20240219-work-md-0-fb50a5e8-ntqbd Ready 9m24s v1.26.7 + > somerville-jade-20240219-work-md-0-fb50a5e8-whft4 Ready 9m24s v1.26.7 + + # + # I've seen these kind of errors before but not recently. + # They may be because we are in the middle of a delete/create cycle + # where ClusterAPI gives up on an unhealthy VM, deletes it and creates a replacement. + # + + +# ----------------------------------------------------- +# Get the details of our Openstack LoadBalancer. +#[root@ansibler] + + openstack \ + --os-cloud "${cloudname:?}" \ + loadbalancer list + + > +--------------------------------------+----------------------------------------------------------------------+----------------------------------+---------------+---------------------+------------------+----------+ + > | id | name | project_id | vip_address | provisioning_status | operating_status | provider | + > +--------------------------------------+----------------------------------------------------------------------+----------------------------------+---------------+---------------------+------------------+----------+ + > | 3e72d1d5-1175-4639-b14d-ff10d44b973d | k8s-clusterapi-cluster-default-somerville-jade-20240219-work-kubeapi | be227fe0300b4ce5b03f44264df615df | 192.168.3.207 | ACTIVE | ONLINE | amphora | + > +--------------------------------------+----------------------------------------------------------------------+----------------------------------+---------------+---------------------+------------------+----------+ + + + balancerid=$( + openstack \ + --os-cloud "${cloudname:?}" \ + loadbalancer list \ + --format json \ + | jq -r '.[0].id' + ) + + openstack \ + --os-cloud "${cloudname:?}" \ + loadbalancer show \ + --format json \ + "${balancerid:?}" \ + | jq '.' + + > { + > "admin_state_up": true, + > "availability_zone": null, + > "created_at": "2024-02-19T17:00:02", + > "description": "Created by cluster-api-provider-openstack cluster default-somerville-jade-20240219-work", + > "flavor_id": null, + > "id": "3e72d1d5-1175-4639-b14d-ff10d44b973d", + > "listeners": "1f9ad6df-39a6-4344-ba35-ef85a75dad57", + > "name": "k8s-clusterapi-cluster-default-somerville-jade-20240219-work-kubeapi", + > "operating_status": "ONLINE", + > "pools": "c64bf91b-d71c-4cc6-aff6-c4f8e8afb55c", + > "project_id": "be227fe0300b4ce5b03f44264df615df", + > "provider": "amphora", + > "provisioning_status": "ACTIVE", + > "updated_at": "2024-02-19T17:11:05", + > "vip_address": "192.168.3.207", + > "vip_network_id": "f4690e78-0658-408a-8eef-de41061d0a65", + > "vip_port_id": "2ae9edc7-ac51-4fbf-ad7a-c0115b12c3e9", + > "vip_qos_policy_id": null, + > "vip_subnet_id": "52ea968f-c137-4db8-80f7-b3f86b8a547a", + > "tags": "" + > } + + + openstack \ + --os-cloud "${cloudname:?}" \ + loadbalancer listener \ + list + + > +--------------------------------------+--------------------------------------+---------------------------------------------------------------------------+----------------------------------+----------+---------------+----------------+ + > | id | default_pool_id | name | project_id | protocol | protocol_port | admin_state_up | + > +--------------------------------------+--------------------------------------+---------------------------------------------------------------------------+----------------------------------+----------+---------------+----------------+ + > | 1f9ad6df-39a6-4344-ba35-ef85a75dad57 | c64bf91b-d71c-4cc6-aff6-c4f8e8afb55c | k8s-clusterapi-cluster-default-somerville-jade-20240219-work-kubeapi-6443 | be227fe0300b4ce5b03f44264df615df | TCP | 6443 | True | + > +--------------------------------------+--------------------------------------+---------------------------------------------------------------------------+----------------------------------+----------+---------------+----------------+ + + + listenerid=$( + openstack \ + --os-cloud "${cloudname:?}" \ + loadbalancer listener \ + list \ + --format json \ + | jq -r '.[0].id' + ) + + openstack \ + --os-cloud "${cloudname:?}" \ + loadbalancer listener \ + show \ + --format json \ + "${listenerid}" \ + | jq '.' + + > { + > "admin_state_up": true, + > "connection_limit": -1, + > "created_at": "2024-02-19T17:01:02", + > "default_pool_id": "c64bf91b-d71c-4cc6-aff6-c4f8e8afb55c", + > "default_tls_container_ref": null, + > "description": "", + > "id": "1f9ad6df-39a6-4344-ba35-ef85a75dad57", + > "insert_headers": null, + > "l7policies": "", + > "loadbalancers": "3e72d1d5-1175-4639-b14d-ff10d44b973d", + > "name": "k8s-clusterapi-cluster-default-somerville-jade-20240219-work-kubeapi-6443", + > "operating_status": "ONLINE", + > "project_id": "be227fe0300b4ce5b03f44264df615df", + > "protocol": "TCP", + > "protocol_port": 6443, + > "provisioning_status": "ACTIVE", + > "sni_container_refs": [], + > "timeout_client_data": 50000, + > "timeout_member_connect": 5000, + > "timeout_member_data": 50000, + > "timeout_tcp_inspect": 0, + > "updated_at": "2024-02-19T17:11:05", + > "client_ca_tls_container_ref": null, + > "client_authentication": "NONE", + > "client_crl_container_ref": null, + > "allowed_cidrs": "192.168.3.0/24\n192.41.122.133/32\n192.41.122.174/32\n90.155.51.57/32", + > "tls_ciphers": null, + > "tls_versions": null, + > "alpn_protocols": null, + > "tags": "" + > } + + + openstack \ + --os-cloud "${cloudname:?}" \ + loadbalancer pool \ + list + + > +--------------------------------------+---------------------------------------------------------------------------+----------------------------------+---------------------+----------+--------------+----------------+ + > | id | name | project_id | provisioning_status | protocol | lb_algorithm | admin_state_up | + > +--------------------------------------+---------------------------------------------------------------------------+----------------------------------+---------------------+----------+--------------+----------------+ + > | c64bf91b-d71c-4cc6-aff6-c4f8e8afb55c | k8s-clusterapi-cluster-default-somerville-jade-20240219-work-kubeapi-6443 | be227fe0300b4ce5b03f44264df615df | ACTIVE | TCP | ROUND_ROBIN | True | + > +--------------------------------------+---------------------------------------------------------------------------+----------------------------------+---------------------+----------+--------------+----------------+ + + + poolid=$( + openstack \ + --os-cloud "${cloudname:?}" \ + loadbalancer pool \ + list \ + --format json \ + | jq -r '.[0].id' + ) + + openstack \ + --os-cloud "${cloudname:?}" \ + loadbalancer pool \ + show \ + --format json \ + "${poolid}" \ + | jq '.' + + > { + > "admin_state_up": true, + > "created_at": "2024-02-19T17:01:08", + > "description": "", + > "healthmonitor_id": "dea8bd22-56bb-4adb-b1d3-616fdec8b7a0", + > "id": "c64bf91b-d71c-4cc6-aff6-c4f8e8afb55c", + > "lb_algorithm": "ROUND_ROBIN", + > "listeners": "1f9ad6df-39a6-4344-ba35-ef85a75dad57", + > "loadbalancers": "3e72d1d5-1175-4639-b14d-ff10d44b973d", + > "members": "d3b895f8-ee56-4c1e-8f44-4b3bf8d66f38\nd517ee7c-6b7f-41a5-bcb6-00ba6aa8b0df\ne887d363-c63c-48b2-86a3-89ab1a5e62d2", + > "name": "k8s-clusterapi-cluster-default-somerville-jade-20240219-work-kubeapi-6443", + > "operating_status": "ONLINE", + > "project_id": "be227fe0300b4ce5b03f44264df615df", + > "protocol": "TCP", + > "provisioning_status": "ACTIVE", + > "session_persistence": null, + > "updated_at": "2024-02-19T17:11:02", + > "tls_container_ref": null, + > "ca_tls_container_ref": null, + > "crl_container_ref": null, + > "tls_enabled": false, + > "tls_ciphers": null, + > "tls_versions": null, + > "tags": "", + > "alpn_protocols": null + > } + + + openstack \ + --os-cloud "${cloudname:?}" \ + loadbalancer member \ + list \ + "${poolid}" + + > +--------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------+----------------------------------+---------------------+---------------+---------------+------------------+--------+ + > | id | name | project_id | provisioning_status | address | protocol_port | operating_status | weight | + > +--------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------+----------------------------------+---------------------+---------------+---------------+------------------+--------+ + > | d517ee7c-6b7f-41a5-bcb6-00ba6aa8b0df | k8s-clusterapi-cluster-default-somerville-jade-20240219-work-kubeapi-6443-somerville-jade-20240219-work-control-plane-ac9af912-gjv45 | be227fe0300b4ce5b03f44264df615df | ACTIVE | 192.168.3.186 | 6443 | ONLINE | 1 | + > | e887d363-c63c-48b2-86a3-89ab1a5e62d2 | k8s-clusterapi-cluster-default-somerville-jade-20240219-work-kubeapi-6443-somerville-jade-20240219-work-control-plane-ac9af912-m4vdt | be227fe0300b4ce5b03f44264df615df | ACTIVE | 192.168.3.47 | 6443 | ONLINE | 1 | + > | d3b895f8-ee56-4c1e-8f44-4b3bf8d66f38 | k8s-clusterapi-cluster-default-somerville-jade-20240219-work-kubeapi-6443-somerville-jade-20240219-work-control-plane-ac9af912-v42dq | be227fe0300b4ce5b03f44264df615df | ACTIVE | 192.168.3.171 | 6443 | ONLINE | 1 | + > +--------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------+----------------------------------+---------------------+---------------+---------------+------------------+--------+ + + + for memberid in $( + openstack \ + --os-cloud "${cloudname:?}" \ + loadbalancer member \ + list \ + "${poolid}" \ + --format json \ + | jq -r '.[].id' + ) + do + echo "" + echo "Member [${memberid}]" + openstack \ + --os-cloud "${cloudname:?}" \ + loadbalancer member \ + show \ + "${poolid}" \ + "${memberid}" \ + --format json \ + | jq '.' + done + + > Member [d517ee7c-6b7f-41a5-bcb6-00ba6aa8b0df] + > { + > "address": "192.168.3.186", + > "admin_state_up": true, + > "created_at": "2024-02-19T17:01:30", + > "id": "d517ee7c-6b7f-41a5-bcb6-00ba6aa8b0df", + > "name": "k8s-clusterapi-cluster-default-somerville-jade-20240219-work-kubeapi-6443-somerville-jade-20240219-work-control-plane-ac9af912-gjv45", + > "operating_status": "ONLINE", + > "project_id": "be227fe0300b4ce5b03f44264df615df", + > "protocol_port": 6443, + > "provisioning_status": "ACTIVE", + > "subnet_id": null, + > "updated_at": "2024-02-19T17:02:40", + > "weight": 1, + > "monitor_port": null, + > "monitor_address": null, + > "backup": false, + > "tags": "" + > } + > + > Member [e887d363-c63c-48b2-86a3-89ab1a5e62d2] + > { + > "address": "192.168.3.47", + > "admin_state_up": true, + > "created_at": "2024-02-19T17:04:14", + > "id": "e887d363-c63c-48b2-86a3-89ab1a5e62d2", + > "name": "k8s-clusterapi-cluster-default-somerville-jade-20240219-work-kubeapi-6443-somerville-jade-20240219-work-control-plane-ac9af912-m4vdt", + > "operating_status": "ONLINE", + > "project_id": "be227fe0300b4ce5b03f44264df615df", + > "protocol_port": 6443, + > "provisioning_status": "ACTIVE", + > "subnet_id": null, + > "updated_at": "2024-02-19T17:06:01", + > "weight": 1, + > "monitor_port": null, + > "monitor_address": null, + > "backup": false, + > "tags": "" + > } + > + > Member [d3b895f8-ee56-4c1e-8f44-4b3bf8d66f38] + > { + > "address": "192.168.3.171", + > "admin_state_up": true, + > "created_at": "2024-02-19T17:07:34", + > "id": "d3b895f8-ee56-4c1e-8f44-4b3bf8d66f38", + > "name": "k8s-clusterapi-cluster-default-somerville-jade-20240219-work-kubeapi-6443-somerville-jade-20240219-work-control-plane-ac9af912-v42dq", + > "operating_status": "ONLINE", + > "project_id": "be227fe0300b4ce5b03f44264df615df", + > "protocol_port": 6443, + > "provisioning_status": "ACTIVE", + > "subnet_id": null, + > "updated_at": "2024-02-19T17:11:02", + > "weight": 1, + > "monitor_port": null, + > "monitor_address": null, + > "backup": false, + > "tags": "" + > } + + +# ----------------------------------------------------- +# Check the Helm releases on the KinD managment cluster. +# https://github.com/lsst-uk/somerville-operations/issues/144#issuecomment-1905783041 +#[root@ansibler] + + ssh bootstrap -t \ + ' + source loadconfig + kubectl \ + --kubeconfig "${kindclusterconf:?}" \ + get helmrelease -A + ' + + > NAMESPACE NAME CLUSTER BOOTSTRAP TARGET NAMESPACE RELEASE NAME PHASE REVISION CHART NAME CHART VERSION AGE + > default somerville-jade-20240219-work-ccm-openstack somerville-jade-20240219-work true openstack-system ccm-openstack Deployed 1 openstack-cloud-controller-manager 1.3.0 17m + > default somerville-jade-20240219-work-cni-calico somerville-jade-20240219-work true tigera-operator cni-calico Deployed 1 tigera-operator v3.26.0 17m + > default somerville-jade-20240219-work-csi-cinder somerville-jade-20240219-work true openstack-system csi-cinder Deployed 1 openstack-cinder-csi 2.2.0 17m + > default somerville-jade-20240219-work-kubernetes-dashboard somerville-jade-20240219-work true kubernetes-dashboard kubernetes-dashboard Deployed 1 kubernetes-dashboard 5.10.0 17m + > default somerville-jade-20240219-work-mellanox-network-operator somerville-jade-20240219-work true network-operator mellanox-network-operator Deployed 1 network-operator 1.3.0 17m + > default somerville-jade-20240219-work-metrics-server somerville-jade-20240219-work true kube-system metrics-server Deployed 1 metrics-server 3.8.2 17m + > default somerville-jade-20240219-work-node-feature-discovery somerville-jade-20240219-work true node-feature-discovery node-feature-discovery Deployed 1 node-feature-discovery 0.11.2 17m + > default somerville-jade-20240219-work-nvidia-gpu-operator somerville-jade-20240219-work true gpu-operator nvidia-gpu-operator Deployed 1 gpu-operator v1.11.1 17m + + + ssh bootstrap -t \ + ' + source loadconfig + kubectl \ + --kubeconfig "${kindclusterconf:?}" \ + describe helmrelease \ + "somerville-jade-20240219-work-ccm-openstack" + ' + + > Name: somerville-jade-20240219-work-ccm-openstack + > Namespace: default + > Labels: addons.stackhpc.com/cluster=somerville-jade-20240219-work + > addons.stackhpc.com/release-name=ccm-openstack + > addons.stackhpc.com/release-namespace=openstack-system + > app.kubernetes.io/managed-by=Helm + > capi.stackhpc.com/cluster=somerville-jade-20240219-work + > capi.stackhpc.com/component=ccm-openstack + > capi.stackhpc.com/managed-by=Helm + > helm.sh/chart=addons-0.1.0 + > Annotations: addons.stackhpc.com/kopf-managed: yes + > addons.stackhpc.com/last-handled-configuration: + > {"spec":{"bootstrap":true,"chart":{"name":"openstack-cloud-controller-manager","repo":"https://kubernetes.github.io/cloud-provider-opensta... + > meta.helm.sh/release-name: somerville-jade-20240219-work + > meta.helm.sh/release-namespace: default + > API Version: addons.stackhpc.com/v1alpha1 + > Kind: HelmRelease + > Metadata: + > Creation Timestamp: 2024-02-19T16:59:49Z + > Finalizers: + > addons.stackhpc.com/finalizer + > Generation: 1 + > Owner References: + > API Version: cluster.x-k8s.io/v1beta1 + > Block Owner Deletion: true + > Controller: false + > Kind: Cluster + > Name: somerville-jade-20240219-work + > UID: e185eb3f-f4d6-41e8-838a-027dfdf749f7 + > Resource Version: 2938 + > UID: 210026d6-097e-4ce4-b3d1-6f01a6384cf5 + > Spec: + > Bootstrap: true + > Chart: + > Name: openstack-cloud-controller-manager + > Repo: https://kubernetes.github.io/cloud-provider-openstack + > Version: 1.3.0 + > Cluster Name: somerville-jade-20240219-work + > Release Name: ccm-openstack + > Target Namespace: openstack-system + > Values Sources: + > Secret: + > Key: defaults + > Name: somerville-jade-20240219-work-ccm-openstack-config + > Secret: + > Key: overrides + > Name: somerville-jade-20240219-work-ccm-openstack-config + > Status: + > Phase: Deployed + > Resources: + > API Version: v1 + > Kind: ServiceAccount + > Name: openstack-cloud-controller-manager + > API Version: rbac.authorization.k8s.io/v1 + > Kind: ClusterRole + > Name: system:openstack-cloud-controller-manager + > API Version: rbac.authorization.k8s.io/v1 + > Kind: ClusterRoleBinding + > Name: system:openstack-cloud-controller-manager + > API Version: apps/v1 + > Kind: DaemonSet + > Name: openstack-cloud-controller-manager + > Revision: 1 + > Events: + > Type Reason Age From Message + > ---- ------ ---- ---- ------- + > Error Logging 18m kopf Handler 'handle_addon_updated' failed temporarily: cluster 'somerville-jade-20240219-work' is not ready + > Error Logging 18m kopf Handler 'handle_addon_updated' failed temporarily: cluster 'somerville-jade-20240219-work' is not ready + > Error Logging 18m kopf Handler 'handle_addon_updated' failed temporarily: cluster 'somerville-jade-20240219-work' is not ready + > Error Logging 17m kopf Handler 'handle_addon_updated' failed temporarily: cluster 'somerville-jade-20240219-work' is not ready + > Error Logging 17m kopf Handler 'handle_addon_updated' failed temporarily: cluster 'somerville-jade-20240219-work' is not ready + > Error Logging 17m kopf Handler 'handle_addon_updated' failed temporarily: cluster 'somerville-jade-20240219-work' is not ready + > Error Logging 17m kopf Handler 'handle_addon_updated' failed temporarily: cluster 'somerville-jade-20240219-work' is not ready + > Error Logging 16m kopf Handler 'handle_addon_updated' failed temporarily: cluster 'somerville-jade-20240219-work' is not ready + > Error Logging 16m kopf Handler 'handle_addon_updated' failed temporarily: cluster 'somerville-jade-20240219-work' is not ready + > Error Logging 16m kopf Handler 'handle_addon_updated' failed temporarily: cluster 'somerville-jade-20240219-work' is not ready + > Error Logging 16m kopf Handler 'handle_addon_updated' failed temporarily: cluster 'somerville-jade-20240219-work' is not ready + > Normal Logging 15m kopf Creation is processed: 1 succeeded; 0 failed. + > Normal Logging 15m kopf Handler 'handle_addon_updated' succeeded. + + + ssh bootstrap -t \ + ' + source loadconfig + kubectl \ + --kubeconfig "${kindclusterconf:?}" \ + describe helmrelease \ + "somerville-jade-20240219-work-cni-calico" + ' + + > Name: somerville-jade-20240219-work-cni-calico + > Namespace: default + > Labels: addons.stackhpc.com/cluster=somerville-jade-20240219-work + > addons.stackhpc.com/release-name=cni-calico + > addons.stackhpc.com/release-namespace=tigera-operator + > app.kubernetes.io/managed-by=Helm + > capi.stackhpc.com/cluster=somerville-jade-20240219-work + > capi.stackhpc.com/component=cni-calico + > capi.stackhpc.com/managed-by=Helm + > helm.sh/chart=addons-0.1.0 + > Annotations: addons.stackhpc.com/kopf-managed: yes + > addons.stackhpc.com/last-handled-configuration: + > {"spec":{"bootstrap":true,"chart":{"name":"tigera-operator","repo":"https://projectcalico.docs.tigera.io/charts","version":"v3.26.0"},"clu... + > meta.helm.sh/release-name: somerville-jade-20240219-work + > meta.helm.sh/release-namespace: default + > API Version: addons.stackhpc.com/v1alpha1 + > Kind: HelmRelease + > Metadata: + > Creation Timestamp: 2024-02-19T16:59:49Z + > Finalizers: + > addons.stackhpc.com/finalizer + > Generation: 1 + > Owner References: + > API Version: cluster.x-k8s.io/v1beta1 + > Block Owner Deletion: true + > Controller: false + > Kind: Cluster + > Name: somerville-jade-20240219-work + > UID: e185eb3f-f4d6-41e8-838a-027dfdf749f7 + > Resource Version: 3100 + > UID: 2295d73c-894f-45da-b1fe-edd03baab5ab + > Spec: + > Bootstrap: true + > Chart: + > Name: tigera-operator + > Repo: https://projectcalico.docs.tigera.io/charts + > Version: v3.26.0 + > Cluster Name: somerville-jade-20240219-work + > Release Name: cni-calico + > Target Namespace: tigera-operator + > Values Sources: + > Secret: + > Key: defaults + > Name: somerville-jade-20240219-work-cni-calico-config + > Secret: + > Key: overrides + > Name: somerville-jade-20240219-work-cni-calico-config + > Status: + > Phase: Deployed + > Resources: + > API Version: v1 + > Kind: ServiceAccount + > Name: tigera-operator + > Namespace: tigera-operator + > API Version: rbac.authorization.k8s.io/v1 + > Kind: ClusterRole + > Name: tigera-operator + > API Version: rbac.authorization.k8s.io/v1 + > Kind: ClusterRoleBinding + > Name: tigera-operator + > API Version: apps/v1 + > Kind: Deployment + > Name: tigera-operator + > Namespace: tigera-operator + > API Version: operator.tigera.io/v1 + > Kind: APIServer + > Name: default + > API Version: operator.tigera.io/v1 + > Kind: Installation + > Name: default + > Revision: 1 + > Events: + > Type Reason Age From Message + > ---- ------ ---- ---- ------- + > Error Logging 19m kopf Handler 'handle_addon_updated' failed temporarily: cluster 'somerville-jade-20240219-work' is not ready + > Error Logging 18m kopf Handler 'handle_addon_updated' failed temporarily: cluster 'somerville-jade-20240219-work' is not ready + > Error Logging 18m kopf Handler 'handle_addon_updated' failed temporarily: cluster 'somerville-jade-20240219-work' is not ready + > Error Logging 18m kopf Handler 'handle_addon_updated' failed temporarily: cluster 'somerville-jade-20240219-work' is not ready + > Error Logging 18m kopf Handler 'handle_addon_updated' failed temporarily: cluster 'somerville-jade-20240219-work' is not ready + > Error Logging 17m kopf Handler 'handle_addon_updated' failed temporarily: cluster 'somerville-jade-20240219-work' is not ready + > Error Logging 17m kopf Handler 'handle_addon_updated' failed temporarily: cluster 'somerville-jade-20240219-work' is not ready + > Error Logging 17m kopf Handler 'handle_addon_updated' failed temporarily: cluster 'somerville-jade-20240219-work' is not ready + > Error Logging 17m kopf Handler 'handle_addon_updated' failed temporarily: cluster 'somerville-jade-20240219-work' is not ready + > Error Logging 16m kopf Handler 'handle_addon_updated' failed temporarily: cluster 'somerville-jade-20240219-work' is not ready + > Error Logging 16m kopf Handler 'handle_addon_updated' failed temporarily: cluster 'somerville-jade-20240219-work' is not ready + > Normal Logging 15m kopf Handler 'handle_addon_updated' succeeded. + > Normal Logging 15m kopf Creation is processed: 1 succeeded; 0 failed. + + + ssh bootstrap -t \ + ' + source loadconfig + kubectl \ + --kubeconfig "${kindclusterconf:?}" \ + describe helmrelease \ + "somerville-jade-20240219-work-csi-cinder" + ' + + > Name: somerville-jade-20240219-work-csi-cinder + > Namespace: default + > Labels: addons.stackhpc.com/cluster=somerville-jade-20240219-work + > addons.stackhpc.com/release-name=csi-cinder + > addons.stackhpc.com/release-namespace=openstack-system + > app.kubernetes.io/managed-by=Helm + > capi.stackhpc.com/cluster=somerville-jade-20240219-work + > capi.stackhpc.com/component=csi-cinder + > capi.stackhpc.com/managed-by=Helm + > helm.sh/chart=addons-0.1.0 + > Annotations: addons.stackhpc.com/kopf-managed: yes + > addons.stackhpc.com/last-handled-configuration: + > {"spec":{"bootstrap":true,"chart":{"name":"openstack-cinder-csi","repo":"https://kubernetes.github.io/cloud-provider-openstack","version":... + > meta.helm.sh/release-name: somerville-jade-20240219-work + > meta.helm.sh/release-namespace: default + > API Version: addons.stackhpc.com/v1alpha1 + > Kind: HelmRelease + > Metadata: + > Creation Timestamp: 2024-02-19T16:59:49Z + > Finalizers: + > addons.stackhpc.com/finalizer + > Generation: 1 + > Owner References: + > API Version: cluster.x-k8s.io/v1beta1 + > Block Owner Deletion: true + > Controller: false + > Kind: Cluster + > Name: somerville-jade-20240219-work + > UID: e185eb3f-f4d6-41e8-838a-027dfdf749f7 + > Resource Version: 4944 + > UID: 20fe5f5b-4ebf-4116-a847-e079e788631e + > Spec: + > Bootstrap: true + > Chart: + > Name: openstack-cinder-csi + > Repo: https://kubernetes.github.io/cloud-provider-openstack + > Version: 2.2.0 + > Cluster Name: somerville-jade-20240219-work + > Release Name: csi-cinder + > Target Namespace: openstack-system + > Values Sources: + > Secret: + > Key: defaults + > Name: somerville-jade-20240219-work-csi-cinder-config + > Secret: + > Key: overrides + > Name: somerville-jade-20240219-work-csi-cinder-config + > Status: + > Notes: Use the following storageClass csi-cinder-sc-retain and csi-cinder-sc-delete only for RWO volumes. + > Phase: Deployed + > Resources: + > API Version: v1 + > Kind: ServiceAccount + > Name: csi-cinder-controller-sa + > Namespace: openstack-system + > API Version: v1 + > Kind: ServiceAccount + > Name: csi-cinder-node-sa + > Namespace: openstack-system + > API Version: rbac.authorization.k8s.io/v1 + > Kind: ClusterRole + > Name: csi-attacher-role + > API Version: rbac.authorization.k8s.io/v1 + > Kind: ClusterRole + > Name: csi-provisioner-role + > API Version: rbac.authorization.k8s.io/v1 + > Kind: ClusterRole + > Name: csi-snapshotter-role + > API Version: rbac.authorization.k8s.io/v1 + > Kind: ClusterRole + > Name: csi-resizer-role + > API Version: rbac.authorization.k8s.io/v1 + > Kind: ClusterRole + > Name: csi-nodeplugin-role + > API Version: rbac.authorization.k8s.io/v1 + > Kind: ClusterRoleBinding + > Name: csi-attacher-binding + > API Version: rbac.authorization.k8s.io/v1 + > Kind: ClusterRoleBinding + > Name: csi-provisioner-binding + > API Version: rbac.authorization.k8s.io/v1 + > Kind: ClusterRoleBinding + > Name: csi-snapshotter-binding + > API Version: rbac.authorization.k8s.io/v1 + > Kind: ClusterRoleBinding + > Name: csi-resizer-binding + > API Version: rbac.authorization.k8s.io/v1 + > Kind: ClusterRoleBinding + > Name: csi-nodeplugin-binding + > API Version: apps/v1 + > Kind: DaemonSet + > Name: openstack-cinder-csi-nodeplugin + > Namespace: openstack-system + > API Version: apps/v1 + > Kind: Deployment + > Name: openstack-cinder-csi-controllerplugin + > Namespace: openstack-system + > API Version: storage.k8s.io/v1 + > Kind: CSIDriver + > Name: cinder.csi.openstack.org + > Revision: 1 + > Events: + > Type Reason Age From Message + > ---- ------ ---- ---- ------- + > Error Logging 19m kopf Handler 'handle_addon_updated' failed temporarily: cluster 'somerville-jade-20240219-work' is not ready + > Error Logging 19m kopf Handler 'handle_addon_updated' failed temporarily: cluster 'somerville-jade-20240219-work' is not ready + > Error Logging 19m kopf Handler 'handle_addon_updated' failed temporarily: cluster 'somerville-jade-20240219-work' is not ready + > Error Logging 18m kopf Handler 'handle_addon_updated' failed temporarily: cluster 'somerville-jade-20240219-work' is not ready + > Error Logging 18m kopf Handler 'handle_addon_updated' failed temporarily: cluster 'somerville-jade-20240219-work' is not ready + > Error Logging 18m kopf Handler 'handle_addon_updated' failed temporarily: cluster 'somerville-jade-20240219-work' is not ready + > Error Logging 17m kopf Handler 'handle_addon_updated' failed temporarily: cluster 'somerville-jade-20240219-work' is not ready + > Error Logging 17m kopf Handler 'handle_addon_updated' failed temporarily: cluster 'somerville-jade-20240219-work' is not ready + > Error Logging 17m kopf Handler 'handle_addon_updated' failed temporarily: cluster 'somerville-jade-20240219-work' is not ready + > Error Logging 17m kopf Handler 'handle_addon_updated' failed temporarily: cluster 'somerville-jade-20240219-work' is not ready + > Error Logging 16m kopf Handler 'handle_addon_updated' failed temporarily: cluster 'somerville-jade-20240219-work' is not ready + > Normal Logging 6m19s kopf Handler 'handle_addon_updated' succeeded. + > Normal Logging 6m19s kopf Creation is processed: 1 succeeded; 0 failed. + + +# ----------------------------------------------------- +# List the Pods in the tenant (work) cluster. +#[root@ansibler] + + ssh bootstrap -t \ + ' + source loadconfig + kubectl \ + --kubeconfig "${workclusterconf:?}" \ + get pods \ + --all-namespaces + ' + + > NAMESPACE NAME READY STATUS RESTARTS AGE + > calico-apiserver calico-apiserver-6fdc7b8d5c-l7l2n 1/1 Running 0 15m + > calico-apiserver calico-apiserver-6fdc7b8d5c-rhpkn 1/1 Running 0 15m + > calico-system calico-kube-controllers-6b6ff777c8-znd2l 1/1 Running 0 16m + > calico-system calico-node-6h82p 1/1 Running 0 16m + > calico-system calico-node-d27fv 1/1 Running 0 16m + > calico-system calico-node-kms42 1/1 Running 2 (2m50s ago) 10m + > calico-system calico-node-kxbnx 0/1 Init:1/2 0 3m51s + > calico-system calico-node-pvczv 1/1 Running 1 (3m29s ago) 5m29s + > calico-system calico-node-rxlzl 1/1 Running 0 16m + > calico-system calico-node-szj2r 1/1 Running 3 (2m14s ago) 5m24s + > calico-system calico-node-xhw4p 1/1 Running 1 (11m ago) 14m + > calico-system calico-node-zws67 1/1 Running 0 5m23s + > calico-system calico-typha-695c6f4d98-8h879 1/1 Running 0 8m30s + > calico-system calico-typha-695c6f4d98-fssbf 1/1 Running 1 (15m ago) 16m + > calico-system calico-typha-695c6f4d98-vfkvc 1/1 Running 0 16m + > calico-system csi-node-driver-29phg 2/2 Running 0 5m29s + > calico-system csi-node-driver-2g7fq 0/2 ContainerCreating 0 3m50s + > calico-system csi-node-driver-j8qj4 2/2 Running 0 16m + > calico-system csi-node-driver-jhggk 2/2 Running 0 5m23s + > calico-system csi-node-driver-jkzrl 2/2 Running 0 5m26s + > calico-system csi-node-driver-kv6cq 2/2 Running 0 10m + > calico-system csi-node-driver-lhb8k 2/2 Running 0 14m + > calico-system csi-node-driver-m4wll 2/2 Running 0 16m + > calico-system csi-node-driver-mbnd8 2/2 Running 0 16m + > gpu-operator gpu-operator-6c8649c88c-cdbr7 1/1 Running 5 (4m39s ago) 17m + > kube-system coredns-787d4945fb-dv6px 1/1 Running 0 17m + > kube-system coredns-787d4945fb-svl9q 1/1 Running 0 17m + > kube-system etcd-somerville-jade-20240219-work-control-plane-ac9af912-gjv45 1/1 Running 0 17m + > kube-system etcd-somerville-jade-20240219-work-control-plane-ac9af912-m4vdt 1/1 Running 0 14m + > kube-system etcd-somerville-jade-20240219-work-control-plane-ac9af912-v42dq 1/1 Running 0 10m + > kube-system kube-apiserver-somerville-jade-20240219-work-control-plane-ac9af912-gjv45 1/1 Running 0 17m + > kube-system kube-apiserver-somerville-jade-20240219-work-control-plane-ac9af912-m4vdt 1/1 Running 0 14m + > kube-system kube-apiserver-somerville-jade-20240219-work-control-plane-ac9af912-v42dq 1/1 Running 0 9m57s + > kube-system kube-controller-manager-somerville-jade-20240219-work-control-plane-ac9af912-gjv45 1/1 Running 4 (4m55s ago) 17m + > kube-system kube-controller-manager-somerville-jade-20240219-work-control-plane-ac9af912-m4vdt 1/1 Running 2 (6m16s ago) 14m + > kube-system kube-controller-manager-somerville-jade-20240219-work-control-plane-ac9af912-v42dq 1/1 Running 0 9m57s + > kube-system kube-proxy-6ccvh 1/1 Running 0 16m + > kube-system kube-proxy-6vr7b 1/1 Running 0 5m34s + > kube-system kube-proxy-7qwtb 1/1 Running 0 5m29s + > kube-system kube-proxy-8pn9v 1/1 Running 0 10m + > kube-system kube-proxy-dpzg8 1/1 Running 0 16m + > kube-system kube-proxy-ppr9v 1/1 Running 0 5m24s + > kube-system kube-proxy-qn22t 1/1 Running 0 3m52s + > kube-system kube-proxy-rj9qh 1/1 Running 0 14m + > kube-system kube-proxy-vpskm 1/1 Running 0 17m + > kube-system kube-scheduler-somerville-jade-20240219-work-control-plane-ac9af912-gjv45 1/1 Running 4 (4m52s ago) 17m + > kube-system kube-scheduler-somerville-jade-20240219-work-control-plane-ac9af912-m4vdt 1/1 Running 2 (6m17s ago) 14m + > kube-system kube-scheduler-somerville-jade-20240219-work-control-plane-ac9af912-v42dq 1/1 Running 0 9m57s + > kube-system metrics-server-65cccfc7bb-k594p 1/1 Running 0 17m + > kubernetes-dashboard kubernetes-dashboard-85d67585b8-cblnp 2/2 Running 0 17m + > network-operator mellanox-network-operator-5f7b6b766c-nc5b2 0/1 CrashLoopBackOff 7 (4m14s ago) 16m + > node-feature-discovery node-feature-discovery-master-75c9d78d5f-brf4f 1/1 Running 0 17m + > node-feature-discovery node-feature-discovery-worker-54bht 1/1 Running 0 14m + > node-feature-discovery node-feature-discovery-worker-5t2nr 1/1 Running 0 10m + > node-feature-discovery node-feature-discovery-worker-bqwhq 1/1 Running 0 5m29s + > node-feature-discovery node-feature-discovery-worker-dwhft 1/1 Running 0 5m34s + > node-feature-discovery node-feature-discovery-worker-g95b5 0/1 ContainerCreating 0 3m52s + > node-feature-discovery node-feature-discovery-worker-ngxmc 1/1 Running 1 (14m ago) 16m + > node-feature-discovery node-feature-discovery-worker-nhzvn 1/1 Running 1 (4m53s ago) 16m + > node-feature-discovery node-feature-discovery-worker-sm5qd 1/1 Running 0 17m + > node-feature-discovery node-feature-discovery-worker-xkcwp 1/1 Running 0 5m23s + > openstack-system openstack-cinder-csi-controllerplugin-7f768f855d-65hnm 6/6 Running 16 (9m44s ago) 17m + > openstack-system openstack-cinder-csi-nodeplugin-frh8x 3/3 Running 0 5m31s + > openstack-system openstack-cinder-csi-nodeplugin-g7rs5 3/3 Running 0 5m23s + > openstack-system openstack-cinder-csi-nodeplugin-hl62v 3/3 Running 0 14m + > openstack-system openstack-cinder-csi-nodeplugin-plr2r 3/3 Running 0 10m + > openstack-system openstack-cinder-csi-nodeplugin-pndz8 3/3 Running 0 16m + > openstack-system openstack-cinder-csi-nodeplugin-qfq8f 3/3 Running 0 17m + > openstack-system openstack-cinder-csi-nodeplugin-r462n 3/3 Running 0 16m + > openstack-system openstack-cinder-csi-nodeplugin-tglw6 3/3 Running 0 5m26s + > openstack-system openstack-cinder-csi-nodeplugin-xjnf9 0/3 ContainerCreating 0 3m51s + > openstack-system openstack-cloud-controller-manager-b574z 1/1 Running 0 5m31s + > openstack-system openstack-cloud-controller-manager-lqhdf 1/1 Running 1 (4m54s ago) 12m + > openstack-system openstack-cloud-controller-manager-whndz 1/1 Running 3 (9m45s ago) 16m + > tigera-operator tigera-operator-7d4cfffc6-6lsxs 1/1 Running 3 (9m45s ago) 16m + + +# ----------------------------------------------------- +# Check the Pods in the 'calico-apiserver' namespace. +# https://github.com/lsst-uk/somerville-operations/issues/144#issuecomment-1910276921 +#[root@ansibler] + + ssh bootstrap -t \ + ' + source loadconfig + kubectl \ + --kubeconfig "${workclusterconf:?}" \ + get pods \ + --namespace calico-apiserver \ + --output json + ' \ + | jq -r '.items[].metadata.name' + + > calico-apiserver-6fdc7b8d5c-l7l2n + > calico-apiserver-6fdc7b8d5c-rhpkn + + ssh bootstrap -t \ + ' + source loadconfig + for podname in $( + kubectl \ + --kubeconfig "${workclusterconf:?}" \ + get pods \ + --namespace calico-apiserver \ + --output json \ + | jq -r ".items[].metadata.name" + ) + do + echo "" + echo "---- ---- ---- ----" + echo "Podname [${podname}]" + kubectl \ + --kubeconfig "${workclusterconf:?}" \ + describe pod \ + --namespace calico-apiserver \ + "${podname}" + done + ' + + > ---- ---- ---- ---- + > Podname [calico-apiserver-6fdc7b8d5c-l7l2n] + > Name: calico-apiserver-6fdc7b8d5c-l7l2n + > Namespace: calico-apiserver + > Priority: 0 + > Service Account: calico-apiserver + > Node: somerville-jade-20240219-work-md-0-fb50a5e8-ntqbd/192.168.3.129 + > Start Time: Mon, 19 Feb 2024 17:04:22 +0000 + > Labels: apiserver=true + > app.kubernetes.io/name=calico-apiserver + > k8s-app=calico-apiserver + > pod-template-hash=6fdc7b8d5c + > Annotations: cni.projectcalico.org/containerID: 756a921aa43dfa8f88da8dbf23dfc73d627977b2a9c504b498ec6f403ae409aa + > cni.projectcalico.org/podIP: 172.16.25.66/32 + > cni.projectcalico.org/podIPs: 172.16.25.66/32 + > hash.operator.tigera.io/calico-apiserver-certs: 631758c34fff686de27ac5a15c25b047cbb0305f + > Status: Running + > IP: 172.16.25.66 + > IPs: + > IP: 172.16.25.66 + > Controlled By: ReplicaSet/calico-apiserver-6fdc7b8d5c + > Containers: + > calico-apiserver: + > Container ID: containerd://6d8168d249f0d93807c1c01f6f85767407dedb6dc1bb2c07e243ffd63a8f04ba + > Image: docker.io/calico/apiserver:v3.26.0 + > Image ID: docker.io/calico/apiserver@sha256:7cb5f719499163c172de25d55f1b3fc9bb1b6ea7ad8a1c8259e3eb3ac74890fc + > Port: + > Host Port: + > SeccompProfile: RuntimeDefault + > Args: + > --secure-port=5443 + > --tls-private-key-file=/calico-apiserver-certs/tls.key + > --tls-cert-file=/calico-apiserver-certs/tls.crt + > State: Running + > Started: Mon, 19 Feb 2024 17:04:28 +0000 + > Ready: True + > Restart Count: 0 + > Liveness: http-get https://:5443/version delay=90s timeout=1s period=10s #success=1 #failure=3 + > Readiness: exec [/code/filecheck] delay=5s timeout=1s period=10s #success=1 #failure=5 + > Environment: + > DATASTORE_TYPE: kubernetes + > KUBERNETES_SERVICE_HOST: 172.24.0.1 + > KUBERNETES_SERVICE_PORT: 443 + > MULTI_INTERFACE_MODE: none + > Mounts: + > /calico-apiserver-certs from calico-apiserver-certs (ro) + > /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-s8khn (ro) + > Conditions: + > Type Status + > Initialized True + > Ready True + > ContainersReady True + > PodScheduled True + > Volumes: + > calico-apiserver-certs: + > Type: Secret (a volume populated by a Secret) + > SecretName: calico-apiserver-certs + > Optional: false + > kube-api-access-s8khn: + > Type: Projected (a volume that contains injected data from multiple sources) + > TokenExpirationSeconds: 3607 + > ConfigMapName: kube-root-ca.crt + > ConfigMapOptional: + > DownwardAPI: true + > QoS Class: BestEffort + > Node-Selectors: kubernetes.io/os=linux + > Tolerations: node-role.kubernetes.io/control-plane:NoSchedule + > node-role.kubernetes.io/master:NoSchedule + > node.kubernetes.io/not-ready:NoExecute op=Exists for 300s + > node.kubernetes.io/unreachable:NoExecute op=Exists for 300s + > Events: + > Type Reason Age From Message + > ---- ------ ---- ---- ------- + > Normal Scheduled 28m default-scheduler Successfully assigned calico-apiserver/calico-apiserver-6fdc7b8d5c-l7l2n to somerville-jade-20240219-work-md-0-fb50a5e8-ntqbd + > Normal Pulling 28m kubelet Pulling image "docker.io/calico/apiserver:v3.26.0" + > Normal Pulled 28m kubelet Successfully pulled image "docker.io/calico/apiserver:v3.26.0" in 4.700072553s (4.70017801s including waiting) + > Normal Created 28m kubelet Created container calico-apiserver + > Normal Started 28m kubelet Started container calico-apiserver + > + > ---- ---- ---- ---- + > Podname [calico-apiserver-6fdc7b8d5c-rhpkn] + > Name: calico-apiserver-6fdc7b8d5c-rhpkn + > Namespace: calico-apiserver + > Priority: 0 + > Service Account: calico-apiserver + > Node: somerville-jade-20240219-work-md-0-fb50a5e8-whft4/192.168.3.113 + > Start Time: Mon, 19 Feb 2024 17:04:22 +0000 + > Labels: apiserver=true + > app.kubernetes.io/name=calico-apiserver + > k8s-app=calico-apiserver + > pod-template-hash=6fdc7b8d5c + > Annotations: cni.projectcalico.org/containerID: 96f37000f7731a1ed2dfa1be5c0797d99b3e7a14b0055c1e570edc200d77695c + > cni.projectcalico.org/podIP: 172.21.168.199/32 + > cni.projectcalico.org/podIPs: 172.21.168.199/32 + > hash.operator.tigera.io/calico-apiserver-certs: 631758c34fff686de27ac5a15c25b047cbb0305f + > Status: Running + > IP: 172.21.168.199 + > IPs: + > IP: 172.21.168.199 + > Controlled By: ReplicaSet/calico-apiserver-6fdc7b8d5c + > Containers: + > calico-apiserver: + > Container ID: containerd://5d9e2658df6044035e38f579f481bf21c0ec194c7523b59110e94c1f8b270703 + > Image: docker.io/calico/apiserver:v3.26.0 + > Image ID: docker.io/calico/apiserver@sha256:7cb5f719499163c172de25d55f1b3fc9bb1b6ea7ad8a1c8259e3eb3ac74890fc + > Port: + > Host Port: + > SeccompProfile: RuntimeDefault + > Args: + > --secure-port=5443 + > --tls-private-key-file=/calico-apiserver-certs/tls.key + > --tls-cert-file=/calico-apiserver-certs/tls.crt + > State: Running + > Started: Mon, 19 Feb 2024 17:04:51 +0000 + > Ready: True + > Restart Count: 0 + > Liveness: http-get https://:5443/version delay=90s timeout=1s period=10s #success=1 #failure=3 + > Readiness: exec [/code/filecheck] delay=5s timeout=1s period=10s #success=1 #failure=5 + > Environment: + > DATASTORE_TYPE: kubernetes + > KUBERNETES_SERVICE_HOST: 172.24.0.1 + > KUBERNETES_SERVICE_PORT: 443 + > MULTI_INTERFACE_MODE: none + > Mounts: + > /calico-apiserver-certs from calico-apiserver-certs (ro) + > /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-j6csd (ro) + > Conditions: + > Type Status + > Initialized True + > Ready True + > ContainersReady True + > PodScheduled True + > Volumes: + > calico-apiserver-certs: + > Type: Secret (a volume populated by a Secret) + > SecretName: calico-apiserver-certs + > Optional: false + > kube-api-access-j6csd: + > Type: Projected (a volume that contains injected data from multiple sources) + > TokenExpirationSeconds: 3607 + > ConfigMapName: kube-root-ca.crt + > ConfigMapOptional: + > DownwardAPI: true + > QoS Class: BestEffort + > Node-Selectors: kubernetes.io/os=linux + > Tolerations: node-role.kubernetes.io/control-plane:NoSchedule + > node-role.kubernetes.io/master:NoSchedule + > node.kubernetes.io/not-ready:NoExecute op=Exists for 300s + > node.kubernetes.io/unreachable:NoExecute op=Exists for 300s + > Events: + > Type Reason Age From Message + > ---- ------ ---- ---- ------- + > Normal Scheduled 28m default-scheduler Successfully assigned calico-apiserver/calico-apiserver-6fdc7b8d5c-rhpkn to somerville-jade-20240219-work-md-0-fb50a5e8-whft4 + > Normal Pulling 28m kubelet Pulling image "docker.io/calico/apiserver:v3.26.0" + > Normal Pulled 28m kubelet Successfully pulled image "docker.io/calico/apiserver:v3.26.0" in 4.147833587s (27.685598929s including waiting) + > Normal Created 28m kubelet Created container calico-apiserver + > Normal Started 28m kubelet Started container calico-apiserver + > Connection to bootstrap closed. + + +# ----------------------------------------------------- +# Check the 'calico-kube-controllers' Pod in the 'calico-system' namespace. +# https://github.com/lsst-uk/somerville-operations/issues/144#issuecomment-1910276921 +#[root@ansibler] + + ssh bootstrap -t \ + ' + source loadconfig + kubectl \ + --kubeconfig "${workclusterconf:?}" \ + get pods \ + --namespace calico-system \ + --output json \ + | jq -r ".items[].metadata.name | select(. | startswith(\"calico-kube\"))" + ' + + > calico-kube-controllers-6b6ff777c8-znd2l + + + ssh bootstrap -t \ + ' + source loadconfig + for podname in $( + kubectl \ + --kubeconfig "${workclusterconf:?}" \ + get pods \ + --namespace calico-system \ + --output json \ + | jq -r ".items[].metadata.name | select(. | startswith(\"calico-kube\"))" + ) + do + echo "" + echo "---- ---- ---- ----" + echo "Podname [${podname}]" + kubectl \ + --kubeconfig "${workclusterconf:?}" \ + describe pod \ + --namespace calico-system \ + "${podname}" + done + ' + + > ---- ---- ---- ---- + > Podname [calico-kube-controllers-6b6ff777c8-znd2l] + > Name: calico-kube-controllers-6b6ff777c8-znd2l + > Namespace: calico-system + > Priority: 2000000000 + > Priority Class Name: system-cluster-critical + > Service Account: calico-kube-controllers + > Node: somerville-jade-20240219-work-control-plane-ac9af912-gjv45/192.168.3.186 + > Start Time: Mon, 19 Feb 2024 17:03:44 +0000 + > Labels: app.kubernetes.io/name=calico-kube-controllers + > k8s-app=calico-kube-controllers + > pod-template-hash=6b6ff777c8 + > Annotations: cni.projectcalico.org/containerID: fe09123ab529ed895aaf1f1dcd87de8e0a17b9c04a4716c4ba0eff1ba1941b0d + > cni.projectcalico.org/podIP: 172.22.18.5/32 + > cni.projectcalico.org/podIPs: 172.22.18.5/32 + > hash.operator.tigera.io/system: bb4746872201725da2dea19756c475aa67d9c1e9 + > hash.operator.tigera.io/tigera-ca-private: 98097acfbf24fd9cbb280f444442d222bb399518 + > Status: Running + > IP: 172.22.18.5 + > IPs: + > IP: 172.22.18.5 + > Controlled By: ReplicaSet/calico-kube-controllers-6b6ff777c8 + > Containers: + > calico-kube-controllers: + > Container ID: containerd://c42bef4b8557e0f7a24a81cd7ea0efcd15d3a4c682da46e65fa65ccd52994231 + > Image: docker.io/calico/kube-controllers:v3.26.0 + > Image ID: docker.io/calico/kube-controllers@sha256:a097a09cd7523e6a2ad5b6f7566e68ece5827a77fcbc631d6c34d6d092db4aaa + > Port: + > Host Port: + > SeccompProfile: RuntimeDefault + > State: Running + > Started: Mon, 19 Feb 2024 17:04:07 +0000 + > Ready: True + > Restart Count: 0 + > Liveness: exec [/usr/bin/check-status -l] delay=10s timeout=10s period=10s #success=1 #failure=6 + > Readiness: exec [/usr/bin/check-status -r] delay=0s timeout=10s period=10s #success=1 #failure=3 + > Environment: + > KUBE_CONTROLLERS_CONFIG_NAME: default + > DATASTORE_TYPE: kubernetes + > ENABLED_CONTROLLERS: node + > FIPS_MODE_ENABLED: false + > KUBERNETES_SERVICE_HOST: 172.24.0.1 + > KUBERNETES_SERVICE_PORT: 443 + > CA_CRT_PATH: /etc/pki/tls/certs/tigera-ca-bundle.crt + > Mounts: + > /etc/pki/tls/cert.pem from tigera-ca-bundle (ro,path="ca-bundle.crt") + > /etc/pki/tls/certs from tigera-ca-bundle (ro) + > /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-fq7cr (ro) + > Conditions: + > Type Status + > Initialized True + > Ready True + > ContainersReady True + > PodScheduled True + > Volumes: + > tigera-ca-bundle: + > Type: ConfigMap (a volume populated by a ConfigMap) + > Name: tigera-ca-bundle + > Optional: false + > kube-api-access-fq7cr: + > Type: Projected (a volume that contains injected data from multiple sources) + > TokenExpirationSeconds: 3607 + > ConfigMapName: kube-root-ca.crt + > ConfigMapOptional: + > DownwardAPI: true + > QoS Class: BestEffort + > Node-Selectors: kubernetes.io/os=linux + > Tolerations: CriticalAddonsOnly op=Exists + > node-role.kubernetes.io/control-plane:NoSchedule + > node-role.kubernetes.io/master:NoSchedule + > node.kubernetes.io/not-ready:NoExecute op=Exists for 300s + > node.kubernetes.io/unreachable:NoExecute op=Exists for 300s + > Events: + > Type Reason Age From Message + > ---- ------ ---- ---- ------- + > Warning FailedScheduling 87m default-scheduler 0/1 nodes are available: 1 node(s) had untolerated taint {node.cloudprovider.kubernetes.io/uninitialized: true}. preemption: 0/1 nodes are available: 1 Preemption is not helpful for scheduling.. + > Normal Scheduled 86m default-scheduler Successfully assigned calico-system/calico-kube-controllers-6b6ff777c8-znd2l to somerville-jade-20240219-work-control-plane-ac9af912-gjv45 + > Warning FailedCreatePodSandBox 86m kubelet Failed to create pod sandbox: rpc error: code = Unknown desc = failed to setup network for sandbox "f678969fe336c93ad34796b0db47adabf3bb6158770dc3bf8c3a30931026df19": plugin type="calico" failed (add): stat /var/lib/calico/nodename: no such file or directory: check that the calico/node container is running and has mounted /var/lib/calico/ + > Normal SandboxChanged 86m (x2 over 86m) kubelet Pod sandbox changed, it will be killed and re-created. + > Normal Pulling 86m kubelet Pulling image "docker.io/calico/kube-controllers:v3.26.0" + > Normal Created 86m kubelet Created container calico-kube-controllers + > Normal Pulled 86m kubelet Successfully pulled image "docker.io/calico/kube-controllers:v3.26.0" in 5.062029507s (8.800845877s including waiting) + > Normal Started 86m kubelet Started container calico-kube-controllers + > Warning Unhealthy 82m kubelet Liveness probe failed: Error reaching apiserver: Get "https://172.24.0.1:443/apis/crd.projectcalico.org/v1/clusterinformations/default": context deadline exceeded with http status code: 500; Error verifying datastore: Get "https://172.24.0.1:443/apis/crd.projectcalico.org/v1/clusterinformations/default": context deadline exceeded + > Warning Unhealthy 76m (x6 over 85m) kubelet Readiness probe failed: Error verifying datastore: Get "https://172.24.0.1:443/apis/crd.projectcalico.org/v1/clusterinformations/default": context deadline exceeded; Error reaching apiserver: Get "https://172.24.0.1:443/apis/crd.projectcalico.org/v1/clusterinformations/default": context deadline exceeded with http status code: 500 + > Warning Unhealthy 76m (x5 over 85m) kubelet Liveness probe failed: Error verifying datastore: Get "https://172.24.0.1:443/apis/crd.projectcalico.org/v1/clusterinformations/default": context deadline exceeded; Error reaching apiserver: Get "https://172.24.0.1:443/apis/crd.projectcalico.org/v1/clusterinformations/default": context deadline exceeded with http status code: 500 + > Warning Unhealthy 75m (x2 over 78m) kubelet Liveness probe failed: Error verifying datastore: Get "https://172.24.0.1:443/apis/crd.projectcalico.org/v1/clusterinformations/default": context deadline exceeded + > Warning Unhealthy 75m (x2 over 78m) kubelet Readiness probe failed: Error verifying datastore: Get "https://172.24.0.1:443/apis/crd.projectcalico.org/v1/clusterinformations/default": context deadline exceeded + > Warning Unhealthy 75m (x2 over 75m) kubelet Liveness probe failed: Error reaching apiserver: with http status code: 500 + > Warning Unhealthy 75m (x2 over 75m) kubelet Readiness probe failed: Error reaching apiserver: with http status code: 500 + + +# ----------------------------------------------------- +# Check the first 3 'calico-node' Pods in the 'calico-system' namespace. +# https://github.com/lsst-uk/somerville-operations/issues/144#issuecomment-1910276921 +#[root@ansibler] + + ssh bootstrap -t \ + ' + source loadconfig + kubectl \ + --kubeconfig "${workclusterconf:?}" \ + get pods \ + --namespace calico-system \ + --output json \ + | jq -r "limit(3; .items[].metadata.name | select(. | startswith(\"calico-node\")))" + ' + + > calico-node-6h82p + > calico-node-d27fv + > calico-node-kms42 + + + ssh bootstrap -t \ + ' + source loadconfig + for podname in $( + kubectl \ + --kubeconfig "${workclusterconf:?}" \ + get pods \ + --namespace calico-system \ + --output json \ + | jq -r "limit(3; .items[].metadata.name | select(. | startswith(\"calico-node\")))" + ) + do + echo "" + echo "---- ---- ---- ----" + echo "Podname [${podname}]" + kubectl \ + --kubeconfig "${workclusterconf:?}" \ + describe pod \ + --namespace calico-system \ + "${podname}" + done + ' + + > ---- ---- ---- ---- + > Podname [calico-node-6h82p] + > Name: calico-node-6h82p + > Namespace: calico-system + > Priority: 2000001000 + > Priority Class Name: system-node-critical + > Service Account: calico-node + > Node: somerville-jade-20240219-work-md-0-fb50a5e8-ntqbd/192.168.3.129 + > Start Time: Mon, 19 Feb 2024 17:03:34 +0000 + > Labels: app.kubernetes.io/name=calico-node + > controller-revision-hash=85d8f76fdd + > k8s-app=calico-node + > pod-template-generation=1 + > Annotations: hash.operator.tigera.io/cni-config: 7c1526fa50c76a4b3650efe703ed353846f576d4 + > hash.operator.tigera.io/system: bb4746872201725da2dea19756c475aa67d9c1e9 + > hash.operator.tigera.io/tigera-ca-private: 98097acfbf24fd9cbb280f444442d222bb399518 + > Status: Running + > IP: 192.168.3.129 + > IPs: + > IP: 192.168.3.129 + > Controlled By: DaemonSet/calico-node + > Init Containers: + > flexvol-driver: + > Container ID: containerd://9b9d4f0c6db836745c00f5edbf3d379f2faff2b5a0c4401dff18642d113a0b96 + > Image: docker.io/calico/pod2daemon-flexvol:v3.26.0 + > Image ID: docker.io/calico/pod2daemon-flexvol@sha256:f8a737c47bfef9f070c50f02332d931cb3d19d544f7b1867fee0a99cb0626356 + > Port: + > Host Port: + > SeccompProfile: RuntimeDefault + > State: Terminated + > Reason: Completed + > Exit Code: 0 + > Started: Mon, 19 Feb 2024 17:03:52 +0000 + > Finished: Mon, 19 Feb 2024 17:03:52 +0000 + > Ready: True + > Restart Count: 0 + > Environment: + > Mounts: + > /host/driver from flexvol-driver-host (rw) + > /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-vzwhp (ro) + > install-cni: + > Container ID: containerd://eee97c347d78ee76f1c1c79f14be7f9ef2c11504fd3ea00d2b3826418e64eb42 + > Image: docker.io/calico/cni:v3.26.0 + > Image ID: docker.io/calico/cni@sha256:c7c80d82dc4f85ac4d7f2345c940bc7f818bbea03c2043df89923c032d8ee7fc + > Port: + > Host Port: + > SeccompProfile: RuntimeDefault + > Command: + > /opt/cni/bin/install + > State: Terminated + > Reason: Completed + > Exit Code: 0 + > Started: Mon, 19 Feb 2024 17:03:59 +0000 + > Finished: Mon, 19 Feb 2024 17:04:00 +0000 + > Ready: True + > Restart Count: 0 + > Environment: + > CNI_CONF_NAME: 10-calico.conflist + > SLEEP: false + > CNI_NET_DIR: /etc/cni/net.d + > CNI_NETWORK_CONFIG: Optional: false + > KUBERNETES_SERVICE_HOST: 172.24.0.1 + > KUBERNETES_SERVICE_PORT: 443 + > Mounts: + > /host/etc/cni/net.d from cni-net-dir (rw) + > /host/opt/cni/bin from cni-bin-dir (rw) + > /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-vzwhp (ro) + > Containers: + > calico-node: + > Container ID: containerd://a9f88eeade97fe0f7b6d3c31dfa51f7696f5d8dfc4e01f901eaf7ae8bf04ad64 + > Image: docker.io/calico/node:v3.26.0 + > Image ID: docker.io/calico/node@sha256:5086f1ef0287886811fea4d545a4bbb75d9345367b1b1ad1aa4447af2ecbc4ea + > Port: + > Host Port: + > SeccompProfile: RuntimeDefault + > State: Running + > Started: Mon, 19 Feb 2024 17:04:09 +0000 + > Ready: True + > Restart Count: 0 + > Liveness: http-get http://localhost:9099/liveness delay=0s timeout=10s period=10s #success=1 #failure=3 + > Readiness: exec [/bin/calico-node -felix-ready] delay=0s timeout=5s period=10s #success=1 #failure=3 + > Environment: + > DATASTORE_TYPE: kubernetes + > WAIT_FOR_DATASTORE: true + > CLUSTER_TYPE: k8s,operator + > CALICO_DISABLE_FILE_LOGGING: false + > FELIX_DEFAULTENDPOINTTOHOSTACTION: ACCEPT + > FELIX_HEALTHENABLED: true + > FELIX_HEALTHPORT: 9099 + > NODENAME: (v1:spec.nodeName) + > NAMESPACE: calico-system (v1:metadata.namespace) + > FELIX_TYPHAK8SNAMESPACE: calico-system + > FELIX_TYPHAK8SSERVICENAME: calico-typha + > FELIX_TYPHACAFILE: /etc/pki/tls/certs/tigera-ca-bundle.crt + > FELIX_TYPHACERTFILE: /node-certs/tls.crt + > FELIX_TYPHAKEYFILE: /node-certs/tls.key + > FIPS_MODE_ENABLED: false + > FELIX_TYPHACN: typha-server + > CALICO_MANAGE_CNI: true + > CALICO_IPV4POOL_CIDR: 172.16.0.0/13 + > CALICO_IPV4POOL_VXLAN: Always + > CALICO_IPV4POOL_BLOCK_SIZE: 26 + > CALICO_IPV4POOL_NODE_SELECTOR: all() + > CALICO_IPV4POOL_DISABLE_BGP_EXPORT: false + > CALICO_NETWORKING_BACKEND: vxlan + > IP: autodetect + > IP_AUTODETECTION_METHOD: kubernetes-internal-ip + > IP6: none + > FELIX_IPV6SUPPORT: false + > KUBERNETES_SERVICE_HOST: 172.24.0.1 + > KUBERNETES_SERVICE_PORT: 443 + > Mounts: + > /etc/pki/tls/cert.pem from tigera-ca-bundle (ro,path="ca-bundle.crt") + > /etc/pki/tls/certs from tigera-ca-bundle (ro) + > /host/etc/cni/net.d from cni-net-dir (rw) + > /lib/modules from lib-modules (ro) + > /node-certs from node-certs (ro) + > /run/xtables.lock from xtables-lock (rw) + > /var/lib/calico from var-lib-calico (rw) + > /var/log/calico/cni from cni-log-dir (rw) + > /var/run/calico from var-run-calico (rw) + > /var/run/nodeagent from policysync (rw) + > /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-vzwhp (ro) + > Conditions: + > Type Status + > Initialized True + > Ready True + > ContainersReady True + > PodScheduled True + > Volumes: + > lib-modules: + > Type: HostPath (bare host directory volume) + > Path: /lib/modules + > HostPathType: + > xtables-lock: + > Type: HostPath (bare host directory volume) + > Path: /run/xtables.lock + > HostPathType: FileOrCreate + > policysync: + > Type: HostPath (bare host directory volume) + > Path: /var/run/nodeagent + > HostPathType: DirectoryOrCreate + > tigera-ca-bundle: + > Type: ConfigMap (a volume populated by a ConfigMap) + > Name: tigera-ca-bundle + > Optional: false + > node-certs: + > Type: Secret (a volume populated by a Secret) + > SecretName: node-certs + > Optional: false + > var-run-calico: + > Type: HostPath (bare host directory volume) + > Path: /var/run/calico + > HostPathType: + > var-lib-calico: + > Type: HostPath (bare host directory volume) + > Path: /var/lib/calico + > HostPathType: + > cni-bin-dir: + > Type: HostPath (bare host directory volume) + > Path: /opt/cni/bin + > HostPathType: + > cni-net-dir: + > Type: HostPath (bare host directory volume) + > Path: /etc/cni/net.d + > HostPathType: + > cni-log-dir: + > Type: HostPath (bare host directory volume) + > Path: /var/log/calico/cni + > HostPathType: + > flexvol-driver-host: + > Type: HostPath (bare host directory volume) + > Path: /usr/libexec/kubernetes/kubelet-plugins/volume/exec/nodeagent~uds + > HostPathType: DirectoryOrCreate + > kube-api-access-vzwhp: + > Type: Projected (a volume that contains injected data from multiple sources) + > TokenExpirationSeconds: 3607 + > ConfigMapName: kube-root-ca.crt + > ConfigMapOptional: + > DownwardAPI: true + > QoS Class: BestEffort + > Node-Selectors: kubernetes.io/os=linux + > Tolerations: :NoSchedule op=Exists + > :NoExecute op=Exists + > CriticalAddonsOnly op=Exists + > node.kubernetes.io/disk-pressure:NoSchedule op=Exists + > node.kubernetes.io/memory-pressure:NoSchedule op=Exists + > node.kubernetes.io/network-unavailable:NoSchedule op=Exists + > node.kubernetes.io/not-ready:NoExecute op=Exists + > node.kubernetes.io/pid-pressure:NoSchedule op=Exists + > node.kubernetes.io/unreachable:NoExecute op=Exists + > node.kubernetes.io/unschedulable:NoSchedule op=Exists + > Events: + > Type Reason Age From Message + > ---- ------ ---- ---- ------- + > Normal Scheduled 94m default-scheduler Successfully assigned calico-system/calico-node-6h82p to somerville-jade-20240219-work-md-0-fb50a5e8-ntqbd + > Normal Pulling 94m kubelet Pulling image "docker.io/calico/pod2daemon-flexvol:v3.26.0" + > Normal Pulled 94m kubelet Successfully pulled image "docker.io/calico/pod2daemon-flexvol:v3.26.0" in 3.365999016s (3.366097822s including waiting) + > Warning Failed 94m (x2 over 94m) kubelet Error: services have not yet been read at least once, cannot construct envvars + > Normal Pulled 94m (x2 over 94m) kubelet Container image "docker.io/calico/pod2daemon-flexvol:v3.26.0" already present on machine + > Normal Created 94m kubelet Created container flexvol-driver + > Normal Started 94m kubelet Started container flexvol-driver + > Normal Pulling 94m kubelet Pulling image "docker.io/calico/cni:v3.26.0" + > Normal Pulled 94m kubelet Successfully pulled image "docker.io/calico/cni:v3.26.0" in 6.59046412s (6.590470262s including waiting) + > Normal Created 94m kubelet Created container install-cni + > Normal Started 94m kubelet Started container install-cni + > Normal Pulling 94m kubelet Pulling image "docker.io/calico/node:v3.26.0" + > Normal Pulled 93m kubelet Successfully pulled image "docker.io/calico/node:v3.26.0" in 7.928166573s (7.928173578s including waiting) + > Normal Created 93m kubelet Created container calico-node + > Normal Started 93m kubelet Started container calico-node + > Warning Unhealthy 93m kubelet Readiness probe failed: calico/node is not ready: felix is not ready: Get "http://localhost:9099/readiness": dial tcp [::1]:9099: connect: connection refused + > W0219 17:04:10.259145 55 feature_gate.go:241] Setting GA feature gate ServiceInternalTrafficPolicy=true. It will be removed in a future release. + > Warning Unhealthy 93m kubelet Readiness probe failed: calico/node is not ready: felix is not ready: readiness probe reporting 503 + > W0219 17:04:11.262556 208 feature_gate.go:241] Setting GA feature gate ServiceInternalTrafficPolicy=true. It will be removed in a future release. + > + > ---- ---- ---- ---- + > Podname [calico-node-d27fv] + > Name: calico-node-d27fv + > Namespace: calico-system + > Priority: 2000001000 + > Priority Class Name: system-node-critical + > Service Account: calico-node + > Node: somerville-jade-20240219-work-md-0-fb50a5e8-whft4/192.168.3.113 + > Start Time: Mon, 19 Feb 2024 17:03:38 +0000 + > Labels: app.kubernetes.io/name=calico-node + > controller-revision-hash=85d8f76fdd + > k8s-app=calico-node + > pod-template-generation=1 + > Annotations: hash.operator.tigera.io/cni-config: 7c1526fa50c76a4b3650efe703ed353846f576d4 + > hash.operator.tigera.io/system: bb4746872201725da2dea19756c475aa67d9c1e9 + > hash.operator.tigera.io/tigera-ca-private: 98097acfbf24fd9cbb280f444442d222bb399518 + > Status: Running + > IP: 192.168.3.113 + > IPs: + > IP: 192.168.3.113 + > Controlled By: DaemonSet/calico-node + > Init Containers: + > flexvol-driver: + > Container ID: containerd://38abe01f88925af39f78b8795158233f917a5fd8d1fdb7aae139f91f1235c31c + > Image: docker.io/calico/pod2daemon-flexvol:v3.26.0 + > Image ID: docker.io/calico/pod2daemon-flexvol@sha256:f8a737c47bfef9f070c50f02332d931cb3d19d544f7b1867fee0a99cb0626356 + > Port: + > Host Port: + > SeccompProfile: RuntimeDefault + > State: Terminated + > Reason: Completed + > Exit Code: 0 + > Started: Mon, 19 Feb 2024 17:03:44 +0000 + > Finished: Mon, 19 Feb 2024 17:03:44 +0000 + > Ready: True + > Restart Count: 0 + > Environment: + > Mounts: + > /host/driver from flexvol-driver-host (rw) + > /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-fbxw5 (ro) + > install-cni: + > Container ID: containerd://b9337dd0bcd13efffb5720994ed7b57fad8a8dbeeb6ba2cf1eff0a002befc03c + > Image: docker.io/calico/cni:v3.26.0 + > Image ID: docker.io/calico/cni@sha256:c7c80d82dc4f85ac4d7f2345c940bc7f818bbea03c2043df89923c032d8ee7fc + > Port: + > Host Port: + > SeccompProfile: RuntimeDefault + > Command: + > /opt/cni/bin/install + > State: Terminated + > Reason: Completed + > Exit Code: 0 + > Started: Mon, 19 Feb 2024 17:03:55 +0000 + > Finished: Mon, 19 Feb 2024 17:03:56 +0000 + > Ready: True + > Restart Count: 0 + > Environment: + > CNI_CONF_NAME: 10-calico.conflist + > SLEEP: false + > CNI_NET_DIR: /etc/cni/net.d + > CNI_NETWORK_CONFIG: Optional: false + > KUBERNETES_SERVICE_HOST: 172.24.0.1 + > KUBERNETES_SERVICE_PORT: 443 + > Mounts: + > /host/etc/cni/net.d from cni-net-dir (rw) + > /host/opt/cni/bin from cni-bin-dir (rw) + > /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-fbxw5 (ro) + > Containers: + > calico-node: + > Container ID: containerd://d17b7b1e962a0927a64f8a5e5a4ae55bf3b96fde9148c36cb252a81cf93495f9 + > Image: docker.io/calico/node:v3.26.0 + > Image ID: docker.io/calico/node@sha256:5086f1ef0287886811fea4d545a4bbb75d9345367b1b1ad1aa4447af2ecbc4ea + > Port: + > Host Port: + > SeccompProfile: RuntimeDefault + > State: Running + > Started: Mon, 19 Feb 2024 17:04:07 +0000 + > Ready: True + > Restart Count: 0 + > Liveness: http-get http://localhost:9099/liveness delay=0s timeout=10s period=10s #success=1 #failure=3 + > Readiness: exec [/bin/calico-node -felix-ready] delay=0s timeout=5s period=10s #success=1 #failure=3 + > Environment: + > DATASTORE_TYPE: kubernetes + > WAIT_FOR_DATASTORE: true + > CLUSTER_TYPE: k8s,operator + > CALICO_DISABLE_FILE_LOGGING: false + > FELIX_DEFAULTENDPOINTTOHOSTACTION: ACCEPT + > FELIX_HEALTHENABLED: true + > FELIX_HEALTHPORT: 9099 + > NODENAME: (v1:spec.nodeName) + > NAMESPACE: calico-system (v1:metadata.namespace) + > FELIX_TYPHAK8SNAMESPACE: calico-system + > FELIX_TYPHAK8SSERVICENAME: calico-typha + > FELIX_TYPHACAFILE: /etc/pki/tls/certs/tigera-ca-bundle.crt + > FELIX_TYPHACERTFILE: /node-certs/tls.crt + > FELIX_TYPHAKEYFILE: /node-certs/tls.key + > FIPS_MODE_ENABLED: false + > FELIX_TYPHACN: typha-server + > CALICO_MANAGE_CNI: true + > CALICO_IPV4POOL_CIDR: 172.16.0.0/13 + > CALICO_IPV4POOL_VXLAN: Always + > CALICO_IPV4POOL_BLOCK_SIZE: 26 + > CALICO_IPV4POOL_NODE_SELECTOR: all() + > CALICO_IPV4POOL_DISABLE_BGP_EXPORT: false + > CALICO_NETWORKING_BACKEND: vxlan + > IP: autodetect + > IP_AUTODETECTION_METHOD: kubernetes-internal-ip + > IP6: none + > FELIX_IPV6SUPPORT: false + > KUBERNETES_SERVICE_HOST: 172.24.0.1 + > KUBERNETES_SERVICE_PORT: 443 + > Mounts: + > /etc/pki/tls/cert.pem from tigera-ca-bundle (ro,path="ca-bundle.crt") + > /etc/pki/tls/certs from tigera-ca-bundle (ro) + > /host/etc/cni/net.d from cni-net-dir (rw) + > /lib/modules from lib-modules (ro) + > /node-certs from node-certs (ro) + > /run/xtables.lock from xtables-lock (rw) + > /var/lib/calico from var-lib-calico (rw) + > /var/log/calico/cni from cni-log-dir (rw) + > /var/run/calico from var-run-calico (rw) + > /var/run/nodeagent from policysync (rw) + > /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-fbxw5 (ro) + > Conditions: + > Type Status + > Initialized True + > Ready True + > ContainersReady True + > PodScheduled True + > Volumes: + > lib-modules: + > Type: HostPath (bare host directory volume) + > Path: /lib/modules + > HostPathType: + > xtables-lock: + > Type: HostPath (bare host directory volume) + > Path: /run/xtables.lock + > HostPathType: FileOrCreate + > policysync: + > Type: HostPath (bare host directory volume) + > Path: /var/run/nodeagent + > HostPathType: DirectoryOrCreate + > tigera-ca-bundle: + > Type: ConfigMap (a volume populated by a ConfigMap) + > Name: tigera-ca-bundle + > Optional: false + > node-certs: + > Type: Secret (a volume populated by a Secret) + > SecretName: node-certs + > Optional: false + > var-run-calico: + > Type: HostPath (bare host directory volume) + > Path: /var/run/calico + > HostPathType: + > var-lib-calico: + > Type: HostPath (bare host directory volume) + > Path: /var/lib/calico + > HostPathType: + > cni-bin-dir: + > Type: HostPath (bare host directory volume) + > Path: /opt/cni/bin + > HostPathType: + > cni-net-dir: + > Type: HostPath (bare host directory volume) + > Path: /etc/cni/net.d + > HostPathType: + > cni-log-dir: + > Type: HostPath (bare host directory volume) + > Path: /var/log/calico/cni + > HostPathType: + > flexvol-driver-host: + > Type: HostPath (bare host directory volume) + > Path: /usr/libexec/kubernetes/kubelet-plugins/volume/exec/nodeagent~uds + > HostPathType: DirectoryOrCreate + > kube-api-access-fbxw5: + > Type: Projected (a volume that contains injected data from multiple sources) + > TokenExpirationSeconds: 3607 + > ConfigMapName: kube-root-ca.crt + > ConfigMapOptional: + > DownwardAPI: true + > QoS Class: BestEffort + > Node-Selectors: kubernetes.io/os=linux + > Tolerations: :NoSchedule op=Exists + > :NoExecute op=Exists + > CriticalAddonsOnly op=Exists + > node.kubernetes.io/disk-pressure:NoSchedule op=Exists + > node.kubernetes.io/memory-pressure:NoSchedule op=Exists + > node.kubernetes.io/network-unavailable:NoSchedule op=Exists + > node.kubernetes.io/not-ready:NoExecute op=Exists + > node.kubernetes.io/pid-pressure:NoSchedule op=Exists + > node.kubernetes.io/unreachable:NoExecute op=Exists + > node.kubernetes.io/unschedulable:NoSchedule op=Exists + > Events: + > Type Reason Age From Message + > ---- ------ ---- ---- ------- + > Normal Scheduled 94m default-scheduler Successfully assigned calico-system/calico-node-d27fv to somerville-jade-20240219-work-md-0-fb50a5e8-whft4 + > Normal Pulling 94m kubelet Pulling image "docker.io/calico/pod2daemon-flexvol:v3.26.0" + > Normal Pulled 94m kubelet Successfully pulled image "docker.io/calico/pod2daemon-flexvol:v3.26.0" in 3.283466337s (4.492166592s including waiting) + > Normal Created 94m kubelet Created container flexvol-driver + > Normal Started 94m kubelet Started container flexvol-driver + > Normal Pulling 94m kubelet Pulling image "docker.io/calico/cni:v3.26.0" + > Normal Pulled 94m kubelet Successfully pulled image "docker.io/calico/cni:v3.26.0" in 6.580640403s (10.949116566s including waiting) + > Normal Created 94m kubelet Created container install-cni + > Normal Started 94m kubelet Started container install-cni + > Normal Pulling 94m kubelet Pulling image "docker.io/calico/node:v3.26.0" + > Normal Pulled 94m kubelet Successfully pulled image "docker.io/calico/node:v3.26.0" in 6.86913986s (9.778872808s including waiting) + > Normal Created 94m kubelet Created container calico-node + > Normal Started 94m kubelet Started container calico-node + > Warning Unhealthy 94m kubelet Readiness probe failed: calico/node is not ready: felix is not ready: Get "http://localhost:9099/readiness": dial tcp [::1]:9099: connect: connection refused + > W0219 17:04:07.799771 27 feature_gate.go:241] Setting GA feature gate ServiceInternalTrafficPolicy=true. It will be removed in a future release. + > Warning Unhealthy 94m kubelet Readiness probe failed: calico/node is not ready: felix is not ready: Get "http://localhost:9099/readiness": dial tcp [::1]:9099: connect: connection refused + > W0219 17:04:08.804513 79 feature_gate.go:241] Setting GA feature gate ServiceInternalTrafficPolicy=true. It will be removed in a future release. + > Warning Unhealthy 94m kubelet Readiness probe failed: calico/node is not ready: felix is not ready: readiness probe reporting 503 + > W0219 17:04:09.553516 268 feature_gate.go:241] Setting GA feature gate ServiceInternalTrafficPolicy=true. It will be removed in a future release. + > + > ---- ---- ---- ---- + > Podname [calico-node-kms42] + > Name: calico-node-kms42 + > Namespace: calico-system + > Priority: 2000001000 + > Priority Class Name: system-node-critical + > Service Account: calico-node + > Node: somerville-jade-20240219-work-control-plane-ac9af912-v42dq/192.168.3.171 + > Start Time: Mon, 19 Feb 2024 17:09:49 +0000 + > Labels: app.kubernetes.io/name=calico-node + > controller-revision-hash=85d8f76fdd + > k8s-app=calico-node + > pod-template-generation=1 + > Annotations: hash.operator.tigera.io/cni-config: 7c1526fa50c76a4b3650efe703ed353846f576d4 + > hash.operator.tigera.io/system: bb4746872201725da2dea19756c475aa67d9c1e9 + > hash.operator.tigera.io/tigera-ca-private: 98097acfbf24fd9cbb280f444442d222bb399518 + > Status: Running + > IP: 192.168.3.171 + > IPs: + > IP: 192.168.3.171 + > Controlled By: DaemonSet/calico-node + > Init Containers: + > flexvol-driver: + > Container ID: containerd://ebe7ea230ba167e42ac00a608503829cd25f824b244c98295e93e44328e588b5 + > Image: docker.io/calico/pod2daemon-flexvol:v3.26.0 + > Image ID: docker.io/calico/pod2daemon-flexvol@sha256:f8a737c47bfef9f070c50f02332d931cb3d19d544f7b1867fee0a99cb0626356 + > Port: + > Host Port: + > SeccompProfile: RuntimeDefault + > State: Terminated + > Reason: Completed + > Exit Code: 0 + > Started: Mon, 19 Feb 2024 17:12:09 +0000 + > Finished: Mon, 19 Feb 2024 17:12:09 +0000 + > Ready: True + > Restart Count: 0 + > Environment: + > Mounts: + > /host/driver from flexvol-driver-host (rw) + > /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-zffw2 (ro) + > install-cni: + > Container ID: containerd://519625e1989b43b2435ef3a01a9f99d6ddef09b41b187fe9124f21ee39486a20 + > Image: docker.io/calico/cni:v3.26.0 + > Image ID: docker.io/calico/cni@sha256:c7c80d82dc4f85ac4d7f2345c940bc7f818bbea03c2043df89923c032d8ee7fc + > Port: + > Host Port: + > SeccompProfile: RuntimeDefault + > Command: + > /opt/cni/bin/install + > State: Terminated + > Reason: Completed + > Exit Code: 0 + > Started: Mon, 19 Feb 2024 17:13:44 +0000 + > Finished: Mon, 19 Feb 2024 17:14:13 +0000 + > Ready: True + > Restart Count: 0 + > Environment: + > CNI_CONF_NAME: 10-calico.conflist + > SLEEP: false + > CNI_NET_DIR: /etc/cni/net.d + > CNI_NETWORK_CONFIG: Optional: false + > KUBERNETES_SERVICE_HOST: 172.24.0.1 + > KUBERNETES_SERVICE_PORT: 443 + > Mounts: + > /host/etc/cni/net.d from cni-net-dir (rw) + > /host/opt/cni/bin from cni-bin-dir (rw) + > /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-zffw2 (ro) + > Containers: + > calico-node: + > Container ID: containerd://d203d4a8470ddf43acd5fa9c85937efa30ab55c62a688467f54739fab061b3cb + > Image: docker.io/calico/node:v3.26.0 + > Image ID: docker.io/calico/node@sha256:5086f1ef0287886811fea4d545a4bbb75d9345367b1b1ad1aa4447af2ecbc4ea + > Port: + > Host Port: + > SeccompProfile: RuntimeDefault + > State: Running + > Started: Mon, 19 Feb 2024 17:17:13 +0000 + > Last State: Terminated + > Reason: Error + > Exit Code: 1 + > Started: Mon, 19 Feb 2024 17:16:58 +0000 + > Finished: Mon, 19 Feb 2024 17:17:03 +0000 + > Ready: True + > Restart Count: 2 + > Liveness: http-get http://localhost:9099/liveness delay=0s timeout=10s period=10s #success=1 #failure=3 + > Readiness: exec [/bin/calico-node -felix-ready] delay=0s timeout=5s period=10s #success=1 #failure=3 + > Environment: + > DATASTORE_TYPE: kubernetes + > WAIT_FOR_DATASTORE: true + > CLUSTER_TYPE: k8s,operator + > CALICO_DISABLE_FILE_LOGGING: false + > FELIX_DEFAULTENDPOINTTOHOSTACTION: ACCEPT + > FELIX_HEALTHENABLED: true + > FELIX_HEALTHPORT: 9099 + > NODENAME: (v1:spec.nodeName) + > NAMESPACE: calico-system (v1:metadata.namespace) + > FELIX_TYPHAK8SNAMESPACE: calico-system + > FELIX_TYPHAK8SSERVICENAME: calico-typha + > FELIX_TYPHACAFILE: /etc/pki/tls/certs/tigera-ca-bundle.crt + > FELIX_TYPHACERTFILE: /node-certs/tls.crt + > FELIX_TYPHAKEYFILE: /node-certs/tls.key + > FIPS_MODE_ENABLED: false + > FELIX_TYPHACN: typha-server + > CALICO_MANAGE_CNI: true + > CALICO_IPV4POOL_CIDR: 172.16.0.0/13 + > CALICO_IPV4POOL_VXLAN: Always + > CALICO_IPV4POOL_BLOCK_SIZE: 26 + > CALICO_IPV4POOL_NODE_SELECTOR: all() + > CALICO_IPV4POOL_DISABLE_BGP_EXPORT: false + > CALICO_NETWORKING_BACKEND: vxlan + > IP: autodetect + > IP_AUTODETECTION_METHOD: kubernetes-internal-ip + > IP6: none + > FELIX_IPV6SUPPORT: false + > KUBERNETES_SERVICE_HOST: 172.24.0.1 + > KUBERNETES_SERVICE_PORT: 443 + > Mounts: + > /etc/pki/tls/cert.pem from tigera-ca-bundle (ro,path="ca-bundle.crt") + > /etc/pki/tls/certs from tigera-ca-bundle (ro) + > /host/etc/cni/net.d from cni-net-dir (rw) + > /lib/modules from lib-modules (ro) + > /node-certs from node-certs (ro) + > /run/xtables.lock from xtables-lock (rw) + > /var/lib/calico from var-lib-calico (rw) + > /var/log/calico/cni from cni-log-dir (rw) + > /var/run/calico from var-run-calico (rw) + > /var/run/nodeagent from policysync (rw) + > /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-zffw2 (ro) + > Conditions: + > Type Status + > Initialized True + > Ready True + > ContainersReady True + > PodScheduled True + > Volumes: + > lib-modules: + > Type: HostPath (bare host directory volume) + > Path: /lib/modules + > HostPathType: + > xtables-lock: + > Type: HostPath (bare host directory volume) + > Path: /run/xtables.lock + > HostPathType: FileOrCreate + > policysync: + > Type: HostPath (bare host directory volume) + > Path: /var/run/nodeagent + > HostPathType: DirectoryOrCreate + > tigera-ca-bundle: + > Type: ConfigMap (a volume populated by a ConfigMap) + > Name: tigera-ca-bundle + > Optional: false + > node-certs: + > Type: Secret (a volume populated by a Secret) + > SecretName: node-certs + > Optional: false + > var-run-calico: + > Type: HostPath (bare host directory volume) + > Path: /var/run/calico + > HostPathType: + > var-lib-calico: + > Type: HostPath (bare host directory volume) + > Path: /var/lib/calico + > HostPathType: + > cni-bin-dir: + > Type: HostPath (bare host directory volume) + > Path: /opt/cni/bin + > HostPathType: + > cni-net-dir: + > Type: HostPath (bare host directory volume) + > Path: /etc/cni/net.d + > HostPathType: + > cni-log-dir: + > Type: HostPath (bare host directory volume) + > Path: /var/log/calico/cni + > HostPathType: + > flexvol-driver-host: + > Type: HostPath (bare host directory volume) + > Path: /usr/libexec/kubernetes/kubelet-plugins/volume/exec/nodeagent~uds + > HostPathType: DirectoryOrCreate + > kube-api-access-zffw2: + > Type: Projected (a volume that contains injected data from multiple sources) + > TokenExpirationSeconds: 3607 + > ConfigMapName: kube-root-ca.crt + > ConfigMapOptional: + > DownwardAPI: true + > QoS Class: BestEffort + > Node-Selectors: kubernetes.io/os=linux + > Tolerations: :NoSchedule op=Exists + > :NoExecute op=Exists + > CriticalAddonsOnly op=Exists + > node.kubernetes.io/disk-pressure:NoSchedule op=Exists + > node.kubernetes.io/memory-pressure:NoSchedule op=Exists + > node.kubernetes.io/network-unavailable:NoSchedule op=Exists + > node.kubernetes.io/not-ready:NoExecute op=Exists + > node.kubernetes.io/pid-pressure:NoSchedule op=Exists + > node.kubernetes.io/unreachable:NoExecute op=Exists + > node.kubernetes.io/unschedulable:NoSchedule op=Exists + > Events: + > Type Reason Age From Message + > ---- ------ ---- ---- ------- + > Normal Scheduled 88m default-scheduler Successfully assigned calico-system/calico-node-kms42 to somerville-jade-20240219-work-control-plane-ac9af912-v42dq + > Normal Pulling 88m kubelet Pulling image "docker.io/calico/pod2daemon-flexvol:v3.26.0" + > Normal Pulled 86m kubelet Successfully pulled image "docker.io/calico/pod2daemon-flexvol:v3.26.0" in 55.328793338s (2m11.025799765s including waiting) + > Normal Created 86m kubelet Created container flexvol-driver + > Normal Started 85m kubelet Started container flexvol-driver + > Normal Pulling 85m kubelet Pulling image "docker.io/calico/cni:v3.26.0" + > Normal Pulled 84m kubelet Successfully pulled image "docker.io/calico/cni:v3.26.0" in 1m15.602397012s (1m23.063931937s including waiting) + > Normal Created 84m kubelet Created container install-cni + > Normal Started 84m kubelet Started container install-cni + > Normal Pulling 83m kubelet Pulling image "docker.io/calico/node:v3.26.0" + > Normal Pulled 81m kubelet Successfully pulled image "docker.io/calico/node:v3.26.0" in 39.009670808s (1m59.559883559s including waiting) + > Warning Unhealthy 81m kubelet Readiness probe failed: calico/node is not ready: felix is not ready: Get "http://localhost:9099/readiness": dial tcp [::1]:9099: connect: connection refused + > W0219 17:16:27.755952 17 feature_gate.go:241] Setting GA feature gate ServiceInternalTrafficPolicy=true. It will be removed in a future release. + > Warning Unhealthy 81m kubelet Readiness probe failed: calico/node is not ready: felix is not ready: Get "http://localhost:9099/readiness": dial tcp [::1]:9099: connect: connection refused + > W0219 17:16:28.879073 29 feature_gate.go:241] Setting GA feature gate ServiceInternalTrafficPolicy=true. It will be removed in a future release. + > Warning Unhealthy 81m kubelet Readiness probe failed: calico/node is not ready: felix is not ready: Get "http://localhost:9099/readiness": dial tcp [::1]:9099: connect: connection refused + > W0219 17:16:32.049500 41 feature_gate.go:241] Setting GA feature gate ServiceInternalTrafficPolicy=true. It will be removed in a future release. + > Warning Unhealthy 81m kubelet Readiness probe failed: calico/node is not ready: felix is not ready: Get "http://localhost:9099/readiness": dial tcp [::1]:9099: connect: connection refused + > W0219 17:16:41.906753 53 feature_gate.go:241] Setting GA feature gate ServiceInternalTrafficPolicy=true. It will be removed in a future release. + > Warning Unhealthy 81m (x3 over 81m) kubelet Liveness probe failed: Get "http://localhost:9099/liveness": dial tcp [::1]:9099: connect: connection refused + > Normal Killing 81m kubelet Container calico-node failed liveness probe, will be restarted + > Warning FailedPreStopHook 81m kubelet PreStopHook failed + > Warning Unhealthy 81m (x2 over 81m) kubelet Readiness probe errored: rpc error: code = NotFound desc = failed to exec in container: failed to load task: no running task found: task ddec81771b5c481c265f68c42f82169c0ae43a7ee6b80fdf9a583e2304e1fe91 not found: not found + > Normal Pulled 81m kubelet Container image "docker.io/calico/node:v3.26.0" already present on machine + > Normal Created 81m (x2 over 81m) kubelet Created container calico-node + > Normal Started 81m (x2 over 81m) kubelet Started container calico-node + + +# ----------------------------------------------------- +# Check the first 3 'calico-typha' Pods in the 'calico-system' namespace. +#[root@ansibler] + + ssh bootstrap -t \ + ' + source loadconfig + kubectl \ + --kubeconfig "${workclusterconf:?}" \ + get pods \ + --namespace calico-system \ + --output json \ + | jq -r "limit(3; .items[].metadata.name | select(. | startswith(\"calico-typha\")))" + ' + + > calico-typha-695c6f4d98-8h879 + > calico-typha-695c6f4d98-fssbf + > calico-typha-695c6f4d98-vfkvc + + + ssh bootstrap -t \ + ' + source loadconfig + for podname in $( + kubectl \ + --kubeconfig "${workclusterconf:?}" \ + get pods \ + --namespace calico-system \ + --output json \ + | jq -r "limit(3; .items[].metadata.name | select(. | startswith(\"calico-typha\")))" + ) + do + echo "" + echo "---- ---- ---- ----" + echo "Podname [${podname}]" + kubectl \ + --kubeconfig "${workclusterconf:?}" \ + describe pod \ + --namespace calico-system \ + "${podname}" + done + ' + + > ---- ---- ---- ---- + > Podname [calico-typha-695c6f4d98-8h879] + > Name: calico-typha-695c6f4d98-8h879 + > Namespace: calico-system + > Priority: 2000000000 + > Priority Class Name: system-cluster-critical + > Service Account: calico-typha + > Node: somerville-jade-20240219-work-md-0-fb50a5e8-ntqbd/192.168.3.129 + > Start Time: Mon, 19 Feb 2024 17:11:23 +0000 + > Labels: app.kubernetes.io/name=calico-typha + > k8s-app=calico-typha + > pod-template-hash=695c6f4d98 + > Annotations: hash.operator.tigera.io/system: bb4746872201725da2dea19756c475aa67d9c1e9 + > hash.operator.tigera.io/tigera-ca-private: 98097acfbf24fd9cbb280f444442d222bb399518 + > hash.operator.tigera.io/typha-certs: 0e7796d1d5162a8217cd23869362204a9ebc35e3 + > Status: Running + > IP: 192.168.3.129 + > IPs: + > IP: 192.168.3.129 + > Controlled By: ReplicaSet/calico-typha-695c6f4d98 + > Containers: + > calico-typha: + > Container ID: containerd://a6aaefde775db0a74fa0f9d3d428e404ea3775ebcd7c0aa655df5ae66047bcff + > Image: docker.io/calico/typha:v3.26.0 + > Image ID: docker.io/calico/typha@sha256:32328a796f86ee18660e77b22a494369b07abe749a0f184b65d922e17beaeee1 + > Port: 5473/TCP + > Host Port: 5473/TCP + > SeccompProfile: RuntimeDefault + > State: Running + > Started: Mon, 19 Feb 2024 17:11:28 +0000 + > Ready: True + > Restart Count: 0 + > Liveness: http-get http://localhost:9098/liveness delay=0s timeout=10s period=10s #success=1 #failure=3 + > Readiness: http-get http://localhost:9098/readiness delay=0s timeout=10s period=10s #success=1 #failure=3 + > Environment: + > TYPHA_LOGSEVERITYSCREEN: info + > TYPHA_LOGFILEPATH: none + > TYPHA_LOGSEVERITYSYS: none + > TYPHA_CONNECTIONREBALANCINGMODE: kubernetes + > TYPHA_DATASTORETYPE: kubernetes + > TYPHA_HEALTHENABLED: true + > TYPHA_HEALTHPORT: 9098 + > TYPHA_K8SNAMESPACE: calico-system + > TYPHA_CAFILE: /etc/pki/tls/certs/tigera-ca-bundle.crt + > TYPHA_SERVERCERTFILE: /typha-certs/tls.crt + > TYPHA_SERVERKEYFILE: /typha-certs/tls.key + > TYPHA_FIPSMODEENABLED: false + > TYPHA_SHUTDOWNTIMEOUTSECS: 300 + > TYPHA_CLIENTCN: typha-client + > KUBERNETES_SERVICE_HOST: 172.24.0.1 + > KUBERNETES_SERVICE_PORT: 443 + > Mounts: + > /etc/pki/tls/cert.pem from tigera-ca-bundle (ro,path="ca-bundle.crt") + > /etc/pki/tls/certs from tigera-ca-bundle (ro) + > /typha-certs from typha-certs (ro) + > /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-gsbnn (ro) + > Conditions: + > Type Status + > Initialized True + > Ready True + > ContainersReady True + > PodScheduled True + > Volumes: + > tigera-ca-bundle: + > Type: ConfigMap (a volume populated by a ConfigMap) + > Name: tigera-ca-bundle + > Optional: false + > typha-certs: + > Type: Secret (a volume populated by a Secret) + > SecretName: typha-certs + > Optional: false + > kube-api-access-gsbnn: + > Type: Projected (a volume that contains injected data from multiple sources) + > TokenExpirationSeconds: 3607 + > ConfigMapName: kube-root-ca.crt + > ConfigMapOptional: + > DownwardAPI: true + > QoS Class: BestEffort + > Node-Selectors: kubernetes.io/os=linux + > Tolerations: :NoSchedule op=Exists + > :NoExecute op=Exists + > CriticalAddonsOnly op=Exists + > Events: + > Type Reason Age From Message + > ---- ------ ---- ---- ------- + > Normal Scheduled 89m default-scheduler Successfully assigned calico-system/calico-typha-695c6f4d98-8h879 to somerville-jade-20240219-work-md-0-fb50a5e8-ntqbd + > Normal Pulling 89m kubelet Pulling image "docker.io/calico/typha:v3.26.0" + > Normal Pulled 89m kubelet Successfully pulled image "docker.io/calico/typha:v3.26.0" in 3.918294313s (3.918301096s including waiting) + > Normal Created 89m kubelet Created container calico-typha + > Normal Started 89m kubelet Started container calico-typha + > Warning Unhealthy 89m kubelet Readiness probe failed: HTTP probe failed with statuscode: 503 + > + > ---- ---- ---- ---- + > Podname [calico-typha-695c6f4d98-fssbf] + > Name: calico-typha-695c6f4d98-fssbf + > Namespace: calico-system + > Priority: 2000000000 + > Priority Class Name: system-cluster-critical + > Service Account: calico-typha + > Node: somerville-jade-20240219-work-md-0-fb50a5e8-whft4/192.168.3.113 + > Start Time: Mon, 19 Feb 2024 17:03:42 +0000 + > Labels: app.kubernetes.io/name=calico-typha + > k8s-app=calico-typha + > pod-template-hash=695c6f4d98 + > Annotations: hash.operator.tigera.io/system: bb4746872201725da2dea19756c475aa67d9c1e9 + > hash.operator.tigera.io/tigera-ca-private: 98097acfbf24fd9cbb280f444442d222bb399518 + > hash.operator.tigera.io/typha-certs: 0e7796d1d5162a8217cd23869362204a9ebc35e3 + > Status: Running + > IP: 192.168.3.113 + > IPs: + > IP: 192.168.3.113 + > Controlled By: ReplicaSet/calico-typha-695c6f4d98 + > Containers: + > calico-typha: + > Container ID: containerd://0d8f5447fdf54c6a46dd980d90d898707f232023f9383e28d60a98b16381ec0f + > Image: docker.io/calico/typha:v3.26.0 + > Image ID: docker.io/calico/typha@sha256:32328a796f86ee18660e77b22a494369b07abe749a0f184b65d922e17beaeee1 + > Port: 5473/TCP + > Host Port: 5473/TCP + > SeccompProfile: RuntimeDefault + > State: Running + > Started: Mon, 19 Feb 2024 17:04:14 +0000 + > Last State: Terminated + > Reason: Error + > Exit Code: 143 + > Started: Mon, 19 Feb 2024 17:03:49 +0000 + > Finished: Mon, 19 Feb 2024 17:04:13 +0000 + > Ready: True + > Restart Count: 1 + > Liveness: http-get http://localhost:9098/liveness delay=0s timeout=10s period=10s #success=1 #failure=3 + > Readiness: http-get http://localhost:9098/readiness delay=0s timeout=10s period=10s #success=1 #failure=3 + > Environment: + > TYPHA_LOGSEVERITYSCREEN: info + > TYPHA_LOGFILEPATH: none + > TYPHA_LOGSEVERITYSYS: none + > TYPHA_CONNECTIONREBALANCINGMODE: kubernetes + > TYPHA_DATASTORETYPE: kubernetes + > TYPHA_HEALTHENABLED: true + > TYPHA_HEALTHPORT: 9098 + > TYPHA_K8SNAMESPACE: calico-system + > TYPHA_CAFILE: /etc/pki/tls/certs/tigera-ca-bundle.crt + > TYPHA_SERVERCERTFILE: /typha-certs/tls.crt + > TYPHA_SERVERKEYFILE: /typha-certs/tls.key + > TYPHA_FIPSMODEENABLED: false + > TYPHA_SHUTDOWNTIMEOUTSECS: 300 + > TYPHA_CLIENTCN: typha-client + > KUBERNETES_SERVICE_HOST: 172.24.0.1 + > KUBERNETES_SERVICE_PORT: 443 + > Mounts: + > /etc/pki/tls/cert.pem from tigera-ca-bundle (ro,path="ca-bundle.crt") + > /etc/pki/tls/certs from tigera-ca-bundle (ro) + > /typha-certs from typha-certs (ro) + > /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-zlq6r (ro) + > Conditions: + > Type Status + > Initialized True + > Ready True + > ContainersReady True + > PodScheduled True + > Volumes: + > tigera-ca-bundle: + > Type: ConfigMap (a volume populated by a ConfigMap) + > Name: tigera-ca-bundle + > Optional: false + > typha-certs: + > Type: Secret (a volume populated by a Secret) + > SecretName: typha-certs + > Optional: false + > kube-api-access-zlq6r: + > Type: Projected (a volume that contains injected data from multiple sources) + > TokenExpirationSeconds: 3607 + > ConfigMapName: kube-root-ca.crt + > ConfigMapOptional: + > DownwardAPI: true + > QoS Class: BestEffort + > Node-Selectors: kubernetes.io/os=linux + > Tolerations: :NoSchedule op=Exists + > :NoExecute op=Exists + > CriticalAddonsOnly op=Exists + > Events: + > Type Reason Age From Message + > ---- ------ ---- ---- ------- + > Normal Scheduled 97m default-scheduler Successfully assigned calico-system/calico-typha-695c6f4d98-fssbf to somerville-jade-20240219-work-md-0-fb50a5e8-whft4 + > Normal Pulling 97m kubelet Pulling image "docker.io/calico/typha:v3.26.0" + > Normal Pulled 96m kubelet Successfully pulled image "docker.io/calico/typha:v3.26.0" in 3.720552915s (5.68228993s including waiting) + > Warning Unhealthy 96m (x6 over 96m) kubelet Readiness probe failed: Get "http://localhost:9098/readiness": dial tcp [::1]:9098: connect: connection refused + > Warning Unhealthy 96m (x3 over 96m) kubelet Liveness probe failed: Get "http://localhost:9098/liveness": dial tcp [::1]:9098: connect: connection refused + > Normal Killing 96m kubelet Container calico-typha failed liveness probe, will be restarted + > Normal Created 96m (x2 over 96m) kubelet Created container calico-typha + > Normal Started 96m (x2 over 96m) kubelet Started container calico-typha + > Normal Pulled 96m kubelet Container image "docker.io/calico/typha:v3.26.0" already present on machine + > + > ---- ---- ---- ---- + > Podname [calico-typha-695c6f4d98-vfkvc] + > Name: calico-typha-695c6f4d98-vfkvc + > Namespace: calico-system + > Priority: 2000000000 + > Priority Class Name: system-cluster-critical + > Service Account: calico-typha + > Node: somerville-jade-20240219-work-control-plane-ac9af912-gjv45/192.168.3.186 + > Start Time: Mon, 19 Feb 2024 17:03:13 +0000 + > Labels: app.kubernetes.io/name=calico-typha + > k8s-app=calico-typha + > pod-template-hash=695c6f4d98 + > Annotations: hash.operator.tigera.io/system: bb4746872201725da2dea19756c475aa67d9c1e9 + > hash.operator.tigera.io/tigera-ca-private: 98097acfbf24fd9cbb280f444442d222bb399518 + > hash.operator.tigera.io/typha-certs: 0e7796d1d5162a8217cd23869362204a9ebc35e3 + > Status: Running + > IP: 192.168.3.186 + > IPs: + > IP: 192.168.3.186 + > Controlled By: ReplicaSet/calico-typha-695c6f4d98 + > Containers: + > calico-typha: + > Container ID: containerd://d83059fa41462cccc5e935973f14d41b9e7194f1c0b3ea2d0bcbf1266d30650f + > Image: docker.io/calico/typha:v3.26.0 + > Image ID: docker.io/calico/typha@sha256:32328a796f86ee18660e77b22a494369b07abe749a0f184b65d922e17beaeee1 + > Port: 5473/TCP + > Host Port: 5473/TCP + > SeccompProfile: RuntimeDefault + > State: Running + > Started: Mon, 19 Feb 2024 17:03:18 +0000 + > Ready: True + > Restart Count: 0 + > Liveness: http-get http://localhost:9098/liveness delay=0s timeout=10s period=10s #success=1 #failure=3 + > Readiness: http-get http://localhost:9098/readiness delay=0s timeout=10s period=10s #success=1 #failure=3 + > Environment: + > TYPHA_LOGSEVERITYSCREEN: info + > TYPHA_LOGFILEPATH: none + > TYPHA_LOGSEVERITYSYS: none + > TYPHA_CONNECTIONREBALANCINGMODE: kubernetes + > TYPHA_DATASTORETYPE: kubernetes + > TYPHA_HEALTHENABLED: true + > TYPHA_HEALTHPORT: 9098 + > TYPHA_K8SNAMESPACE: calico-system + > TYPHA_CAFILE: /etc/pki/tls/certs/tigera-ca-bundle.crt + > TYPHA_SERVERCERTFILE: /typha-certs/tls.crt + > TYPHA_SERVERKEYFILE: /typha-certs/tls.key + > TYPHA_FIPSMODEENABLED: false + > TYPHA_SHUTDOWNTIMEOUTSECS: 300 + > TYPHA_CLIENTCN: typha-client + > KUBERNETES_SERVICE_HOST: 172.24.0.1 + > KUBERNETES_SERVICE_PORT: 443 + > Mounts: + > /etc/pki/tls/cert.pem from tigera-ca-bundle (ro,path="ca-bundle.crt") + > /etc/pki/tls/certs from tigera-ca-bundle (ro) + > /typha-certs from typha-certs (ro) + > /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-9772f (ro) + > Conditions: + > Type Status + > Initialized True + > Ready True + > ContainersReady True + > PodScheduled True + > Volumes: + > tigera-ca-bundle: + > Type: ConfigMap (a volume populated by a ConfigMap) + > Name: tigera-ca-bundle + > Optional: false + > typha-certs: + > Type: Secret (a volume populated by a Secret) + > SecretName: typha-certs + > Optional: false + > kube-api-access-9772f: + > Type: Projected (a volume that contains injected data from multiple sources) + > TokenExpirationSeconds: 3607 + > ConfigMapName: kube-root-ca.crt + > ConfigMapOptional: + > DownwardAPI: true + > QoS Class: BestEffort + > Node-Selectors: kubernetes.io/os=linux + > Tolerations: :NoSchedule op=Exists + > :NoExecute op=Exists + > CriticalAddonsOnly op=Exists + > Events: + > Type Reason Age From Message + > ---- ------ ---- ---- ------- + > Normal Scheduled 97m default-scheduler Successfully assigned calico-system/calico-typha-695c6f4d98-vfkvc to somerville-jade-20240219-work-control-plane-ac9af912-gjv45 + > Normal Pulling 97m kubelet Pulling image "docker.io/calico/typha:v3.26.0" + > Normal Pulled 97m kubelet Successfully pulled image "docker.io/calico/typha:v3.26.0" in 4.104362668s (4.104461474s including waiting) + > Normal Created 97m kubelet Created container calico-typha + > Normal Started 97m kubelet Started container calico-typha + + + # + # Common theme between the Pods, liveness and readiness probes failing. + # + + > .... + > Warning Unhealthy 82m kubelet Liveness probe failed: Error reaching apiserver: Get "https://172.24.0.1:443/apis/crd.projectcalico.org/v1/clusterinformations/default": context deadline exceeded with http status code: 500; Error verifying datastore: Get "https://172.24.0.1:443/apis/crd.projectcalico.org/v1/clusterinformations/default": context deadline exceeded + > Warning Unhealthy 76m (x6 over 85m) kubelet Readiness probe failed: Error verifying datastore: Get "https://172.24.0.1:443/apis/crd.projectcalico.org/v1/clusterinformations/default": context deadline exceeded; Error reaching apiserver: Get "https://172.24.0.1:443/apis/crd.projectcalico.org/v1/clusterinformations/default": context deadline exceeded with http status code: 500 + > Warning Unhealthy 76m (x5 over 85m) kubelet Liveness probe failed: Error verifying datastore: Get "https://172.24.0.1:443/apis/crd.projectcalico.org/v1/clusterinformations/default": context deadline exceeded; Error reaching apiserver: Get "https://172.24.0.1:443/apis/crd.projectcalico.org/v1/clusterinformations/default": context deadline exceeded with http status code: 500 + > Warning Unhealthy 75m (x2 over 78m) kubelet Liveness probe failed: Error verifying datastore: Get "https://172.24.0.1:443/apis/crd.projectcalico.org/v1/clusterinformations/default": context deadline exceeded + > Warning Unhealthy 75m (x2 over 78m) kubelet Readiness probe failed: Error verifying datastore: Get "https://172.24.0.1:443/apis/crd.projectcalico.org/v1/clusterinformations/default": context deadline exceeded + > Warning Unhealthy 75m (x2 over 75m) kubelet Liveness probe failed: Error reaching apiserver: with http status code: 500 + > Warning Unhealthy 75m (x2 over 75m) kubelet Readiness probe failed: Error reaching apiserver: with http status code: 500 + > .... + + > .... + > Warning Unhealthy 93m kubelet Readiness probe failed: calico/node is not ready: felix is not ready: Get "http://localhost:9099/readiness": dial tcp [::1]:9099: connect: connection refused + > Warning Unhealthy 93m kubelet Readiness probe failed: calico/node is not ready: felix is not ready: readiness probe reporting 503 + > .... + + > .... + > Warning Unhealthy 94m kubelet Readiness probe failed: calico/node is not ready: felix is not ready: Get "http://localhost:9099/readiness": dial tcp [::1]:9099: connect: connection refused + > Warning Unhealthy 94m kubelet Readiness probe failed: calico/node is not ready: felix is not ready: Get "http://localhost:9099/readiness": dial tcp [::1]:9099: connect: connection refused + > Warning Unhealthy 94m kubelet Readiness probe failed: calico/node is not ready: felix is not ready: readiness probe reporting 503 + > .... + + > .... + > Warning Unhealthy 81m kubelet Readiness probe failed: calico/node is not ready: felix is not ready: Get "http://localhost:9099/readiness": dial tcp [::1]:9099: connect: connection refused + > Warning Unhealthy 81m kubelet Readiness probe failed: calico/node is not ready: felix is not ready: Get "http://localhost:9099/readiness": dial tcp [::1]:9099: connect: connection refused + > Warning Unhealthy 81m kubelet Readiness probe failed: calico/node is not ready: felix is not ready: Get "http://localhost:9099/readiness": dial tcp [::1]:9099: connect: connection refused + > Warning Unhealthy 81m kubelet Readiness probe failed: calico/node is not ready: felix is not ready: Get "http://localhost:9099/readiness": dial tcp [::1]:9099: connect: connection refused + > Warning Unhealthy 81m (x3 over 81m) kubelet Liveness probe failed: Get "http://localhost:9099/liveness": dial tcp [::1]:9099: connect: connection refused + > Normal Killing 81m kubelet Container calico-node failed liveness probe, will be restarted + > Warning FailedPreStopHook 81m kubelet PreStopHook failed + > Warning Unhealthy 81m (x2 over 81m) kubelet Readiness probe errored: rpc error: code = NotFound desc = failed to exec in container: failed to load task: no running task found: task ddec81771b5c481c265f68c42f82169c0ae43a7ee6b80fdf9a583e2304e1fe91 not found: not found + > .... + + > .... + > Warning Unhealthy 96m (x6 over 96m) kubelet Readiness probe failed: Get "http://localhost:9098/readiness": dial tcp [::1]:9098: connect: connection refused + > Warning Unhealthy 96m (x3 over 96m) kubelet Liveness probe failed: Get "http://localhost:9098/liveness": dial tcp [::1]:9098: connect: connection refused + > .... + diff --git a/notes/zrq/20240219-03-jade-reconnect.txt b/notes/zrq/20240219-03-jade-reconnect.txt new file mode 100644 index 00000000..8fbce528 --- /dev/null +++ b/notes/zrq/20240219-03-jade-reconnect.txt @@ -0,0 +1,114 @@ +# +# +# +# Copyright (c) 2024, ROE (http://www.roe.ac.uk/) +# +# This information is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This information is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# +# +#zrq-notes-time +#zrq-notes-indent +#zrq-notes-crypto +#zrq-notes-ansible +#zrq-notes-osformat +#zrq-notes-zeppelin +# +# AIMetrics: [] +# + + Target: + + If we need to re-connect to a deployment on Jade. + All we need to do os fetch the status yaml file and run an Ansible playbook + to re-configure the client. + + Assumes we know the IP address of the bootstrap node, + which we can get from the Horizon GUI. + + Result: + + Work in progress ... + +# ----------------------------------------------------- +# Run our local client. +#[user@desktop] + + source "${HOME:?}/aglais.env" + export PATH=${PATH}:${AGLAIS_CODE}/bin + + kube-client jade + + +# ----------------------------------------------------- +# Check we can login via ssh. +#[root@ansibler] + + ipaddress=192.41.122.174 + + ssh "fedora@${ipaddress}" \ + ' + date + hostname + ls -al /opt/aglais/aglais-status.yml + ' + + > Mon Feb 19 05:58:30 PM UTC 2024 + > somerville-jade-20240219-bootstrap-node.novalocal + > -rw-r--r--. 1 root root 1970 Feb 19 17:02 /opt/aglais/aglais-status.yml + + +# ----------------------------------------------------- +# Fetch the 'aglais-status' file. +#[root@ansibler] + + mkdir /opt/aglais + scp "fedora@${ipaddress}:/opt/aglais/aglais-status.yml" \ + /opt/aglais/aglais-status.yml + + > aglais-status.yml 100% 1970 54.1KB/s 00:00 + + +# ----------------------------------------------------- +# Run the 'config-ansible' playbook. +#[root@ansibler] + + export cloudsite=somerville-jade + ansible-playbook \ + --inventory 'bootstrap,' \ + '/deployments/cluster-api/ansible/04-config-ansible.yml' + + > .... + > .... + > PLAY RECAP **************************************************************************************** + > localhost : ok=5 changed=2 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 + + +# ----------------------------------------------------- +# Check we can login using the hostname. +#[root@ansibler] + + ssh bootstrap \ + ' + date + hostname + ' + + > Mon Feb 19 06:01:55 PM UTC 2024 + > somerville-jade-20240219-bootstrap-node.novalocal + + # + # Client configured :-D + # +