diff --git a/bin/kube-client b/bin/kube-client
index af12a19b..24731175 100755
--- a/bin/kube-client
+++ b/bin/kube-client
@@ -65,6 +65,7 @@
containerfull=ghcr.io/wfau/atolmis/${containername:?}
kubectlproxy=127.0.0.1::8001
+ monitorproxy=127.0.0.1::3001
podman run \
--rm \
@@ -73,6 +74,7 @@
--name "${clientname:?}" \
--hostname "${clientname:?}" \
--publish "${kubectlproxy:?}" \
+ --publish "${monitorproxy:?}" \
--env "cloudname=${cloudname:?}" \
--env "cloudsite=${cloudsite:?}" \
--env "SSH_AUTH_SOCK=/mnt/ssh_auth_sock" \
diff --git a/deployments/cluster-api/ansible/00-create-all.yml b/deployments/cluster-api/ansible/00-create-all.yml
index e4739d83..273cbb43 100644
--- a/deployments/cluster-api/ansible/00-create-all.yml
+++ b/deployments/cluster-api/ansible/00-create-all.yml
@@ -29,7 +29,7 @@
- import_playbook: 22-install-capi-provider.yml
- import_playbook: 23-install-capi-helm-charts.yml
- import_playbook: 25-create-work-cluster.yml
-- import_playbook: 26-secure-work-cluster.yml
+# import_playbook: 26-secure-work-cluster.yml
- import_playbook: 30-install-aglais.yml
diff --git a/deployments/cluster-api/ansible/21-create-kind-cluster.yml b/deployments/cluster-api/ansible/21-create-kind-cluster.yml
index a20c1bf8..98709fc4 100644
--- a/deployments/cluster-api/ansible/21-create-kind-cluster.yml
+++ b/deployments/cluster-api/ansible/21-create-kind-cluster.yml
@@ -46,7 +46,7 @@
dest: "{{ aglais.kubernetes.cluster.kind.conf }}"
flat: yes
-- name: "Set local file permissions"
+- name: "Update localhost"
gather_facts: false
hosts: localhost
vars_files:
@@ -61,3 +61,11 @@
path: "{{ aglais.kubernetes.cluster.kind.conf }}"
mode: "u=rw,g=,o="
+ - name: "Update timestamp in [{{ agstatusfile }}]"
+ yedit:
+ src: "{{ agstatuspath }}"
+ key: aglais.kubernetes.cluster.kind.debug
+ value:
+ created: "{{ now('%Y-%m-%dT%H:%M:%S%:z') }}"
+
+
diff --git a/deployments/cluster-api/ansible/25-create-work-cluster.yml b/deployments/cluster-api/ansible/25-create-work-cluster.yml
index cb11e0f0..5dc1a39c 100644
--- a/deployments/cluster-api/ansible/25-create-work-cluster.yml
+++ b/deployments/cluster-api/ansible/25-create-work-cluster.yml
@@ -77,7 +77,7 @@
dest: "{{ aglais.kubernetes.cluster.work.conf }}"
flat: yes
-- name: "Set local file permissions"
+- name: "Update localhost"
gather_facts: false
hosts: localhost
vars_files:
@@ -92,4 +92,11 @@
path: "{{ aglais.kubernetes.cluster.work.conf }}"
mode: "u=rw,g=,o="
+ - name: "Update timestamp in [{{ agstatusfile }}]"
+ yedit:
+ src: "{{ agstatuspath }}"
+ key: aglais.kubernetes.cluster.work.debug
+ value:
+ created: "{{ now('%Y-%m-%dT%H:%M:%S%:z') }}"
+
diff --git a/deployments/cluster-api/ansible/templates/clusterapi-config.j2 b/deployments/cluster-api/ansible/templates/clusterapi-config.j2
index f35c16c2..639fe998 100644
--- a/deployments/cluster-api/ansible/templates/clusterapi-config.j2
+++ b/deployments/cluster-api/ansible/templates/clusterapi-config.j2
@@ -93,8 +93,7 @@ addons:
# Settings for the CNI addon
cni:
-
- # Indicates if a CNI should be deployed
+ # Indicates if a CNI should be deployed (default true)
enabled: true
# The CNI to deploy - supported values are calico or cilium
@@ -108,8 +107,53 @@ addons:
name: tigera-operator
version: v3.26.0
- # Include the Kubernetes dashboard
+ # Settings for the OpenStack integrations
+ openstack:
+ # Indicates if the OpenStack integrations should be enabled (default false)
+ enabled: false
+
+ # Settings for the metrics server
+ # https://github.com/kubernetes-sigs/metrics-server#helm-chart
+ metricsServer:
+ # Indicates if the metrics server should be deployed (default true)
+ enabled: true
+
+ # Settings for the Kubernetes dashboard
+ # https://github.com/kubernetes/dashboard/tree/master/charts/helm-chart/kubernetes-dashboard
kubernetesDashboard:
+ # Indicates if the Kubernetes dashboard should be enabled (default false)
+ enabled: true
+
+ # Settings for ingress controllers
+ ingress:
+ # Indicates if ingress controllers should be enabled (default false)
+ enabled: false
+
+ # Settings for cluster monitoring
+ monitoring:
+ # Indicates if the cluster monitoring should be enabled (default false)
+ enabled: true
+
+ # Settings for node feature discovery
+ # https://github.com/kubernetes-sigs/node-feature-discovery/tree/master/deployment/helm/node-feature-discovery
+ nodeFeatureDiscovery:
+ # Indicates if node feature discovery should be enabled (default true)
+ enabled: true
+
+ # Settings for the NVIDIA GPU operator
+ nvidiaGPUOperator:
+ # Indicates if the NVIDIA GPU operator should be enabled (default true)
+ # Note that because it uses node feature discovery to run only on nodes
+ # with an NVIDIA GPU available, the overhead of enabling this on clusters
+ # that do not need it now but may need it in the future is low
+ enabled: true
+
+ # Settings for the Mellanox network operator
+ mellanoxNetworkOperator:
+ # Indicates if the network operator should be enabled (default true)
+ # Note that because it uses node feature discovery to run only on nodes
+ # with a Mellanox NIC available, the overhead of enabling this on clusters
+ # that do not need it now but may need it in the future is low
enabled: true
diff --git a/deployments/cluster-api/ansible/templates/init-status.j2 b/deployments/cluster-api/ansible/templates/init-status.j2
index 72b83643..edd4adb7 100644
--- a/deployments/cluster-api/ansible/templates/init-status.j2
+++ b/deployments/cluster-api/ansible/templates/init-status.j2
@@ -26,6 +26,8 @@ aglais:
type: cluster-api
name: {{ deployname }}
date: {{ deploydate }}
+ debug:
+ started: "{{ now('%Y-%m-%dT%H:%M:%S%:z') }}"
openstack:
cloud:
name: {{ cloudname }}
diff --git a/notes/zrq/20240219-03-jade-reconnect.txt b/notes/zrq/20240219-03-jade-reconnect.txt
index 8fbce528..0af2db41 100644
--- a/notes/zrq/20240219-03-jade-reconnect.txt
+++ b/notes/zrq/20240219-03-jade-reconnect.txt
@@ -61,12 +61,10 @@
'
date
hostname
- ls -al /opt/aglais/aglais-status.yml
'
> Mon Feb 19 05:58:30 PM UTC 2024
> somerville-jade-20240219-bootstrap-node.novalocal
- > -rw-r--r--. 1 root root 1970 Feb 19 17:02 /opt/aglais/aglais-status.yml
# -----------------------------------------------------
@@ -112,3 +110,42 @@
# Client configured :-D
#
+
+# -----------------------------------------------------
+# Fetch the kubectl configuration files.
+#[root@ansibler]
+
+ yq '.aglais.kubernetes.cluster.kind.conf' \
+ '/opt/aglais/aglais-status.yml'
+
+ > /opt/aglais/somerville-jade-20240221-kind.yml
+
+
+ yq '.aglais.kubernetes.cluster.work.conf' \
+ '/opt/aglais/aglais-status.yml'
+
+ > /opt/aglais/somerville-jade-20240221-work.yml
+
+
+ kindclusterconf=$(
+ yq '.aglais.kubernetes.cluster.kind.conf' \
+ '/opt/aglais/aglais-status.yml'
+ )
+
+ workclusterconf=$(
+ yq '.aglais.kubernetes.cluster.work.conf' \
+ '/opt/aglais/aglais-status.yml'
+ )
+
+ scp "root@bootstrap:${kindclusterconf}" \
+ "${kindclusterconf}"
+
+ scp "root@bootstrap:${workclusterconf}" \
+ "${workclusterconf}"
+
+ #
+ # This doesn't work because the kubectl API is blocked by Somerville firewall.
+ # Only ssh is allowed to public IP addresses ?
+ #
+
+
diff --git a/notes/zrq/20240220-01-jade-debug.txt b/notes/zrq/20240220-01-jade-debug.txt
new file mode 100644
index 00000000..b65aec98
--- /dev/null
+++ b/notes/zrq/20240220-01-jade-debug.txt
@@ -0,0 +1,725 @@
+#
+#
+#
+# Copyright (c) 2024, ROE (http://www.roe.ac.uk/)
+#
+# This information is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This information is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+#
+#
+#zrq-notes-time
+#zrq-notes-indent
+#zrq-notes-crypto
+#zrq-notes-ansible
+#zrq-notes-osformat
+#zrq-notes-zeppelin
+#
+# AIMetrics: []
+#
+
+ Target:
+
+ Collecting more details about the failed deployment from yesterday.
+
+ Result:
+
+ Embarrassing/iritating outcome.
+ While I was collecting all of this infomation, I dsciovered that the deployment was working.
+ Some time in the 12hrs between when I did the initial deploy and when I was collecting the data
+ the cluster finally managed to fix itself.
+
+ Discovered some more thinsg to check if/when we get the same problem again.
+
+ We need to come up with a way of measuting how long it takes for a cluster to resolve itself.
+ Something like `watch`, but one that periodically checks the cluster status and can detect
+ if/when it becomes healthy.
+
+
+# -----------------------------------------------------
+
+ Message from Scott Davidson on Slack
+ Hi
+ @Dave Morris
+ I hope you don’t mind me jumping in here but I was just looking at your notes from the Kubernetes issues
+ you were having yesterday and these lines stand out to me.
+ It looks like 172.24.0.1 might be the internal IP address of the Kubernetes API server (I think you can
+ check with kubectl get svc -n default ) in which case a HTTP 500 from the Kubernetes API server itself
+ looks pretty suspicious.
+ Do you (or anyone else here) happen to know if the OpenStack VMs that make up the cluster are volume-backed?
+ The reason I ask is that there are known issues around running etcd on slow(er) storage devices such as
+ network-attached storage and sometimes even with HDD local disks.
+ If the storage backing the VMs is too slow (or is somehow sensitive to other workloads happening on the cloud)
+ then that might explain the intermittent issues you have been seeing.
+
+# -----------------------------------------------------
+# -----------------------------------------------------
+#[user@desktop]
+
+ #
+ # Re-connect a client using notes.
+ # notes/zrq/20240219-03-jade-reconnect.txt
+ #
+
+# -----------------------------------------------------
+# List the nodes, flavors and images.
+#[root@ansibler]
+
+ openstack \
+ --os-cloud "${cloudname:?}" \
+ server list
+
+ > +--------------------------------------+------------------------------------------------------------+--------+----------------------------------------------------------------------------+-----------------------------------+----------------+
+ > | ID | Name | Status | Networks | Image | Flavor |
+ > +--------------------------------------+------------------------------------------------------------+--------+----------------------------------------------------------------------------+-----------------------------------+----------------+
+ > | 32bdadee-9ab1-4f7e-ade4-463908234aa5 | somerville-jade-20240219-work-md-0-fb50a5e8-fhxtc | ACTIVE | k8s-clusterapi-cluster-default-somerville-jade-20240219-work=192.168.3.157 | gaia-dmp-ubuntu-2204-kube-v1.26.7 | gaia.vm.26vcpu |
+ > | 7f6c01ab-059f-4709-8149-36ce4864570b | somerville-jade-20240219-work-md-0-fb50a5e8-bh6d5 | ACTIVE | k8s-clusterapi-cluster-default-somerville-jade-20240219-work=192.168.3.223 | gaia-dmp-ubuntu-2204-kube-v1.26.7 | gaia.vm.26vcpu |
+ > | ed9a635d-ed0a-4b2e-a054-41160b3feb80 | somerville-jade-20240219-work-md-0-fb50a5e8-9bsxs | ACTIVE | k8s-clusterapi-cluster-default-somerville-jade-20240219-work=192.168.3.225 | gaia-dmp-ubuntu-2204-kube-v1.26.7 | gaia.vm.26vcpu |
+ > | d7c145c8-b699-4555-bde6-4ec7973a5ba7 | somerville-jade-20240219-work-md-0-fb50a5e8-zjwtj | ACTIVE | k8s-clusterapi-cluster-default-somerville-jade-20240219-work=192.168.3.243 | gaia-dmp-ubuntu-2204-kube-v1.26.7 | gaia.vm.26vcpu |
+ > | f380cceb-6385-48e5-bf70-030ace07b8e7 | somerville-jade-20240219-work-control-plane-ac9af912-v42dq | ACTIVE | k8s-clusterapi-cluster-default-somerville-jade-20240219-work=192.168.3.171 | gaia-dmp-ubuntu-2204-kube-v1.26.7 | gaia.vm.2vcpu |
+ > | 2ff8c8d9-34a1-444d-b32c-8db1a806e833 | somerville-jade-20240219-work-control-plane-ac9af912-m4vdt | ACTIVE | k8s-clusterapi-cluster-default-somerville-jade-20240219-work=192.168.3.47 | gaia-dmp-ubuntu-2204-kube-v1.26.7 | gaia.vm.2vcpu |
+ > | d64cc19f-f0eb-4044-b28d-476fc39208f9 | somerville-jade-20240219-work-md-0-fb50a5e8-ntqbd | ACTIVE | k8s-clusterapi-cluster-default-somerville-jade-20240219-work=192.168.3.129 | gaia-dmp-ubuntu-2204-kube-v1.26.7 | gaia.vm.26vcpu |
+ > | d05a0082-33f7-4d8d-bc57-b33757c67cd2 | somerville-jade-20240219-work-md-0-fb50a5e8-whft4 | ACTIVE | k8s-clusterapi-cluster-default-somerville-jade-20240219-work=192.168.3.113 | gaia-dmp-ubuntu-2204-kube-v1.26.7 | gaia.vm.26vcpu |
+ > | 3eca33f6-b2f0-414a-b7ca-c2a35541022e | somerville-jade-20240219-work-control-plane-ac9af912-gjv45 | ACTIVE | k8s-clusterapi-cluster-default-somerville-jade-20240219-work=192.168.3.186 | gaia-dmp-ubuntu-2204-kube-v1.26.7 | gaia.vm.2vcpu |
+ > | d8d63532-0ca9-4a0c-9e84-93644df8af49 | somerville-jade-20240219-bootstrap-node | ACTIVE | somerville-jade-20240219-bootstrap-network=10.10.0.211, 192.41.122.174 | gaia-dmp-fedora-cloud-38-1.6 | gaia.vm.2vcpu |
+ > +--------------------------------------+------------------------------------------------------------+--------+----------------------------------------------------------------------------+-----------------------------------+----------------+
+
+
+ openstack \
+ --os-cloud "${cloudname:?}" \
+ server show \
+ somerville-jade-20240219-work-md-0-fb50a5e8-fhxtc
+
+ > +-------------------------------------+----------------------------------------------------------------------------+
+ > | Field | Value |
+ > +-------------------------------------+----------------------------------------------------------------------------+
+ > | OS-DCF:diskConfig | MANUAL |
+ > | OS-EXT-AZ:availability_zone | nova |
+ > | OS-EXT-SRV-ATTR:host | sv-hpe-0-6 |
+ > | OS-EXT-SRV-ATTR:hypervisor_hostname | sv-hpe-0-6 |
+ > | OS-EXT-SRV-ATTR:instance_name | instance-000075e9 |
+ > | OS-EXT-STS:power_state | Running |
+ > | OS-EXT-STS:task_state | None |
+ > | OS-EXT-STS:vm_state | active |
+ > | OS-SRV-USG:launched_at | 2024-02-19T17:13:10.000000 |
+ > | OS-SRV-USG:terminated_at | None |
+ > | accessIPv4 | |
+ > | accessIPv6 | |
+ > | addresses | k8s-clusterapi-cluster-default-somerville-jade-20240219-work=192.168.3.157 |
+ > | config_drive | |
+ > | created | 2024-02-19T17:13:06Z |
+ > | flavor | gaia.vm.26vcpu (f5bf7c55-d6aa-4ef7-ba91-6e15683ab557) |
+ > | hostId | f790b78efb6cb4355ad73dd6a6f953627fb3e8c2a0457196852611a8 |
+ > | id | 32bdadee-9ab1-4f7e-ade4-463908234aa5 |
+ > | image | gaia-dmp-ubuntu-2204-kube-v1.26.7 (2bfecf33-9fd4-4687-bf6a-569e43c47999) |
+ > | key_name | somerville-jade-20240219-keypair |
+ > | name | somerville-jade-20240219-work-md-0-fb50a5e8-fhxtc |
+ > | progress | 0 |
+ > | project_id | be227fe0300b4ce5b03f44264df615df |
+ > | properties | |
+ > | security_groups | name='k8s-cluster-default-somerville-jade-20240219-work-secgroup-worker' |
+ > | status | ACTIVE |
+ > | updated | 2024-02-19T17:13:10Z |
+ > | user_id | c4aad146ab7acaf44819e90e3e67a4d0490c164fbb02d388823c1ac9f0ae2e13 |
+ > | volumes_attached | |
+ > +-------------------------------------+----------------------------------------------------------------------------+
+
+
+ openstack \
+ --os-cloud "${cloudname:?}" \
+ image show \
+ gaia-dmp-ubuntu-2204-kube-v1.26.7
+
+ > +------------------+---------------------------------------------------------------------------------+
+ > | Field | Value |
+ > +------------------+---------------------------------------------------------------------------------+
+ > | checksum | eb33d889f410ee521e87d313f1b200ce |
+ > | container_format | bare |
+ > | created_at | 2024-01-06T03:39:13Z |
+ > | disk_format | qcow2 |
+ > | file | /v2/images/2bfecf33-9fd4-4687-bf6a-569e43c47999/file |
+ > | id | 2bfecf33-9fd4-4687-bf6a-569e43c47999 |
+ > | min_disk | 0 |
+ > | min_ram | 0 |
+ > | name | gaia-dmp-ubuntu-2204-kube-v1.26.7 |
+ > | owner | be227fe0300b4ce5b03f44264df615df |
+ > | properties | direct_url='rbd://84c5........7999/snap', |
+ > | | os_hash_algo='sha512', |
+ > | | os_hash_value='7015........147e', |
+ > | | os_hidden='False', |
+ > | | owner_specified.openstack.md5='', |
+ > | | owner_specified.openstack.object='images/gaia-dmp-ubuntu-2204-kube-v1.26.7', |
+ > | | owner_specified.openstack.sha256='', |
+ > | | stores='rbd' |
+ > | protected | False |
+ > | schema | /v2/schemas/image |
+ > | size | 10737418240 |
+ > | status | active |
+ > | tags | |
+ > | updated_at | 2024-01-06T05:45:22Z |
+ > | visibility | shared |
+ > +------------------+---------------------------------------------------------------------------------+
+
+
+ openstack \
+ --os-cloud "${cloudname:?}" \
+ flavor show \
+ gaia.vm.26vcpu
+
+ > +----------------------------+--------------------------------------+
+ > | Field | Value |
+ > +----------------------------+--------------------------------------+
+ > | OS-FLV-DISABLED:disabled | False |
+ > | OS-FLV-EXT-DATA:ephemeral | 0 |
+ > | access_project_ids | None |
+ > | description | None |
+ > | disk | 20 |
+ > | id | f5bf7c55-d6aa-4ef7-ba91-6e15683ab557 |
+ > | name | gaia.vm.26vcpu |
+ > | os-flavor-access:is_public | True |
+ > | properties | trait:CUSTOM_SSD_DEV='forbidden' |
+ > | ram | 44032 |
+ > | rxtx_factor | 1.0 |
+ > | swap | |
+ > | vcpus | 26 |
+ > +----------------------------+--------------------------------------+
+
+
+# -----------------------------------------------------
+# List the kube-system Pods in the tenant (work) cluster.
+#[root@ansibler]
+
+ ssh bootstrap -t \
+ '
+ source loadconfig
+ kubectl \
+ --kubeconfig "${workclusterconf:?}" \
+ get pods \
+ --namespace kube-system
+ '
+
+ > NAME READY STATUS RESTARTS AGE
+ > coredns-787d4945fb-dv6px 1/1 Running 0 18h
+ > coredns-787d4945fb-svl9q 1/1 Running 0 18h
+ > etcd-somerville-jade-20240219-work-control-plane-ac9af912-gjv45 1/1 Running 0 18h
+ > etcd-somerville-jade-20240219-work-control-plane-ac9af912-m4vdt 1/1 Running 0 18h
+ > etcd-somerville-jade-20240219-work-control-plane-ac9af912-v42dq 1/1 Running 0 18h
+ > kube-apiserver-somerville-jade-20240219-work-control-plane-ac9af912-gjv45 1/1 Running 0 18h
+ > kube-apiserver-somerville-jade-20240219-work-control-plane-ac9af912-m4vdt 1/1 Running 0 18h
+ > kube-apiserver-somerville-jade-20240219-work-control-plane-ac9af912-v42dq 1/1 Running 0 18h
+ > kube-controller-manager-somerville-jade-20240219-work-control-plane-ac9af912-gjv45 1/1 Running 4 (18h ago) 18h
+ > kube-controller-manager-somerville-jade-20240219-work-control-plane-ac9af912-m4vdt 1/1 Running 2 (18h ago) 18h
+ > kube-controller-manager-somerville-jade-20240219-work-control-plane-ac9af912-v42dq 1/1 Running 0 18h
+ > kube-proxy-6ccvh 1/1 Running 0 18h
+ > kube-proxy-6vr7b 1/1 Running 0 18h
+ > kube-proxy-7qwtb 1/1 Running 0 18h
+ > kube-proxy-8pn9v 1/1 Running 0 18h
+ > kube-proxy-dpzg8 1/1 Running 0 18h
+ > kube-proxy-ppr9v 1/1 Running 0 18h
+ > kube-proxy-qn22t 1/1 Running 0 18h
+ > kube-proxy-rj9qh 1/1 Running 0 18h
+ > kube-proxy-vpskm 1/1 Running 0 18h
+ > kube-scheduler-somerville-jade-20240219-work-control-plane-ac9af912-gjv45 1/1 Running 4 (18h ago) 18h
+ > kube-scheduler-somerville-jade-20240219-work-control-plane-ac9af912-m4vdt 1/1 Running 2 (18h ago) 18h
+ > kube-scheduler-somerville-jade-20240219-work-control-plane-ac9af912-v42dq 1/1 Running 0 18h
+ > metrics-server-65cccfc7bb-k594p 1/1 Running 0 18h
+
+
+# -----------------------------------------------------
+# Check the 'kube-apiserver' Pod in the 'kube-system' namespace.
+#[root@ansibler]
+
+ ssh bootstrap -t \
+ '
+ source loadconfig
+ kubectl \
+ --kubeconfig "${workclusterconf:?}" \
+ get pods \
+ --namespace kube-system \
+ --output json \
+ | jq -r ".items[].metadata.name | select(. | startswith(\"kube-apiserver\"))"
+ '
+
+ > kube-apiserver-somerville-jade-20240219-work-control-plane-ac9af912-gjv45
+ > kube-apiserver-somerville-jade-20240219-work-control-plane-ac9af912-m4vdt
+ > kube-apiserver-somerville-jade-20240219-work-control-plane-ac9af912-v42dq
+
+
+ ssh bootstrap -t \
+ '
+ source loadconfig
+ for podname in $(
+ kubectl \
+ --kubeconfig "${workclusterconf:?}" \
+ get pods \
+ --namespace kube-system \
+ --output json \
+ | jq -r ".items[].metadata.name | select(. | startswith(\"kube-apiserver\"))"
+ )
+ do
+ echo ""
+ echo "---- ---- ---- ----"
+ echo "Podname [${podname}]"
+ kubectl \
+ --kubeconfig "${workclusterconf:?}" \
+ describe pod \
+ --namespace kube-system \
+ "${podname}"
+ done
+ '
+
+ > ---- ---- ---- ----
+ > Podname [kube-apiserver-somerville-jade-20240219-work-control-plane-ac9af912-gjv45]
+ > Name: kube-apiserver-somerville-jade-20240219-work-control-plane-ac9af912-gjv45
+ > Namespace: kube-system
+ > Priority: 2000001000
+ > Priority Class Name: system-node-critical
+ > Node: somerville-jade-20240219-work-control-plane-ac9af912-gjv45/192.168.3.186
+ > Start Time: Mon, 19 Feb 2024 17:02:41 +0000
+ > Labels: component=kube-apiserver
+ > tier=control-plane
+ > Annotations: kubeadm.kubernetes.io/kube-apiserver.advertise-address.endpoint: 192.168.3.186:6443
+ > kubernetes.io/config.hash: 46c32cf496946e8498634f34e761d972
+ > kubernetes.io/config.mirror: 46c32cf496946e8498634f34e761d972
+ > kubernetes.io/config.seen: 2024-02-19T17:02:40.871951952Z
+ > kubernetes.io/config.source: file
+ > Status: Running
+ > SeccompProfile: RuntimeDefault
+ > IP: 192.168.3.186
+ > IPs:
+ > IP: 192.168.3.186
+ > Controlled By: Node/somerville-jade-20240219-work-control-plane-ac9af912-gjv45
+ > Containers:
+ > kube-apiserver:
+ > Container ID: containerd://620263b18a07caf23fd79658055a6d5ef32ca555c8c203ae054322aa7afc0adf
+ > Image: registry.k8s.io/kube-apiserver:v1.26.7
+ > Image ID: registry.k8s.io/kube-apiserver@sha256:c3b8fbd0418e29e8a3d49fbeebc187ffba6d0b2e437fc6c4db2cfb69b19163bf
+ > Port:
+ > Host Port:
+ > Command:
+ > kube-apiserver
+ > --advertise-address=192.168.3.186
+ > --allow-privileged=true
+ > --authorization-mode=Node,RBAC
+ > --client-ca-file=/etc/kubernetes/pki/ca.crt
+ > --cloud-provider=external
+ > --enable-admission-plugins=NodeRestriction
+ > --enable-bootstrap-token-auth=true
+ > --etcd-cafile=/etc/kubernetes/pki/etcd/ca.crt
+ > --etcd-certfile=/etc/kubernetes/pki/apiserver-etcd-client.crt
+ > --etcd-keyfile=/etc/kubernetes/pki/apiserver-etcd-client.key
+ > --etcd-servers=https://127.0.0.1:2379
+ > --kubelet-client-certificate=/etc/kubernetes/pki/apiserver-kubelet-client.crt
+ > --kubelet-client-key=/etc/kubernetes/pki/apiserver-kubelet-client.key
+ > --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
+ > --proxy-client-cert-file=/etc/kubernetes/pki/front-proxy-client.crt
+ > --proxy-client-key-file=/etc/kubernetes/pki/front-proxy-client.key
+ > --requestheader-allowed-names=front-proxy-client
+ > --requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.crt
+ > --requestheader-extra-headers-prefix=X-Remote-Extra-
+ > --requestheader-group-headers=X-Remote-Group
+ > --requestheader-username-headers=X-Remote-User
+ > --secure-port=6443
+ > --service-account-issuer=https://kubernetes.default.svc.cluster.local
+ > --service-account-key-file=/etc/kubernetes/pki/sa.pub
+ > --service-account-signing-key-file=/etc/kubernetes/pki/sa.key
+ > --service-cluster-ip-range=172.24.0.0/13
+ > --tls-cert-file=/etc/kubernetes/pki/apiserver.crt
+ > --tls-private-key-file=/etc/kubernetes/pki/apiserver.key
+ > State: Running
+ > Started: Mon, 19 Feb 2024 17:01:51 +0000
+ > Ready: True
+ > Restart Count: 0
+ > Requests:
+ > cpu: 250m
+ > Liveness: http-get https://192.168.3.186:6443/livez delay=10s timeout=15s period=10s #success=1 #failure=8
+ > Readiness: http-get https://192.168.3.186:6443/readyz delay=0s timeout=15s period=1s #success=1 #failure=3
+ > Startup: http-get https://192.168.3.186:6443/livez delay=10s timeout=15s period=10s #success=1 #failure=24
+ > Environment:
+ > Mounts:
+ > /etc/ca-certificates from etc-ca-certificates (ro)
+ > /etc/kubernetes/pki from k8s-certs (ro)
+ > /etc/ssl/certs from ca-certs (ro)
+ > /usr/local/share/ca-certificates from usr-local-share-ca-certificates (ro)
+ > /usr/share/ca-certificates from usr-share-ca-certificates (ro)
+ > Conditions:
+ > Type Status
+ > Initialized True
+ > Ready True
+ > ContainersReady True
+ > PodScheduled True
+ > Volumes:
+ > ca-certs:
+ > Type: HostPath (bare host directory volume)
+ > Path: /etc/ssl/certs
+ > HostPathType: DirectoryOrCreate
+ > etc-ca-certificates:
+ > Type: HostPath (bare host directory volume)
+ > Path: /etc/ca-certificates
+ > HostPathType: DirectoryOrCreate
+ > k8s-certs:
+ > Type: HostPath (bare host directory volume)
+ > Path: /etc/kubernetes/pki
+ > HostPathType: DirectoryOrCreate
+ > usr-local-share-ca-certificates:
+ > Type: HostPath (bare host directory volume)
+ > Path: /usr/local/share/ca-certificates
+ > HostPathType: DirectoryOrCreate
+ > usr-share-ca-certificates:
+ > Type: HostPath (bare host directory volume)
+ > Path: /usr/share/ca-certificates
+ > HostPathType: DirectoryOrCreate
+ > QoS Class: Burstable
+ > Node-Selectors:
+ > Tolerations: :NoExecute op=Exists
+ > Events:
+ > Type Reason Age From Message
+ > ---- ------ ---- ---- -------
+ > Warning Unhealthy 93m (x21 over 18h) kubelet Liveness probe failed: HTTP probe failed with statuscode: 500
+ > Warning Unhealthy 4m10s (x251 over 18h) kubelet Readiness probe failed: HTTP probe failed with statuscode: 500
+ >
+ > ---- ---- ---- ----
+ > Podname [kube-apiserver-somerville-jade-20240219-work-control-plane-ac9af912-m4vdt]
+ > Name: kube-apiserver-somerville-jade-20240219-work-control-plane-ac9af912-m4vdt
+ > Namespace: kube-system
+ > Priority: 2000001000
+ > Priority Class Name: system-node-critical
+ > Node: somerville-jade-20240219-work-control-plane-ac9af912-m4vdt/192.168.3.47
+ > Start Time: Mon, 19 Feb 2024 17:05:09 +0000
+ > Labels: component=kube-apiserver
+ > tier=control-plane
+ > Annotations: kubeadm.kubernetes.io/kube-apiserver.advertise-address.endpoint: 192.168.3.47:6443
+ > kubernetes.io/config.hash: f6125d635ed29a6fe511cb611ace8915
+ > kubernetes.io/config.mirror: f6125d635ed29a6fe511cb611ace8915
+ > kubernetes.io/config.seen: 2024-02-19T17:05:08.276846295Z
+ > kubernetes.io/config.source: file
+ > Status: Running
+ > SeccompProfile: RuntimeDefault
+ > IP: 192.168.3.47
+ > IPs:
+ > IP: 192.168.3.47
+ > Controlled By: Node/somerville-jade-20240219-work-control-plane-ac9af912-m4vdt
+ > Containers:
+ > kube-apiserver:
+ > Container ID: containerd://5d8bc6b204199714638784346538cd226ebd6ae9f9321d3181fbae2e4b429733
+ > Image: registry.k8s.io/kube-apiserver:v1.26.7
+ > Image ID: registry.k8s.io/kube-apiserver@sha256:c3b8fbd0418e29e8a3d49fbeebc187ffba6d0b2e437fc6c4db2cfb69b19163bf
+ > Port:
+ > Host Port:
+ > Command:
+ > kube-apiserver
+ > --advertise-address=192.168.3.47
+ > --allow-privileged=true
+ > --authorization-mode=Node,RBAC
+ > --client-ca-file=/etc/kubernetes/pki/ca.crt
+ > --cloud-provider=external
+ > --enable-admission-plugins=NodeRestriction
+ > --enable-bootstrap-token-auth=true
+ > --etcd-cafile=/etc/kubernetes/pki/etcd/ca.crt
+ > --etcd-certfile=/etc/kubernetes/pki/apiserver-etcd-client.crt
+ > --etcd-keyfile=/etc/kubernetes/pki/apiserver-etcd-client.key
+ > --etcd-servers=https://127.0.0.1:2379
+ > --kubelet-client-certificate=/etc/kubernetes/pki/apiserver-kubelet-client.crt
+ > --kubelet-client-key=/etc/kubernetes/pki/apiserver-kubelet-client.key
+ > --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
+ > --proxy-client-cert-file=/etc/kubernetes/pki/front-proxy-client.crt
+ > --proxy-client-key-file=/etc/kubernetes/pki/front-proxy-client.key
+ > --requestheader-allowed-names=front-proxy-client
+ > --requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.crt
+ > --requestheader-extra-headers-prefix=X-Remote-Extra-
+ > --requestheader-group-headers=X-Remote-Group
+ > --requestheader-username-headers=X-Remote-User
+ > --secure-port=6443
+ > --service-account-issuer=https://kubernetes.default.svc.cluster.local
+ > --service-account-key-file=/etc/kubernetes/pki/sa.pub
+ > --service-account-signing-key-file=/etc/kubernetes/pki/sa.key
+ > --service-cluster-ip-range=172.24.0.0/13
+ > --tls-cert-file=/etc/kubernetes/pki/apiserver.crt
+ > --tls-private-key-file=/etc/kubernetes/pki/apiserver.key
+ > State: Running
+ > Started: Mon, 19 Feb 2024 17:05:24 +0000
+ > Ready: True
+ > Restart Count: 0
+ > Requests:
+ > cpu: 250m
+ > Liveness: http-get https://192.168.3.47:6443/livez delay=10s timeout=15s period=10s #success=1 #failure=8
+ > Readiness: http-get https://192.168.3.47:6443/readyz delay=0s timeout=15s period=1s #success=1 #failure=3
+ > Startup: http-get https://192.168.3.47:6443/livez delay=10s timeout=15s period=10s #success=1 #failure=24
+ > Environment:
+ > Mounts:
+ > /etc/ca-certificates from etc-ca-certificates (ro)
+ > /etc/kubernetes/pki from k8s-certs (ro)
+ > /etc/ssl/certs from ca-certs (ro)
+ > /usr/local/share/ca-certificates from usr-local-share-ca-certificates (ro)
+ > /usr/share/ca-certificates from usr-share-ca-certificates (ro)
+ > Conditions:
+ > Type Status
+ > Initialized True
+ > Ready True
+ > ContainersReady True
+ > PodScheduled True
+ > Volumes:
+ > ca-certs:
+ > Type: HostPath (bare host directory volume)
+ > Path: /etc/ssl/certs
+ > HostPathType: DirectoryOrCreate
+ > etc-ca-certificates:
+ > Type: HostPath (bare host directory volume)
+ > Path: /etc/ca-certificates
+ > HostPathType: DirectoryOrCreate
+ > k8s-certs:
+ > Type: HostPath (bare host directory volume)
+ > Path: /etc/kubernetes/pki
+ > HostPathType: DirectoryOrCreate
+ > usr-local-share-ca-certificates:
+ > Type: HostPath (bare host directory volume)
+ > Path: /usr/local/share/ca-certificates
+ > HostPathType: DirectoryOrCreate
+ > usr-share-ca-certificates:
+ > Type: HostPath (bare host directory volume)
+ > Path: /usr/share/ca-certificates
+ > HostPathType: DirectoryOrCreate
+ > QoS Class: Burstable
+ > Node-Selectors:
+ > Tolerations: :NoExecute op=Exists
+ > Events:
+ > Type Reason Age From Message
+ > ---- ------ ---- ---- -------
+ > Normal Pulled 18h kubelet Container image "registry.k8s.io/kube-apiserver:v1.26.7" already present on machine
+ > Normal Created 18h kubelet Created container kube-apiserver
+ > Normal Started 18h kubelet Started container kube-apiserver
+ > Warning Unhealthy 18h kubelet Startup probe failed: HTTP probe failed with statuscode: 403
+ > Warning Unhealthy 18h (x4 over 18h) kubelet Startup probe failed: HTTP probe failed with statuscode: 500
+ > Warning Unhealthy 13m (x139 over 18h) kubelet Readiness probe failed: HTTP probe failed with statuscode: 500
+ > Warning Unhealthy 3m17s (x21 over 18h) kubelet Liveness probe failed: HTTP probe failed with statuscode: 500
+ >
+ > ---- ---- ---- ----
+ > Podname [kube-apiserver-somerville-jade-20240219-work-control-plane-ac9af912-v42dq]
+ > Name: kube-apiserver-somerville-jade-20240219-work-control-plane-ac9af912-v42dq
+ > Namespace: kube-system
+ > Priority: 2000001000
+ > Priority Class Name: system-node-critical
+ > Node: somerville-jade-20240219-work-control-plane-ac9af912-v42dq/192.168.3.171
+ > Start Time: Mon, 19 Feb 2024 17:09:40 +0000
+ > Labels: component=kube-apiserver
+ > tier=control-plane
+ > Annotations: kubeadm.kubernetes.io/kube-apiserver.advertise-address.endpoint: 192.168.3.171:6443
+ > kubernetes.io/config.hash: 94787ecd0b24bc15df444d00e1ead91e
+ > kubernetes.io/config.mirror: 94787ecd0b24bc15df444d00e1ead91e
+ > kubernetes.io/config.seen: 2024-02-19T17:09:35.786020973Z
+ > kubernetes.io/config.source: file
+ > Status: Running
+ > SeccompProfile: RuntimeDefault
+ > IP: 192.168.3.171
+ > IPs:
+ > IP: 192.168.3.171
+ > Controlled By: Node/somerville-jade-20240219-work-control-plane-ac9af912-v42dq
+ > Containers:
+ > kube-apiserver:
+ > Container ID: containerd://b6ecd2f7c06f03576508e0617ac9e8ce93b5321b1905079da9a97a015d0869c7
+ > Image: registry.k8s.io/kube-apiserver:v1.26.7
+ > Image ID: registry.k8s.io/kube-apiserver@sha256:c3b8fbd0418e29e8a3d49fbeebc187ffba6d0b2e437fc6c4db2cfb69b19163bf
+ > Port:
+ > Host Port:
+ > Command:
+ > kube-apiserver
+ > --advertise-address=192.168.3.171
+ > --allow-privileged=true
+ > --authorization-mode=Node,RBAC
+ > --client-ca-file=/etc/kubernetes/pki/ca.crt
+ > --cloud-provider=external
+ > --enable-admission-plugins=NodeRestriction
+ > --enable-bootstrap-token-auth=true
+ > --etcd-cafile=/etc/kubernetes/pki/etcd/ca.crt
+ > --etcd-certfile=/etc/kubernetes/pki/apiserver-etcd-client.crt
+ > --etcd-keyfile=/etc/kubernetes/pki/apiserver-etcd-client.key
+ > --etcd-servers=https://127.0.0.1:2379
+ > --kubelet-client-certificate=/etc/kubernetes/pki/apiserver-kubelet-client.crt
+ > --kubelet-client-key=/etc/kubernetes/pki/apiserver-kubelet-client.key
+ > --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname
+ > --proxy-client-cert-file=/etc/kubernetes/pki/front-proxy-client.crt
+ > --proxy-client-key-file=/etc/kubernetes/pki/front-proxy-client.key
+ > --requestheader-allowed-names=front-proxy-client
+ > --requestheader-client-ca-file=/etc/kubernetes/pki/front-proxy-ca.crt
+ > --requestheader-extra-headers-prefix=X-Remote-Extra-
+ > --requestheader-group-headers=X-Remote-Group
+ > --requestheader-username-headers=X-Remote-User
+ > --secure-port=6443
+ > --service-account-issuer=https://kubernetes.default.svc.cluster.local
+ > --service-account-key-file=/etc/kubernetes/pki/sa.pub
+ > --service-account-signing-key-file=/etc/kubernetes/pki/sa.key
+ > --service-cluster-ip-range=172.24.0.0/13
+ > --tls-cert-file=/etc/kubernetes/pki/apiserver.crt
+ > --tls-private-key-file=/etc/kubernetes/pki/apiserver.key
+ > State: Running
+ > Started: Mon, 19 Feb 2024 17:09:56 +0000
+ > Ready: True
+ > Restart Count: 0
+ > Requests:
+ > cpu: 250m
+ > Liveness: http-get https://192.168.3.171:6443/livez delay=10s timeout=15s period=10s #success=1 #failure=8
+ > Readiness: http-get https://192.168.3.171:6443/readyz delay=0s timeout=15s period=1s #success=1 #failure=3
+ > Startup: http-get https://192.168.3.171:6443/livez delay=10s timeout=15s period=10s #success=1 #failure=24
+ > Environment:
+ > Mounts:
+ > /etc/ca-certificates from etc-ca-certificates (ro)
+ > /etc/kubernetes/pki from k8s-certs (ro)
+ > /etc/ssl/certs from ca-certs (ro)
+ > /usr/local/share/ca-certificates from usr-local-share-ca-certificates (ro)
+ > /usr/share/ca-certificates from usr-share-ca-certificates (ro)
+ > Conditions:
+ > Type Status
+ > Initialized True
+ > Ready True
+ > ContainersReady True
+ > PodScheduled True
+ > Volumes:
+ > ca-certs:
+ > Type: HostPath (bare host directory volume)
+ > Path: /etc/ssl/certs
+ > HostPathType: DirectoryOrCreate
+ > etc-ca-certificates:
+ > Type: HostPath (bare host directory volume)
+ > Path: /etc/ca-certificates
+ > HostPathType: DirectoryOrCreate
+ > k8s-certs:
+ > Type: HostPath (bare host directory volume)
+ > Path: /etc/kubernetes/pki
+ > HostPathType: DirectoryOrCreate
+ > usr-local-share-ca-certificates:
+ > Type: HostPath (bare host directory volume)
+ > Path: /usr/local/share/ca-certificates
+ > HostPathType: DirectoryOrCreate
+ > usr-share-ca-certificates:
+ > Type: HostPath (bare host directory volume)
+ > Path: /usr/share/ca-certificates
+ > HostPathType: DirectoryOrCreate
+ > QoS Class: Burstable
+ > Node-Selectors:
+ > Tolerations: :NoExecute op=Exists
+ > Events:
+ > Type Reason Age From Message
+ > ---- ------ ---- ---- -------
+ > Normal Pulled 18h kubelet Container image "registry.k8s.io/kube-apiserver:v1.26.7" already present on machine
+ > Normal Created 18h kubelet Created container kube-apiserver
+ > Normal Started 18h kubelet Started container kube-apiserver
+ > Warning Unhealthy 18h (x5 over 18h) kubelet Startup probe failed: Get "https://192.168.3.171:6443/livez": dial tcp 192.168.3.171:6443: connect: connection refused
+ > Warning Unhealthy 18h kubelet Startup probe failed: Get "https://192.168.3.171:6443/livez": net/http: TLS handshake timeout
+ > Warning Unhealthy 18h kubelet Startup probe failed: HTTP probe failed with statuscode: 403
+ > Warning Unhealthy 27m (x28 over 18h) kubelet Liveness probe failed: HTTP probe failed with statuscode: 500
+ > Warning Unhealthy 9m40s (x144 over 18h) kubelet Readiness probe failed: HTTP probe failed with statuscode: 500
+
+
+
+# -----------------------------------------------------
+# Slow down and read Scott's comment again.
+# He actually tells us how to find out what the IP address is.
+#[root@ansibler]
+
+ ssh bootstrap -t \
+ '
+ source loadconfig
+ kubectl \
+ --kubeconfig "${workclusterconf:?}" \
+ get services \
+ --namespace default
+ '
+
+ > NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
+ > kubernetes ClusterIP 172.24.0.1 443/TCP 19h
+
+
+# -----------------------------------------------------
+# Found some documentation about debugging services.
+# https://kubernetes.io/docs/tasks/debug/debug-application/debug-service/
+#[root@ansibler]
+
+ ssh bootstrap -t \
+ '
+ source loadconfig
+ kubectl \
+ --kubeconfig "${workclusterconf:?}" \
+ describe service \
+ --namespace default \
+ kubernetes
+ '
+
+ > Name: kubernetes
+ > Namespace: default
+ > Labels: component=apiserver
+ > provider=kubernetes
+ > Annotations:
+ > Selector:
+ > Type: ClusterIP
+ > IP Family Policy: SingleStack
+ > IP Families: IPv4
+ > IP: 172.24.0.1
+ > IPs: 172.24.0.1
+ > Port: https 443/TCP
+ > TargetPort: 6443/TCP
+ > Endpoints: 192.168.3.171:6443,192.168.3.186:6443,192.168.3.47:6443
+ > Session Affinity: None
+ > Events:
+
+
+ ssh bootstrap -t \
+ '
+ source loadconfig
+ kubectl \
+ --kubeconfig "${workclusterconf:?}" \
+ get service \
+ --namespace default \
+ kubernetes \
+ --output json
+ '
+
+ > {
+ > "apiVersion": "v1",
+ > "kind": "Service",
+ > "metadata": {
+ > "creationTimestamp": "2024-02-19T17:01:57Z",
+ > "labels": {
+ > "component": "apiserver",
+ > "provider": "kubernetes"
+ > },
+ > "name": "kubernetes",
+ > "namespace": "default",
+ > "resourceVersion": "191",
+ > "uid": "7080949b-a9fc-48bf-89fd-bc9098ed2132"
+ > },
+ > "spec": {
+ > "clusterIP": "172.24.0.1",
+ > "clusterIPs": [
+ > "172.24.0.1"
+ > ],
+ > "internalTrafficPolicy": "Cluster",
+ > "ipFamilies": [
+ > "IPv4"
+ > ],
+ > "ipFamilyPolicy": "SingleStack",
+ > "ports": [
+ > {
+ > "name": "https",
+ > "port": 443,
+ > "protocol": "TCP",
+ > "targetPort": 6443
+ > }
+ > ],
+ > "sessionAffinity": "None",
+ > "type": "ClusterIP"
+ > },
+ > "status": {
+ > "loadBalancer": {}
+ > }
+ > }
+
diff --git a/notes/zrq/20240220-02-bookmarks.txt b/notes/zrq/20240220-02-bookmarks.txt
new file mode 100644
index 00000000..5c6bdc0c
--- /dev/null
+++ b/notes/zrq/20240220-02-bookmarks.txt
@@ -0,0 +1,56 @@
+#
+#
+#
+# Copyright (c) 2024, ROE (http://www.roe.ac.uk/)
+#
+# This information is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This information is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+#
+#
+#zrq-notes-time
+#zrq-notes-indent
+#zrq-notes-crypto
+#zrq-notes-ansible
+#zrq-notes-osformat
+#zrq-notes-zeppelin
+#
+# AIMetrics: []
+#
+
+ Target:
+
+ Just some bookmarks that might be useful.
+
+ Result:
+
+ Work in progress ...
+
+# -----------------------------------------------------
+
+ Cinder volumes and Kubernetes - slow
+ https://stackhpc.github.io/azimuth-config/configuration/01-prerequisites/#cinder-volumes-and-kubernetes
+
+ Accessing the Kubernetes API from a Pod
+ https://kubernetes.io/docs/tasks/run-application/access-api-from-pod/
+
+ Kubernetes Debugging Services
+ https://kubernetes.io/docs/tasks/debug/debug-application/debug-service/
+
+ Using Kubectl Logs | Complete Guide to viewing Kubernetes Pod Logs
+ https://signoz.io/blog/kubectl-logs/
+
+ StackHPC capi-helm-charts Monitoring logging and
+ https://github.com/stackhpc/capi-helm-charts/tree/main/charts/cluster-addons#monitoring-and-logging
+
+
diff --git a/notes/zrq/20240221-01-jade-deploy.txt b/notes/zrq/20240221-01-jade-deploy.txt
new file mode 100644
index 00000000..49f71932
--- /dev/null
+++ b/notes/zrq/20240221-01-jade-deploy.txt
@@ -0,0 +1,1476 @@
+#
+#
+#
+# Copyright (c) 2024, ROE (http://www.roe.ac.uk/)
+#
+# This information is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This information is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+#
+#
+#zrq-notes-time
+#zrq-notes-indent
+#zrq-notes-crypto
+#zrq-notes-ansible
+#zrq-notes-osformat
+#zrq-notes-zeppelin
+#
+# AIMetrics: []
+#
+
+ Target:
+
+ Try a new deployment, keeping track of how long it takes to become healthy.
+
+ Result:
+
+ Deploy failed.
+ Left it for 30hrs and still not resolved.
+
+
+# -----------------------------------------------------
+# Run our local client.
+#[user@desktop]
+
+ source "${HOME:?}/aglais.env"
+ export PATH=${PATH}:${AGLAIS_CODE}/bin
+
+ kube-client jade
+
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# Delete and create everything.
+#[root@ansibler]
+
+ export cloudsite=somerville-jade
+
+ /deployments/openstack/bin/delete-all.sh \
+ "${cloudname:?}"
+
+ ansible-playbook \
+ --inventory 'bootstrap,' \
+ '/deployments/cluster-api/ansible/00-create-all.yml'
+
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# Check the deployment configuration.
+#[root@ansibler]
+
+ cat /opt/aglais/aglais-status.yml
+
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# Check the cluster status.
+#[root@ansibler]
+
+ ssh bootstrap -t \
+ '
+ source loadconfig
+ clusterctl \
+ --kubeconfig "${kindclusterconf:?}" \
+ describe cluster \
+ --grouping=false \
+ --show-conditions all \
+ "${workclustername:?}"
+ '
+
+ > NAME READY SEVERITY REASON SINCE MESSAGE
+ > Cluster/somerville-jade-20240221-work False Warning ScalingUp 12m Scaling up control plane to 3 replicas (actual 1)
+ > │ ├─ControlPlaneInitialized True 10m
+ > │ ├─ControlPlaneReady False Warning ScalingUp 12m Scaling up control plane to 3 replicas (actual 1)
+ > │ └─InfrastructureReady True 13m
+ > ├─ClusterInfrastructure - OpenStackCluster/somerville-jade-20240221-work
+ > ├─ControlPlane - KubeadmControlPlane/somerville-jade-20240221-work-control-plane False Warning ScalingUp 12m Scaling up control plane to 3 replicas (actual 1)
+ > │ │ ├─Available True 10m
+ > │ │ ├─CertificatesAvailable True 13m
+ > │ │ ├─MachinesReady False Warning NodeStartupTimeout @ /somerville-jade-20240221-work-control-plane-bdfrr 52s Node failed to report startup in 10m0s
+ > │ │ └─Resized False Warning ScalingUp 12m Scaling up control plane to 3 replicas (actual 1)
+ > │ └─Machine/somerville-jade-20240221-work-control-plane-bdfrr False Warning NodeStartupTimeout 55s Node failed to report startup in 10m0s
+ > │ ├─BootstrapReady True 13m
+ > │ ├─HealthCheckSucceeded False Warning NodeStartupTimeout 55s Node failed to report startup in 10m0s
+ > │ ├─InfrastructureReady True 12m
+ > │ ├─NodeHealthy False Warning NodeProvisioning 10m
+ > │ └─OwnerRemediated False Warning WaitingForRemediation 53s KCP can't remediate if current replicas are less or equal to 1
+ > └─Workers
+ > └─MachineDeployment/somerville-jade-20240221-work-md-0 False Warning WaitingForAvailableMachines 14m Minimum availability requires 5 replicas, current 0 available
+ > │ └─Available False Warning WaitingForAvailableMachines 14m Minimum availability requires 5 replicas, current 0 available
+ > ├─Machine/somerville-jade-20240221-work-md-0-bc4ps-jqcjp True 14s
+ > │ ├─BootstrapReady True 27s
+ > │ ├─InfrastructureReady True 14s
+ > │ └─NodeHealthy False Warning NodeProvisioning 13s
+ > ├─Machine/somerville-jade-20240221-work-md-0-bc4ps-sc5pc True 17s
+ > │ ├─BootstrapReady True 29s
+ > │ ├─InfrastructureReady True 17s
+ > │ └─NodeHealthy False Warning NodeProvisioning 16s
+ > ├─Machine/somerville-jade-20240221-work-md-0-bc4ps-sc5xt True 14s
+ > │ ├─BootstrapReady True 26s
+ > │ ├─InfrastructureReady True 14s
+ > │ └─NodeHealthy False Warning NodeProvisioning 14s
+ > ├─Machine/somerville-jade-20240221-work-md-0-bc4ps-spnmh True 23s
+ > │ ├─BootstrapReady True 35s
+ > │ ├─InfrastructureReady True 23s
+ > │ └─NodeHealthy False Warning NodeProvisioning 22s
+ > ├─Machine/somerville-jade-20240221-work-md-0-bc4ps-xccp4 True 11s
+ > │ ├─BootstrapReady True 24s
+ > │ ├─InfrastructureReady True 11s
+ > │ └─NodeHealthy False Warning NodeProvisioning 10s
+ > └─Machine/somerville-jade-20240221-work-md-0-bc4ps-xmx67 True 18s
+ > ├─BootstrapReady True 32s
+ > ├─InfrastructureReady True 18s
+ > └─NodeHealthy False Warning NodeProvisioning 18s
+
+
+# -----------------------------------------------------
+# List our machines in Openstack.
+#[root@ansibler]
+
+ openstack \
+ --os-cloud "${cloudname:?}" \
+ server list
+
+ > +--------------------------------------+------------------------------------------------------------+--------+----------------------------------------------------------------------------+-----------------------------------+----------------+
+ > | ID | Name | Status | Networks | Image | Flavor |
+ > +--------------------------------------+------------------------------------------------------------+--------+----------------------------------------------------------------------------+-----------------------------------+----------------+
+ > | 9930656b-57eb-4455-b697-5e135a55679a | somerville-jade-20240221-work-md-0-f538b732-x9gr9 | ACTIVE | k8s-clusterapi-cluster-default-somerville-jade-20240221-work=192.168.3.115 | gaia-dmp-ubuntu-2204-kube-v1.26.7 | gaia.vm.26vcpu |
+ > | b21a5408-3ceb-45de-89ff-ed50d2bdf2e3 | somerville-jade-20240221-work-md-0-f538b732-9cflr | ACTIVE | k8s-clusterapi-cluster-default-somerville-jade-20240221-work=192.168.3.96 | gaia-dmp-ubuntu-2204-kube-v1.26.7 | gaia.vm.26vcpu |
+ > | eb4702e1-29d8-43f4-bfb5-b8da42edcc54 | somerville-jade-20240221-work-md-0-f538b732-s46n8 | ACTIVE | k8s-clusterapi-cluster-default-somerville-jade-20240221-work=192.168.3.39 | gaia-dmp-ubuntu-2204-kube-v1.26.7 | gaia.vm.26vcpu |
+ > | 0babc155-fc63-483b-8df2-e027cd751110 | somerville-jade-20240221-work-md-0-f538b732-6xkbt | ACTIVE | k8s-clusterapi-cluster-default-somerville-jade-20240221-work=192.168.3.119 | gaia-dmp-ubuntu-2204-kube-v1.26.7 | gaia.vm.26vcpu |
+ > | d5d50923-dcbe-4d9f-958e-6991fb595d42 | somerville-jade-20240221-work-md-0-f538b732-ptksh | ACTIVE | k8s-clusterapi-cluster-default-somerville-jade-20240221-work=192.168.3.31 | gaia-dmp-ubuntu-2204-kube-v1.26.7 | gaia.vm.26vcpu |
+ > | bd6abaff-27ad-4010-bba2-e78d29c94f6b | somerville-jade-20240221-work-md-0-f538b732-9x7bk | ACTIVE | k8s-clusterapi-cluster-default-somerville-jade-20240221-work=192.168.3.191 | gaia-dmp-ubuntu-2204-kube-v1.26.7 | gaia.vm.26vcpu |
+ > | 073c0051-8805-4719-a589-3769c66339b1 | somerville-jade-20240221-work-control-plane-c6b6f2d1-f5k5w | ACTIVE | k8s-clusterapi-cluster-default-somerville-jade-20240221-work=192.168.3.176 | gaia-dmp-ubuntu-2204-kube-v1.26.7 | gaia.vm.2vcpu |
+ > | 086da559-fea9-4875-aa79-53cc0dbe7011 | somerville-jade-20240221-bootstrap-node | ACTIVE | somerville-jade-20240221-bootstrap-network=10.10.2.166, 192.41.122.71 | gaia-dmp-fedora-cloud-38-1.6 | gaia.vm.2vcpu |
+ > +--------------------------------------+------------------------------------------------------------+--------+----------------------------------------------------------------------------+-----------------------------------+----------------+
+
+
+# -----------------------------------------------------
+# List our machines in the KinD cluster.
+#[root@ansibler]
+
+ ssh bootstrap -t \
+ '
+ source loadconfig
+ kubectl \
+ --kubeconfig "${kindclusterconf:?}" \
+ get machines \
+ --all-namespaces
+ '
+
+ > NAMESPACE NAME CLUSTER NODENAME PROVIDERID PHASE AGE VERSION
+ > default somerville-jade-20240221-work-control-plane-bdfrr somerville-jade-20240221-work openstack:///073c0051-8805-4719-a589-3769c66339b1 Provisioned 19m v1.26.7
+ > default somerville-jade-20240221-work-md-0-bc4ps-jqcjp somerville-jade-20240221-work openstack:///eb4702e1-29d8-43f4-bfb5-b8da42edcc54 Provisioned 6m32s v1.26.7
+ > default somerville-jade-20240221-work-md-0-bc4ps-sc5pc somerville-jade-20240221-work openstack:///0babc155-fc63-483b-8df2-e027cd751110 Provisioned 6m34s v1.26.7
+ > default somerville-jade-20240221-work-md-0-bc4ps-sc5xt somerville-jade-20240221-work openstack:///b21a5408-3ceb-45de-89ff-ed50d2bdf2e3 Provisioned 6m31s v1.26.7
+ > default somerville-jade-20240221-work-md-0-bc4ps-spnmh somerville-jade-20240221-work openstack:///bd6abaff-27ad-4010-bba2-e78d29c94f6b Provisioned 6m39s v1.26.7
+ > default somerville-jade-20240221-work-md-0-bc4ps-xccp4 somerville-jade-20240221-work openstack:///9930656b-57eb-4455-b697-5e135a55679a Provisioned 6m29s v1.26.7
+ > default somerville-jade-20240221-work-md-0-bc4ps-xmx67 somerville-jade-20240221-work openstack:///d5d50923-dcbe-4d9f-958e-6991fb595d42 Provisioned 6m36s v1.26.7
+
+
+# -----------------------------------------------------
+# List our nodes in the work cluster.
+#[root@ansibler]
+
+ ssh bootstrap -t \
+ '
+ source loadconfig
+ kubectl \
+ --kubeconfig "${workclusterconf:?}" \
+ get nodes \
+ --all-namespaces
+ '
+
+ > E0221 07:03:35.168872 15478 memcache.go:287] couldn't get resource list for metrics.k8s.io/v1beta1: the server is currently unable to handle the request
+ > E0221 07:03:35.210309 15478 memcache.go:121] couldn't get resource list for metrics.k8s.io/v1beta1: the server is currently unable to handle the request
+ > E0221 07:03:35.220496 15478 memcache.go:121] couldn't get resource list for metrics.k8s.io/v1beta1: the server is currently unable to handle the request
+ > E0221 07:03:35.227506 15478 memcache.go:121] couldn't get resource list for metrics.k8s.io/v1beta1: the server is currently unable to handle the request
+ > NAME STATUS ROLES AGE VERSION
+ > somerville-jade-20240221-work-control-plane-c6b6f2d1-f5k5w Ready control-plane 17m v1.26.7
+ > somerville-jade-20240221-work-md-0-f538b732-474d8 NotReady 15m v1.26.7
+ > somerville-jade-20240221-work-md-0-f538b732-5tgnn NotReady 15m v1.26.7
+ > somerville-jade-20240221-work-md-0-f538b732-6xkbt Ready 6m5s v1.26.7
+ > somerville-jade-20240221-work-md-0-f538b732-8d8n2 NotReady 16m v1.26.7
+ > somerville-jade-20240221-work-md-0-f538b732-9cflr Ready 5m34s v1.26.7
+ > somerville-jade-20240221-work-md-0-f538b732-9x7bk Ready 5m50s v1.26.7
+ > somerville-jade-20240221-work-md-0-f538b732-fbpln NotReady 15m v1.26.7
+ > somerville-jade-20240221-work-md-0-f538b732-phgtl NotReady 16m v1.26.7
+ > somerville-jade-20240221-work-md-0-f538b732-ptksh Ready 6m30s v1.26.7
+ > somerville-jade-20240221-work-md-0-f538b732-s46n8 Ready 5m4s v1.26.7
+ > somerville-jade-20240221-work-md-0-f538b732-vfckp NotReady 16m v1.26.7
+ > somerville-jade-20240221-work-md-0-f538b732-x9gr9 Ready 6m18s v1.26.7
+
+
+
+
+# -----------------------------------------------------
+# Get the Kubernetes service status
+# https://kubernetes.io/docs/tasks/debug/debug-application/debug-service/
+#[root@ansibler]
+
+ ssh bootstrap -t \
+ '
+ source loadconfig
+ kubectl \
+ --kubeconfig "${workclusterconf:?}" \
+ describe service \
+ --namespace default \
+ kubernetes
+ '
+
+ > E0221 07:13:43.703538 15770 memcache.go:287] couldn't get resource list for metrics.k8s.io/v1beta1: the server is currently unable to handle the request
+ > E0221 07:13:43.732918 15770 memcache.go:121] couldn't get resource list for metrics.k8s.io/v1beta1: the server is currently unable to handle the request
+ > E0221 07:13:43.737366 15770 memcache.go:121] couldn't get resource list for metrics.k8s.io/v1beta1: the server is currently unable to handle the request
+ > E0221 07:13:43.741154 15770 memcache.go:121] couldn't get resource list for metrics.k8s.io/v1beta1: the server is currently unable to handle the request
+ > Name: kubernetes
+ > Namespace: default
+ > Labels: component=apiserver
+ > provider=kubernetes
+ > Annotations:
+ > Selector:
+ > Type: ClusterIP
+ > IP Family Policy: SingleStack
+ > IP Families: IPv4
+ > IP: 172.24.0.1
+ > IPs: 172.24.0.1
+ > Port: https 443/TCP
+ > TargetPort: 6443/TCP
+ > Endpoints: 192.168.3.176:6443
+ > Session Affinity: None
+ > Events:
+
+
+ ssh bootstrap -t \
+ '
+ source loadconfig
+ kubectl \
+ --kubeconfig "${workclusterconf:?}" \
+ get service \
+ --namespace default \
+ kubernetes \
+ --output json
+ '
+ > {
+ > "apiVersion": "v1",
+ > "kind": "Service",
+ > "metadata": {
+ > "creationTimestamp": "2024-02-21T06:45:23Z",
+ > "labels": {
+ > "component": "apiserver",
+ > "provider": "kubernetes"
+ > },
+ > "name": "kubernetes",
+ > "namespace": "default",
+ > "resourceVersion": "195",
+ > "uid": "12c7b70c-cff8-4cf3-9f37-2a01ddf50193"
+ > },
+ > "spec": {
+ > "clusterIP": "172.24.0.1",
+ > "clusterIPs": [
+ > "172.24.0.1"
+ > ],
+ > "internalTrafficPolicy": "Cluster",
+ > "ipFamilies": [
+ > "IPv4"
+ > ],
+ > "ipFamilyPolicy": "SingleStack",
+ > "ports": [
+ > {
+ > "name": "https",
+ > "port": 443,
+ > "protocol": "TCP",
+ > "targetPort": 6443
+ > }
+ > ],
+ > "sessionAffinity": "None",
+ > "type": "ClusterIP"
+ > },
+ > "status": {
+ > "loadBalancer": {}
+ > }
+ > }
+
+
+# -----------------------------------------------------
+# List stuff in the KinD cluster.
+#[root@ansibler]
+
+ ssh bootstrap -t \
+ '
+ source loadconfig
+ kubectl \
+ --kubeconfig "${kindclusterconf:?}" \
+ api-resources
+ '
+
+ > NAME SHORTNAMES APIVERSION NAMESPACED KIND
+ > bindings v1 true Binding
+ > componentstatuses cs v1 false ComponentStatus
+ > configmaps cm v1 true ConfigMap
+ > endpoints ep v1 true Endpoints
+ > events ev v1 true Event
+ > limitranges limits v1 true LimitRange
+ > namespaces ns v1 false Namespace
+ > nodes no v1 false Node
+ > persistentvolumeclaims pvc v1 true PersistentVolumeClaim
+ > persistentvolumes pv v1 false PersistentVolume
+ > pods po v1 true Pod
+ > podtemplates v1 true PodTemplate
+ > replicationcontrollers rc v1 true ReplicationController
+ > resourcequotas quota v1 true ResourceQuota
+ > secrets v1 true Secret
+ > serviceaccounts sa v1 true ServiceAccount
+ > services svc v1 true Service
+ > challenges acme.cert-manager.io/v1 true Challenge
+ > orders acme.cert-manager.io/v1 true Order
+ > clusterresourcesetbindings addons.cluster.x-k8s.io/v1beta1 true ClusterResourceSetBinding
+ > clusterresourcesets addons.cluster.x-k8s.io/v1beta1 true ClusterResourceSet
+ > helmreleases addons.stackhpc.com/v1alpha1 true HelmRelease
+ > manifests addons.stackhpc.com/v1alpha1 true Manifests
+ > mutatingwebhookconfigurations admissionregistration.k8s.io/v1 false MutatingWebhookConfiguration
+ > validatingwebhookconfigurations admissionregistration.k8s.io/v1 false ValidatingWebhookConfiguration
+ > customresourcedefinitions crd,crds apiextensions.k8s.io/v1 false CustomResourceDefinition
+ > apiservices apiregistration.k8s.io/v1 false APIService
+ > controllerrevisions apps/v1 true ControllerRevision
+ > daemonsets ds apps/v1 true DaemonSet
+ > deployments deploy apps/v1 true Deployment
+ > replicasets rs apps/v1 true ReplicaSet
+ > statefulsets sts apps/v1 true StatefulSet
+ > tokenreviews authentication.k8s.io/v1 false TokenReview
+ > localsubjectaccessreviews authorization.k8s.io/v1 true LocalSubjectAccessReview
+ > selfsubjectaccessreviews authorization.k8s.io/v1 false SelfSubjectAccessReview
+ > selfsubjectrulesreviews authorization.k8s.io/v1 false SelfSubjectRulesReview
+ > subjectaccessreviews authorization.k8s.io/v1 false SubjectAccessReview
+ > horizontalpodautoscalers hpa autoscaling/v2 true HorizontalPodAutoscaler
+ > cronjobs cj batch/v1 true CronJob
+ > jobs batch/v1 true Job
+ > kubeadmconfigs bootstrap.cluster.x-k8s.io/v1beta1 true KubeadmConfig
+ > kubeadmconfigtemplates bootstrap.cluster.x-k8s.io/v1beta1 true KubeadmConfigTemplate
+ > certificaterequests cr,crs cert-manager.io/v1 true CertificateRequest
+ > certificates cert,certs cert-manager.io/v1 true Certificate
+ > clusterissuers cert-manager.io/v1 false ClusterIssuer
+ > issuers cert-manager.io/v1 true Issuer
+ > certificatesigningrequests csr certificates.k8s.io/v1 false CertificateSigningRequest
+ > clusterclasses cc cluster.x-k8s.io/v1beta1 true ClusterClass
+ > clusters cl cluster.x-k8s.io/v1beta1 true Cluster
+ > machinedeployments md cluster.x-k8s.io/v1beta1 true MachineDeployment
+ > machinehealthchecks mhc,mhcs cluster.x-k8s.io/v1beta1 true MachineHealthCheck
+ > machinepools mp cluster.x-k8s.io/v1beta1 true MachinePool
+ > machines ma cluster.x-k8s.io/v1beta1 true Machine
+ > machinesets ms cluster.x-k8s.io/v1beta1 true MachineSet
+ > providers clusterctl.cluster.x-k8s.io/v1alpha3 true Provider
+ > kubeadmcontrolplanes kcp controlplane.cluster.x-k8s.io/v1beta1 true KubeadmControlPlane
+ > kubeadmcontrolplanetemplates controlplane.cluster.x-k8s.io/v1beta1 true KubeadmControlPlaneTemplate
+ > leases coordination.k8s.io/v1 true Lease
+ > endpointslices discovery.k8s.io/v1 true EndpointSlice
+ > events ev events.k8s.io/v1 true Event
+ > flowschemas flowcontrol.apiserver.k8s.io/v1beta3 false FlowSchema
+ > prioritylevelconfigurations flowcontrol.apiserver.k8s.io/v1beta3 false PriorityLevelConfiguration
+ > openstackclusters osc infrastructure.cluster.x-k8s.io/v1alpha7 true OpenStackCluster
+ > openstackclustertemplates osct infrastructure.cluster.x-k8s.io/v1alpha7 true OpenStackClusterTemplate
+ > openstackmachines osm infrastructure.cluster.x-k8s.io/v1alpha7 true OpenStackMachine
+ > openstackmachinetemplates osmt infrastructure.cluster.x-k8s.io/v1alpha7 true OpenStackMachineTemplate
+ > ipaddressclaims ipam.cluster.x-k8s.io/v1beta1 true IPAddressClaim
+ > ipaddresses ipam.cluster.x-k8s.io/v1beta1 true IPAddress
+ > ingressclasses networking.k8s.io/v1 false IngressClass
+ > ingresses ing networking.k8s.io/v1 true Ingress
+ > networkpolicies netpol networking.k8s.io/v1 true NetworkPolicy
+ > runtimeclasses node.k8s.io/v1 false RuntimeClass
+ > poddisruptionbudgets pdb policy/v1 true PodDisruptionBudget
+ > clusterrolebindings rbac.authorization.k8s.io/v1 false ClusterRoleBinding
+ > clusterroles rbac.authorization.k8s.io/v1 false ClusterRole
+ > rolebindings rbac.authorization.k8s.io/v1 true RoleBinding
+ > roles rbac.authorization.k8s.io/v1 true Role
+ > extensionconfigs ext runtime.cluster.x-k8s.io/v1alpha1 false ExtensionConfig
+ > priorityclasses pc scheduling.k8s.io/v1 false PriorityClass
+ > csidrivers storage.k8s.io/v1 false CSIDriver
+ > csinodes storage.k8s.io/v1 false CSINode
+ > csistoragecapacities storage.k8s.io/v1 true CSIStorageCapacity
+ > storageclasses sc storage.k8s.io/v1 false StorageClass
+ > volumeattachments storage.k8s.io/v1 false VolumeAttachment
+
+
+ ssh bootstrap -t \
+ '
+ source loadconfig
+ kubectl \
+ --kubeconfig "${kindclusterconf:?}" \
+ get clusters \
+ --all-namespaces
+ '
+
+ > NAMESPACE NAME CLUSTERCLASS PHASE AGE VERSION
+ > default somerville-jade-20240221-work Provisioned 38m
+
+
+ ssh bootstrap -t \
+ '
+ source loadconfig
+ kubectl \
+ --kubeconfig "${kindclusterconf:?}" \
+ get cluster \
+ --namespace default \
+ 'somerville-jade-20240221-work'
+ '
+
+ > NAME CLUSTERCLASS PHASE AGE VERSION
+ > somerville-jade-20240221-work Provisioned 39m
+
+
+ ssh bootstrap -t \
+ '
+ source loadconfig
+ kubectl \
+ --kubeconfig "${kindclusterconf:?}" \
+ get cluster \
+ --namespace default \
+ --output json \
+ 'somerville-jade-20240221-work'
+ '
+
+ > {
+ > "apiVersion": "cluster.x-k8s.io/v1beta1",
+ > "kind": "Cluster",
+ > "metadata": {
+ > "annotations": {
+ > "meta.helm.sh/release-name": "somerville-jade-20240221-work",
+ > "meta.helm.sh/release-namespace": "default"
+ > },
+ > "creationTimestamp": "2024-02-21T06:42:08Z",
+ > "finalizers": [
+ > "cluster.cluster.x-k8s.io"
+ > ],
+ > "generation": 3,
+ > "labels": {
+ > "app.kubernetes.io/managed-by": "Helm",
+ > "capi.stackhpc.com/cluster": "somerville-jade-20240221-work",
+ > "capi.stackhpc.com/infrastructure-provider": "openstack",
+ > "capi.stackhpc.com/managed-by": "Helm",
+ > "helm.sh/chart": "openstack-cluster-0.1.0"
+ > },
+ > "name": "somerville-jade-20240221-work",
+ > "namespace": "default",
+ > "resourceVersion": "4059",
+ > "uid": "5d43f46b-4b5f-4a88-a27c-f2b446434a89"
+ > },
+ > "spec": {
+ > "clusterNetwork": {
+ > "pods": {
+ > "cidrBlocks": [
+ > "172.16.0.0/13"
+ > ]
+ > },
+ > "serviceDomain": "cluster.local",
+ > "services": {
+ > "cidrBlocks": [
+ > "172.24.0.0/13"
+ > ]
+ > }
+ > },
+ > "controlPlaneEndpoint": {
+ > "host": "192.41.122.207",
+ > "port": 6443
+ > },
+ > "controlPlaneRef": {
+ > "apiVersion": "controlplane.cluster.x-k8s.io/v1beta1",
+ > "kind": "KubeadmControlPlane",
+ > "name": "somerville-jade-20240221-work-control-plane",
+ > "namespace": "default"
+ > },
+ > "infrastructureRef": {
+ > "apiVersion": "infrastructure.cluster.x-k8s.io/v1alpha7",
+ > "kind": "OpenStackCluster",
+ > "name": "somerville-jade-20240221-work",
+ > "namespace": "default"
+ > }
+ > },
+ > "status": {
+ > "conditions": [
+ > {
+ > "lastTransitionTime": "2024-02-21T06:44:13Z",
+ > "message": "Scaling up control plane to 3 replicas (actual 1)",
+ > "reason": "ScalingUp",
+ > "severity": "Warning",
+ > "status": "False",
+ > "type": "Ready"
+ > },
+ > {
+ > "lastTransitionTime": "2024-02-21T06:45:58Z",
+ > "status": "True",
+ > "type": "ControlPlaneInitialized"
+ > },
+ > {
+ > "lastTransitionTime": "2024-02-21T06:44:13Z",
+ > "message": "Scaling up control plane to 3 replicas (actual 1)",
+ > "reason": "ScalingUp",
+ > "severity": "Warning",
+ > "status": "False",
+ > "type": "ControlPlaneReady"
+ > },
+ > {
+ > "lastTransitionTime": "2024-02-21T06:43:49Z",
+ > "status": "True",
+ > "type": "InfrastructureReady"
+ > }
+ > ],
+ > "controlPlaneReady": true,
+ > "failureDomains": {
+ > "nova": {},
+ > "testbed": {}
+ > },
+ > "infrastructureReady": true,
+ > "observedGeneration": 3,
+ > "phase": "Provisioned"
+ > }
+ > }
+
+
+ ssh bootstrap -t \
+ '
+ source loadconfig
+ kubectl \
+ --kubeconfig "${kindclusterconf:?}" \
+ get openstackcluster \
+ --namespace default \
+ --output json \
+ 'somerville-jade-20240221-work'
+ '
+
+ > {
+ > "apiVersion": "infrastructure.cluster.x-k8s.io/v1alpha7",
+ > "kind": "OpenStackCluster",
+ > "metadata": {
+ > "annotations": {
+ > "helm.sh/resource-policy": "keep",
+ > "meta.helm.sh/release-name": "somerville-jade-20240221-work",
+ > "meta.helm.sh/release-namespace": "default"
+ > },
+ > "creationTimestamp": "2024-02-21T06:42:10Z",
+ > "finalizers": [
+ > "openstackcluster.infrastructure.cluster.x-k8s.io"
+ > ],
+ > "generation": 3,
+ > "labels": {
+ > "app.kubernetes.io/managed-by": "Helm",
+ > "capi.stackhpc.com/cluster": "somerville-jade-20240221-work",
+ > "capi.stackhpc.com/infrastructure-provider": "openstack",
+ > "capi.stackhpc.com/managed-by": "Helm",
+ > "cluster.x-k8s.io/cluster-name": "somerville-jade-20240221-work",
+ > "helm.sh/chart": "openstack-cluster-0.1.0"
+ > },
+ > "name": "somerville-jade-20240221-work",
+ > "namespace": "default",
+ > "ownerReferences": [
+ > {
+ > "apiVersion": "cluster.x-k8s.io/v1beta1",
+ > "blockOwnerDeletion": true,
+ > "controller": true,
+ > "kind": "Cluster",
+ > "name": "somerville-jade-20240221-work",
+ > "uid": "5d43f46b-4b5f-4a88-a27c-f2b446434a89"
+ > }
+ > ],
+ > "resourceVersion": "3818",
+ > "uid": "13e936cc-625b-40f5-8cac-1773818a067f"
+ > },
+ > "spec": {
+ > "allowAllInClusterTraffic": true,
+ > "apiServerLoadBalancer": {
+ > "allowedCidrs": [
+ > "192.41.122.71/32",
+ > "90.155.51.57/32"
+ > ],
+ > "enabled": true
+ > },
+ > "apiServerPort": 6443,
+ > "cloudName": "openstack",
+ > "controlPlaneEndpoint": {
+ > "host": "192.41.122.207",
+ > "port": 6443
+ > },
+ > "controlPlaneOmitAvailabilityZone": true,
+ > "disableAPIServerFloatingIP": false,
+ > "externalNetworkId": "1875828a-ccc3-419b-87fd-856aaa781492",
+ > "identityRef": {
+ > "kind": "Secret",
+ > "name": "somerville-jade-20240221-work-cloud-credentials"
+ > },
+ > "managedSecurityGroups": true,
+ > "network": {},
+ > "nodeCidr": "192.168.3.0/24",
+ > "subnet": {}
+ > },
+ > "status": {
+ > "apiServerLoadBalancer": {
+ > "allowedCIDRs": [
+ > "192.168.3.0/24",
+ > "192.41.122.71/32",
+ > "192.41.122.98/32",
+ > "90.155.51.57/32"
+ > ],
+ > "id": "71b5524d-a508-496b-9f49-06edbf8ed358",
+ > "internalIP": "192.168.3.46",
+ > "ip": "192.41.122.207",
+ > "name": "k8s-clusterapi-cluster-default-somerville-jade-20240221-work-kubeapi"
+ > },
+ > "controlPlaneSecurityGroup": {
+ > "id": "02053952-b7da-4e4a-a659-31c0ead42ef9",
+ > "name": "k8s-cluster-default-somerville-jade-20240221-work-secgroup-controlplane",
+ > "rules": [
+ > {
+ > "description": "Full open",
+ > "direction": "egress",
+ > "etherType": "IPv4",
+ > "name": "93649003-4943-4480-8218-0ea309640a49",
+ > "portRangeMax": 0,
+ > "portRangeMin": 0,
+ > "protocol": "",
+ > "remoteGroupID": "",
+ > "remoteIPPrefix": "",
+ > "securityGroupID": "02053952-b7da-4e4a-a659-31c0ead42ef9"
+ > },
+ > {
+ > "description": "Full open",
+ > "direction": "egress",
+ > "etherType": "IPv6",
+ > "name": "5285f124-624f-420e-9a80-2035b0351715",
+ > "portRangeMax": 0,
+ > "portRangeMin": 0,
+ > "protocol": "",
+ > "remoteGroupID": "",
+ > "remoteIPPrefix": "",
+ > "securityGroupID": "02053952-b7da-4e4a-a659-31c0ead42ef9"
+ > },
+ > {
+ > "description": "Kubernetes API",
+ > "direction": "ingress",
+ > "etherType": "IPv4",
+ > "name": "7bcfc55e-4a0b-4be0-b89f-663444420860",
+ > "portRangeMax": 6443,
+ > "portRangeMin": 6443,
+ > "protocol": "tcp",
+ > "remoteGroupID": "",
+ > "remoteIPPrefix": "",
+ > "securityGroupID": "02053952-b7da-4e4a-a659-31c0ead42ef9"
+ > },
+ > {
+ > "description": "In-cluster Ingress",
+ > "direction": "ingress",
+ > "etherType": "IPv4",
+ > "name": "3c813479-0661-4ae2-bc69-02338ef64c12",
+ > "portRangeMax": 0,
+ > "portRangeMin": 0,
+ > "protocol": "",
+ > "remoteGroupID": "02053952-b7da-4e4a-a659-31c0ead42ef9",
+ > "remoteIPPrefix": "",
+ > "securityGroupID": "02053952-b7da-4e4a-a659-31c0ead42ef9"
+ > },
+ > {
+ > "description": "In-cluster Ingress",
+ > "direction": "ingress",
+ > "etherType": "IPv4",
+ > "name": "e3b737e6-4928-4b05-900c-320ce89f664a",
+ > "portRangeMax": 0,
+ > "portRangeMin": 0,
+ > "protocol": "",
+ > "remoteGroupID": "416d64bf-8b76-4602-a497-6773b48dc3df",
+ > "remoteIPPrefix": "",
+ > "securityGroupID": "02053952-b7da-4e4a-a659-31c0ead42ef9"
+ > }
+ > ]
+ > },
+ > "externalNetwork": {
+ > "id": "1875828a-ccc3-419b-87fd-856aaa781492",
+ > "name": "external"
+ > },
+ > "failureDomains": {
+ > "nova": {},
+ > "testbed": {}
+ > },
+ > "network": {
+ > "id": "401237d6-1385-4b2b-9167-c0abd0d0fe48",
+ > "name": "k8s-clusterapi-cluster-default-somerville-jade-20240221-work",
+ > "subnets": [
+ > {
+ > "cidr": "192.168.3.0/24",
+ > "id": "94f4ece7-4439-4de9-a863-903c6428b6e5",
+ > "name": "k8s-clusterapi-cluster-default-somerville-jade-20240221-work"
+ > }
+ > ]
+ > },
+ > "ready": true,
+ > "router": {
+ > "id": "a242ea47-3d3d-4215-9350-453da310614b",
+ > "ips": [
+ > "192.41.122.98"
+ > ],
+ > "name": "k8s-clusterapi-cluster-default-somerville-jade-20240221-work"
+ > },
+ > "workerSecurityGroup": {
+ > "id": "416d64bf-8b76-4602-a497-6773b48dc3df",
+ > "name": "k8s-cluster-default-somerville-jade-20240221-work-secgroup-worker",
+ > "rules": [
+ > {
+ > "description": "Full open",
+ > "direction": "egress",
+ > "etherType": "IPv4",
+ > "name": "59056e61-a9d7-4797-b2ba-210740d3ab0d",
+ > "portRangeMax": 0,
+ > "portRangeMin": 0,
+ > "protocol": "",
+ > "remoteGroupID": "",
+ > "remoteIPPrefix": "",
+ > "securityGroupID": "416d64bf-8b76-4602-a497-6773b48dc3df"
+ > },
+ > {
+ > "description": "Full open",
+ > "direction": "egress",
+ > "etherType": "IPv6",
+ > "name": "8f1feb2b-6514-4b53-8c00-dd5b1176fca5",
+ > "portRangeMax": 0,
+ > "portRangeMin": 0,
+ > "protocol": "",
+ > "remoteGroupID": "",
+ > "remoteIPPrefix": "",
+ > "securityGroupID": "416d64bf-8b76-4602-a497-6773b48dc3df"
+ > },
+ > {
+ > "description": "Node Port Services",
+ > "direction": "ingress",
+ > "etherType": "IPv4",
+ > "name": "272f1fce-ec60-43e1-a885-3ec119006a0c",
+ > "portRangeMax": 32767,
+ > "portRangeMin": 30000,
+ > "protocol": "tcp",
+ > "remoteGroupID": "",
+ > "remoteIPPrefix": "",
+ > "securityGroupID": "416d64bf-8b76-4602-a497-6773b48dc3df"
+ > },
+ > {
+ > "description": "Node Port Services",
+ > "direction": "ingress",
+ > "etherType": "IPv4",
+ > "name": "848147ab-df94-4b59-aecc-55bc61e4ca6a",
+ > "portRangeMax": 32767,
+ > "portRangeMin": 30000,
+ > "protocol": "udp",
+ > "remoteGroupID": "",
+ > "remoteIPPrefix": "",
+ > "securityGroupID": "416d64bf-8b76-4602-a497-6773b48dc3df"
+ > },
+ > {
+ > "description": "In-cluster Ingress",
+ > "direction": "ingress",
+ > "etherType": "IPv4",
+ > "name": "93075903-fbcd-4819-b348-f0a086cd093e",
+ > "portRangeMax": 0,
+ > "portRangeMin": 0,
+ > "protocol": "",
+ > "remoteGroupID": "416d64bf-8b76-4602-a497-6773b48dc3df",
+ > "remoteIPPrefix": "",
+ > "securityGroupID": "416d64bf-8b76-4602-a497-6773b48dc3df"
+ > },
+ > {
+ > "description": "In-cluster Ingress",
+ > "direction": "ingress",
+ > "etherType": "IPv4",
+ > "name": "145a6db0-18f6-4ed4-b274-39f215790616",
+ > "portRangeMax": 0,
+ > "portRangeMin": 0,
+ > "protocol": "",
+ > "remoteGroupID": "02053952-b7da-4e4a-a659-31c0ead42ef9",
+ > "remoteIPPrefix": "",
+ > "securityGroupID": "416d64bf-8b76-4602-a497-6773b48dc3df"
+ > }
+ > ]
+ > }
+ > }
+ > }
+
+
+
+ ssh bootstrap -t \
+ '
+ source loadconfig
+ kubectl \
+ --kubeconfig "${kindclusterconf:?}" \
+ get machinehealthchecks \
+ --namespace default \
+ --output json
+ '
+
+ > {
+ > "apiVersion": "v1",
+ > "items": [
+ > {
+ > "apiVersion": "cluster.x-k8s.io/v1beta1",
+ > "kind": "MachineHealthCheck",
+ > "metadata": {
+ > "annotations": {
+ > "meta.helm.sh/release-name": "somerville-jade-20240221-work",
+ > "meta.helm.sh/release-namespace": "default"
+ > },
+ > "creationTimestamp": "2024-02-21T06:42:10Z",
+ > "generation": 1,
+ > "labels": {
+ > "app.kubernetes.io/managed-by": "Helm",
+ > "capi.stackhpc.com/cluster": "somerville-jade-20240221-work",
+ > "capi.stackhpc.com/component": "control-plane",
+ > "capi.stackhpc.com/infrastructure-provider": "openstack",
+ > "capi.stackhpc.com/managed-by": "Helm",
+ > "cluster.x-k8s.io/cluster-name": "somerville-jade-20240221-work",
+ > "helm.sh/chart": "openstack-cluster-0.1.0"
+ > },
+ > "name": "somerville-jade-20240221-work-control-plane",
+ > "namespace": "default",
+ > "ownerReferences": [
+ > {
+ > "apiVersion": "cluster.x-k8s.io/v1beta1",
+ > "kind": "Cluster",
+ > "name": "somerville-jade-20240221-work",
+ > "uid": "5d43f46b-4b5f-4a88-a27c-f2b446434a89"
+ > }
+ > ],
+ > "resourceVersion": "2273",
+ > "uid": "c8992d33-517e-4d9b-b54d-6d2546640d3e"
+ > },
+ > "spec": {
+ > "clusterName": "somerville-jade-20240221-work",
+ > "maxUnhealthy": "100%",
+ > "nodeStartupTimeout": "10m0s",
+ > "selector": {
+ > "matchLabels": {
+ > "capi.stackhpc.com/cluster": "somerville-jade-20240221-work",
+ > "capi.stackhpc.com/component": "control-plane"
+ > }
+ > },
+ > "unhealthyConditions": [
+ > {
+ > "status": "Unknown",
+ > "timeout": "5m0s",
+ > "type": "Ready"
+ > },
+ > {
+ > "status": "False",
+ > "timeout": "5m0s",
+ > "type": "Ready"
+ > }
+ > ]
+ > },
+ > "status": {
+ > "conditions": [
+ > {
+ > "lastTransitionTime": "2024-02-21T06:42:10Z",
+ > "status": "True",
+ > "type": "RemediationAllowed"
+ > }
+ > ],
+ > "expectedMachines": 1,
+ > "observedGeneration": 1,
+ > "targets": [
+ > "somerville-jade-20240221-work-control-plane-bdfrr"
+ > ]
+ > }
+ > },
+ > {
+ > "apiVersion": "cluster.x-k8s.io/v1beta1",
+ > "kind": "MachineHealthCheck",
+ > "metadata": {
+ > "annotations": {
+ > "meta.helm.sh/release-name": "somerville-jade-20240221-work",
+ > "meta.helm.sh/release-namespace": "default"
+ > },
+ > "creationTimestamp": "2024-02-21T06:42:10Z",
+ > "generation": 1,
+ > "labels": {
+ > "app.kubernetes.io/managed-by": "Helm",
+ > "capi.stackhpc.com/cluster": "somerville-jade-20240221-work",
+ > "capi.stackhpc.com/component": "worker",
+ > "capi.stackhpc.com/infrastructure-provider": "openstack",
+ > "capi.stackhpc.com/managed-by": "Helm",
+ > "capi.stackhpc.com/node-group": "md-0",
+ > "cluster.x-k8s.io/cluster-name": "somerville-jade-20240221-work",
+ > "helm.sh/chart": "openstack-cluster-0.1.0"
+ > },
+ > "name": "somerville-jade-20240221-work-md-0",
+ > "namespace": "default",
+ > "ownerReferences": [
+ > {
+ > "apiVersion": "cluster.x-k8s.io/v1beta1",
+ > "kind": "Cluster",
+ > "name": "somerville-jade-20240221-work",
+ > "uid": "5d43f46b-4b5f-4a88-a27c-f2b446434a89"
+ > }
+ > ],
+ > "resourceVersion": "11655",
+ > "uid": "de48c1ad-7401-4027-bc7a-6f5b385c8b63"
+ > },
+ > "spec": {
+ > "clusterName": "somerville-jade-20240221-work",
+ > "maxUnhealthy": "100%",
+ > "nodeStartupTimeout": "10m0s",
+ > "selector": {
+ > "matchLabels": {
+ > "capi.stackhpc.com/cluster": "somerville-jade-20240221-work",
+ > "capi.stackhpc.com/component": "worker",
+ > "capi.stackhpc.com/node-group": "md-0"
+ > }
+ > },
+ > "unhealthyConditions": [
+ > {
+ > "status": "Unknown",
+ > "timeout": "5m0s",
+ > "type": "Ready"
+ > },
+ > {
+ > "status": "False",
+ > "timeout": "5m0s",
+ > "type": "Ready"
+ > }
+ > ]
+ > },
+ > "status": {
+ > "conditions": [
+ > {
+ > "lastTransitionTime": "2024-02-21T06:42:10Z",
+ > "status": "True",
+ > "type": "RemediationAllowed"
+ > }
+ > ],
+ > "expectedMachines": 6,
+ > "observedGeneration": 1,
+ > "targets": [
+ > "somerville-jade-20240221-work-md-0-bc4ps-2vwsp",
+ > "somerville-jade-20240221-work-md-0-bc4ps-64l4h",
+ > "somerville-jade-20240221-work-md-0-bc4ps-bn992",
+ > "somerville-jade-20240221-work-md-0-bc4ps-dqbpb",
+ > "somerville-jade-20240221-work-md-0-bc4ps-jxnvk",
+ > "somerville-jade-20240221-work-md-0-bc4ps-zvgb7"
+ > ]
+ > }
+ > }
+ > ],
+ > "kind": "List",
+ > "metadata": {
+ > "resourceVersion": ""
+ > }
+ > }
+
+
+ ssh bootstrap -t \
+ '
+ source loadconfig
+ kubectl \
+ --kubeconfig "${kindclusterconf:?}" \
+ get machinedeployments \
+ --namespace default \
+ --output json
+ '
+
+ > {
+ > "apiVersion": "v1",
+ > "items": [
+ > {
+ > "apiVersion": "cluster.x-k8s.io/v1beta1",
+ > "kind": "MachineDeployment",
+ > "metadata": {
+ > "annotations": {
+ > "machinedeployment.clusters.x-k8s.io/revision": "1",
+ > "meta.helm.sh/release-name": "somerville-jade-20240221-work",
+ > "meta.helm.sh/release-namespace": "default"
+ > },
+ > "creationTimestamp": "2024-02-21T06:42:09Z",
+ > "generation": 2,
+ > "labels": {
+ > "app.kubernetes.io/managed-by": "Helm",
+ > "capi.stackhpc.com/cluster": "somerville-jade-20240221-work",
+ > "capi.stackhpc.com/component": "worker",
+ > "capi.stackhpc.com/infrastructure-provider": "openstack",
+ > "capi.stackhpc.com/managed-by": "Helm",
+ > "capi.stackhpc.com/node-group": "md-0",
+ > "cluster.x-k8s.io/cluster-name": "somerville-jade-20240221-work",
+ > "helm.sh/chart": "openstack-cluster-0.1.0"
+ > },
+ > "name": "somerville-jade-20240221-work-md-0",
+ > "namespace": "default",
+ > "ownerReferences": [
+ > {
+ > "apiVersion": "cluster.x-k8s.io/v1beta1",
+ > "kind": "Cluster",
+ > "name": "somerville-jade-20240221-work",
+ > "uid": "5d43f46b-4b5f-4a88-a27c-f2b446434a89"
+ > }
+ > ],
+ > "resourceVersion": "11702",
+ > "uid": "e9d2ee01-aded-4f47-af74-f4d93aca4cdc"
+ > },
+ > "spec": {
+ > "clusterName": "somerville-jade-20240221-work",
+ > "minReadySeconds": 0,
+ > "progressDeadlineSeconds": 600,
+ > "replicas": 6,
+ > "revisionHistoryLimit": 1,
+ > "selector": {
+ > "matchLabels": {
+ > "capi.stackhpc.com/cluster": "somerville-jade-20240221-work",
+ > "capi.stackhpc.com/component": "worker",
+ > "capi.stackhpc.com/node-group": "md-0",
+ > "cluster.x-k8s.io/cluster-name": "somerville-jade-20240221-work"
+ > }
+ > },
+ > "strategy": {
+ > "rollingUpdate": {
+ > "deletePolicy": "Random",
+ > "maxSurge": 0,
+ > "maxUnavailable": 1
+ > },
+ > "type": "RollingUpdate"
+ > },
+ > "template": {
+ > "metadata": {
+ > "labels": {
+ > "capi.stackhpc.com/cluster": "somerville-jade-20240221-work",
+ > "capi.stackhpc.com/component": "worker",
+ > "capi.stackhpc.com/node-group": "md-0",
+ > "cluster.x-k8s.io/cluster-name": "somerville-jade-20240221-work"
+ > }
+ > },
+ > "spec": {
+ > "bootstrap": {
+ > "configRef": {
+ > "apiVersion": "bootstrap.cluster.x-k8s.io/v1beta1",
+ > "kind": "KubeadmConfigTemplate",
+ > "name": "somerville-jade-20240221-work-md-0-99910806"
+ > }
+ > },
+ > "clusterName": "somerville-jade-20240221-work",
+ > "infrastructureRef": {
+ > "apiVersion": "infrastructure.cluster.x-k8s.io/v1alpha7",
+ > "kind": "OpenStackMachineTemplate",
+ > "name": "somerville-jade-20240221-work-md-0-f538b732"
+ > },
+ > "nodeDrainTimeout": "5m0s",
+ > "version": "v1.26.7"
+ > }
+ > }
+ > },
+ > "status": {
+ > "conditions": [
+ > {
+ > "lastTransitionTime": "2024-02-21T06:42:12Z",
+ > "message": "Minimum availability requires 5 replicas, current 0 available",
+ > "reason": "WaitingForAvailableMachines",
+ > "severity": "Warning",
+ > "status": "False",
+ > "type": "Ready"
+ > },
+ > {
+ > "lastTransitionTime": "2024-02-21T06:42:12Z",
+ > "message": "Minimum availability requires 5 replicas, current 0 available",
+ > "reason": "WaitingForAvailableMachines",
+ > "severity": "Warning",
+ > "status": "False",
+ > "type": "Available"
+ > }
+ > ],
+ > "observedGeneration": 2,
+ > "phase": "ScalingUp",
+ > "replicas": 6,
+ > "selector": "capi.stackhpc.com/cluster=somerville-jade-20240221-work,capi.stackhpc.com/component=worker,capi.stackhpc.com/node-group=md-0,cluster.x-k8s.io/cluster-name=somerville-jade-20240221-work",
+ > "unavailableReplicas": 6,
+ > "updatedReplicas": 6
+ > }
+ > }
+ > ],
+ > "kind": "List",
+ > "metadata": {
+ > "resourceVersion": ""
+ > }
+ > }
+
+
+ ssh bootstrap -t \
+ '
+ source loadconfig
+ kubectl \
+ --kubeconfig "${kindclusterconf:?}" \
+ get machinedeployment \
+ --namespace default \
+ --output json \
+ "somerville-jade-20240221-work-md-0" \
+ | jq ".status"
+ '
+
+ > {
+ > "conditions": [
+ > {
+ > "lastTransitionTime": "2024-02-21T06:42:12Z",
+ > "message": "Minimum availability requires 5 replicas, current 0 available",
+ > "reason": "WaitingForAvailableMachines",
+ > "severity": "Warning",
+ > "status": "False",
+ > "type": "Ready"
+ > },
+ > {
+ > "lastTransitionTime": "2024-02-21T06:42:12Z",
+ > "message": "Minimum availability requires 5 replicas, current 0 available",
+ > "reason": "WaitingForAvailableMachines",
+ > "severity": "Warning",
+ > "status": "False",
+ > "type": "Available"
+ > }
+ > ],
+ > "observedGeneration": 2,
+ > "phase": "ScalingUp",
+ > "replicas": 6,
+ > "selector": "capi.stackhpc.com/cluster=somerville-jade-20240221-work,capi.stackhpc.com/component=worker,capi.stackhpc.com/node-group=md-0,cluster.x-k8s.io/cluster-name=somerville-jade-20240221-work",
+ > "unavailableReplicas": 6,
+ > "updatedReplicas": 6
+ > }
+
+
+
+ ssh bootstrap -t \
+ '
+ source loadconfig
+ kubectl \
+ --kubeconfig "${kindclusterconf:?}" \
+ get KubeadmControlPlanes \
+ --namespace default \
+ --output json
+ '
+
+ > {
+ > "apiVersion": "v1",
+ > "items": [
+ > {
+ > "apiVersion": "controlplane.cluster.x-k8s.io/v1beta1",
+ > "kind": "KubeadmControlPlane",
+ > "metadata": {
+ > "annotations": {
+ > "helm.sh/resource-policy": "keep",
+ > "meta.helm.sh/release-name": "somerville-jade-20240221-work",
+ > "meta.helm.sh/release-namespace": "default"
+ > },
+ > "creationTimestamp": "2024-02-21T06:42:09Z",
+ > "finalizers": [
+ > "kubeadm.controlplane.cluster.x-k8s.io"
+ > ],
+ > "generation": 2,
+ > "labels": {
+ > "app.kubernetes.io/managed-by": "Helm",
+ > "capi.stackhpc.com/cluster": "somerville-jade-20240221-work",
+ > "capi.stackhpc.com/component": "control-plane",
+ > "capi.stackhpc.com/infrastructure-provider": "openstack",
+ > "capi.stackhpc.com/managed-by": "Helm",
+ > "cluster.x-k8s.io/cluster-name": "somerville-jade-20240221-work",
+ > "helm.sh/chart": "openstack-cluster-0.1.0"
+ > },
+ > "name": "somerville-jade-20240221-work-control-plane",
+ > "namespace": "default",
+ > "ownerReferences": [
+ > {
+ > "apiVersion": "cluster.x-k8s.io/v1beta1",
+ > "blockOwnerDeletion": true,
+ > "controller": true,
+ > "kind": "Cluster",
+ > "name": "somerville-jade-20240221-work",
+ > "uid": "5d43f46b-4b5f-4a88-a27c-f2b446434a89"
+ > }
+ > ],
+ > "resourceVersion": "4570",
+ > "uid": "2cf20dae-b637-41d6-9097-5d004aff1206"
+ > },
+ > "spec": {
+ > "kubeadmConfigSpec": {
+ > "clusterConfiguration": {
+ > "apiServer": {
+ > "extraArgs": {
+ > "cloud-provider": "external"
+ > }
+ > },
+ > "controllerManager": {
+ > "extraArgs": {
+ > "bind-address": "0.0.0.0",
+ > "cloud-provider": "external"
+ > }
+ > },
+ > "dns": {},
+ > "etcd": {
+ > "local": {
+ > "extraArgs": {
+ > "listen-metrics-urls": "http://0.0.0.0:2381"
+ > }
+ > }
+ > },
+ > "networking": {},
+ > "scheduler": {
+ > "extraArgs": {
+ > "bind-address": "0.0.0.0"
+ > }
+ > }
+ > },
+ > "files": [
+ > {
+ > "content": "# This file is created by the capi-helm-chart to\n# ensure that its parent directory exists. *.toml\n# files in this directory are included in containerd\n# config when /etc/containerd/config.toml is parsed.\n",
+ > "owner": "root:root",
+ > "path": "/etc/containerd/conf.d/.keepdir",
+ > "permissions": "0644"
+ > },
+ > {
+ > "content": "---\napiVersion: kubeproxy.config.k8s.io/v1alpha1\nkind: KubeProxyConfiguration\nmetricsBindAddress: 0.0.0.0:10249\n",
+ > "owner": "root:root",
+ > "path": "/run/kubeadm/kube-proxy-configuration.yaml",
+ > "permissions": "0644"
+ > }
+ > ],
+ > "format": "cloud-config",
+ > "initConfiguration": {
+ > "localAPIEndpoint": {},
+ > "nodeRegistration": {
+ > "imagePullPolicy": "IfNotPresent",
+ > "kubeletExtraArgs": {
+ > "cloud-provider": "external"
+ > },
+ > "name": "{{ local_hostname }}"
+ > }
+ > },
+ > "joinConfiguration": {
+ > "discovery": {},
+ > "nodeRegistration": {
+ > "imagePullPolicy": "IfNotPresent",
+ > "kubeletExtraArgs": {
+ > "cloud-provider": "external"
+ > },
+ > "name": "{{ local_hostname }}"
+ > }
+ > },
+ > "preKubeadmCommands": [
+ > "cat /run/kubeadm/kube-proxy-configuration.yaml \u003e\u003e /run/kubeadm/kubeadm.yaml"
+ > ]
+ > },
+ > "machineTemplate": {
+ > "infrastructureRef": {
+ > "apiVersion": "infrastructure.cluster.x-k8s.io/v1alpha7",
+ > "kind": "OpenStackMachineTemplate",
+ > "name": "somerville-jade-20240221-work-control-plane-c6b6f2d1",
+ > "namespace": "default"
+ > },
+ > "metadata": {
+ > "labels": {
+ > "capi.stackhpc.com/cluster": "somerville-jade-20240221-work",
+ > "capi.stackhpc.com/component": "control-plane"
+ > }
+ > },
+ > "nodeDrainTimeout": "5m0s"
+ > },
+ > "replicas": 3,
+ > "rolloutStrategy": {
+ > "rollingUpdate": {
+ > "maxSurge": 1
+ > },
+ > "type": "RollingUpdate"
+ > },
+ > "version": "v1.26.7"
+ > },
+ > "status": {
+ > "conditions": [
+ > {
+ > "lastTransitionTime": "2024-02-21T06:44:12Z",
+ > "message": "Scaling up control plane to 3 replicas (actual 1)",
+ > "reason": "ScalingUp",
+ > "severity": "Warning",
+ > "status": "False",
+ > "type": "Ready"
+ > },
+ > {
+ > "lastTransitionTime": "2024-02-21T06:45:58Z",
+ > "status": "True",
+ > "type": "Available"
+ > },
+ > {
+ > "lastTransitionTime": "2024-02-21T06:43:51Z",
+ > "status": "True",
+ > "type": "CertificatesAvailable"
+ > },
+ > {
+ > "lastTransitionTime": "2024-02-21T06:56:02Z",
+ > "message": "Node failed to report startup in 10m0s",
+ > "reason": "NodeStartupTimeout @ /somerville-jade-20240221-work-control-plane-bdfrr",
+ > "severity": "Warning",
+ > "status": "False",
+ > "type": "MachinesReady"
+ > },
+ > {
+ > "lastTransitionTime": "2024-02-21T06:44:12Z",
+ > "message": "Scaling up control plane to 3 replicas (actual 1)",
+ > "reason": "ScalingUp",
+ > "severity": "Warning",
+ > "status": "False",
+ > "type": "Resized"
+ > }
+ > ],
+ > "initialized": true,
+ > "observedGeneration": 2,
+ > "ready": true,
+ > "readyReplicas": 1,
+ > "replicas": 1,
+ > "selector": "cluster.x-k8s.io/cluster-name=somerville-jade-20240221-work,cluster.x-k8s.io/control-plane",
+ > "unavailableReplicas": 0,
+ > "updatedReplicas": 1
+ > }
+ > }
+ > ],
+ > "kind": "List",
+ > "metadata": {
+ > "resourceVersion": ""
+ > }
+ > }
+
+
+ ssh bootstrap -t \
+ '
+ source loadconfig
+ kubectl \
+ --kubeconfig "${kindclusterconf:?}" \
+ get KubeadmControlPlane \
+ --namespace default \
+ --output json \
+ "somerville-jade-20240221-work-control-plane" \
+ | jq ".status"
+ '
+
+ > {
+ > "conditions": [
+ > {
+ > "lastTransitionTime": "2024-02-21T06:44:12Z",
+ > "message": "Scaling up control plane to 3 replicas (actual 1)",
+ > "reason": "ScalingUp",
+ > "severity": "Warning",
+ > "status": "False",
+ > "type": "Ready"
+ > },
+ > {
+ > "lastTransitionTime": "2024-02-21T06:45:58Z",
+ > "status": "True",
+ > "type": "Available"
+ > },
+ > {
+ > "lastTransitionTime": "2024-02-21T06:43:51Z",
+ > "status": "True",
+ > "type": "CertificatesAvailable"
+ > },
+ > {
+ > "lastTransitionTime": "2024-02-21T06:56:02Z",
+ > "message": "Node failed to report startup in 10m0s",
+ > "reason": "NodeStartupTimeout @ /somerville-jade-20240221-work-control-plane-bdfrr",
+ > "severity": "Warning",
+ > "status": "False",
+ > "type": "MachinesReady"
+ > },
+ > {
+ > "lastTransitionTime": "2024-02-21T06:44:12Z",
+ > "message": "Scaling up control plane to 3 replicas (actual 1)",
+ > "reason": "ScalingUp",
+ > "severity": "Warning",
+ > "status": "False",
+ > "type": "Resized"
+ > }
+ > ],
+ > "initialized": true,
+ > "observedGeneration": 2,
+ > "ready": true,
+ > "readyReplicas": 1,
+ > "replicas": 1,
+ > "selector": "cluster.x-k8s.io/cluster-name=somerville-jade-20240221-work,cluster.x-k8s.io/control-plane",
+ > "unavailableReplicas": 0,
+ > "updatedReplicas": 1
+ > }
+
+
+# -----------------------------------------------------
+# List stuff in the work cluster ....
+#[root@ansibler]
+
+
+
+# -----------------------------------------------------
+# Try connect to the metrics endpoint.
+# https://github.com/stackhpc/capi-helm-charts/tree/main/charts/cluster-addons#monitoring-and-logging
+#[root@ansibler]
+
+ source /deployments/cluster-api/ansible/files/aglais/bin/loadconfig
+
+ kubectl \
+ --kubeconfig "${workclusterconf:?}" \
+ --namespace monitoring-system \
+ port-forward \
+ svc/kube-prometheus-stack-grafana \
+ 3000:80
+
+ #
+ # Need to use ssh port forwarding as well.
+ # podman -> client
+ # ssh socks desktop -> bootstrap
+ # kubectl bootstrap -> work
+ # ....
+ #
+ # Added port 3001 to the podman command
+ #
+
+# -----------------------------------------------------
+# Watch the cluster status ...
+#[root@bootstrap]
+
+ clusterctl \
+ --kubeconfig "${kindclusterconf:?}" \
+ describe cluster \
+ "${workclustername:?}"
+
+ > NAME READY SEVERITY REASON SINCE MESSAGE
+ > Cluster/somerville-jade-20240221-work False Warning ScalingUp 3h33m Scaling up control plane to 3 replicas (actual 1)
+ > ├─ClusterInfrastructure - OpenStackCluster/somerville-jade-20240221-work
+ > ├─ControlPlane - KubeadmControlPlane/somerville-jade-20240221-work-control-plane False Warning ScalingUp 3h33m Scaling up control plane to 3 replicas (actual 1)
+ > │ └─Machine/somerville-jade-20240221-work-control-plane-bdfrr False Warning NodeStartupTimeout 3h22m Node failed to report startup in 10m0s
+ > └─Workers
+ > └─MachineDeployment/somerville-jade-20240221-work-md-0 False Warning WaitingForAvailableMachines 3h35m Minimum availability requires 5 replicas, current 0 available
+ > └─6 Machines... True 6m21s See somerville-jade-20240221-work-md-0-bc4ps-5bvnv, somerville-jade-20240221-work-md-0-bc4ps-dd6rs, ...
+
+ #
+ # 5hrs and counting ...
+ # 6hrs and counting ...
+ # 30hrs ....
+ #
+
+ clusterctl \
+ --kubeconfig "${kindclusterconf:?}" \
+ describe cluster \
+ "${workclustername:?}"
+
+ > NAME READY SEVERITY REASON SINCE MESSAGE
+ > Cluster/somerville-jade-20240221-work False Warning ScalingUp 30h Scaling up control plane to 3 replicas (actual 1)
+ > ├─ClusterInfrastructure - OpenStackCluster/somerville-jade-20240221-work
+ > ├─ControlPlane - KubeadmControlPlane/somerville-jade-20240221-work-control-plane False Warning ScalingUp 30h Scaling up control plane to 3 replicas (actual 1)
+ > │ └─Machine/somerville-jade-20240221-work-control-plane-bdfrr False Warning NodeStartupTimeout 30h Node failed to report startup in 10m0s
+ > └─Workers
+ > └─MachineDeployment/somerville-jade-20240221-work-md-0 False Warning WaitingForAvailableMachines 30h Minimum availability requires 5 replicas, current 0 available
+ > └─6 Machines... True 5m50s See somerville-jade-20240221-work-md-0-bc4ps-6x8lk, somerville-jade-20240221-work-md-0-bc4ps-nlwb7, ...
+
+
+
diff --git a/notes/zrq/20240222-01-jade-deploy.txt b/notes/zrq/20240222-01-jade-deploy.txt
new file mode 100644
index 00000000..15b55331
--- /dev/null
+++ b/notes/zrq/20240222-01-jade-deploy.txt
@@ -0,0 +1,282 @@
+#
+#
+#
+# Copyright (c) 2024, ROE (http://www.roe.ac.uk/)
+#
+# This information is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This information is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+#
+#
+#zrq-notes-time
+#zrq-notes-indent
+#zrq-notes-crypto
+#zrq-notes-ansible
+#zrq-notes-osformat
+#zrq-notes-zeppelin
+#
+# AIMetrics: []
+#
+
+ Target:
+
+ Try a new deployment, adding proxies to access the metrics.
+
+ Result:
+
+ Nope, still broken.
+
+
+# -----------------------------------------------------
+# Run our local client.
+#[user@desktop]
+
+ source "${HOME:?}/aglais.env"
+ export PATH=${PATH}:${AGLAIS_CODE}/bin
+
+ kube-client jade
+
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# Delete and create everything.
+#[root@ansibler]
+
+ export cloudsite=somerville-jade
+
+ /deployments/openstack/bin/delete-all.sh \
+ "${cloudname:?}"
+
+ ansible-playbook \
+ --inventory 'bootstrap,' \
+ '/deployments/cluster-api/ansible/00-create-all.yml'
+
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# Check the deployment configuration.
+#[root@ansibler]
+
+ cat /opt/aglais/aglais-status.yml
+
+ > aglais:
+ > ansibler:
+ > external:
+ > ipv4: 90.155.51.57
+ > deployment:
+ > date: 20240222
+ > debug:
+ > started: '2024-02-22 17:44:10.779956'
+ > name: somerville-jade-20240222
+ > type: cluster-api
+ > kubernetes:
+ > cluster:
+ > kind:
+ > conf: /opt/aglais/somerville-jade-20240222-kind.yml
+ > debug:
+ > created: '2024-02-22 17:50:39.471363'
+ > name: somerville-jade-20240222-kind
+ > work:
+ > conf: /opt/aglais/somerville-jade-20240222-work.yml
+ > debug:
+ > created: '2024-02-22 17:58:09.520860'
+ > name: somerville-jade-20240222-work
+ > version: 1.26.7
+ > openstack:
+ > cloud:
+ > name: somerville-jade
+ > site: somerville-jade
+ > keypair:
+ > fingerprint: 2e:84:98:98:df:70:06:0e:4c:ed:bd:d4:d6:6b:eb:16
+ > id: somerville-jade-20240222-keypair
+ > name: somerville-jade-20240222-keypair
+ > networks:
+ > bootstrap:
+ > network:
+ > id: 63675b2c-6491-4f43-a678-d45245b531d7
+ > name: somerville-jade-20240222-bootstrap-network
+ > router:
+ > id: 1abb80d5-09a7-4cc6-9693-51554140a915
+ > name: somerville-jade-20240222-bootstrap-network-router
+ > subnet:
+ > cidr: 10.10.0.0/16
+ > id: 1f97702c-ace1-4cde-b97d-27f531931419
+ > name: somerville-jade-20240222-bootstrap-network-subnet
+ > external:
+ > network:
+ > id: 1875828a-ccc3-419b-87fd-856aaa781492
+ > name: external
+ > project:
+ > id: be227fe0300b4ce5b03f44264df615df,
+ > name: Somerville-Gaia-Jade
+ > servers:
+ > bootstrap:
+ > float:
+ > external: 192.41.122.188
+ > id: a33d9502-6af3-45ed-8278-448f16fe8b67
+ > internal: 10.10.0.235
+ > server:
+ > address:
+ > ipv4: 10.10.0.235
+ > flavor:
+ > name: gaia.vm.2vcpu
+ > hostname: bootstrap
+ > id: 799411c4-000b-46c7-b32f-3971715958ef
+ > image:
+ > id: ce533fcf-75a6-4267-a622-d0227e6940b0
+ > name: gaia-dmp-fedora-cloud-38-1.6
+ > name: somerville-jade-20240222-bootstrap-node
+ > user:
+ > id: c4aad146ab7acaf44819e90e3e67a4d0490c164fbb02d388823c1ac9f0ae2e13,
+ > name: Dave Morris
+
+
+# -----------------------------------------------------
+# Check the cluster status.
+#[root@ansibler]
+
+ ssh bootstrap -t \
+ '
+ source loadconfig
+ watch \
+ clusterctl \
+ --kubeconfig "${kindclusterconf:?}" \
+ describe cluster \
+ --grouping=false \
+ --show-conditions all \
+ "${workclustername:?}"
+ '
+
+ > NAME READY SEVERITY REASON SINCE MESSAGE
+ > Cluster/somerville-jade-20240222-work False Warning ScalingUp 4m28s Scaling up control plane to 3 replicas (actual 1)
+ > │ ├─ControlPlaneInitialized True 3m51s
+ > │ ├─ControlPlaneReady False Warning ScalingUp 4m28s Scaling up control plane to 3 replicas (actual 1)
+ > │ └─InfrastructureReady True 4m53s
+ > ├─ClusterInfrastructure - OpenStackCluster/somerville-jade-20240222-work
+ > ├─ControlPlane - KubeadmControlPlane/somerville-jade-20240222-work-control-plane False Warning ScalingUp 4m28s Scaling up control plane to 3 replicas (actual 1)
+ > │ │ ├─Available True 3m51s
+ > │ │ ├─CertificatesAvailable True 4m50s
+ > │ │ ├─MachinesReady True 4m18s
+ > │ │ └─Resized False Warning ScalingUp 4m28s Scaling up control plane to 3 replicas (actual 1)
+ > │ └─Machine/somerville-jade-20240222-work-control-plane-7vdtz True 4m31s
+ > │ ├─BootstrapReady True 4m49s
+ > │ ├─InfrastructureReady True 4m31s
+ > │ └─NodeHealthy False Warning NodeProvisioning 3m52s
+ > └─Workers
+ > └─MachineDeployment/somerville-jade-20240222-work-md-0 False Warning WaitingForAvailableMachines 6m33s Minimum availability requires 5 replicas, current 0 available
+ > │ └─Available False Warning WaitingForAvailableMachines 6m33s Minimum availability requires 5 replicas, current 0 available
+ > ├─Machine/somerville-jade-20240222-work-md-0-fb2mb-52csp True 3m8s
+ > │ ├─BootstrapReady True 3m21s
+ > │ ├─InfrastructureReady True 3m8s
+ > │ └─NodeHealthy False Warning NodeProvisioning 3m8s
+ > ├─Machine/somerville-jade-20240222-work-md-0-fb2mb-9fxhc True 3m8s
+ > │ ├─BootstrapReady True 3m21s
+ > │ ├─InfrastructureReady True 3m8s
+ > │ └─NodeHealthy False Warning NodeProvisioning 3m7s
+ > ├─Machine/somerville-jade-20240222-work-md-0-fb2mb-gbs2n True 3m8s
+ > │ ├─BootstrapReady True 3m21s
+ > │ ├─InfrastructureReady True 3m8s
+ > │ └─NodeHealthy False Warning NodeProvisioning 3m8s
+ > ├─Machine/somerville-jade-20240222-work-md-0-fb2mb-gxkq7 True 3m7s
+ > │ ├─BootstrapReady True 3m21s
+ > │ ├─InfrastructureReady True 3m7s
+ > │ └─NodeHealthy False Warning NodeProvisioning 3m6s
+ > ├─Machine/somerville-jade-20240222-work-md-0-fb2mb-j7q9g True 3m6s
+ > │ ├─BootstrapReady True 3m21s
+ > │ ├─InfrastructureReady True 3m6s
+ > │ └─NodeHealthy False Warning NodeProvisioning 3m6s
+ > └─Machine/somerville-jade-20240222-work-md-0-fb2mb-qptvh True 3m6s
+ > ├─BootstrapReady True 3m21s
+ > ├─InfrastructureReady True 3m6s
+ > └─NodeHealthy False Warning NodeProvisioning 3m6s
+
+
+# -----------------------------------------------------
+# Setup the proxies to access the monitoring endpoint.
+# https://github.com/stackhpc/capi-helm-charts/tree/main/charts/cluster-addons#monitoring-and-logging
+#[root@ansibler]
+
+ #
+ # Based on the monitoring-and-logging section in their documentation,
+ # we should be looking for a service called 'kube-prometheus-stack-grafana'.
+ #
+
+ ssh bootstrap \
+ '
+ source loadconfig
+ kubectl \
+ --kubeconfig "${workclusterconf:?}" \
+ get services \
+ --all-namespaces
+ '
+
+ > E0222 18:03:18.397556 13404 memcache.go:287] couldn't get resource list for metrics.k8s.io/v1beta1: the server is currently unable to handle the request
+ > E0222 18:03:18.439676 13404 memcache.go:121] couldn't get resource list for metrics.k8s.io/v1beta1: the server is currently unable to handle the request
+ > E0222 18:03:18.444345 13404 memcache.go:121] couldn't get resource list for metrics.k8s.io/v1beta1: the server is currently unable to handle the request
+ > E0222 18:03:18.448025 13404 memcache.go:121] couldn't get resource list for metrics.k8s.io/v1beta1: the server is currently unable to handle the request
+ > NAMESPACE NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
+ > calico-system calico-typha ClusterIP 172.31.125.139 5473/TCP 3m15s
+ > default kubernetes ClusterIP 172.24.0.1 443/TCP 5m15s
+ > kube-system kube-dns ClusterIP 172.24.0.10 53/UDP,53/TCP,9153/TCP 4m54s
+ > kube-system metrics-server ClusterIP 172.24.139.107 443/TCP 4m50s
+ > kubernetes-dashboard kubernetes-dashboard ClusterIP 172.30.195.58 443/TCP 4m47s
+ > monitoring-system loki-stack ClusterIP 172.24.244.80 3100/TCP 4m35s
+ > monitoring-system loki-stack-headless ClusterIP None 3100/TCP 4m35s
+ > monitoring-system loki-stack-memberlist ClusterIP None 7946/TCP 4m35s
+ > node-feature-discovery node-feature-discovery-master ClusterIP 172.24.77.107 8080/TCP 4m47s
+
+ #
+ # .... but there isn't one.
+ #
+
+ ssh bootstrap \
+ '
+ source loadconfig
+ kubectl \
+ --kubeconfig "${workclusterconf:?}" \
+ get deployments \
+ --all-namespaces
+ '
+
+ > E0222 18:10:31.007049 13650 memcache.go:287] couldn't get resource list for metrics.k8s.io/v1beta1: the server is currently unable to handle the request
+ > E0222 18:10:31.009879 13650 memcache.go:121] couldn't get resource list for metrics.k8s.io/v1beta1: the server is currently unable to handle the request
+ > E0222 18:10:31.013548 13650 memcache.go:121] couldn't get resource list for metrics.k8s.io/v1beta1: the server is currently unable to handle the request
+ > E0222 18:10:31.016791 13650 memcache.go:121] couldn't get resource list for metrics.k8s.io/v1beta1: the server is currently unable to handle the request
+ > NAMESPACE NAME READY UP-TO-DATE AVAILABLE AGE
+ > calico-system calico-kube-controllers 0/1 1 0 10m
+ > calico-system calico-typha 0/3 3 0 10m
+ > gpu-operator gpu-operator 0/1 1 0 12m
+ > kube-system coredns 0/2 2 0 12m
+ > kube-system metrics-server 0/1 1 0 12m
+ > kubernetes-dashboard kubernetes-dashboard 0/1 1 0 12m
+ > network-operator mellanox-network-operator 0/1 1 0 12m
+ > node-feature-discovery node-feature-discovery-master 0/1 1 0 12m
+ > tigera-operator tigera-operator 0/1 1 0 10m
+
+ #
+ # I'd assume there is a linit to which components we can run in the cluster
+ # when the cluster itself is broken ?
+ # I understand the kube-system and calico-system deployments should be there
+ # because they are part of the cluster infrastructure.
+ # But I'm wondering how much of the Prometheus and Grafana deployments
+ # we can run on a broken cluster ?
+ #
+ # One way to find out is to deploy thison the Cambeidge Arcus system,
+ # get the proxies connected up and learn how it works on a working
+ # cluster, and then come back to deploy it on the Somerville system.
+ #
+
diff --git a/notes/zrq/20240222-02-jade-deploy.txt b/notes/zrq/20240222-02-jade-deploy.txt
new file mode 100644
index 00000000..1fce572b
--- /dev/null
+++ b/notes/zrq/20240222-02-jade-deploy.txt
@@ -0,0 +1,104 @@
+#
+#
+#
+# Copyright (c) 2024, ROE (http://www.roe.ac.uk/)
+#
+# This information is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This information is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+#
+#
+#zrq-notes-time
+#zrq-notes-indent
+#zrq-notes-crypto
+#zrq-notes-ansible
+#zrq-notes-osformat
+#zrq-notes-zeppelin
+#
+# AIMetrics: []
+#
+
+ Target:
+
+ We added the firewall rules because Cambridge don't want us to leave the kubectl endpoint public.
+ but no such rules apply to Somerville (despite what they do to the Openstack API).
+ So might as well try the deploy without the firewall rules patch on the kubectl endpoint.
+
+ Result:
+
+ Work in progress ...
+
+
+# -----------------------------------------------------
+# Remove the firewall rules patch.
+#[user@desktop]
+
+ source "${HOME:?}/aglais.env"
+ pushd "${AGLAIS_CODE}"
+
+ gedit deployments/cluster-api/ansible/00-create-all.yml &
+
+ - import_playbook: 25-create-work-cluster.yml
+ ~ # import_playbook: 26-secure-work-cluster.yml
+
+ popd
+
+# -----------------------------------------------------
+# Run our local client.
+#[user@desktop]
+
+ source "${HOME:?}/aglais.env"
+ export PATH=${PATH}:${AGLAIS_CODE}/bin
+
+ kube-client jade
+
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# Delete and create everything.
+#[root@ansibler]
+
+ export cloudsite=somerville-jade
+
+ /deployments/openstack/bin/delete-all.sh \
+ "${cloudname:?}"
+
+ ansible-playbook \
+ --inventory 'bootstrap,' \
+ '/deployments/cluster-api/ansible/00-create-all.yml'
+
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# Watch the cluster status.
+#[root@ansibler]
+
+ ssh bootstrap -t \
+ '
+ source loadconfig
+ watch \
+ clusterctl \
+ --kubeconfig "${kindclusterconf:?}" \
+ describe cluster \
+ --grouping=false \
+ --show-conditions all \
+ "${workclustername:?}"
+ '
+
+ > ....
+ > ....
+
diff --git a/notes/zrq/20240222-03-arcus-deploy.txt b/notes/zrq/20240222-03-arcus-deploy.txt
new file mode 100644
index 00000000..97ad1b3d
--- /dev/null
+++ b/notes/zrq/20240222-03-arcus-deploy.txt
@@ -0,0 +1,95 @@
+#
+#
+#
+# Copyright (c) 2024, ROE (http://www.roe.ac.uk/)
+#
+# This information is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This information is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+#
+#
+#zrq-notes-time
+#zrq-notes-indent
+#zrq-notes-crypto
+#zrq-notes-ansible
+#zrq-notes-osformat
+#zrq-notes-zeppelin
+#
+# AIMetrics: []
+#
+
+ Target:
+
+ Comparison test.
+ Run the ClusterAPI deploy on Arcus blue.
+ First to check that it still works on the Cambridge platform.
+ Also to learn how to use the monitoring components on a working cluster
+ before we try to use them on a broken cluster.
+
+ Result:
+
+ Work in progress ...
+
+
+# -----------------------------------------------------
+# Run our local client.
+#[user@desktop]
+
+ source "${HOME:?}/aglais.env"
+ export PATH=${PATH}:${AGLAIS_CODE}/bin
+
+ kube-client blue
+
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# Delete and create everything.
+#[root@ansibler]
+
+ /deployments/openstack/bin/delete-all.sh \
+ "${cloudname:?}"
+
+ ansible-playbook \
+ --inventory 'bootstrap,' \
+ '/deployments/cluster-api/ansible/00-create-all.yml'
+
+ > ....
+ > ....
+
+
+# -----------------------------------------------------
+# Watch the cluster status.
+#[root@ansibler]
+
+ ssh bootstrap -t \
+ '
+ source loadconfig
+ watch \
+ clusterctl \
+ --kubeconfig "${kindclusterconf:?}" \
+ describe cluster \
+ "${workclustername:?}"
+ '
+
+ > ....
+ > ....
+
+
+
+ #
+ # create delete create delete .......
+ # create on red, blue, jade
+ # It works, ir doesn't it doesn't it does ... then it dosn't
+