-
Notifications
You must be signed in to change notification settings - Fork 103
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
docs: document procedure to fix PVs broken with v2.0.0 (#340)
See #333
- Loading branch information
Showing
2 changed files
with
213 additions
and
0 deletions.
There are no files selected for viewing
123 changes: 123 additions & 0 deletions
123
docs/v2.0.0-fix-volume-topology/fix-persistentvolume-topology.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
#!/usr/bin/env bash | ||
set -e -o pipefail | ||
|
||
if [ "$DEBUG" != "" ]; | ||
then | ||
set -x | ||
fi | ||
|
||
PV_NAME="$1" | ||
|
||
# Prepare directory | ||
DIR="./hcloud-csi-fix-topology/${PV_NAME}" | ||
echo "[INFO] Creating a new directory to backup objects: ${DIR}" | ||
mkdir --parents "${DIR}" | ||
|
||
# Logging utility | ||
LOG_FILE="${DIR}/logs.txt" | ||
write_log() { | ||
echo "$1" | ||
echo "$1" >> "${LOG_FILE}" | ||
} | ||
|
||
# Verify dependencies | ||
verify_installed() { | ||
cmd="$1" | ||
if ! command -v "$cmd" &> /dev/null | ||
then | ||
write_log "[ERR] For the script to run successfully, \"${cmd}\" is required, but it could not be found. Please make sure it is installed." | ||
exit | ||
fi | ||
} | ||
|
||
verify_installed kubectl | ||
verify_installed hcloud | ||
|
||
# [kubectl] Get PersistentVolume (PV) and verify it fulfills criteria | ||
PV_FILE_ORIG="${DIR}/persistentvolume.orig.json" | ||
|
||
kubectl get persistentvolume "${PV_NAME}" -o=json > "$PV_FILE_ORIG" | ||
PV_INFO=( | ||
$(kubectl get persistentvolume "${PV_NAME}" \ | ||
-o=jsonpath='{.metadata.annotations.pv\.kubernetes\.io\/provisioned-by} {.spec.nodeAffinity.required.nodeSelectorTerms[*].matchExpressions[*].key} {.spec.csi.volumeHandle}' | ||
)) | ||
PV_PROVISIONED_BY="${PV_INFO[0]}" | ||
PV_TOPOLOGY_LABEL="${PV_INFO[1]}" | ||
PV_VOLUME_ID="${PV_INFO[2]}" | ||
|
||
if [ "${PV_PROVISIONED_BY}" != "csi.hetzner.cloud" ]; | ||
then | ||
write_log "[ERR] PersistentVolume with name \"${PV_NAME}\" was not provisioned by hcloud-csi-driver." | ||
exit 1 | ||
fi | ||
|
||
if [ "${PV_TOPOLOGY_LABEL}" != "topology.kubernetes.io/region" ]; | ||
then | ||
write_log "[ERR] PersistentVolume with name \"${PV_NAME}\" does not use the invalid topology label." | ||
exit 1 | ||
fi | ||
|
||
# [kubectl] Verify that no volume attachment exists | ||
ATTACHMENTS=$(kubectl get volumeattachment -o jsonpath="{.items[?(@.spec.source.persistentVolumeName==\"${PV_NAME}\")].metadata.name}") | ||
if [ "${ATTACHMENTS}" != "" ]; | ||
then | ||
write_log "[ERR] PersistentVolume with name \"${PV_NAME}\" is still attached according to kubernetes VolumeAttachment: ${ATTACHMENTS}" | ||
exit 1 | ||
fi | ||
|
||
# [hcloud] Get Volume | ||
hcloud volume describe "${PV_VOLUME_ID}" -o=json > "${DIR}"/volume.orig.json | ||
VOLUME_INFO=($(hcloud volume describe "${PV_VOLUME_ID}" -o=format='{{.Protection.Delete}} {{if .Server }}{{.Server.ID}}{{end}}')) | ||
|
||
VOLUME_DELETION_PROTECTION="${VOLUME_INFO[0]}" | ||
VOLUME_SERVER="${VOLUME_INFO[1]}" | ||
|
||
# [hcloud] Verify that the Volume is not assigned to a server | ||
if [ "${VOLUME_SERVER}" != "" ]; | ||
then | ||
write_log "[ERR] Hetzner Cloud Volume with ID \"${PV_VOLUME_ID}\" is still attached to server \"${VOLUME_SERVER}\" according to Hetzner Cloud API." | ||
exit 1 | ||
fi | ||
|
||
# [hcloud] Enable deletion protection | ||
write_log "[INFO] Current state of Volume deletion protection: ${VOLUME_DELETION_PROTECTION}" | ||
|
||
if [ "${VOLUME_DELETION_PROTECTION}" != "true" ]; | ||
then | ||
write_log "[INFO] Enabling Volume deletion protection" | ||
hcloud volume enable-protection "${PV_VOLUME_ID}" delete | ||
fi | ||
|
||
# [kubectl] Remove finalizers | ||
write_log "[INFO] Removing finalizers from PersistentVolume" | ||
kubectl patch persistentvolume "${PV_NAME}" --type=json -p='[{"op":"replace", "path": "/metadata/finalizers", "value": []}]' | ||
|
||
# Prepare PersistentVolume JSON | ||
PV_FILE_FIXED="${DIR}/persistentvolume.fixed.json" | ||
kubectl patch \ | ||
--dry-run=client \ | ||
--filename="$PV_FILE_ORIG" \ | ||
--type=json \ | ||
--patch='[{"op":"replace", "path": "/spec/nodeAffinity/required/nodeSelectorTerms/0/matchExpressions/0/key", "value": "csi.hetzner.cloud/location"}]' \ | ||
--output=yaml > "${PV_FILE_FIXED}" | ||
|
||
# [kubectl] Delete Persistent Volume | ||
write_log "[INFO] Deleting current PersistentVolume" | ||
kubectl delete persistentvolume "${PV_NAME}" & | ||
# The pv-protection finalizer is added right back. For the deletion to work, | ||
# we need to remove it again. | ||
kubectl patch persistentvolume "${PV_NAME}" --type=json -p='[{"op":"replace", "path": "/metadata/finalizers", "value": []}]' | ||
|
||
write_log "[INFO] Waiting for deletion to finish" | ||
kubectl wait --for=delete persistentvolume "${PV_NAME}" --timeout=10s | ||
|
||
# [kubectl] Create new Persistent Volume | ||
write_log "[INFO] Creating new PersistentVolume" | ||
kubectl create --filename="${PV_FILE_FIXED}" | ||
|
||
# [hcloud] Disable deletion protection (if previously enabled) | ||
if [ "${VOLUME_DELETION_PROTECTION}" != "true" ]; | ||
then | ||
write_log "[INFO] Disabling Volume deletion protection which was added for migration" | ||
hcloud volume disable-protection "${PV_VOLUME_ID}" delete | ||
fi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
# Updating the Topology selection for `PersistentVolumes` created with csi-driver v2.0.0 | ||
|
||
This guide is intended for Kubernetes Cluster operators that installed the hcloud-csi-driver v2.0.0 and created Volumes. Unfortunatly this version included a change that we had to revert, for details you can read the [issue #333](https://github.com/hetznercloud/csi-driver/issues/333). | ||
|
||
The affected `PersistentVolumes` reference a wrong label in the `spec.nodeAffinity.required` fields. The volumes work as they should, but for consistency and future compatibility we recommend that the `PersistentVolumes` should be fixed. | ||
|
||
Unfortunatly the affected field is immutable, so we need to recreate the `PersistentVolume` in Kubernetes, the actual Volume with the data will not be touched, no data loss is expected. | ||
This is only possible if it is not attached to a node, so any workload using the `PersistentVolume` needs to be paused for this. | ||
|
||
## Pre-requisites | ||
|
||
You need to have `kubectl` and the `hcloud` cli tool installed for this guide. | ||
|
||
To avoid creating any new broken `PersistentVolumes` while you are still fixing old ones, you should ugprade to `v2.0.1` or `v2.1.0` before starting this guide. After all `PersistentVolumes` have been migrated, you should upgrade to `v2.1.0` | ||
|
||
## Find affected `PersistentVolumes` | ||
|
||
To find out if you are affected by this, and which `PersistentVolumes` need to be recreated, you can run the following command, which will output a list of affected `PersistentVolumes`. | ||
|
||
```shell | ||
kubectl get persistentvolume -o=custom-columns="NAME:.metadata.name,CLAIM:.spec.claimRef.name,TOPOLOGY:.spec.nodeAffinity.required.nodeSelectorTerms[*].matchExpressions[*].key,DRIVER:.metadata.annotations.pv\.kubernetes\.io/provisioned-by" | grep -e NAME -e "topology.kubernetes.io/region.*csi.hetzner.cloud" --color=never | ||
|
||
NAME CLAIM TOPOLOGY DRIVER | ||
pvc-0409d3b7-46c8-4a95-8475-dfbb053559c0 csi-test-6 topology.kubernetes.io/region csi.hetzner.cloud | ||
pvc-194d750a-bc28-4911-9618-8c6f3e61c404 csi-test-9 topology.kubernetes.io/region csi.hetzner.cloud | ||
pvc-2d1a5015-74c1-4746-b523-e1ce8d91705e csi-test-5 topology.kubernetes.io/region csi.hetzner.cloud | ||
pvc-35cff6bc-dc4e-4f3d-9524-31d3217a77c4 csi-test-10 topology.kubernetes.io/region csi.hetzner.cloud | ||
pvc-44623554-8491-4ee0-a55b-92e9b6a5fa78 csi-test-8 topology.kubernetes.io/region csi.hetzner.cloud | ||
pvc-92b2e92d-d079-4df2-860d-b715996d9f86 csi-test-2 topology.kubernetes.io/region csi.hetzner.cloud | ||
pvc-c157fbd7-e26f-4ab6-8587-aa0ac737ee93 csi-test-4 topology.kubernetes.io/region csi.hetzner.cloud | ||
pvc-ca9a8389-596b-404a-85f8-56aa4362c00f csi-test-3 topology.kubernetes.io/region csi.hetzner.cloud | ||
pvc-e7eccb3f-a842-452d-b10f-f8f88a40c267 csi-test-1 topology.kubernetes.io/region csi.hetzner.cloud | ||
pvc-eff7592a-616d-4188-9963-8c2640093d32 csi-test-7 topology.kubernetes.io/region csi.hetzner.cloud | ||
``` | ||
|
||
This (example) output means that 10 `PersistentVolumes` are affected. You should save this output somewhere, as you will need the names for the next steps. | ||
|
||
## Re-create `PersistentVolume` | ||
|
||
You need to repeat these steps for every `PersistentVolume`. | ||
|
||
Start by pausing the application (if any) using the `PersistentVolume`. For example, if you have a Deployment that uses the `PersistentVolume` (through a `PersistentVolumeClaim`, see table above), you can scale it down to 0. | ||
|
||
For the following step we provide a migration script, that will make a backup, and then delete and re-create the `PersistentVolume`. You can use this or execute the steps included in the script yourself. | ||
|
||
Download the script: | ||
|
||
```shell | ||
$ curl TODO_LINK_TO_SCRIPT ./fix-persistentvolume-topology.sh | ||
$ chmod +x ./fix-persistentvolume-topology.sh | ||
``` | ||
|
||
Make sure that you have the right Kubernetes and hcloud contexts selected in the current shell. | ||
|
||
Now you can run the script for a single `PersistentVolume`: | ||
|
||
```shell | ||
$ ./fix-persistent-volume.sh pvc-e7eccb3f-a842-452d-b10f-f8f88a40c267 | ||
[INFO] Creating a new directory to backup objects: ./hcloud-csi-fix-topology/pvc-e7eccb3f-a842-452d-b10f-f8f88a40c267 | ||
[INFO] Current state of Volume deletion protection: false | ||
[INFO] Enabling Volume deletion protection | ||
1.1s [===================================] 100.00% | ||
Resource protection enabled for volume 123456789 | ||
[INFO] Removing finalizers from PersistentVolume | ||
persistentvolume/pvc-e7eccb3f-a842-452d-b10f-f8f88a40c267 patched | ||
[INFO] Deleting current PersistentVolume | ||
persistentvolume "pvc-e7eccb3f-a842-452d-b10f-f8f88a40c267" deleted | ||
persistentvolume/pvc-e7eccb3f-a842-452d-b10f-f8f88a40c267 patched | ||
[INFO] Waiting for deletion to finish | ||
[INFO] Creating new PersistentVolume | ||
persistentvolume/pvc-e7eccb3f-a842-452d-b10f-f8f88a40c267 created | ||
[INFO] Disabling Volume deletion protection which was added for migration | ||
600ms [==================================] 100.00% | ||
Resource protection disabled for volume 123456789 | ||
``` | ||
|
||
Once the script has successfully finished, you can scale up the workload again. | ||
|
||
In case the script encountered an error and shows a message prefixed with `[ERR]` some pre-condition was not met: | ||
|
||
- You are missing the `kubectl` or `hcloud` binaries | ||
- The volume is not affected | ||
- The volume does not belong to hcloud-csi-driver | ||
- The volume is still attached to a server | ||
|
||
You can fix these errors and then re-run the script. | ||
|
||
In case something else goes wrong, the script makes backups of all resources in the directory`./hcloud-csi-fix-topology/$PERSITENT_VOLUME_NAME`, as logged by the script. You can use these to manually re-create the `PersistentVolume`. | ||
|
||
If you have any issues, please feel free to open an issue on the [GitHub Repository](https://github.com/hetznercloud/csi-driver) or through the [Hetzner Ticket System](https://console.hetzner.cloud/support). |