From 7464a22d99515a08ff9aed64d282b83139989a12 Mon Sep 17 00:00:00 2001 From: Jeremy Lewi Date: Tue, 28 Jan 2020 10:59:40 -0800 Subject: [PATCH] Update playbook for dealing with NFS out of space and use kustomize. (#583) * Instead of using ksonnet to manage the PV and PVC for NFS switch to using kustomize and update the playbook. Fix #582 --- playbook.md | 9 +- test-infra/base/kustomization.yaml | 6 ++ test-infra/base/nfs-external.yaml | 16 ++++ test-infra/base/pvc.yaml | 33 ++++++++ test-infra/base/statefulset.yaml | 50 +++++++++++ .../ks_app/components/debug-worker.jsonnet | 83 ------------------- .../ks_app/components/nfs-external.jsonnet | 65 --------------- 7 files changed, 109 insertions(+), 153 deletions(-) create mode 100644 test-infra/base/kustomization.yaml create mode 100644 test-infra/base/nfs-external.yaml create mode 100644 test-infra/base/pvc.yaml create mode 100644 test-infra/base/statefulset.yaml delete mode 100644 test-infra/ks_app/components/debug-worker.jsonnet delete mode 100644 test-infra/ks_app/components/nfs-external.jsonnet diff --git a/playbook.md b/playbook.md index 5945dc3f003..7d8a48e4013 100644 --- a/playbook.md +++ b/playbook.md @@ -166,15 +166,14 @@ kubectl config set-context $(kubectl config current-context) --namespace=kubeflo 1. Set the IP address in the PV - ``` - cd test-infra/ks_app - ks param set --env=kubeflow-ci nfs-external nfsServer - ``` + * Edit `test-infra/base/pvc.yaml` + * Change the server address of the persistent volume to the new PV 1. Recreate the PV and PVC ``` - ks apply kubeflow-ci -c nfs-external + cd test-infra/ + kustomize build base | kubectl apply -f - ``` 1. Make sure the `debug-worker-0` pod is able to successfully mount the PV diff --git a/test-infra/base/kustomization.yaml b/test-infra/base/kustomization.yaml new file mode 100644 index 00000000000..3e3364ef62e --- /dev/null +++ b/test-infra/base/kustomization.yaml @@ -0,0 +1,6 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +namespace: kubeflow-test-infra +resources: +- pvc.yaml +- statefulset.yaml \ No newline at end of file diff --git a/test-infra/base/nfs-external.yaml b/test-infra/base/nfs-external.yaml new file mode 100644 index 00000000000..e8a60b602c2 --- /dev/null +++ b/test-infra/base/nfs-external.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + labels: + name: nfs-external + namespace: kubeflow-test-infra +spec: + accessModes: + - ReadWriteMany + dataSource: null + resources: + requests: + storage: 500Mi + storageClassName: gcfs-storage + volumeMode: Filesystem + volumeName: gcfs diff --git a/test-infra/base/pvc.yaml b/test-infra/base/pvc.yaml new file mode 100644 index 00000000000..ed9e91caf40 --- /dev/null +++ b/test-infra/base/pvc.yaml @@ -0,0 +1,33 @@ +apiVersion: "v1" +kind: "PersistentVolume" +metadata: + name: gcfs +spec: + accessModes: + - "ReadWriteMany" + capacity: + storage: "5Gi" + nfs: + path: "/kubeflow" + server: "10.86.7.242" + persistentVolumeReclaimPolicy: "Retain" + storageClassName: gcfs-storage + +--- + +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: nfs-external + annotations: + "volume.beta.kubernetes.io/storage-class": gcfs-storage +spec: + accessModes: + - ReadWriteMany + dataSource: null + resources: + requests: + storage: 500Mi + storageClassName: gcfs-storage + volumeMode: Filesystem + volumeName: gcfs diff --git a/test-infra/base/statefulset.yaml b/test-infra/base/statefulset.yaml new file mode 100644 index 00000000000..7e424fbd0c9 --- /dev/null +++ b/test-infra/base/statefulset.yaml @@ -0,0 +1,50 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: debug-worker +spec: + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app: debug-worker + serviceName: "" + template: + metadata: + creationTimestamp: null + labels: + app: debug-worker + spec: + containers: + - command: + - tail + - -f + - /dev/null + env: + - name: GOOGLE_APPLICATION_CREDENTIALS + value: /secret/gcp-credentials/key.json + image: gcr.io/kubeflow-ci/test-worker:latest + imagePullPolicy: Always + name: test-container + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /mnt/test-data-volume + name: nfs-external + - mountPath: /secret/gcp-credentials + name: gcp-credentials + dnsPolicy: ClusterFirst + restartPolicy: Always + schedulerName: default-scheduler + securityContext: {} + terminationGracePeriodSeconds: 30 + volumes: + - name: nfs-external + persistentVolumeClaim: + claimName: nfs-external + - name: gcp-credentials + secret: + defaultMode: 420 + secretName: kubeflow-testing-credentials + updateStrategy: + type: RollingUpdate diff --git a/test-infra/ks_app/components/debug-worker.jsonnet b/test-infra/ks_app/components/debug-worker.jsonnet deleted file mode 100644 index ec5731d2935..00000000000 --- a/test-infra/ks_app/components/debug-worker.jsonnet +++ /dev/null @@ -1,83 +0,0 @@ -// This component defines a stateful set that just starts a pod using the same image -// as the workers used by our Argo workflows and mounts the same NFS volumes. -// This is useful for looking at the files because you can just do kubectl exec. -local params = std.extVar("__ksonnet/params").components["debug-worker"]; -// TODO(https://github.com/ksonnet/ksonnet/issues/222): We have to add namespace as an explicit parameter -// because ksonnet doesn't support inheriting it from the environment yet. - -local k = import "k.libsonnet"; - -local name = params.name; -local namespace = params.namespace; - - -local ss = { - apiVersion: "apps/v1beta1", - kind: "StatefulSet", - metadata: { - name: "debug-worker", - namespace: namespace, - }, - spec: { - replicas: 1, - serviceName: "", - template: { - metadata: { - labels: { - app: "debug-worker", - }, - }, - spec: { - containers: [ - { - command: [ - "tail", - "-f", - "/dev/null", - ], - env: [ - { - name: "GOOGLE_APPLICATION_CREDENTIALS", - value: "/secret/gcp-credentials/key.json", - }, - ], - image: "gcr.io/kubeflow-ci/test-worker:latest", - name: "test-container", - volumeMounts: [ - { - mountPath: "/mnt/test-data-volume", - name: "nfs-external", - }, - { - mountPath: "/secret/gcp-credentials", - name: "gcp-credentials", - }, - ], - }, - ], - volumes: [ - { - name: "nfs-external", - persistentVolumeClaim: { - claimName: "nfs-external", - }, - }, - { - name: "gcp-credentials", - secret: { - secretName: params.gcpCredentialsSecretName, - }, - }, - ], - - }, - }, - updateStrategy: { - type: "RollingUpdate", - }, - }, -}; - -std.prune(k.core.v1.list.new([ - ss, -])) diff --git a/test-infra/ks_app/components/nfs-external.jsonnet b/test-infra/ks_app/components/nfs-external.jsonnet deleted file mode 100644 index 8fa5dd21404..00000000000 --- a/test-infra/ks_app/components/nfs-external.jsonnet +++ /dev/null @@ -1,65 +0,0 @@ -// This component defines persistent volumes and claims for an external -// NFS file system. -local params = std.extVar("__ksonnet/params").components["nfs-external"]; -// TODO(https://github.com/ksonnet/ksonnet/issues/222): We have to add namespace as an explicit parameter -// because ksonnet doesn't support inheriting it from the environment yet. - -local k = import "k.libsonnet"; - -local name = params.name; -local namespace = params.namespace; -local nfsServer = params.nfsServer; - -local storageClassName = "gcfs-storage"; - -local pv = { - apiVersion: "v1", - kind: "PersistentVolume", - metadata: { - name: "gcfs", - namespace: namespace, - }, - spec: { - accessModes: [ - "ReadWriteMany", - ], - capacity: { - storage: "5Gi", - }, - nfs: { - path: "/kubeflow", - server: nfsServer, - }, - persistentVolumeReclaimPolicy: "Retain", - storageClassName: storageClassName, - }, -}; - -local pvc = { - apiVersion: "v1", - kind: "PersistentVolumeClaim", - metadata: { - annotations: { - "volume.beta.kubernetes.io/storage-class": storageClassName, - }, - name: "nfs-external", - namespace: namespace, - }, - spec: { - accessModes: [ - "ReadWriteMany", - ], - resources: { - requests: { - storage: "500Mi", - }, - }, - storageClassName: storageClassName, - volumeName: "gcfs", - }, -}; - -std.prune(k.core.v1.list.new([ - pv, - pvc, -]))