From df692424b69f51f3f1496807cc4c34e619545e20 Mon Sep 17 00:00:00 2001 From: Matthew B <106352182+artntek@users.noreply.github.com> Date: Tue, 6 Feb 2024 15:04:00 -0800 Subject: [PATCH 01/10] create new logging directory, with config files for prototyped solutions --- .../logging-agents/filebeat-kubernetes.yaml | 258 ++++++++++++++++++ .../fluentbit-values-dev-cluster.yaml | 83 ++++++ .../ek-values-dev-cluster.yaml | 17 ++ 3 files changed, 358 insertions(+) create mode 100644 logging/logging-agents/filebeat-kubernetes.yaml create mode 100644 logging/logging-agents/fluentbit-values-dev-cluster.yaml create mode 100644 logging/search-analytics/ek-values-dev-cluster.yaml diff --git a/logging/logging-agents/filebeat-kubernetes.yaml b/logging/logging-agents/filebeat-kubernetes.yaml new file mode 100644 index 0000000..c5b506b --- /dev/null +++ b/logging/logging-agents/filebeat-kubernetes.yaml @@ -0,0 +1,258 @@ +## Create a daemonset for FileBeat +## +## install: $ kubectl create -f filebeat-kubernetes.yaml -n namespace +## delete: $ kubectl delete -f filebeat-kubernetes.yaml -n namespace +## +## grep for "MB" to see edits +## +apiVersion: v1 +kind: ServiceAccount +metadata: + name: filebeat + namespace: kube-system + labels: + k8s-app: filebeat +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: filebeat + labels: + k8s-app: filebeat +rules: +- apiGroups: [""] # "" indicates the core API group + resources: + - namespaces + - pods + - nodes + verbs: + - get + - watch + - list +- apiGroups: ["apps"] + resources: + - replicasets + verbs: ["get", "list", "watch"] +- apiGroups: ["batch"] + resources: + - jobs + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: filebeat + # should be the namespace where filebeat is running + namespace: kube-system + labels: + k8s-app: filebeat +rules: + - apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: ["get", "create", "update"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: filebeat-kubeadm-config + namespace: kube-system + labels: + k8s-app: filebeat +rules: + - apiGroups: [""] + resources: + - configmaps + resourceNames: + - kubeadm-config + verbs: ["get"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: filebeat +subjects: +- kind: ServiceAccount + name: filebeat + namespace: kube-system +roleRef: + kind: ClusterRole + name: filebeat + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: filebeat + namespace: kube-system +subjects: + - kind: ServiceAccount + name: filebeat + namespace: kube-system +roleRef: + kind: Role + name: filebeat + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: filebeat-kubeadm-config + namespace: kube-system +subjects: + - kind: ServiceAccount + name: filebeat + namespace: kube-system +roleRef: + kind: Role + name: filebeat-kubeadm-config + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: filebeat-config + namespace: kube-system + labels: + k8s-app: filebeat +data: + filebeat.yml: |- + filebeat.inputs: + - type: filestream + paths: + - /var/log/containers/*.log + parsers: + - container: ~ + prospector: + scanner: + fingerprint.enabled: true + symlinks: true + file_identity.fingerprint: ~ + processors: + - add_kubernetes_metadata: + host: ${NODE_NAME} + matchers: + - logs_path: + logs_path: "/var/log/containers/" + + # To enable hints based autodiscover, remove `filebeat.inputs` configuration and uncomment this: + # filebeat.autodiscover: + # providers: + # - type: kubernetes + # node: ${NODE_NAME} + # hints.enabled: true + # hints.default_config: + # type: filestream + # id: kubernetes-container-logs-${data.kubernetes.pod.name}-${data.kubernetes.container.id} + # paths: + # - /var/log/containers/*-${data.kubernetes.container.id}.log + # parsers: + # - container: ~ + # prospector: + # scanner: + # fingerprint.enabled: true + # symlinks: true + # file_identity.fingerprint: ~ + + processors: + - add_cloud_metadata: + - add_host_metadata: + + cloud.id: ${ELASTIC_CLOUD_ID} + cloud.auth: ${ELASTIC_CLOUD_AUTH} + + output.elasticsearch: + hosts: ['${ELASTICSEARCH_HOST:elasticsearch}:${ELASTICSEARCH_PORT:9200}'] + username: ${ELASTICSEARCH_USERNAME} + password: ${ELASTICSEARCH_PASSWORD} +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: filebeat + namespace: kube-system + labels: + k8s-app: filebeat +spec: + selector: + matchLabels: + k8s-app: filebeat + template: + metadata: + labels: + k8s-app: filebeat + spec: + serviceAccountName: filebeat + terminationGracePeriodSeconds: 30 + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + ## MB added nodeSelector - experiment with just the node containing metacat (and others) + # nodeSelector: + # kubernetes.io/hostname: k8s-dev-node-3 + containers: + - name: filebeat + image: docker.elastic.co/beats/filebeat:8.12.0 + args: [ + "-c", "/etc/filebeat.yml", + "-e", + ] + env: + - name: ELASTICSEARCH_HOST + value: elkbrooke-elasticsearch.brooke.svc.cluster.local + ## MB edited ELASTICSEARCH_HOST value + - name: ELASTICSEARCH_PORT + value: "9200" + - name: ELASTICSEARCH_USERNAME + value: elastic + - name: ELASTICSEARCH_PASSWORD + value: changeme + - name: ELASTIC_CLOUD_ID + value: + - name: ELASTIC_CLOUD_AUTH + value: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + securityContext: + runAsUser: 0 + # If using Red Hat OpenShift uncomment this: + #privileged: true + resources: + limits: + memory: 200Mi + requests: + cpu: 100m + memory: 100Mi + volumeMounts: + - name: config + mountPath: /etc/filebeat.yml + readOnly: true + subPath: filebeat.yml + - name: data + mountPath: /usr/share/filebeat/data + - name: varlibdockercontainers + mountPath: /var/lib/docker/containers + readOnly: true + - name: varlog + mountPath: /var/log + readOnly: true + volumes: + - name: config + configMap: + defaultMode: 0640 + name: filebeat-config + - name: varlibdockercontainers + hostPath: + path: /var/lib/docker/containers + - name: varlog + hostPath: + path: /var/log + # data folder stores a registry of read status for all files, so we don't send everything again on a Filebeat pod restart + - name: data + hostPath: + # When filebeat runs as non-root user, this directory needs to be writable by group (g+w). + path: /var/lib/filebeat-data + type: DirectoryOrCreate +--- diff --git a/logging/logging-agents/fluentbit-values-dev-cluster.yaml b/logging/logging-agents/fluentbit-values-dev-cluster.yaml new file mode 100644 index 0000000..8abf636 --- /dev/null +++ b/logging/logging-agents/fluentbit-values-dev-cluster.yaml @@ -0,0 +1,83 @@ +## Example Value overrides for deploying the bitnami Fluent Bit chart +## (https://github.com/bitnami/charts/tree/main/bitnami/fluent-bit/) on the NCEAS dev cluster +## +## To install: +## $ helm install -n \ +## -f fluentbit-values-dev-cluster.yaml \ +## oci://registry-1.docker.io/bitnamicharts/fluent-bit +## +global: + ## Use a dynamically-created PV + storageClass: csi-rbd-sc + +image: + debug: true + +daemonset: + ## @param daemonset.enabled Use a daemonset instead of a deployment. `replicaCount` will not take effect. + ## + enabled: true + +config: + logLevel: debug + + ## @param config.service [string] Defines the global behaviour of the Fluent Bit engine. + ## + ## NOTE: must override here, to add location for Parsers_File + service: | + [SERVICE] + Flush {{ .Values.config.flush }} + Daemon Off + LogLevel {{ .Values.config.logLevel }} + Config_Watch On + HTTP_Server On + HTTP_Listen 0.0.0.0 + HTTP_Port {{ .Values.containerPorts.http }} + Parsers_File /opt/bitnami/fluent-bit/conf/custom_parsers.conf + + ## @param config.inputs [string] Inputs from which to collect data + ## https://docs.fluentbit.io/manual/pipeline/inputs + inputs: | + [INPUT] + Name tail + Path /var/log/containers/*.log + Path_Key filename + + ## @param config.filters [string] Set of plugins that can be used to filter, modify, or enrich log data that is processed by Fluent Bit. + ## https://docs.fluentbit.io/manual/pipeline/filters + filters: | + [FILTER] + name parser + match * + key_name log + Reserve_Data True + Preserve_Key On + parser named_capture + + ## @param config.outputs [string] Outputs to send the collected data to different destinations + ## https://docs.fluentbit.io/manual/pipeline/outputs + outputs: | + [OUTPUT] + Name es + Match * + Host ekbrooke-elasticsearch.brooke.svc.cluster.local + Port 9200 + Index fluentbit + Type flb_type + Suppress_Type_Name On + + + ## @param config.customParsers [string] Custom-defined Parsers + ## https://docs.fluentbit.io/manual/pipeline/parsers + customParsers: | + [MULTILINE_PARSER] + name multiline_java + type regex + flush_timeout 1000 + rule "start_state" "/^[_\-0-9a-zA-Z]+ (\d+\-\d+\:\d+\:\d+)\:?\s?(.*)/" "cont" + rule "cont" "/^\s+at.*/" "cont" + + [PARSER] + Name named_capture + Format regex + Regex /(?.*)/m diff --git a/logging/search-analytics/ek-values-dev-cluster.yaml b/logging/search-analytics/ek-values-dev-cluster.yaml new file mode 100644 index 0000000..1b1b7fe --- /dev/null +++ b/logging/search-analytics/ek-values-dev-cluster.yaml @@ -0,0 +1,17 @@ +## Example Value overrides for deploying the bitnami Elasticsearch + Kibana chart +## (https://github.com/bitnami/charts/blob/main/bitnami/elasticsearch/README.md) +## on the NCEAS dev cluster +## +## $ helm install -n \ +## -f ek-values-dev-cluster.yaml \ +## oci://registry-1.docker.io/bitnamicharts/elasticsearch +## +## $ kubectl port-forward -n svc/-kibana 5601:5601 +## (e.g: kubectl port-forward -n brooke svc/elkbrooke-kibana 5601:5601 ) +## +global: + ## Use a dynamically-created PV + storageClass: csi-rbd-sc + + ## enable the Kibana subchart + kibanaEnabled: true From 94aa59a00fcd4065fd1cf799151362eed3fdddf4 Mon Sep 17 00:00:00 2001 From: Matthew B <106352182+artntek@users.noreply.github.com> Date: Tue, 6 Feb 2024 15:23:27 -0800 Subject: [PATCH 02/10] added readme --- logging/logging.md | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 logging/logging.md diff --git a/logging/logging.md b/logging/logging.md new file mode 100644 index 0000000..8337ed5 --- /dev/null +++ b/logging/logging.md @@ -0,0 +1,36 @@ +# Logging Infrastructure + +## ** IMPORTANT NOTE: STILL A WORK IN PROGRESS ** + +In a modern k8s environment, the ELK stack (Elasticsearch - Logstash - Kibana) has been +superseded by the EFK stack, where Logstash is replaced by Fluentd, FluentBit or FileBeats. + +* `Elasticsearch` is a free, open-source search and analytics engine (similar in concept to solr), +based on the Apache Lucene library. +* `Kibana` is a data visualization and exploration tool, used for log and time-series analytics, +application monitoring, and operational intelligence. +* `FluentBit` & `FileBeats` are logging agents (see below) + +## Simple overview + +1. Our k8s applications log to stdout, and this output is collected on the host Node machine, + under `/var/log/contianers/*.log`. (This is standard k8s functionality). +2. We need to install logging agents that do 3 things: + 1. aggregate logs from each of the node machines + 2. parse and transform them into the required structured logging format + 3. send copies to elasticsearch and to the central syslog archive. +3. Then we can use the tools provided by Kibana to view and analyze the logs that are in + elasticsearch. + +## Steps to deploy +Note there is some config that needs to be changed in each case: +1. install the Elasticsearch Bitnami chart, with the Kibana subchart enabled + * see [./search-analytics/ek-values-dev-cluster.yaml](./search-analytics/ek-values-dev-cluster.yaml) +2. install the Fluent Bit Bitnami chart + * see [./logging-agents/fluentbit-values-dev-cluster.yaml](./logging-agents/fluentbit-values-dev-cluster.yaml) + * [Fluent Bit](https://docs.fluentbit.io/manual/) is essentially a faster, smaller-footprint + replacement for Fluentd, and is much better suited to k8s. + +(Experiments with FileBeat were largely unsuccessful. The yaml file for installing the FileBeat +daemonset is also included for completeness: +[./logging-agents/filebeat-kubernetes.yaml](./logging-agents/filebeat-kubernetes.yaml)) From 5fbba4f2d85ebc0c6f39243ff0c0bf45be9d0ff5 Mon Sep 17 00:00:00 2001 From: Matthew B <106352182+artntek@users.noreply.github.com> Date: Wed, 7 Feb 2024 10:36:46 -0800 Subject: [PATCH 03/10] add link to issue and next steps --- logging/logging.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/logging/logging.md b/logging/logging.md index 8337ed5..d7bc52b 100644 --- a/logging/logging.md +++ b/logging/logging.md @@ -1,15 +1,19 @@ # Logging Infrastructure -## ** IMPORTANT NOTE: STILL A WORK IN PROGRESS ** +> # ** IMPORTANT: STILL A WORK IN PROGRESS ** +> ## * * * Suggested next steps * * * +> ### see [Issue #43 - "Logging Infrastructure for K8s Clusters"](https://github.com/DataONEorg/k8s-cluster/issues/43#issuecomment-1932643235) + +## Introduction In a modern k8s environment, the ELK stack (Elasticsearch - Logstash - Kibana) has been -superseded by the EFK stack, where Logstash is replaced by Fluentd, FluentBit or FileBeats. +superseded by the EFK stack, where Logstash is replaced by Fluentd, FluentBit or FileBeat. * `Elasticsearch` is a free, open-source search and analytics engine (similar in concept to solr), based on the Apache Lucene library. * `Kibana` is a data visualization and exploration tool, used for log and time-series analytics, application monitoring, and operational intelligence. -* `FluentBit` & `FileBeats` are logging agents (see below) +* `FluentBit` & `FileBeat` are logging agents (see below) ## Simple overview From 365a288fd4336da06e87a533a4c0facedc807116 Mon Sep 17 00:00:00 2001 From: Matthew B <106352182+artntek@users.noreply.github.com> Date: Thu, 8 Feb 2024 11:45:16 -0800 Subject: [PATCH 04/10] Update README.md --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c8cdd70..62f49f6 100644 --- a/README.md +++ b/README.md @@ -67,8 +67,10 @@ Documentation is organized into an overview, and then separate config files and - [Persistent Volume Claim](./storage/Ceph/Ceph-CSI-RBD.md#Persistent-Volume-Claim) - [Using The Persistent Volume Claim](./storage/Ceph/Ceph-CSI-RBD.md#Using-The-Persistent-Volume-Claim) +## [Logging Infrastructure](./logging/logging.md) + ## [Data Recovery](./storage/data-recovery.md#Data-Recovery) - [Data Recovery For RBD Based PVs](./storage/data-recovery.md#Data-Recovery-For-RBD-Based-PVs) - [Data Recovery For CephFS Based PVs](./storage/data-recovery.md#Data-Recovery-For-CephFS-Based-PVs) -[![dataone_footer](https://www.dataone.org/sites/all/images/DataONE_LOGO.jpg)](https://www.dataone.org) +[![dataone_footer](https://user-images.githubusercontent.com/6643222/162324180-b5cf0f5f-ae7a-4ca6-87c3-9733a2590634.png)](https://www.dataone.org) From fbfe6189ca9ffb0c88699137c5354cbbf5e9923c Mon Sep 17 00:00:00 2001 From: Matthew B <106352182+artntek@users.noreply.github.com> Date: Thu, 8 Feb 2024 12:14:00 -0800 Subject: [PATCH 05/10] change 'Important Notes'->'Important Notes on Secrets and Credentials' --- storage/Ceph/Ceph-CSI-CephFS.md | 4 ++-- storage/Ceph/Ceph-CSI.md | 17 +++++++++-------- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/storage/Ceph/Ceph-CSI-CephFS.md b/storage/Ceph/Ceph-CSI-CephFS.md index 29ee382..755e314 100644 --- a/storage/Ceph/Ceph-CSI-CephFS.md +++ b/storage/Ceph/Ceph-CSI-CephFS.md @@ -162,7 +162,7 @@ spec: persistentVolumeReclaimPolicy: Retain volumeMode: Filesystem ``` -> **NOTE**: there are some potential gotchas involved with creating the Secret (e.g. `csi-cephfs-secret`, above). Please refer to [this section of Ceph-CSI.md](./Ceph-CSI.md#important-notes) +> **NOTE**: there are some potential gotchas involved with creating the Secret (e.g. `csi-cephfs-secret`, above). Please refer to [Ceph CSI - Important Notes on Secrets and Credentials](./Ceph-CSI.md#important-notes-on-secrets-and-credentials) This PV is created with the command: @@ -278,7 +278,7 @@ status 32) occurred while running mount args: [-t ceph 10.0.3.197:6789,10.0.3.20 mds_namespace=cephfs,_netdev] stderr: mount error: no mds server is up or the cluster is laggy ``` -...the message `no mds server is up or the cluster is laggy` is potentially misleading. It is more likely that the `userID` is missing or incorrect, in your `secret.yaml` file. See [Ceph CSI - Important Notes](https://github.com/DataONEorg/k8s-cluster/blob/main/storage/Ceph/Ceph-CSI.md#important-notes). +...the message `no mds server is up or the cluster is laggy` is potentially misleading. It is more likely that the `userID` is missing or incorrect, in your `secret.yaml` file. See [Ceph CSI - Important Notes on Secrets and Credentials](./Ceph-CSI.md#important-notes-on-secrets-and-credentials). diff --git a/storage/Ceph/Ceph-CSI.md b/storage/Ceph/Ceph-CSI.md index 448a5bc..75603f8 100644 --- a/storage/Ceph/Ceph-CSI.md +++ b/storage/Ceph/Ceph-CSI.md @@ -19,7 +19,7 @@ Command line options to helm supply most of the information that is needed for t Here is an example `csi-config-map.yaml` file: -```yaml +```json --- # This is a sample configmap that helps define a Ceph cluster configuration # as required by the CSI plugins. @@ -173,13 +173,7 @@ Command line options to helm supply most of the information that is needed for t The `secret.yaml` file contains the ceph storage cluster login credentials needed for ceph-csi to mount CephFS subvolumes that are statically provisioned. These CephFS subvolumes must be created manually with the Linux `ceph` utility before they can be accessed by ceph-csi. -The `userId` and `userKey` values provide the needed authorization for this. - -### Important Notes -1. ceph-generated usernames are typically of the form: `client.k8s-dev-releasename-subvol-user`. Note that you must omit the `client.` prefix when adding to the `secret.yaml` file (i.e. use only: `k8s-dev-myreleasename-subvol-user`). - * (However, when mounting the volume via `fstab`, the `client.` prefix should be retained for the keyring file.) -1. The example [`secret.yaml`](https://github.com/DataONEorg/k8s-cluster/blob/main/storage/Ceph/CephFS/secret.yaml) file contains plaintext credentials (listed under `stringData:`), that are automatically base64-encoded at runtime. If you prefer to base64-encode the userID and userKey before adding to the `secret.yaml` file, be sure to use the `-n` option with the `echo` command, (i.e.: `echo -n k8s-dev-myreleasename-subvol-user | base64`), to suppress the trailing newline character. Failure to do so will cause authentication to fail (see also: [CephFS Troubleshooting](https://github.com/DataONEorg/k8s-cluster/blob/main/storage/Ceph/Ceph-CSI-CephFS.md#troubleshooting)). If they are already base64 encoded in this way, values should be added to the `secret.yaml` file under `data:` instead of `stringData:`. -1. for dynamically provisioned (ceph-csi provisions them) CephFS volumes and subvolumes, the `adminId` and `adminKey` values are required. +The `userId` and `userKey` values provide the needed authorization for this. Some of the ceph-csi functionality is only in Alpha release state, so is not ready for production use. Please refer to the [Ceph-CSI Support Matrix](https://github.com/ceph/ceph-csi#support-matrix) for more information. @@ -341,3 +335,10 @@ reclaimPolicy: Delete volumeBindingMode: Immediate ``` +### Important Notes on Secrets and Credentials + +1. In the Ceph client file configurations, the userid will likely contain a prefix; for example: `client.k8s-dev-releasename-subvol-user`. Note that you must omit the `client.` prefix when adding to the `secret.yaml` file (i.e. use only: `k8s-dev-myreleasename-subvol-user`). + * (However, when mounting the volume via `fstab`, the `client.` prefix should be retained for the keyring file.) +1. In the Ceph user configuration files, the userKey is already base64 encoded, but ***it needs to be base64-encoded again*** when the kubernetes Secret is created. Put the Ceph-provided base64 string in the `stringData.userKey` field, and it will automatically be base64-encoded again, upon creation. +1. If you prefer to manually base64-encode the userID and userKey before adding to the `secret.yaml` file, be sure to use the `-n` option with the `echo` command, (i.e.: `echo -n k8s-dev-myreleasename-subvol-user | base64`), to suppress the trailing newline character. Failure to do so will cause authentication to fail (see also: [CephFS Troubleshooting](https://github.com/DataONEorg/k8s-cluster/blob/main/storage/Ceph/Ceph-CSI-CephFS.md#troubleshooting)). If they are already (double-)base64 encoded in this way, values should be added to the `secret.yaml` file under `data:` instead of `stringData:`. +1. for dynamically provisioned CephFS volumes and subvolumes (ceph-csi provisions them), the `adminId` and `adminKey` values are required. From 27e94c745637374c561fbf8f892e3a9e245f1587 Mon Sep 17 00:00:00 2001 From: Matthew B <106352182+artntek@users.noreply.github.com> Date: Thu, 8 Feb 2024 12:14:49 -0800 Subject: [PATCH 06/10] Add to TOC: 'Important Notes on Secrets and Credentials' --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 62f49f6..9718057 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,7 @@ Documentation is organized into an overview, and then separate config files and ### [Ceph CSI](./storage/Ceph/Ceph-CSI.md#) - [Installing Ceph CSI RBD Plugin](./storage/Ceph/Ceph-CSI.md#Installing-Ceph-CSI-RBD-Plugin) - [Installing Ceph CSI CephFS Plugin](./storage/Ceph/Ceph-CSI.md#Installing-Ceph-CSI-CephFS-Plugin) +- [Important Notes on Secrets and Credentials](./Ceph-CSI.md#important-notes-on-secrets-and-credentials) #### [Ceph CSI CephFS](./storage/Ceph/Ceph-CSI-CephFS.md) - [Provisioning Static CephFS Volumes](./storage/Ceph/Ceph-CSI-CephFS.md#Provisioning-Static-CephFS-Volumes) From dfa07e8b977254252003a97f0a3ae667e27df85e Mon Sep 17 00:00:00 2001 From: Matthew B <106352182+artntek@users.noreply.github.com> Date: Thu, 8 Feb 2024 12:18:09 -0800 Subject: [PATCH 07/10] code block formatting --- storage/Ceph/Ceph-CSI.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/Ceph/Ceph-CSI.md b/storage/Ceph/Ceph-CSI.md index 75603f8..8f03563 100644 --- a/storage/Ceph/Ceph-CSI.md +++ b/storage/Ceph/Ceph-CSI.md @@ -19,7 +19,7 @@ Command line options to helm supply most of the information that is needed for t Here is an example `csi-config-map.yaml` file: -```json +```yaml --- # This is a sample configmap that helps define a Ceph cluster configuration # as required by the CSI plugins. From e9f7c18d095226c3342691c731fa6eeeb01b7432 Mon Sep 17 00:00:00 2001 From: Matthew B <106352182+artntek@users.noreply.github.com> Date: Thu, 8 Feb 2024 12:33:01 -0800 Subject: [PATCH 08/10] Revert "code block formatting" This reverts commit dfa07e8b977254252003a97f0a3ae667e27df85e. --- storage/Ceph/Ceph-CSI.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/Ceph/Ceph-CSI.md b/storage/Ceph/Ceph-CSI.md index 8f03563..75603f8 100644 --- a/storage/Ceph/Ceph-CSI.md +++ b/storage/Ceph/Ceph-CSI.md @@ -19,7 +19,7 @@ Command line options to helm supply most of the information that is needed for t Here is an example `csi-config-map.yaml` file: -```yaml +```json --- # This is a sample configmap that helps define a Ceph cluster configuration # as required by the CSI plugins. From 4a2bf64cb47bcbe9e65072d7da5e0c0889e670af Mon Sep 17 00:00:00 2001 From: Matthew B <106352182+artntek@users.noreply.github.com> Date: Thu, 8 Feb 2024 12:33:19 -0800 Subject: [PATCH 09/10] Revert "Add to TOC: 'Important Notes on Secrets and Credentials'" This reverts commit 27e94c745637374c561fbf8f892e3a9e245f1587. --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 9718057..62f49f6 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,6 @@ Documentation is organized into an overview, and then separate config files and ### [Ceph CSI](./storage/Ceph/Ceph-CSI.md#) - [Installing Ceph CSI RBD Plugin](./storage/Ceph/Ceph-CSI.md#Installing-Ceph-CSI-RBD-Plugin) - [Installing Ceph CSI CephFS Plugin](./storage/Ceph/Ceph-CSI.md#Installing-Ceph-CSI-CephFS-Plugin) -- [Important Notes on Secrets and Credentials](./Ceph-CSI.md#important-notes-on-secrets-and-credentials) #### [Ceph CSI CephFS](./storage/Ceph/Ceph-CSI-CephFS.md) - [Provisioning Static CephFS Volumes](./storage/Ceph/Ceph-CSI-CephFS.md#Provisioning-Static-CephFS-Volumes) From c04f272ef23f47384ae945dfd6c3b7eefb9d638a Mon Sep 17 00:00:00 2001 From: Matthew B <106352182+artntek@users.noreply.github.com> Date: Thu, 8 Feb 2024 12:33:36 -0800 Subject: [PATCH 10/10] Revert "change 'Important Notes'->'Important Notes on Secrets and Credentials'" This reverts commit fbfe6189ca9ffb0c88699137c5354cbbf5e9923c. --- storage/Ceph/Ceph-CSI-CephFS.md | 4 ++-- storage/Ceph/Ceph-CSI.md | 17 ++++++++--------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/storage/Ceph/Ceph-CSI-CephFS.md b/storage/Ceph/Ceph-CSI-CephFS.md index 755e314..29ee382 100644 --- a/storage/Ceph/Ceph-CSI-CephFS.md +++ b/storage/Ceph/Ceph-CSI-CephFS.md @@ -162,7 +162,7 @@ spec: persistentVolumeReclaimPolicy: Retain volumeMode: Filesystem ``` -> **NOTE**: there are some potential gotchas involved with creating the Secret (e.g. `csi-cephfs-secret`, above). Please refer to [Ceph CSI - Important Notes on Secrets and Credentials](./Ceph-CSI.md#important-notes-on-secrets-and-credentials) +> **NOTE**: there are some potential gotchas involved with creating the Secret (e.g. `csi-cephfs-secret`, above). Please refer to [this section of Ceph-CSI.md](./Ceph-CSI.md#important-notes) This PV is created with the command: @@ -278,7 +278,7 @@ status 32) occurred while running mount args: [-t ceph 10.0.3.197:6789,10.0.3.20 mds_namespace=cephfs,_netdev] stderr: mount error: no mds server is up or the cluster is laggy ``` -...the message `no mds server is up or the cluster is laggy` is potentially misleading. It is more likely that the `userID` is missing or incorrect, in your `secret.yaml` file. See [Ceph CSI - Important Notes on Secrets and Credentials](./Ceph-CSI.md#important-notes-on-secrets-and-credentials). +...the message `no mds server is up or the cluster is laggy` is potentially misleading. It is more likely that the `userID` is missing or incorrect, in your `secret.yaml` file. See [Ceph CSI - Important Notes](https://github.com/DataONEorg/k8s-cluster/blob/main/storage/Ceph/Ceph-CSI.md#important-notes). diff --git a/storage/Ceph/Ceph-CSI.md b/storage/Ceph/Ceph-CSI.md index 75603f8..448a5bc 100644 --- a/storage/Ceph/Ceph-CSI.md +++ b/storage/Ceph/Ceph-CSI.md @@ -19,7 +19,7 @@ Command line options to helm supply most of the information that is needed for t Here is an example `csi-config-map.yaml` file: -```json +```yaml --- # This is a sample configmap that helps define a Ceph cluster configuration # as required by the CSI plugins. @@ -173,7 +173,13 @@ Command line options to helm supply most of the information that is needed for t The `secret.yaml` file contains the ceph storage cluster login credentials needed for ceph-csi to mount CephFS subvolumes that are statically provisioned. These CephFS subvolumes must be created manually with the Linux `ceph` utility before they can be accessed by ceph-csi. -The `userId` and `userKey` values provide the needed authorization for this. +The `userId` and `userKey` values provide the needed authorization for this. + +### Important Notes +1. ceph-generated usernames are typically of the form: `client.k8s-dev-releasename-subvol-user`. Note that you must omit the `client.` prefix when adding to the `secret.yaml` file (i.e. use only: `k8s-dev-myreleasename-subvol-user`). + * (However, when mounting the volume via `fstab`, the `client.` prefix should be retained for the keyring file.) +1. The example [`secret.yaml`](https://github.com/DataONEorg/k8s-cluster/blob/main/storage/Ceph/CephFS/secret.yaml) file contains plaintext credentials (listed under `stringData:`), that are automatically base64-encoded at runtime. If you prefer to base64-encode the userID and userKey before adding to the `secret.yaml` file, be sure to use the `-n` option with the `echo` command, (i.e.: `echo -n k8s-dev-myreleasename-subvol-user | base64`), to suppress the trailing newline character. Failure to do so will cause authentication to fail (see also: [CephFS Troubleshooting](https://github.com/DataONEorg/k8s-cluster/blob/main/storage/Ceph/Ceph-CSI-CephFS.md#troubleshooting)). If they are already base64 encoded in this way, values should be added to the `secret.yaml` file under `data:` instead of `stringData:`. +1. for dynamically provisioned (ceph-csi provisions them) CephFS volumes and subvolumes, the `adminId` and `adminKey` values are required. Some of the ceph-csi functionality is only in Alpha release state, so is not ready for production use. Please refer to the [Ceph-CSI Support Matrix](https://github.com/ceph/ceph-csi#support-matrix) for more information. @@ -335,10 +341,3 @@ reclaimPolicy: Delete volumeBindingMode: Immediate ``` -### Important Notes on Secrets and Credentials - -1. In the Ceph client file configurations, the userid will likely contain a prefix; for example: `client.k8s-dev-releasename-subvol-user`. Note that you must omit the `client.` prefix when adding to the `secret.yaml` file (i.e. use only: `k8s-dev-myreleasename-subvol-user`). - * (However, when mounting the volume via `fstab`, the `client.` prefix should be retained for the keyring file.) -1. In the Ceph user configuration files, the userKey is already base64 encoded, but ***it needs to be base64-encoded again*** when the kubernetes Secret is created. Put the Ceph-provided base64 string in the `stringData.userKey` field, and it will automatically be base64-encoded again, upon creation. -1. If you prefer to manually base64-encode the userID and userKey before adding to the `secret.yaml` file, be sure to use the `-n` option with the `echo` command, (i.e.: `echo -n k8s-dev-myreleasename-subvol-user | base64`), to suppress the trailing newline character. Failure to do so will cause authentication to fail (see also: [CephFS Troubleshooting](https://github.com/DataONEorg/k8s-cluster/blob/main/storage/Ceph/Ceph-CSI-CephFS.md#troubleshooting)). If they are already (double-)base64 encoded in this way, values should be added to the `secret.yaml` file under `data:` instead of `stringData:`. -1. for dynamically provisioned CephFS volumes and subvolumes (ceph-csi provisions them), the `adminId` and `adminKey` values are required.