DataONEorg · artntek · Feb 6, 2024 · Feb 6, 2024 · Feb 7, 2024 · Feb 8, 2024
diff --git a/README.md b/README.md
@@ -68,6 +68,8 @@ Documentation is organized into an overview, and then separate config files and
   - [Persistent Volume Claim](./storage/Ceph/Ceph-CSI-RBD.md#Persistent-Volume-Claim)
   - [Using The Persistent Volume Claim](./storage/Ceph/Ceph-CSI-RBD.md#Using-The-Persistent-Volume-Claim)
 
+## [Logging Infrastructure](./logging/logging.md)
+
 ## [Data Recovery](./storage/data-recovery.md#Data-Recovery)
 - [Data Recovery For RBD Based PVs](./storage/data-recovery.md#Data-Recovery-For-RBD-Based-PVs)
 - [Data Recovery For CephFS Based PVs](./storage/data-recovery.md#Data-Recovery-For-CephFS-Based-PVs)

diff --git a/logging/logging-agents/filebeat-kubernetes.yaml b/logging/logging-agents/filebeat-kubernetes.yaml
@@ -0,0 +1,258 @@
+## Create a daemonset for FileBeat
+##
+## install:   $  kubectl create -f filebeat-kubernetes.yaml -n namespace
+## delete:    $  kubectl delete -f filebeat-kubernetes.yaml -n namespace
+##
+## grep for "MB" to see edits
+##
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: filebeat
+  namespace: kube-system
+  labels:
+    k8s-app: filebeat
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRole
+metadata:
+  name: filebeat
+  labels:
+    k8s-app: filebeat
+rules:
+- apiGroups: [""] # "" indicates the core API group
+  resources:
+  - namespaces
+  - pods
+  - nodes
+  verbs:
+  - get
+  - watch
+  - list
+- apiGroups: ["apps"]
+  resources:
+    - replicasets
+  verbs: ["get", "list", "watch"]
+- apiGroups: ["batch"]
+  resources:
+    - jobs
+  verbs: ["get", "list", "watch"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: filebeat
+  # should be the namespace where filebeat is running
+  namespace: kube-system
+  labels:
+    k8s-app: filebeat
+rules:
+  - apiGroups:
+      - coordination.k8s.io
+    resources:
+      - leases
+    verbs: ["get", "create", "update"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: Role
+metadata:
+  name: filebeat-kubeadm-config
+  namespace: kube-system
+  labels:
+    k8s-app: filebeat
+rules:
+  - apiGroups: [""]
+    resources:
+      - configmaps
+    resourceNames:
+      - kubeadm-config
+    verbs: ["get"]
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: ClusterRoleBinding
+metadata:
+  name: filebeat
+subjects:
+- kind: ServiceAccount
+  name: filebeat
+  namespace: kube-system
+roleRef:
+  kind: ClusterRole
+  name: filebeat
+  apiGroup: rbac.authorization.k8s.io
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: filebeat
+  namespace: kube-system
+subjects:
+  - kind: ServiceAccount
+    name: filebeat
+    namespace: kube-system
+roleRef:
+  kind: Role
+  name: filebeat
+  apiGroup: rbac.authorization.k8s.io
+---
+apiVersion: rbac.authorization.k8s.io/v1
+kind: RoleBinding
+metadata:
+  name: filebeat-kubeadm-config
+  namespace: kube-system
+subjects:
+  - kind: ServiceAccount
+    name: filebeat
+    namespace: kube-system
+roleRef:
+  kind: Role
+  name: filebeat-kubeadm-config
+  apiGroup: rbac.authorization.k8s.io
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: filebeat-config
+  namespace: kube-system
+  labels:
+    k8s-app: filebeat
+data:
+  filebeat.yml: |-
+    filebeat.inputs:
+    - type: filestream
+            paths:
+        - /var/log/containers/*.log
+            parsers:
+            - container: ~
+            prospector:
+              scanner:
+                fingerprint.enabled: true
+                symlinks: true
+            file_identity.fingerprint: ~
+      processors:
+        - add_kubernetes_metadata:
+            host: ${NODE_NAME}
+            matchers:
+            - logs_path:
+                logs_path: "/var/log/containers/"
+
+    # To enable hints based autodiscover, remove `filebeat.inputs` configuration and uncomment this:
+    # filebeat.autodiscover:
+    #  providers:
+    #    - type: kubernetes
+    #      node: ${NODE_NAME}
+    #      hints.enabled: true
+    #      hints.default_config:
+    #        type: filestream
+    #        id: kubernetes-container-logs-${data.kubernetes.pod.name}-${data.kubernetes.container.id}
+    #        paths:
+    #        - /var/log/containers/*-${data.kubernetes.container.id}.log
+    #        parsers:
+    #        - container: ~
+    #        prospector:
+    #         scanner:
+    #           fingerprint.enabled: true
+    #           symlinks: true
+    #        file_identity.fingerprint: ~
+
+    processors:
+      - add_cloud_metadata:
+      - add_host_metadata:
+
+    cloud.id: ${ELASTIC_CLOUD_ID}
+    cloud.auth: ${ELASTIC_CLOUD_AUTH}
+
+    output.elasticsearch:
+      hosts: ['${ELASTICSEARCH_HOST:elasticsearch}:${ELASTICSEARCH_PORT:9200}']
+      username: ${ELASTICSEARCH_USERNAME}
+      password: ${ELASTICSEARCH_PASSWORD}
+---
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  name: filebeat
+  namespace: kube-system
+  labels:
+    k8s-app: filebeat
+spec:
+  selector:
+    matchLabels:
+      k8s-app: filebeat
+  template:
+    metadata:
+      labels:
+        k8s-app: filebeat
+    spec:
+      serviceAccountName: filebeat
+      terminationGracePeriodSeconds: 30
+      hostNetwork: true
+      dnsPolicy: ClusterFirstWithHostNet
+      ## MB added nodeSelector - experiment with just the node containing metacat (and others)
+      # nodeSelector:
+      #  kubernetes.io/hostname: k8s-dev-node-3
+      containers:
+      - name: filebeat
+        image: docker.elastic.co/beats/filebeat:8.12.0
+        args: [
+          "-c", "/etc/filebeat.yml",
+          "-e",
+        ]
+        env:
+        - name: ELASTICSEARCH_HOST
+          value: elkbrooke-elasticsearch.brooke.svc.cluster.local
+          ## MB edited ELASTICSEARCH_HOST value
+        - name: ELASTICSEARCH_PORT
+          value: "9200"
+        - name: ELASTICSEARCH_USERNAME
+          value: elastic
+        - name: ELASTICSEARCH_PASSWORD
+          value: changeme
+        - name: ELASTIC_CLOUD_ID
+          value:
+        - name: ELASTIC_CLOUD_AUTH
+          value:
+        - name: NODE_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: spec.nodeName
+        securityContext:
+          runAsUser: 0
+          # If using Red Hat OpenShift uncomment this:
+          #privileged: true
+        resources:
+          limits:
+            memory: 200Mi
+          requests:
+            cpu: 100m
+            memory: 100Mi
+        volumeMounts:
+        - name: config
+          mountPath: /etc/filebeat.yml
+          readOnly: true
+          subPath: filebeat.yml
+        - name: data
+          mountPath: /usr/share/filebeat/data
+        - name: varlibdockercontainers
+          mountPath: /var/lib/docker/containers
+          readOnly: true
+        - name: varlog
+          mountPath: /var/log
+          readOnly: true
+      volumes:
+      - name: config
+        configMap:
+          defaultMode: 0640
+          name: filebeat-config
+      - name: varlibdockercontainers
+        hostPath:
+          path: /var/lib/docker/containers
+      - name: varlog
+        hostPath:
+          path: /var/log
+      # data folder stores a registry of read status for all files, so we don't send everything again on a Filebeat pod restart
+      - name: data
+        hostPath:
+          # When filebeat runs as non-root user, this directory needs to be writable by group (g+w).
+          path: /var/lib/filebeat-data
+          type: DirectoryOrCreate
+---
diff --git a/logging/logging-agents/fluentbit-values-dev-cluster.yaml b/logging/logging-agents/fluentbit-values-dev-cluster.yaml
@@ -0,0 +1,83 @@
+## Example Value overrides for deploying the bitnami Fluent Bit chart
+## (https://github.com/bitnami/charts/tree/main/bitnami/fluent-bit/) on the NCEAS dev cluster
+##
+## To install:
+##  $  helm install <myrelease> -n <namespace>              \
+##                  -f fluentbit-values-dev-cluster.yaml    \
+##                  oci://registry-1.docker.io/bitnamicharts/fluent-bit
+##
+global:
+  ## Use a dynamically-created PV
+  storageClass: csi-rbd-sc
+
+image:
+  debug: true
+
+daemonset:
+  ## @param daemonset.enabled Use a daemonset instead of a deployment. `replicaCount` will not take effect.
+  ##
+  enabled: true
+
+config:
+  logLevel: debug
+
+  ## @param config.service [string] Defines the global behaviour of the Fluent Bit engine.
+  ##
+  ## NOTE: must override here, to add location for Parsers_File
+  service: |
+    [SERVICE]
+        Flush        {{ .Values.config.flush }}
+        Daemon       Off
+        LogLevel     {{ .Values.config.logLevel }}
+        Config_Watch On
+        HTTP_Server  On
+        HTTP_Listen  0.0.0.0
+        HTTP_Port    {{ .Values.containerPorts.http }}
+        Parsers_File /opt/bitnami/fluent-bit/conf/custom_parsers.conf
+
+  ## @param config.inputs [string] Inputs from which to collect data
+  ## https://docs.fluentbit.io/manual/pipeline/inputs
+  inputs: |
+    [INPUT]
+        Name              tail
+        Path              /var/log/containers/*.log
+        Path_Key          filename
+
+  ## @param config.filters [string] Set of plugins that can be used to filter, modify, or enrich log data that is processed by Fluent Bit.
+  ## https://docs.fluentbit.io/manual/pipeline/filters
+  filters: |
+    [FILTER]
+        name             parser
+        match            *
+        key_name         log
+        Reserve_Data     True
+        Preserve_Key     On
+        parser           named_capture
+
+  ## @param config.outputs [string] Outputs to send the collected data to different destinations
+  ## https://docs.fluentbit.io/manual/pipeline/outputs
+  outputs: |
+    [OUTPUT]
+        Name                es
+        Match               *
+        Host                ekbrooke-elasticsearch.brooke.svc.cluster.local
+        Port                9200
+        Index               fluentbit
+        Type                flb_type
+        Suppress_Type_Name  On
+
+
+  ## @param config.customParsers [string] Custom-defined Parsers
+  ## https://docs.fluentbit.io/manual/pipeline/parsers
+  customParsers: |
+    [MULTILINE_PARSER]
+        name          multiline_java
+        type          regex
+        flush_timeout 1000
+        rule          "start_state"  "/^[_\-0-9a-zA-Z]+ (\d+\-\d+\:\d+\:\d+)\:?\s?(.*)/"   "cont"
+        rule          "cont"         "/^\s+at.*/"                                          "cont"
+
+    [PARSER]
+        Name          named_capture
+        Format        regex
+        Regex         /(?<message>.*)/m
diff --git a/logging/logging.md b/logging/logging.md
@@ -0,0 +1,40 @@
+# Logging Infrastructure
+
+> # ** IMPORTANT: STILL A WORK IN PROGRESS **
+> ## * * * Suggested next steps * * * 
+> ### see [Issue #43 - "Logging Infrastructure for K8s Clusters"](https://github.com/DataONEorg/k8s-cluster/issues/43#issuecomment-1932643235)
+
+## Introduction
+
+In a modern k8s environment, the ELK stack (Elasticsearch - Logstash - Kibana) has been 
+superseded by the EFK stack, where Logstash is replaced by Fluentd, FluentBit or FileBeat.
+
+* `Elasticsearch` is a free, open-source search and analytics engine (similar in concept to solr),
+based on the Apache Lucene library.
+* `Kibana` is a data visualization and exploration tool, used for log and time-series analytics,
+application monitoring, and operational intelligence.
+* `FluentBit` & `FileBeat` are logging agents (see below)
+
+## Simple overview
+
+1. Our k8s applications log to stdout, and this output is collected on the host Node machine, 
+   under `/var/log/contianers/*.log`. (This is standard k8s functionality).
+2. We need to install logging agents that do 3 things:
+    1. aggregate logs from each of the node machines
+    2. parse and transform them into the required structured logging format
+    3. send copies to elasticsearch and to the central syslog archive.
+3. Then we can use the tools provided by Kibana to view and analyze the logs that are in 
+   elasticsearch.
+
+## Steps to deploy
+Note there is some config that needs to be changed in each case:
+1. install the Elasticsearch Bitnami chart, with the Kibana subchart enabled
+   * see [./search-analytics/ek-values-dev-cluster.yaml](./search-analytics/ek-values-dev-cluster.yaml)
+2. install the Fluent Bit Bitnami chart
+    * see [./logging-agents/fluentbit-values-dev-cluster.yaml](./logging-agents/fluentbit-values-dev-cluster.yaml)
+    * [Fluent Bit](https://docs.fluentbit.io/manual/) is essentially a faster, smaller-footprint
+      replacement for Fluentd, and is much better suited to k8s.
+
+(Experiments with FileBeat were largely unsuccessful. The yaml file for installing the FileBeat
+daemonset is also included for completeness: 
+[./logging-agents/filebeat-kubernetes.yaml](./logging-agents/filebeat-kubernetes.yaml))